#!/usr/bin/perl -w

# this program extracts and counts all tags in an XML file, and outputs them with all
# tags in the hierarchy.  

use strict;
use XML::Parser;

my $p = XML::Parser->new(Style => 'Stream');

    # process the file
my $count = 0;
my @tag_stack;
my %tag_hash;

$p->parsefile("F6N15.xml");

    # print the results
print "# Tags in TIGR XML file $ARGV[0]\n\n"; #, for chromosome $chromosome\n\n";
foreach my $key (sort keys %tag_hash) {
    print "$key : $tag_hash{$key}\n";
}

print "\n# number of start tags = $count\n";

# end of main program

####################################
#  SUBROUTINES
###################################
sub StartTag {
    my ($parser, $element_type) = @_;
	$count++;
    push @tag_stack, $element_type;
}

    # information is printed out from the variables in the EndTag section
sub EndTag {
     my ($expat, $element_type) = @_;

	    # create a hash of concatenated tag names
		# first push the tag names onto the stack (in StartTag)
		# then create the hash key name, increment the hash value, 
		# then pop the tag name off the stack
	my $key_tag = $tag_stack[0];
	for (my $i = 1; $i <= $#tag_stack; $i++) {
        $key_tag .= ":" . $tag_stack[$i];	    
	}
            # if the tag has been seen before, increment the counter in the hash
			# otherwise create the hash element and set the counter (hash value) to 1.
	$tag_hash{$key_tag}++;

	pop @tag_stack;
}

    # information is extracted from the text between the tags in the Text section
sub Text {
     my ($expat, $element_type) = @_;
     # nothing to do with text in this program
}