#!/usr/bin/perl -w
# this program extracts and counts all tags in an XML file, and outputs them with all
# tags in the hierarchy.
use strict;
use XML::Parser;
my $p = XML::Parser->new(Style => 'Stream');
# process the file
my $count = 0;
my @tag_stack;
my %tag_hash;
$p->parsefile("F6N15.xml");
# print the results
print "# Tags in TIGR XML file $ARGV[0]\n\n"; #, for chromosome $chromosome\n\n";
foreach my $key (sort keys %tag_hash) {
print "$key : $tag_hash{$key}\n";
}
print "\n# number of start tags = $count\n";
# end of main program
####################################
# SUBROUTINES
###################################
sub StartTag {
my ($parser, $element_type) = @_;
$count++;
push @tag_stack, $element_type;
}
# information is printed out from the variables in the EndTag section
sub EndTag {
my ($expat, $element_type) = @_;
# create a hash of concatenated tag names
# first push the tag names onto the stack (in StartTag)
# then create the hash key name, increment the hash value,
# then pop the tag name off the stack
my $key_tag = $tag_stack[0];
for (my $i = 1; $i <= $#tag_stack; $i++) {
$key_tag .= ":" . $tag_stack[$i];
}
# if the tag has been seen before, increment the counter in the hash
# otherwise create the hash element and set the counter (hash value) to 1.
$tag_hash{$key_tag}++;
pop @tag_stack;
}
# information is extracted from the text between the tags in the Text section
sub Text {
my ($expat, $element_type) = @_;
# nothing to do with text in this program
}