# # demo rewrite of Clark Cooper's xmlstats program, from # http://www.xml.com/xml/pub/98/09/xml-perl.html # shows how Dispatcher interface allows all info to be collated # in a single pass. Note the #cont handler. # use strict ; package Elinfo ; sub new { my $class = shift ; bless { parent => {}, child => {}, atts => {}, count => 0, empty => 1, text => 0, seen => 0, minlev => undef, @_ } => $class ; } package XmlStats ; use vars qw(@ISA) ; use XML::Parser::Notifier::SimpleDispatcher ; @ISA = qw(XML::Parser::Notifier::SimpleDispatcher) ; sub Init { my $self = shift ; $self -> {el_} = {} ; $self -> {level_} = 0 ; $self -> {seen_} = 0 ; $self -> _rules( { '#start' => \&generic, '#cont' => sub { $self -> {root_} = $_[1] } } ) ; } sub Final { $_[0] -> {el_} } sub generic { my ($self, $gi) = (shift, shift) ; # grab correct hash unless( defined($self -> {el_} -> {$gi}) ) { $self -> {el_} -> {$gi} = new Elinfo seen => $self -> {seen_} ++ ; } my $info = $self -> {el_} -> {$gi} ; # level update my $level = $self -> {level_} ++ ; $info -> {minlev} = $level unless defined $info -> {minlev} and $info -> {minlev} < $level ; $info -> {count} ++ ; # log attributes my $atab = $info -> {atts} ; my $att ; while ( defined($att = shift @_) ) { $atab -> {$att} ++ ; shift @_ ; } # cater for text content my $text ; return { '#start' => \&generic, '#char' => sub { $info -> {empty} = 0 ; $text += length $_[1] if $_[1] =~ /\S/ }, '#cont' => sub { $info -> {empty} = 0 ; # update mutual reference counts $info -> {child} -> {$_[1]} ++ ; $_[2] -> {parent} -> {$gi} ++ }, '#end' => sub { $self -> {level_} -- ; $info -> {text} += $text if $text ; $info } } } package main ; my $subform = ' @<<<<<<<<<<<<<<< @>>>>' ; sub showtab { my ($title, $table, $dosum) = @_ ; my @list = sort keys %$table ; if (@list) { print "\n $title:\n" ; my $item ; my $sum = 0 ; foreach $item (@list) { my $count = $table -> {$item} ; $sum += $count ; formline $subform, $item, $count ; print $^A, "\n" ; $^A = '' ; } if ($dosum and @list > 1) { print " =====\n" ; formline $subform, '', $sum ; print $^A, "\n" ; $^A = '' ; } } } my $file = shift ; die qq[Can't find file "$file"] unless -f $file ; use XML::Parser::Notifier ; my ($stats) = XML::Parser::Notifier -> new( ) -> add_client( new XmlStats ) -> parsefile( $file ) ; for my $el (sort { $stats->{$a}->{minlev} <=> $stats->{$b}->{minlev} or $stats->{$a}->{seen} <=> $stats->{$b}->{seen} } keys %$stats) { my $info = $stats -> {$el} ; print "\n================\n$el: ", $info -> {count}, "\n" ; print "Had ", $info -> {text}, " bytes of character data\n" if $info -> {text} ; print "Always empty\n" if $info -> {empty} ; showtab 'Parents', $info -> {parent}, 0 ; showtab 'Children', $info -> {child}, 1 ; showtab 'Attributes', $info -> {atts}, 0 ; }