#!/usr/bin/perl -w # NAME: attrdict.pl # AIM: **SPECIAL** - Read the C file attrdict.c, and analyse # 14/01/2016 geoff mclane http://geoffair.net/mperl use strict; use warnings; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use Cwd; my $os = $^O; my $perl_dir = '/home/geoff/bin'; my $PATH_SEP = '/'; my $temp_dir = '/tmp'; if ($os =~ /win/i) { $perl_dir = 'C:\GTools\perl'; $temp_dir = $perl_dir; $PATH_SEP = "\\"; } unshift(@INC, $perl_dir); require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n"; # log file stuff our ($LF); my $pgmname = $0; if ($pgmname =~ /(\\|\/)/) { my @tmpsp = split(/(\\|\/)/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt"; open_log($outfile); # user variables my $VERS = "0.0.5 2015-01-09"; my $load_log = 0; my $in_file = ''; my $verbosity = 0; my $out_file = ''; # ### DEBUG ### my $debug_on = 1; my $def_file = 'F:\Projects\tidy-html5\src\attrdict.c'; ### program variables my @warnings = (); my $cwd = cwd(); sub VERB1() { return $verbosity >= 1; } sub VERB2() { return $verbosity >= 2; } sub VERB5() { return $verbosity >= 5; } sub VERB9() { return $verbosity >= 9; } sub show_warnings($) { my ($val) = @_; if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { prt( "\nNo warnings issued.\n\n" ) if (VERB9()); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg); } show_warnings($val); close_log($outfile,$load_log); exit($val); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } my %itemattrs = (); sub get_item_count($) { my $ra = shift; my ($tmp); my $cnt = 0; foreach $tmp (@{$ra}) { if ($tmp =~ /^ITEM/) { $cnt++; $itemattrs{$tmp} = 1; } } return $cnt; } sub mycmp_decend_n1 { return 1 if (${$a}[1] < ${$b}[1]); return -1 if (${$a}[1] > ${$b}[1]); return 0; } # process the file - # const AttrVersion TY_(W3CAttrsFor_SUP)[] = # { TidyAttr_ALIGN, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx|HT50|XH50 }, # { TidyAttr_UNKNOWN, 0 sub process_in_file($) { my ($inf) = @_; if (! open INF, "<$inf") { pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); } my @lines = ; close INF; my $lncnt = scalar @lines; prt("Processing $lncnt lines, from [$inf]... extracting 'elements'\n"); my ($line,$inc,$lnn,$ln,$attr,$elem,$len); $lnn = 0; my %elements = (); my %attributes = (); my $max_atlen = 0; my $max_ellen = 0; prt("Line: Element\n"); for ($ln = 0; $ln < $lncnt; $ln++) { $line = $lines[$ln]; chomp $line; $lnn = $ln + 1; if ($line =~ /\s*#\s*include\s+(.+)$/) { $inc = $1; ### no include files prt("$lnn: $inc\n"); } elsif ($line =~ /^\s*const\s+AttrVersion\s+TY_\((.+)\)/) { $inc = $1; if ($inc =~ /W3CAttrsFor_(\w+)$/) { $elem = $1; } else { pgm_exit(1,"Failed: $lnn: $line\n"); } prt("$lnn: $elem\n"); if (defined $elements{$elem}) { pgm_exit(1,"Failed:2 $lnn: $line\n"); } $ln++; my %h = (); for (; $ln < $lncnt; $ln++) { $line = $lines[$ln]; chomp $line; $lnn = $ln + 1; if ($line =~ /TidyAttr_(\w+)\s*,/) { $attr = $1; last if ($attr eq 'UNKNOWN'); $h{$attr} = 1; if (defined $attributes{$attr}) { $attributes{$attr}++; } else { $attributes{$attr} = 1; $len = length($attr); $max_atlen = $len if ($len > $max_atlen); } if ($attr =~ /^ITEM/) { $itemattrs{$attr} = 1; } } if ($line =~ /^\s*const\s+AttrVersion\s+TY_/) { pgm_exit(1,"Error: $line Did not find UNKNOWN!\n"); } } $elements{$elem} = \%h; # hash of attributes for each element $len = length($elem); $max_ellen = $len if ($len > $max_ellen); } } my ($msg,$icnt,$ra,$cnt,$rh,$tmp,$ulin); my @withip = (); my @elearr = sort keys %elements; # with hash of attrs my $ecnt = scalar @elearr; prt("Found $ecnt elements... now to analyse...\n"); my @itemarr = sort keys %itemattrs; $icnt = scalar @itemarr; prt("First for ITEMPROP, one of $icnt ITEMxxx = ".join(" ",@itemarr)."\n"); my $noipcnt = 0; my @noip = (); $tmp = 'Element'; $tmp .= ' ' while (length($tmp) < $max_ellen); $msg = "$tmp: attrs cnt has ITEMPROP"; prt("$msg\n"); $len = length($msg); $ulin = '=' x $len; prt("$ulin\n"); foreach $elem (@elearr) { $rh = $elements{$elem}; my @ar = sort keys %{$rh}; $icnt = get_item_count(\@ar); $cnt = scalar @ar; $msg = 'ok'; if (defined ${$rh}{ITEMPROP}) { push(@withip,$elem); } else { $msg = "No ITEMPROP"; $noipcnt++; push(@noip,$elem); } $msg .= " $icnt IS"; $tmp = $elem; $tmp .= ' ' while (length($tmp) < $max_ellen); prt("$tmp: attrs $cnt $msg\n"); } prt("$ulin\n"); prt("Of the $ecnt elements, $noipcnt have NO ITEMPROP, as follows -\n"); prt(join(" ",@noip)."\n"); $cnt = scalar @withip; prt("And $cnt With ITEMPROP:\n".join(" ",@withip)."\n"); # expand - show all attributes... my @arr2 = sort keys %attributes; my $acnt = scalar @arr2; prt("\nFound $acnt attributes... and each use count...\n"); $ln = 0; my @attarr = (); $attr = "attribute"; $attr .= ' ' while (length($attr) < $max_atlen); $msg = "cnt: $attr used count"; prt("$msg\n"); $len = length($msg); $ulin = '=' x $len; prt("$ulin\n"); foreach $attr (@arr2) { $cnt = $attributes{$attr}; push(@attarr,[$attr,$cnt]); $ln++; $lnn = sprintf("%3d",$ln); $attr .= ' ' while (length($attr) < $max_atlen); prt("$lnn: $attr $cnt\n"); } prt("$ulin\n"); my @attarrs = sort mycmp_decend_n1 @attarr; prt("\nNow $acnt attributes in count order on $ecnt elements...\n"); $ln = 0; $lnn = 0; my %cnthash = (); $msg = "cnt: attribute list... (cnt)"; prt("$msg\n"); $len = length($msg); $ulin = '=' x $len; prt("$ulin\n"); foreach $ra (@attarrs) { $attr = ${$ra}[0]; $cnt = ${$ra}[1]; if (defined $cnthash{$cnt}) { prt(" $attr"); $lnn++; } else { prt(" ($lnn)\n") if ($ln); prt("$cnt $attr"); $cnthash{$cnt} = 1; $lnn = 1; } $ln++; #$lnn = sprintf("%3d",$ln); #$attr .= ' ' while (length($attr) < $max_atlen); #prt("$lnn: $attr $cnt\n"); } prt(" ($lnn)\n"); prt("$ulin\n"); # maybe now list what an attribute is NOT in prt("\nFor each $acnt arrtibute, show what of the $ecnt elements it is NOT present in!\n"); $tmp = "Attribute"; $tmp .= ' ' while (length($tmp) < $max_atlen); $msg = "$tmp: Elements List without the attribute... (cnt)"; prt("$msg\n"); $len = length($msg); $ulin = '=' x $len; prt("$ulin\n"); foreach $ra (@attarrs) { $attr = ${$ra}[0]; $cnt = ${$ra}[1]; $tmp = $attr; $tmp .= ' ' while (length($tmp) < $max_atlen); prt("$tmp: "); $ln = 0; foreach $elem (@elearr) { $rh = $elements{$elem}; if (defined ${$rh}{$attr}) { #prt("$elem "); } else { prt("$elem "); $ln++; } } prt(" ($ln)\n"); } prt("$ulin\n"); $load_log = 1; } ######################################### ### MAIN ### parse_args(@ARGV); process_in_file($in_file); pgm_exit(0,""); ######################################## sub need_arg { my ($arg,@av) = @_; pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av); } sub parse_args { my (@av) = @_; my ($arg,$sarg); my $verb = VERB2(); while (@av) { $arg = $av[0]; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); if (($sarg =~ /^h/i)||($sarg eq '?')) { give_help(); pgm_exit(0,"Help exit(0)"); } elsif ($sarg =~ /^v/) { if ($sarg =~ /^v.*(\d+)$/) { $verbosity = $1; } else { while ($sarg =~ /^v/) { $verbosity++; $sarg = substr($sarg,1); } } $verb = VERB2(); prt("Verbosity = $verbosity\n") if ($verb); } elsif ($sarg =~ /^l/) { if ($sarg =~ /^ll/) { $load_log = 2; } else { $load_log = 1; } prt("Set to load log at end. ($load_log)\n") if ($verb); } elsif ($sarg =~ /^o/) { need_arg(@av); shift @av; $sarg = $av[0]; $out_file = $sarg; prt("Set out file to [$out_file].\n") if ($verb); } else { pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n"); } } else { $in_file = $arg; prt("Set input to [$in_file]\n") if ($verb); } shift @av; } if ($debug_on) { prtw("WARNING: DEBUG is ON!\n"); if (length($in_file) == 0) { $in_file = $def_file; prt("Set DEFAULT input to [$in_file]\n"); } } if (length($in_file) == 0) { pgm_exit(1,"ERROR: No input files found in command!\n"); } if (! -f $in_file) { pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n"); } } sub give_help { prt("$pgmname: version $VERS\n"); prt("Usage: $pgmname [options] in-file\n"); prt("Options:\n"); prt(" --help (-h or -?) = This help, and exit 0.\n"); prt(" --verb[n] (-v) = Bump [or set] verbosity. def=$verbosity\n"); prt(" --load (-l) = Load LOG at end. ($outfile)\n"); prt(" --out (-o) = Write output to this file.\n"); } # eof - template.pl