Generated: Tue Feb 2 17:54:58 2010 from test7.pl 2006/09/18 13.5 KB.
#!perl -w use strict; use warnings; require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; ####################################################### # Load of HTM tags and PHP reserved words and built-in my $html_stx = 'C:/Program Files/EditPlus 2/html.stx'; my $php_stx = 'C:/Program Files/EditPlus 2/php.stx'; # if in HTML (default) #if ($kw == 1) { my @stxHTM = (); #} elsif ($kw == 2) { my @stxATT = (); #} elsif ($kw == 3) { my @stxSPL = (); #else in PHP #if ($kw == 1) { my @stxRW = (); #} elsif ($kw == 2) { my @stxBI = (); # like @BuiltIns; #} elsif ($kw == 3) { my @stxVA = (); my %HFuncsFnd = (); # set of FOUND builtin functions my %HResWdFnd = (); # reserved words used my @AFileNames = (); # for each output file, with hash of functions my @AFileHashs = (); # for each output file, with hash of functions my %HOldbifs = (); # old BIF, from previous index, if any ######################################################### ######################################################### ######## keep the OLD index ### this is needed IF files have been DELETED ... ### VARIABLES my $oi_tblcnt = 0; my $tbl_num = 1; my $tbl_num3 = 3; my @tbl_arr = (); my @tbl_arr3 = (); my @tbl_set = (); my @tbl_set3 = (); my $no_index = 0; my $dbg20 = 0; # get_table_array() my $dbg21 = 0; my $dbg22 = 0; my $dbg23 = 0; my $dbg24 = 0; my $oi_tacnt = 0; my $oi_tacnt3 = 0; my @oi_larr = (); my @oi_larr2 = (); my @oi_hrefs = (); ### FUNCTIONS # search the @stxBI array for an entry sub is_built_in($) { my ($t) = shift; foreach my $rw (@stxBI) { if ($t eq $rw) { return 1; } } return 0; } sub transfer_old_table3() { $oi_tacnt3 = scalar @tbl_set3; if ($oi_tacnt3 > 0) { prt( "Collected $oi_tacnt3 in \@tbl_set3 ... need to intialise built-in hash ...\n" ); ## load into my %HOldbifs = (); my $elimcnt = 0; my $elimcnt2 = 0; for (my $i = 0; $i < $oi_tacnt3; $i++) { my $bif = $tbl_set3[$i][0]; my $fss = $tbl_set3[$i][1]; if (is_built_in($bif)) { # each new htm file written is kept in - # push(@AFileNames, $ind_file ); # and for each of these a new hash of built ins has been kept # push(@AFileHashs, \%th); # store the functions used ... # so these files can be (safely) eliminated, since they will be added later foreach my $nhf (@AFileNames) { if ($fss =~ /$nhf/i) { $fss =~ s/$nhf//; $elimcnt++; } } $fss = trim_line($fss); if (length($fss)) { if (exists $HOldbifs{$bif}) { prt("\nWARNING: [$bif] appears DUPLICATED ...\n had=[".$HOldbifs{$bif}."\nadding [$fss]\n\n"); $HOldbifs{$bif} .= $fss; } else { $HOldbifs{$bif} = $fss; } } else { $elimcnt2++; } } else { prt("WARNING: DISCARDING [$bif] - NOT BUILT-IN!\n"); } } my $nwcnt = scalar keys %HOldbifs; if ($elimcnt > 0) { prt( "Elimated old files $elimcnt times, avoiding $elimcnt2 bifs being added...\n" ); } prt( "Done $oi_tacnt3 in \@tbl_set3 ... now $nwcnt in \%HOldbifs ...\n" ); } } sub mark_old_index($) { my ($f) = shift; my $tsc = scalar @tbl_set; for (my $i = 0; $i < $tsc; $i++ ) { if ($tbl_set[$i][0] eq $f) { $tbl_set[$i][7] = 1; last; } } } sub get_table_array { my $fnd = 0; my $lncnt = scalar @oi_larr2; for (my $i = 0; $i < $lncnt ; $i++) { my $ln = $oi_larr2[$i]; # extract a line chomp $ln; # remove LF (\n) $ln =~ s/\r$//; # and remove CR, if present if ($ln =~ /<table.*>/i) { prt( "FOUND TABLE: [$ln] ...\n" ) if ($dbg20); $oi_tblcnt++; # bump table counter if ($oi_tblcnt == $tbl_num) { prt( "Is my TABLE [$oi_tblcnt] ...\n" ) if ($dbg20); push(@tbl_arr,$ln); if ( !($ln =~ /<\/table>/i) ) { $i++; # move to next line for ( ; $i < $lncnt; $i++) { $ln = $oi_larr2[$i]; # extract a line chomp $ln; # remove LF (\n) $ln =~ s/\r$//; # and remove CR, if present if ( $ln =~ /<\/table>/i ) { prt( "END TABLE $tbl_num: [$ln] ...\n" ) if ($dbg20); push(@tbl_arr,$ln); $fnd++; last; } push(@tbl_arr,$ln); } } } elsif ($oi_tblcnt == $tbl_num3) { prt( "Is my TABLE [$oi_tblcnt] ...\n" ) if ($dbg20); push(@tbl_arr3,$ln); if ( !($ln =~ /<\/table>/i) ) { $i++; # move to next line for ( ; $i < $lncnt; $i++) { $ln = $oi_larr2[$i]; # extract a line chomp $ln; # remove LF (\n) $ln =~ s/\r$//; # and remove CR, if present if ( $ln =~ /<\/table>/i ) { prt( "END TABLE $tbl_num3: [$ln] ...\n" ) if ($dbg20); push(@tbl_arr3,$ln); $fnd++; last; } push(@tbl_arr3,$ln); } } } } } return $fnd; } sub process_tbl_arr() { my $cc = 0; for (my $i = 0; $i < $oi_tacnt ; $i++) { my $ln = $tbl_arr[$i]; # extract a line if ($ln =~ /<td.*>/i) { while ( !($ln =~ /<\/td>/i) ) { $i++; if ($i < $oi_tacnt) { $ln .= ' '.$tbl_arr[$i]; # extract a line } else { last; } } # got begin and end of <td>...</td> block if ($ln =~ /(<td.*?>)(.*)(<\/td>)/i) { my $tds = $1; my $inb = $2; my $tde = $3; # like Line [<td><a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213</td>] = # [<td>][<a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213][</td>] ... prt( "Line [$ln] = \nBlocks [$tds][$inb][$tde] ...\n" ) if ($dbg21); ###if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>/) { ##if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>/i) { #if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)/i) { if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)\s*/i) { my $hrf = $1; my $fil = $2; my $dt = $3; my $sz = $4; my ($yr, $mt, $dy) = split(/\//,$dt); ###$sz =~ s/,//g; # 0 1 2 3 4 5 6 7 push(@tbl_set, [$hrf, $fil, $dt, $sz, $yr, $mt, $dy, 0]); prt("href=[$hrf], file=[$fil], date=[$dt][$yr][$mt][$dy], size=[$sz]...\n") if ($dbg22); } else { prt("HREF not found - CHECK!\n") if ($dbg22); } } } } } sub process_tbl_arr3() { my $cc = 0; my $ff = 0; # since just two columns - flip flop my $bif = ''; my $fil = ''; for (my $i = 0; $i < $oi_tacnt3 ; $i++) { my $ln = $tbl_arr3[$i]; # extract a line if ($ln =~ /<td.*>/i) { $cc = length($ln); prt( "$i - Line [$ln] $cc...\n" ) if ($dbg24); while ( !($ln =~ /<\/td>/i) ) { $i++; if ($i < $oi_tacnt3) { $ln .= ' '.$tbl_arr3[$i]; # extract a line } else { last; } } if ($cc != length($ln)) { $cc = length($ln); prt( "$i - Line [$ln] $cc...\n" ) if ($dbg24); } # got begin and end of <td>...</td> block # 2006.09.11 '?' added to STOP greedy parsing if ($ln =~ /(<td.*?>)(.*)(<\/td>)/i) { my $tds = $1; my $inb = $2; my $tde = $3; prt( "$i - td[$tds] in[$inb] te[$tde]...\n" ) if ($dbg24); if ($ff > 0) { $fil = collectoi_hrefs($inb, 1); # remove HREF $fil = trim_line($fil); if (is_built_in($bif)) { push(@tbl_set3, [$bif, $fil, 0]); prt( " push(\@tbl_set3, [$bif, $fil, 0]); ...\n" ) if ($dbg23); } else { if (($bif =~ /unused/i)||($bif =~ /missed/i)) { prt( " Advice: Skipping [$bif] ...\n" ) if ($dbg23); } else { prt( " Advice: Skipping [$bif] - NOT BUILT IN FUNCTION!\n" ); } } $ff = 0; } else { $bif = $inb; $bif =~ s/\[//; $bif =~ s/\]//; $bif = trim_line($bif); if ($bif =~ /<.*?>(.*?)<\/.*?>/) { $bif = trim_line($1); } $ff = 1; } } else { prt( "CHECK ME: Missed <td> ... </td> \n"); } } } } sub get_old_index($) { my ($ind) = shift; $oi_tacnt = 0; $oi_tacnt3 = 0; my $ln = ''; my $lncnt = 0; if (open IF, "<$ind") { @oi_larr = <IF>; # slurp it all in ... close(IF); $lncnt = scalar @oi_larr; prt( "Got $lncnt lines to process ... from [$ind]\n" ); ###write2file( join('',@oi_larr), 'tempout.txt'); $ln = tag2newline( join('',@oi_larr), 'td' ); ###$ln = tag2newline( $ln, 'br' ); @oi_larr2 = split(/\n/, $ln); ###write2file( join("\n",@oi_larr2), 'tempout3.txt'); if (get_table_array()) { $oi_tacnt = scalar @tbl_arr; $oi_tacnt3 = scalar @tbl_arr3; prt( "Got $oi_tacnt and $oi_tacnt3 lines to process ... from [$ind]...\n" ); } else { prt( "Failed to find table $tbl_num or $tbl_num3 ... in [$ind]...\n" ); } } else { prt( "Warning: Failed to open $ind ...\n" ); $no_index = 1; } if ($oi_tacnt > 0) { process_tbl_arr(); } else { prt( "Warning: Failed to load table $tbl_num ...\n" ); } if ($oi_tacnt3 > 0) { process_tbl_arr3(); } else { prt( "Warning: Failed to load table $tbl_num3 ...\n" ); } transfer_old_table3(); } ################################################################### # COPIED OUT OF htmltools.pl, since I do NOT want to include it, just now ... sub tag2newline { # ($txt2,'td'); my ($txt, $tag) = @_; my $len = length($txt); my $ntxt = ''; my $i; my $ch = ''; my $ft = ''; my $lcnt = 0; for ($i = 0; $i < $len; $i++ ) { $ch = substr($txt,$i,1); if ($lcnt && ($ch eq '<')) { $ft = $ch; $i++; for ( ; $i < $len; $i++ ) { $ch = substr($txt,$i,1); $ft .= $ch; if ($ch eq '>') { if ($ft =~ /^<$tag/i) { $ft = "\n".$ft; } last; } } $ntxt .= $ft; } else { $ntxt .= $ch; if ($ch eq "\n") { $lcnt = 0; } else { $lcnt++; } } } return $ntxt; } sub collectoi_hrefs { my ($txt,$del) = @_; my $ntxt = ''; my $len = length($txt); my $ch = ''; my $hrf = ''; my $i; for ($i = 0; $i < $len; $i++) { $ch = substr($txt,$i,1); if ($ch eq '<') { $hrf = $ch; $i++; for ( ; $i < $len; $i++) { $ch = substr($txt,$i,1); $hrf .= $ch; if ($ch eq '>') { last; } } if ($hrf =~ /^<a\s/i) { if ($del == 0) { $ntxt .= $hrf; } ### prt("Got [$hrf] ...\n"); if ($hrf =~ /href=["'](\S+)["']./i) { $hrf = $1; push(@oi_hrefs,$hrf); ### prt("Got [$hrf] ...\n"); } } elsif ($hrf =~ /^<\/a>$/i) { if ($del == 0) { $ntxt .= $hrf; } } else { $ntxt .= $hrf; } } else { $ntxt .= $ch; } } return $ntxt; } ################################################################### #################################### # Reducing a line to bare bones # Used when loading # the EditPlus 2 stx files. #################################### sub trim_line($) { my ($l) = shift; chomp $l; # remove LF $l =~ s/\r$//; # and remove CR, if present $l =~ s/\t/ /g; # tabs to a space $l =~ s/\s\s/ /g while ($l =~ /\s\s/); # duplicate space to single $l = substr($l,1) while ($l =~ /^\s/); # each off leading space $l = substr($l,0,length($l)-1) while (($l =~ /\s$/)&&(length($l))); # and trailing space return $l; } #Loading HTML stx [C:/Program Files/EditPlus 2/html.stx] ... #Got KEYWORD [HTML Tags] ... #Got KEYWORD [HTML Attributes] ... #Got KEYWORD [Special characters] ... sub load_html_stx($) { my ($fil) = shift; my $kw = 0; my $nl = ''; prt("Loading HTML stx [$fil] ...\n"); open IF, "<$fil" or mydie( "ERROR: Unable to open [$fil] ...\n" ); my @la = <IF>; close IF; foreach my $ln (@la) { chomp $ln; $ln =~ s/\r$//; if ($ln =~ /^#/) { if ($ln =~ /^#KEYWORD=(.*)/) { prt( "Got KEYWORD [$1] ...\n" ); if ($1 eq 'HTML Tags') { $kw = 1; next; } elsif ($1 eq 'HTML Attributes') { $kw = 2; next; } elsif ($1 eq 'Special characters') { $kw = 3; next; } } $kw = 0; next; } if ($kw == 1) { $nl = trim_line($ln); push(@stxHTM, $nl) if (length($ln)); } elsif ($kw == 2) { $nl = trim_line($ln); push(@stxATT, $nl) if (length($ln)); } elsif ($kw == 3) { $nl = trim_line($ln); push(@stxSPL, $nl) if (length($ln)); } } } #Loading PHP stx [C:/Program Files/EditPlus 2/php.stx] ... #Got KEYWORD [Reserved words] ... #Got KEYWORD [Built-in functions] ... #Got KEYWORD [Variables] ... sub load_php_stx($) { my ($fil) = shift; my $kw = 0; my $nl = ''; prt("Loading PHP stx [$fil] ...\n"); open IF, "<$fil" or mydie( "ERROR: Unable to open [$fil] ...\n" ); my @la = <IF>; close IF; foreach my $ln (@la) { chomp $ln; $ln =~ s/\r$//; if ($ln =~ /^#/) { if ($ln =~ /^#KEYWORD=(.*)/) { prt( "Got KEYWORD [$1] ...\n" ); if ($1 eq 'Reserved words') { $kw = 1; next; } elsif ($1 eq 'Built-in functions') { $kw = 2; next; } elsif ($1 eq 'Variables') { $kw = 3; next; } } $kw = 0; next; } elsif ($ln =~ /^;/) { # skip these 'comments' next; } if ($kw == 1) { $nl = trim_line($ln); push(@stxRW, $nl) if (length($ln)); } elsif ($kw == 2) { $nl = trim_line($ln); push(@stxBI, $nl) if (length($ln)); } elsif ($kw == 3) { $nl = trim_line($ln); push(@stxVA, $nl) if (length($ln)); } } } sub do_stx_load() { load_html_stx( $html_stx ); prt( "Loaded ".scalar @stxHTM." HTM, ".scalar @stxATT." ATT, and ".scalar @stxSPL." spls\n" ); load_php_stx( $php_stx ); prt( "Loaded ".scalar @stxRW." RW, ".scalar @stxBI." BI, and ".scalar @stxVA." vars\n" ); } my $old_ind = 'temp2/index.htm'; do_stx_load(); get_old_index($old_ind); # eof - test7.pl