Generated: Tue Feb 2 17:54:51 2010 from p2html8.pl 2005/05/10 39 KB.
#!/perl -w ### ################################################# ### p2html - perl code to HTML document format ### Works, mostly - still a SPACE-REPLACEMENT problem ... ### Geoff - geoffmclane.com - geoffair _at_ hotmail _dot_ com ### ################################################## use strict; use warnings; require "colours.pl"; require "eppearl.pl"; ### global variables my $vers = '0.0.8'; # eighth iteration ... LOOKS GOOD ... still space replacement, regex, line no, in src ... ### regex is now NOT expanded, but only by exceptchr of '/', so still some problems ... ### space is not 'exactly' maintained ... should try not to parse inside a word array ... my $WHITE_PATTERN2 = "^[ \t\n\r]*\$"; # spacey if ($var =~ /$WHITE_PATTERN2/o ) { ...} my $tab_stg = ' '; # replace tabs, with 3 spaces my $verb2 = 0; my $verb3 = 0; ### add perl.stx parsing diag log my $dbgon = 0; # 1 DOUBLES HTML OUTPUT FOR COMPARISON my $perlstx = 'C:/Program Files/EditPlus 2/perl.stx'; my $DELIMITER = '(){}[]-+*/=~!&|<>?:;.,'; my $logfil = 'templog.txt'; my @logmsgs = (); my ($OF, $IF, $LF, $STX); my $colorON = 1; my $name; my $lc = 0; my $dnpara = 1; my @lnbits; my @spbits; my @copybits; ## keep, for ORIGINAL space work 'replacement' my $chk; my $istxt = 1; my $actpunc = ''; ### store the active punctuation my %HPuncsFnd = (); # hash of Punctuation FOUND in parse my $expanOFF = 0; ### stop expansion temporarily ... my $actresword = ''; my %HResWdFnd = (); my $actfunc = ''; ### store the active built-in functions my %HFuncsFnd = (); my $actlnnum = ''; ### require "colours.pl" and "eppearl.pl"; to fill these our @PPairs; our @DolVars; our @PBPunc; our @TTset; our @PPunct; ### start of program #################### ### Get command line input ... my $infile = shift || '.'; my $outfil = shift || 'tempout.htm'; ## my $func; my @TTColrs = qw(l.blue brown l.br s.green pink mauve b.green l.brn blue white l.grey); my @TTTypes = qw(array comment unass s-quote scalar functions d-quote hash reserved other punctuation); my @TTAttrib = qw(match orange regex green color1 color2 color3 peach blue white grey); for $name (@TTAttrib) { no strict 'refs'; # allow symbol table manipulation *$name = *{uc $name} = sub { "<TT class='$name'>@_</TT>" }; ### *$name = *{uc $name} = sub { "<TT class='$name'>\n@_\n</TT>" }; } ###my @colors = qw(red blue green yellow orange purple violet); my @colors = qw(red yellow purple violet); for $name (@colors) { no strict 'refs'; # allow symbol table manipulation *$name = *{uc $name} = sub { "<FONT COLOR='$name'>@_</FONT>" }; } my $ss = 5; ##our @TTset; ##our @PPunct; ##require "colours.pl"; ##require "eppearl.pl"; my $msg = ''; my ($line, $txt); my $i = 0; my ($cnt1, $cnt2); my $inbraces = 0; my $c; my $c3; if ($infile eq '.') { die "No input file given ...\n"; } open $LF, ">$logfil" or die "Can NOT open LOG file $logfil!\n"; tolog ("$0 Started " . localtime(time()) . " ...\n"); if (! -f $infile) { die "Input file [$infile] NOT FOUND! ...\n"; } tolog ("Opening $infile ...\n"); open $IF, "<$infile" or die "Can not OPEN $infile!\n"; tolog ("Loading $infile ...\n"); my @lines = <$IF>; # slurp whole file, to an array of lines close($IF); open $OF, ">$outfil" or die "Can not create $outfil!\n"; ###### pre-process perl.stx file ###################################### open $STX, "<$perlstx" or die "Can NOT locate $perlstx file...\n"; my @stx = <$STX>; close($STX); $i = @stx; tolog ("List of $i STX file lines...\n"); my %stxh; my @ResWds = (); my @BFuncs = (); my %HResWds; my %HBFuncs; my $sw = 0; # no switch on foreach $line (@stx) { chomp $line; my $ll = length($line); # get LENGTH of file line my @a; my $k; my $v; $c = substr ($line, 0, 1); $msg = ''; if ($c eq ';') { # comment $msg = 'comment only'; } elsif ($c eq '#') { # hash item=value $msg = ' hash'; @a = split('=', $line); # get key/value ($k, $v) = @a; $k = substr($k, 1); ###$stxh{$a[0]} = $a[1]; if ( exists $stxh{$k} ) { if ($stxh{$k} eq $v) { $msg .= ' same '; } else { $msg .= ' new '; } $stxh{$k} .= '|' . $v; ###$v = $stxh{$k}; } else { $stxh{$k} = $v; } ### $msg .= ' k=' . $a[0] . ' v=' . $a[1] . '-'; ###$msg .= ' k=' . $k . ' v=' . $v . ' - '; $msg .= ' k=' . $k . ' v=' . $stxh{$k} . ' - '; #KEYWORD=Reserved words #KEYWORD=Built-in functions if ($k eq 'KEYWORD') { if ($v eq 'Reserved words') { $sw = 1; $msg .= '(ResWds)'; } elsif ($v eq 'Built-in functions') { $sw = 2; $msg .= '(BFuncs)'; } else { $sw = 0; } } } if ($ll > 1) { if ($sw == 1) { push(@ResWds, $line); if ( exists $HResWds{$line} ) { die "Duplicate RESERVE WORD [$line]\n" } $HResWds{$line} = $line; $msg .= " - rw+"; } elsif ($sw == 2) { push(@BFuncs, $line); if ( exists $HBFuncs{$line} ) { die "Duplicate BUILT-IN FUNCTION [$line]\n" } $HBFuncs{$line} = $line; $msg .= " - bf+"; } } tolog ($line . $msg . "\n") if $verb3; } $line = 'new'; if ( ! exists $HBFuncs{$line} ) { $msg = ' ++Added'; push(@BFuncs, $line); $HBFuncs{$line} = $line; tolog ($line . $msg . "\n"); } $cnt1 = @ResWds; $cnt2 = @BFuncs; tolog ("END List of $i STX file lines...rw=$cnt1 bf=$cnt2 \n"); ###### end-process perl.stx file ###################################### add_html_head( $OF, $infile ); add_html_table($OF); ### like <table align="center" width="90%" border="2" bgcolor="#eeeeee"> <tr> <td> my $lncnt = @lines; # get count my $cntlns = 0; tolog ("Processing $infile ... $lncnt lines\n"); prt ("<p>\n"); foreach $line (@lines) { $txt = $line; chomp $txt; $cntlns++; $actlnnum = get_line_num ($cntlns); tolog ("\nLine $actlnnum:[$txt]\n"); $istxt = 1; # assume text if ($txt =~ /$WHITE_PATTERN2/o ) { $txt = "</p>\n<p>\n"; # CLOSE paragraph, and open $istxt = 0; # NOT text } else { ### $txt = white(htmlise($txt)); $txt = htmlise($txt); $txt .= "<br>\n"; } if ( $istxt ) { if ($dbgon) { tolog ("Simple WHITE-ised to HTML file ...\n") if $verb2; prt ($txt); # just for COMPARISON } } else { ## if (! $istxt) { tolog ("Simple WHITE-ised to HTML file ...\n") if $verb2; prt ($txt); # just for COMPARISON } if ($istxt) { ###do_line_parse ($line); tolog ("Per line component parsing to HTML file ...\n") if $verb2; ###do_line_parse ($actlnnum . ' ' . $line); do_line_parse ($line); } } print $OF <<EOF; </td> </tr> </table> EOF tolog ("Processed $lc lines of $infile ... written to $outfil ... add tail ...\n"); add_color_samp($OF); prt ( get_parse_stats () ); add_html_tail($OF); showarrcnts(); tolog ("$0 Ended " . localtime(time()) . " ...\n"); close($OF); system $outfil; # system $logfil; sub prt { tolog (@_); print $OF @_; } ### COLOR: #00008b; sub addTTitem_simp { my ($fh, $nm, $bd, $bg) = @_; print $fh <<"EOF3"; .$nm { COLOR: $bd } EOF3 } sub addTTitem_vOK { my ($fh, $nm, $bd, $bg) = @_; print $fh <<"EOF3"; .$nm { BACKGROUND-COLOR: $bg } EOF3 } sub addTTitem_full { my ($fh, $nm, $bd, $bg) = @_; print $fh <<"EOF3"; .$nmm { COLOR: $bd; BORDER-TOP: $bd 1px solid; BORDER-LEFT-WIDTH: 1px; BORDER-LEFT-COLOR: $bd; PADDING-BOTTOM: 1px; PADDING-TOP: 1px; BORDER-BOTTOM: $bd 1px solid; WHITE-SPACE: nowrap; BACKGROUND-COLOR: $bg; BORDER-RIGHT-WIDTH: 1px; BORDER-RIGHT-COLOR: $bdd } EOF3 } sub add_html_style { my ($fh) = @_; print $fh <<"EOF1"; <style><!-- TT { FONT-FAMILY: 'Courier New'; } EOF1 ### FONT-FAMILY: 'Andale Mono', 'Lucida Console', monospace ################################# ###my @TTset = qw( match #0066ff #e8f4ff string #0000ff #ccccff ); my $nm; my $bd; my $bg; my $mx = @TTset; #### my $ss = 3; tolog ("Processing $mx / 3 styles ...\n"); tolog ( @TTset . "\n" ); my $i; ###my $additem = \&addTTitem_vOK; ###my $additem = \&addTTitem_full; ### my $add_item = \&addTTitem_simp; ## ??while (($nm, $bd, $bg) = @TTset) { for ($i = 0; $i < ($mx / $ss); $i++) { $nm = $TTset[($i*$ss)+0]; $bd = $TTset[($i*$ss)+1]; $bg = $TTset[($i*$ss)+2]; addTTitem_full ($fh, $nm, $bd, $bg); ###addTTitem ($fh, $nm, $bd, $bg); ###&add_item->($fh, $nm, $bd, $bg); } ################################### print $fh <<"EOF2"; --></style> EOF2 } sub add_html_head { my ($fh, $hdr) = @_; print $fh <<"EOF"; <html> <!-- P26.2005.05.10 geoffmclane.com perl HTML generated using p2html5.pl - --> <head> <title>$hdr</title> </head> EOF # dynamic block of style - could be put to a file ... add_html_style($fh); print $fh <<"EOF"; <body> <h1 align="center">$hdr</h1> <p align="center"><a href="perl.htm">back</a></p> EOF } sub add_html_table { my ($fh) = @_; print $OF <<EOF; <table align="center" width="90%" border="2" bgcolor="#eeeeee"> <tr> <td> EOF } sub add_html_tail { my ($fh) = @_; print $fh <<"EOF"; <p align="center"><a href="perl.htm">back</a></p> </body> </html> EOF } my @TypeColors_NOTUSED = ( ###if ($c eq '#') { # comment component - should be to end-of-line, or more ... "comment", ### $func = \&orange; ###} elsif ($c eq "'") { ## "' # does it start with quotes DOUBLE or SINGLE "s.quote", ### $func = \&green; ### } elsif ($c eq '"') { "d.quote", ### $func = \&color3; ###} elsif ($c eq '$') { # start of scalar "scalar", ### $func = \&color1; ###} elsif ($c eq '@') { # start of array "array", ### $func = \&match; ###} elsif ($c eq '%') { # start of hash "hash", ### $func = \&peach; ###} elsif ( exists $HResWds{$tx2} ) { "reserved", ### $func = \&blue; ### } elsif ( exists $HBFuncs{$tx2} ) { "functions", ### $func = \&color2; ### } else { "other" ### $func = \&white;} ); sub a2f { my ($f,$t) = @_; print $f $t; } sub n_row { ###my ($f) = @_; a2f (@_, " <tr>"); } sub n_col { ###my ($f) = @_; a2f (@_, " <td>"); } sub c_row { ###my ($f) = @_; a2f (@_, " </tr>"); } sub c_col { ###my ($f) = @_; a2f (@_, " </td>"); } ## my $func; ### my @TTColrs = qw(l.blue brown l.br s.green pink mauve b.green l.brn blue white l.grey); ### my @TTTypes = qw(array comment unass s-quote scalar functions d-quote hash reserved other punctuation); ### my @TTAttrib = qw(match orange regex green color1 color2 color3 peach blue white grey); sub add_color_samp { my ($fh) = @_; $i = 0; print $fh <<EOF; <p>Colour Key :<br>Function, Description., Colour<br> <table border="1" bgcolor="#eeeeee"> EOF ### out attributes n_row $fh; # add " <tr>\n"; # open ROW n_col $fh; # add " <td>\n"; # open COLUMN a2f $fh, "Style"; c_col $fh; # add " </td>\n"; # close COLUMN n_col $fh; # add " <td>\n"; # open COLUMN a2f $fh, "Description"; c_col $fh; # add " </td>\n"; # close COLUMN n_col $fh; # add " <td>\n"; # open COLUMN a2f $fh, "Colour"; c_col $fh; # add " </td>\n"; # close COLUMN c_row $fh; ### " </tr>\n"; # close ROW foreach $name (@TTAttrib) { ###no strict 'refs'; # allow symbol table manipulation my $fun = \&$name; ## get the function - the auto-generated sub n_row $fh; # add " <tr>\n"; # open ROW n_col $fh; # add " <td>\n"; # open COLUMN ### a2f $fh, "Attributes"; $msg = $name; $txt = $fun->($msg); a2f $fh, $txt; c_col $fh; # add " </td>\n"; # close COLUMN n_col $fh; # add " <td>\n"; # open COLUMN ### a2f $fh, "Function"; $msg = $TTTypes[$i]; $txt = $fun->($msg); a2f $fh, $txt; c_col $fh; # add " </td>\n"; # close COLUMN n_col $fh; # add " <td>\n"; # open COLUMN ### a2f $fh, "Colour"; @TTColrs $msg = $TTColrs[$i]; $txt = $fun->($msg); a2f $fh, $txt; c_col $fh; # add " </td>\n"; # close COLUMN c_row $fh; ### " </tr>\n"; # close ROW $i++; # bump to next } ### end if all print $fh <<EOF; </table> </p> EOF ### all done ... } sub tolog { print @_; print $LF @_; } sub xceptchr { my ($chr) = @_; ###if (($chr eq ':') || ($chr eq '=') || ($chr eq '|') || ($chr eq ',')) { if ( ($chr eq '/') || ($chr eq ':') || ($chr eq '|') ) { return 1; } return 0; } sub is_a_quote { my ($chr) = @_; if (($chr eq '"') || ($chr eq "'")) { return 1; } return 0; } sub get_a_quote { my ($t) = @_; my $mx = length($t); my $i; if ($t =~ /['"]/) { # match quote for ($i = 0; $i < $mx; $i++) { my $chr = substr ($t, $i, 1); if (is_a_quote($chr)) { return $chr; } } } return 0; } ### NOT passed an ALL-SPACEY line sub do_line_parse { my ($tx) = @_; chomp $tx; ### my @copybits; ## keep, for ORIGINAL space work 'replacement' my $tx2 = $tx; my $tx3; my $tx4 = htmlise($tx); ## the HTML'ISED string my $txsp = ''; # frontend SPACEY stuff ### no way ! my $txsp = $htmnbs; # frontend SPACEY stuff my $tx5; my $tx6; my $c1 = substr ($tx, 0, 1); # get and keep first char @lnbits = split (' ', $tx); # initial split spaces my $c2 = substr ($lnbits[0], 0, 1); # get POTENTIAL new first char my $pos1 = index ($tx, $c2); # get pos of first array char, in string my $gotfes = 0; # no frontend space if ($pos1 > 0) { $gotfes = 1; # mark, got frontend space $txsp = substr($tx, 0, $pos1); # get SPACEY at FRONT } my $cnt = @lnbits; # count of componets, so far my $cntorg = $cnt; # keep original SIZE, $cnt is 'adjusted' during ... my $i = 0; my $i3 = 0; my @sp11; my $nct = 0; # count AFTER array 'adjustments' ... my $ln = length($tx2); # get length of line, not soooo important my $ch = substr ($tx2, 0, 1); # get first char, for fast decisions my $c = $ch; ### copy of FIRST char ### if ($lnbits[0] =~ m/^\#/) { if ($c1 eq '#') { ####################################################### # is comment tolog ("Is comment - try ...\n"); ###$tx3 = green($tx4); $tx3 = orange($tx4); $tx3 .= "<br>\n"; prt ($tx3); ####################################################### } else { ## does not START with a # comment char #### tolog ("########### parse run one ###############################(c=$cnt)\n") if $verb2; if ($verb2) { tolog ("########### parse run one ###############################(c=$cnt)\n"); $msg = ''; foreach $tx2 (@lnbits) { $msg .= "[$tx2]"; } $msg .= "\n"; tolog ($msg); } $i3 = 0; my $ichg = 0; ### count of bit changes ### first run - to re-combine quoted text within LINE ARRAY $ichg = 0; @logmsgs = (); ### clear LOG message stack ###tolog ("{ comps $cntorg\n"); # log COUNT at start $msg = ("{ comps $cntorg\n"); # log COUNT at start push(@logmsgs,$msg); ## accumulate ### this pre-run JOINS or SPLITS = ENSURE EACH QUOTED BLOCK is in its own bucket my $icnt = 0; ### init line 'bits' counter doparsereset (); foreach $tx2 (@lnbits) { $icnt++; # PRE-BUMP THE COUNT $msg = $tx2; # set line bit $ln = length($tx2); $ch = substr($tx2, 0, 1); $i = 0; ### special +?.*^$()[]{}|\ ### if ($tx2 =~ /^['"]/ ) { ## "' # does it start with quotes d or s if (($ch eq '"')||($ch eq "'")) { $msg .= " Begin Q (l=$ln)["; $msg .= $tx2; $msg .= ']'; $i3 = 1; # set JOIN if ($ln > 1) { $i3 = 1; # set JOIN ###$tx3 = substr ($tx2, 1, $ln - 1); # get past quote $tx3 = substr ($tx2, 1); # get past quote if (($ln > 1) && ($tx3 =~ /$ch/)) { $pos1 = index ($tx3, $ch); # get position of next quote $msg .= ' and end ['; $msg .= $tx3; $msg .= "](p=$pos1)"; if ($pos1 > 0) { $tx5 = substr ($tx2, 0, ($pos1 + 1 + 1)); # get WHOLE QUOTE $tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if ANY if (length($tx3)) { ### error case ### "_","|", DONE WOULD SPLIT ["_"][,"|",]? b&e same quotes $msg .= ' quote split '; $msg .= '['; $msg .= $tx5; $msg .= ']'; $msg .= '['; $msg .= $tx3; $msg .= ']?'; $lnbits[$icnt - 1] = $tx5; # put back adjusted first @sp11 = ($tx3); ### bit-to-insert ### if ( $tx3 =~ /$ch/ ) { if ((length($tx3) > 1) && ( $tx3 =~ /['"]/ )) { ### zeek, there are more of these ... $i = 0; $tx5 = ''; while(1) { $c = substr ($tx3, $i, 1); if (($c eq '"')|| ($c eq "'") ) { last; } $i++; # bump to next if ($i >= ($ln - 1)) { $c = 0; last; } } if ($i) { if (($c eq '"')||($c eq "'")) { $tx5 = substr ($tx3, 0, $i); # get before QUOTE $tx3 = substr ($tx3, $i ); # get balance $sp11[0] = $tx5; push(@sp11,$tx3); $ichg++; } } $msg .= " found [$c] split [$tx5] [$tx3]* "; } splice (@lnbits, $icnt, 0, @sp11); # insert 1 or more new items ### splice (@lnbits, $i2, 0, $tx3); # insert 1 new items $cnt = @lnbits; ### ADJUST COUNT ITERATOR $ichg++; } } $msg .= " b&e same quotes"; $i3 = 0; } } if ($i3) { ### JOIN, until the END OF THIS QUOTE $i3 = 0; $tx6 = $tx2; ### start slurping for ($i = $icnt; $i < $cnt; $i++) { $tx3 = $lnbits[$i]; # get next $msg .= ('+[' . $tx3 . ']'); $tx6 .= ' '; # add back space $tx6 .= $tx3; ### $lnbits[$i]; $i3++; ### count 'bits' to DELETE $ichg++; ### count a CHANGE if ($tx3 =~ /$ch/) { @sp11 = (); $msg .= '-'; $pos1 = index ($tx3, $ch); # get position of next quote if ($pos1 > 0) { $tx5 = substr ($tx3, 0, $pos1); # get BEFORE QUOTE $tx3 = substr ($tx3, $pos1); # get ending text, if ANY $msg .= " *CHK [$tx5] [$tx3]???\n"; if ((length($tx3) > 1) && ( $tx3 =~ /['"]/ )) { ### zeek, there are more of these ... $i = 0; $tx5 = ''; while(1) { $c = substr ($tx3, $i, 1); if (($c eq '"')|| ($c eq "'") ) { last; } $i++; # bump to next if ($i >= ($ln - 1)) { $c = 0; last; } } if ($i) { if (($c eq '"')||($c eq "'")) { $tx5 = substr ($tx3, 0, $i); # get before QUOTE $tx3 = substr ($tx3, $i ); # get balance @sp11 = ($tx5,$tx3); $ichg++; } } } $msg .= " could split [$tx5] [$tx3]* "; } $msg .= " found end [$c] split "; last; # exit when terminator found } } $msg .= " *REPLACING [$tx2] with [$tx6]!"; $lnbits[$icnt - 1] = $tx6; # put back single quoted message splice (@lnbits, $icnt, $i3); # collapse following items $msg .= ", now joined, to its end\n"; $cnt = @lnbits; ### UPDATE THE COUNT } } elsif ($tx2 =~ /['"]/ ) { ## "' # does it CONTAIN quotes, d OR s $c = get_a_quote($tx2); $pos1 = index ($tx2, $c); # get position of next quote if (($pos1 > 0) && $c) { $msg .= " QUOTE $c split, at $pos1 "; $tx5 = substr ($tx2, 0, $pos1); # get before QUOTE $tx3 = substr ($tx2, $pos1 ); # get balance ### check back $msg .= "would replace [".$lnbits[$icnt - 1]."][$tx5]"; $lnbits[$icnt - 1] = $tx5; # fix this 'line-bit' @sp11 = ($tx3); ### add this one splice (@lnbits, $icnt, 0, @sp11); # add bucket $msg .= ", now sep [$tx5][$tx3]"; $cnt = @lnbits; ### UPDATE THE COUNT } else { die "ERROR: Handler above does BITS-OF-LINE that begin with a QUOTE!!!\n"; } } elsif ($ch eq '#') { # if line-bit starts with a perl comment ## join to end of line $i3 = 0; $tx5 = $tx2; $tx6 = $lnbits[$icnt - 1]; for ($i = $icnt; $i < $cnt; $i++) { $tx3 = $lnbits[$i]; $tx5 .= ' '; $tx5 .= $tx3; ### $lnbits[$i]; $i3++; $ichg++; } if ($i3) { $msg .= ' Joined ['; $msg .= $tx6; ### = $lnbits[$icnt - 1]; $msg .= '] to ['; $msg .= $tx5; $lnbits[$icnt - 1] = $tx5; # put back single quoted message $msg .= '] sp ' . $icnt . ' ' . $i3; splice (@lnbits, $icnt, $i3); # collapse following items $msg .= " end-of-line comment"; $cnt = @lnbits; } } else { ## not begin quote ' or ", nor begin # ... ## dealt with on NEXT iteration of line bits - left for diagnostic only ### $c = 0; $tx3 = substr($tx2,1); if (($ch eq '$') || ($ch eq '@') || ($ch eq '%')) { # start of a scalor, array, hash ... move on to next letter $c = gotdelim($tx3); ### any more in this line if ( length($tx3) && ($c) && ! xceptchr($c) ) { # got first split point, AFTER $var ... $pos1 = index ($tx3,$c); } } else { $tx3 = $tx2; ### check full line $c3 = gotdelim($tx3); if ( length($tx3) && ($c3) ) { # got first split point $pos1 = index ($tx3,$c3); } # process $tx3 } if ($c && ! xceptchr($c) ) { $msg .= ' *EXCEPTED* '; $msg .= $c; $msg .= '* '; } if ( isresword ($tx2) ) { ### exists $HResWds{$tx2} $msg .= ' *B*'; ### blue('R'); } if ( isbinfun ($tx2) ) { ## exists $HBFuncs{$tx2} $msg .= ' *P*'; } if ( $ln < 4 ) { ### tolog ( "*PUNC* CHECK [" . $tx2 . "]\n" ); if ( ispunctuat ( $tx2 ) ) { ###$actpunc = $tx2; ### store the active punctuation $msg .= ' *PUNC*'; } } } ###tolog ($msg . "\n"); $msg .= "\n"; # add end of line push(@logmsgs, $msg); ### store the LOG } # for array list of line components === ONLY DOING JOINING $nct = @lnbits; if ($cnt != $nct) { die "***FIX a COUNT UPDATE $cnt $nct $cntorg ????\n"; } if ($cntorg == $nct) { $msg = "} end comps $cntorg\n"; } else { $msg = ("} end comps $cntorg, adj. $nct " . ($cntorg - $nct) . "\n"); } push(@logmsgs, $msg); if ($ichg || $verb2) { tolog ( "Run 1 made " . $ichg . " changes in line - CHECK CHANGE\n" ); foreach $msg (@logmsgs) { tolog($msg); } } else { ### no change if ($verb2) { tolog ("No change\n"); } } @copybits = @lnbits; ### take a SNAP of this QUOTE ONLY EXPANSION ### want to RETURN the line to this SPACING, if possible ### tolog ("########### parse run two ###############################\n") if $verb2; #################### DO IT ALL NOW ################### ###tolog ("{ comps $nct\n"); # log COUNT at start @logmsgs = (); $msg = ("{ comps $nct\n"); # log COUNT at start push(@logmsgs,$msg); ## accumulate $icnt = 0; ### init line 'bits' counter $ichg = 0; doparsereset (); foreach $tx2 (@lnbits) { $icnt++; # PRE-BUMP THE COUNT $msg = $tx2; ### diag - add the bit-of-the-line to log output $ln = length($tx2); $ch = substr ($tx2, 0, 1); $i = 0; ### special +?.*^$()[]{}|\ ### if ($tx2 =~ /^['"]/ ) { ## "' # does it start with quotes d or s if (($ch eq '"')||($ch eq "'")) { ######################################### ### $msg .= " begin quote (p2)"; $i = 1; # set JOIN if ($ln > 1) { $tx3 = substr ($tx2, 1, $ln - 1); # get past quote if ( $tx3 =~ /$ch/) { $pos1 = index ($tx3, $ch); # get position of next quote if ($pos1 > 0) { $tx5 = substr ($tx2, 0, ($pos1 + 1 + 1)); # get WHOLE QUOTE $tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if ANY if (length($tx3)) { ### error case ### "_","|", DONE WOULD SPLIT ["_"][,"|",]? b&e same quotes $msg .= ' DONE WOULD SPLIT '; $msg .= '['; $msg .= $tx5; $msg .= ']'; $msg .= '['; $msg .= $tx3; $msg .= ']?'; $lnbits[$icnt - 1] = $tx5; # put back adjusted first ### if ( $tx3 =~ /$ch/ ) { if ( $tx3 =~ /['"]/ ) { ### zeek, there are more of these ... $msg .= ' *MESS if , excepted '; } splice (@lnbits, $icnt, 0, $tx3); # insert 1 new items $cnt = @lnbits; ### ADJUST COUNT ITERATOR $ichg++; } } $msg .= " b&e same quotes"; $i = 0; } } if ($i) { # should JOIN until the END $i3 = 0; for ($i = $icnt; $i < $cnt; $i++) { $tx3 = $lnbits[$i]; # get next $tx2 .= ' '; # add back space $tx2 .= $tx3; ### $lnbits[$i]; $i3++; $ichg++; if ($tx3 =~ /$ch/) { last; # exit when terminator found } } $lnbits[$icnt - 1] = $tx2; # put back single quoted message ###splice (@lnbits, $i2, $cnt - $i2); # collapse following items splice (@lnbits, $icnt, $i3); # collapse following items $msg = $tx2; $msg .= ", now joined, to its end"; $cnt = @lnbits; ### UPDATE THE COUNT } $i3++; ######################################### } elsif ($ch eq '#') { # if starts with a comment ######################################### ## should join to end of line $i3 = 0; for ($i = $icnt; $i < $cnt; $i++) { $tx3 = $lnbits[$i]; $tx2 .= ' '; $tx2 .= $tx3; ### $lnbits[$i]; $i3++; $ichg++; } $msg .= ' joined '; $msg .= $lnbits[$icnt - 1]; $msg .= ' to '; $msg .= $tx2; $lnbits[$icnt - 1] = $tx2; # put back single quoted message ###splice (@lnbits, $i2, $cnt - $i2); # collapse following items $msg .= ' sp ' . $icnt . ' ' . $i3 . '['; splice (@lnbits, $icnt, $i3); # collapse following items ### $msg = $tx2; $msg .= "], line comment"; $cnt = @lnbits; $i3++; ######################################### } else { ######################################### ## not begin quote ' or ", nor begin # ... $c = 0; $tx3 = substr($tx2,1); if (($ch eq '$') || ($ch eq '@') || ($ch eq '%')) { # start of a scalor, array, hash ... move on to next $c = gotdelim($tx3); if ( length($tx3) && ($c) && ! xceptchr($c) ) { # got first split point, AFTER $var ... $pos1 = index ($tx3,$c); if ($pos1 > 0) { $i3 = 0; $tx5 = $ch; # put first char back $tx5 .= substr ($tx3, 0, $pos1); # get up to CHAR @sp11 = ($c); $tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if any if (length($tx3)) { push(@sp11, $tx3); # put in slurp if ((($c eq '(') && (substr($tx3,0,1) eq ')')) || (($c eq '+') && (substr($tx3,0,1) eq '+')) ) { # eg check *split* [$sock->accept][(][);] $i3 = 1; # some EXCEPTIONS } } if ($i3) { $msg = '*NO* *split* ['; } else { $msg = 'DONE *split* ['; } $msg .= $tx5 . ']['; $msg .= $c . ']'; if (length($tx3)) { $msg .= '['; $msg .= $tx3 . ']'; } $msg .= "\n"; push(@logmsgs,$msg); ###tolog ($msg . "\n"); if ($i3 == 0) { $lnbits[$icnt - 1] = $tx5; # put back first split splice (@lnbits, $icnt, 0, @sp11); # insert 1 or 2 new items $cnt = @lnbits; ### ADJUST COUNT ITERATOR $ichg++; } } $msg = $tx2; # put original message back } } else { ## not begin quote ' or ", nor begin # ... ### and is NOT if (($ch eq '$') || ($ch eq '@') || ($ch eq '%')) { $tx3 = $tx2; my $c3 = gotdelim($tx3); ###if ( length($tx3) && ($c3) ) { # got first split point if ( ($ln) && ($c3) ) { # got first split point $pos1 = index ($tx3,$c3); if ( $pos1 > 0 ) { # if the first char, or ... ### we have something, a million other variations ##my $ts = '\\'; ##$ts .= $c3; ##@sp11 = split ($ts, $tx3); $tx5 = substr ($tx3, 0, $pos1); # get up to CHAR ###@sp11 = ($tx5, $c3); @sp11 = ($c3); $tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if any if (length($tx3)) { push(@sp11, $tx3); # put in slurp } ###if (($c3 ne ':') && ($c3 ne '=') && ($c3 ne '|')) { if ( ! xceptchr($c3) ) { $msg = 'done Split ['; $msg .= $tx5 . ']['; $msg .= $c3 . ']'; if (length($tx3)) { $msg .= '['; $msg .= $tx3 . ']'; } tolog ($msg . "\n"); $lnbits[$icnt - 1] = $tx5; # put back first split ###splice (@lnbits, $i2, 0, $c3); ###if (length($tx3)) { ### splice (@lnbits, ($i2+1), 0, $tx3); ###} splice (@lnbits, $icnt, 0, @sp11); # insert 1 or 2 new items ##splice (@lnbits, ($i2 - 1), 1, @sp11); # INSERT into array at this pos $cnt = @lnbits; ### ADJUST COUNT ITERATOR $ichg++; } } elsif ( $pos1 == 0 ) { $tx3 = substr ($tx3, ($pos1 + 1)); # get ending text, if any if (length($tx3)) { @sp11 = ($c3, $tx3); # put in slurp ### if (($c3 ne ':') && ($c3 ne '=') && ($c3 ne '|')) { if ( ! xceptchr($c3) ) { $msg = 'DONE SPLIT ['; $msg .= $c3 . ']['; $msg .= $tx3 . ']'; ##tolog ($msg . "\n"); $msg .= "\n"; push(@logmsgs,$msg); ###tolog (@sp11 . "\n"); ##splice (@lnbits, ($i2 - 1), 1, @sp11); # INSERT into array at this pos $lnbits[$icnt - 1] = $c3; # put back first split splice (@lnbits, $icnt, 0, $tx3); $ichg++; $cnt = @lnbits; ### ADJUST COUNT ITERATOR } } } else { ### last; die "ERROR: Unresolved POSITION - can not happen ...\n"; } } # process $tx3 } ######################################### $msg = $tx2; if ($c && ! xceptchr($c) ) { $msg .= ' *EXCEPTED* '; $msg .= $c; $msg .= '* '; } if ( isresword ($tx2) ) { ### exists $HResWds{$tx2} $msg .= ' *B*'; ### blue('R'); $i3++; } if ( isbinfun($tx2) ) { ## exists $HBFuncs{$tx2} $msg .= ' *P*'; $i3++; } if ( $ln < 4 ) { ### tolog ( "*PUNC* CHECK [" . $tx2 . "]\n" ); if ( ispunctuat ( $tx2 ) ) { $msg .= ' *PUNC*'; } } ######################################### } ### tolog ($msg . "\n"); $msg .= "\n"; push(@logmsgs,$msg); } # for array list of line components $nct = @lnbits; if ($cnt != $nct) { die "***FIX a COUNT UPDATE $cnt $nct $cntorg ????\n"; } if ($cntorg == $nct) { $msg = ("} end comps $cntorg\n"); } else { $msg = ("} end comps $cntorg, adj. $nct " . ($cntorg - $nct) . "\n"); } push(@logmsgs,$msg); if ($ichg || $verb2) { tolog ( "Run 2 Made " . $ichg . " changes in line - CHECK CHANGE\n" ); foreach $msg (@logmsgs) { tolog($msg); } } else { ### no change if ($verb2) { tolog ("Run 2 - No change\n"); } } tolog ("########### output run ###############################\n") if $verb2; ### tolog ("{{ $nct"); @logmsgs = (); $msg = ("{{ $nct"); push(@logmsgs,$msg); ### perpare for HTML output ########################### $tx3 = ''; # clear FRONTEND output ### $tx3 = $txsp; # get the FRONTEND SPACE if (($c1 eq ' ') || ($c1 eq "\t")) { die "ERROR: Missed a case above ...\n" if ! $gotfes; # MISS FRONTEND SPACE ### $tx3 .= ' '; # add last space back $tx3 = white(htmlise($txsp)); ## $tx3 = ' '; ## $tx3 = htmlise($txsp); # space to HTML if ($verb2) { $msg = "\nSpace=[\n"; $msg .= $txsp; $msg .= "]\n["; $msg .= $tx3; $msg .= ']'; tolog ($msg . "\n"); } } else { die "ERROR: Missed a case above ...\n" if $gotfes; # MISS FRONTEND SPACE } ############################################# $i3 = 0; # init COUNTER my $func; $icnt = 0; $i = 0; $ln = 0; doparsereset (); foreach $tx2 (@lnbits) { # process for OUTPUT ### we have @copybits = @lnbits; ### take a SNAP of this QUOTE ONLY EXPANSION if ($i3) { # was (length($tx3)) { ### this should REMEMBER the original 'line-spacing', and re-apply it now $tx6 = substr ($tx6, $ln); ### get next line 'bit' ### note, no actual CHECK that they are the EQUAL!!! ### if ($msg eq $tx2) { ### should work also ... if (length($tx6)) { $nct = 0; ### no SPACE addition yet } else { $icnt++; ### bump to NEXT $tx6 = $copybits[$icnt]; ### get the 'copy', for 'formatting' $i = length($tx6); ## len of COPY $c1 = substr ($tx6, 0, 1); ### and first char $nct = 1; ### add back SPACE, per original file } if ($nct) { ###$tx3 .= white(' '); # add back 'space' between LINE components $tx3 .= ' '; # add back 'space' between LINE components/bits } } else { ## first, so no space added = START 'spacer' $tx6 = $copybits[$icnt]; ### get the 'copy', for 'formatting' $i = length($tx6); ## len of COPY $c1 = substr ($tx6, 0, 1); ### and first char } $ln = length($tx2); # length this line 'bit' $c = substr ($tx2, 0, 1); # get FIRST CHAR $msg = $tx2; # get copy of the line $tx5 = htmlise($msg); # make it HTML form ### case of the first CHARACTER - established TYPE of this line bit if ($c eq '#') { # comment component - should be to end-of-line, or more ... $func = \&orange; } elsif ($c eq "'") { ## "' # does it start with quotes DOUBLE or SINGLE $func = \&green; } elsif ($c eq '"') { $func = \&color3; } elsif ($c eq '$') { # start of scalar $func = \&color1; } elsif ($c eq '@') { # start of array $func = \&match; } elsif ($c eq '%') { # start of hash $func = \&peach; } elsif ( isresword ($tx2) ) { ### exists $HResWds{$tx2} $func = \&blue; } elsif ( isbinfun ($tx2) ) { ### exists $HBFuncs{$tx2} $func = \&color2; } else { $func = \&white; # set default, white if ($ln < 4) { # if it is a short 'bit' of the line if ( ispunctuat ($tx2) ) { # check if punc $func = \&grey; # yup, switch to grey } } } $msg = $func->($tx5); # get the HTML form mainly '<' -> '<' changes $tx3 .= $msg; ###tolog (' [' . $msg . ']'); ###tolog (' [' . $tx2 . ']'); $msg = (' [' . $tx2 . ']'); push(@logmsgs,$msg); $i3++; ## count a line item $msg = $tx2; ### keep LAST line 'bit' ... } ### loop while line 'bits' ##### done line output ##### ### tolog ("}}\n"); $msg = ("}}\n"); push(@logmsgs,$msg); foreach $msg (@logmsgs) { tolog($msg); } $tx3 .= "<br>\n"; ### tolog ($tx3); prt ($tx3); ####################################################### } ### comment line summarily dealt with ... } sub htmlise { my ($txt) = @_; my $htmsps = 0; my $htmnbs = ''; # convert to HTML $txt =~ s/\t/$tab_stg /g; # substitute TAB characters $txt =~ s/"/"/g; # sub double quotes $txt =~ s/\</</g; # sub less than tag beginning $txt =~ s/\>/>/g; # and html/xml tag ending my $ln = length($txt); # get the final length if (substr ($txt, 0, 1) eq ' ') { # if starts with a space ### my $htmsps = 0; ### my $htmnbs = ' '; ## $htmsps = 0; $htmnbs = ' '; for ($htmsps = 1; $htmsps < $ln; $htmsps++) { if (substr ($txt, $htmsps, 1) ne ' ') { last; } $htmnbs .= ' ' if $htmsps > 1; } $htmsps-- if $htmsps > 1; # back off last space, if more than 1 tolog ("Replacing $htmsps with [$htmnbs] ...\n") if $verb2; $txt =~ s/ {$htmsps}/$htmnbs/; # replace (N) spaces with ' x N if ($verb2) { my (@vals) = split; while (@vals) { my ($vc) = shift (@vals); tolog ("[$vc] "); } tolog ("\n"); } } # if it was space beginning return $txt; } sub gotdelim { my ($tx) = @_; my $c; my $mx = length($DELIMITER); ### = '(){}[]-+*/=~!&|<>?:;.,'; my @ar = split (//, $DELIMITER); my $i = 0; foreach $c (@ar) { my $ts = '\\'; $ts .= $c; if ($tx =~ /$ts/) { # return 1; return $c; } $i++; } return 0; } ###my $actpunc = ''; ### store the active punctuation ###my %HPuncsFnd = (); # hash of Punctuation FOUND in parse ###my $actresword = ''; ###my %HResWdFnd = (); ###my $actfunc = ''; ### store the active built-in functions ###my %HFuncsFnd = (); ### my %HPuncsFnd = (); # hash of Punctuation FOUND in parse sub ispunctuat { my ($cp) = @_; foreach my $cc (@PPunct) { ###tolog ("Comaring [$cc] with [$cp]...\n"); if ($cc eq $cp) { $actpunc = $cp; ### store the active punctuation if ( exists $HPuncsFnd{$cp} ) { $HPuncsFnd{$cp}++; # another count } else { $HPuncsFnd{$cp} = 1; # set FOUND 1 } return 1; } } return 0; } sub isresword { my ($rw) = @_; if ( exists $HResWds{$rw} ) { $actresword = $rw; if (exists $HResWdFnd{$rw}) { $HResWdFnd{$rw}++; # another count } else { $HResWdFnd{$rw} = 1; # start count } return 1; } return 0; } sub isbinfun { my ($rw) = @_; if ( exists $HBFuncs{$rw} ) { $actfunc = $rw; if (exists $HFuncsFnd{$rw}) { tolog ( "Bumped Funcs $rw ...\n" ); $HFuncsFnd{$rw}++; # another count } else { tolog ( "Created Funcs $rw ...\n" ); $HFuncsFnd{$rw} = 1; # start count } return 1; } return 0; } sub doparsereset { my $k; $actfunc = ''; $actresword = ''; $actpunc = ''; } sub get_parse_stats { my $ms = "Parse stats<br>\n"; my $key; my $k; my $i = 0; my $at; $at = %HResWdFnd; $ms .= '<table border="1">'; $i = 0; foreach $key (keys %HResWdFnd) { ###foreach $key (keys %$at) { $i++; $ms .= '<tr>'; $ms .= '<td>'; $ms .= "$i"; $ms .= '</td>'; $ms .= '<td>'; $ms .= "$key"; $ms .= '</td>'; $ms .= '<td>'; $ms .= $HResWdFnd{$key}; ###$ms .= "$$at{$key}"; $ms .= '</td>'; $ms .= '</tr>'; $ms .= "\n"; } $ms .= '</table>'; $ms .= "List of $i used reserve words ...<br>\n"; $i = 0; $ms .= '<table border="1">'; foreach $key (keys %HFuncsFnd) { $i++; $ms .= '<tr>'; $ms .= '<td>'; $ms .= "$i"; $ms .= '</td>'; $ms .= '<td>'; $ms .= "$key"; $ms .= '</td>'; $ms .= '<td>'; $ms .= $HFuncsFnd{$key}; $ms .= '</td>'; $ms .= '</tr>'; $ms .= "\n"; } $ms .= '</table>'; $ms .= "List of $i used built-in function words ...<br>\n"; $i = 0; ### if ( exists $HPuncsFnd{$cp} ) { $ms .= '<table border="1">'; foreach $key (keys %HPuncsFnd) { $i++; $ms .= '<tr>'; $ms .= '<td>'; $ms .= "$i"; $ms .= '</td>'; $ms .= '<td>'; $ms .= htmlise($key); $ms .= '</td>'; $ms .= '<td>'; $ms .= $HPuncsFnd{$key}; $ms .= '</td>'; $ms .= '</tr>'; $ms .= "\n"; } $ms .= '</table>'; $ms .= "List of $i used punctuation ...<br>\n"; return $ms; } sub showarrcnts { my $i = @PPunct; tolog ("PPunct array count = $i\n"); $i = @PPairs; tolog ("PPairs array count = $i\n"); $i = @DolVars; tolog ("DolVars array count = $i\n"); $i = @PBPunc; tolog ("PBPunc array count = $i\n"); } sub get_line_num { my ($lnn) = @_; while (length($lnn) < 4) { $lnn = '0' . $lnn; } return $lnn; } ### EOF