Generated: Sun Aug 21 11:10:38 2011 from bldctbl.pl 2011/03/25 7.7 KB.
#!/usr/bin/perl -w # NAME: bldctbl.pl # AIM: Very specific processing of a tidy cleaned winword html to a C table use strict; use warnings; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use Cwd; my $perl_dir = 'C:\GTools\perl'; unshift(@INC, $perl_dir); require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /(\\|\/)/) { my @tmpsp = split(/(\\|\/)/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = $perl_dir."\\temp.$pgmname.txt"; open_log($outfile); # user variables my $load_log = 1; my $in_file = ''; my $dbgon = 1; my $def_file = 'C:\Documents and Settings\Geoff McLane\My Documents\MS\temp1.htm'; ### program variables my @warnings = (); my $cwd = cwd(); my $os = $^O; sub show_warnings($) { my ($val) = @_; if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { prt( "\nNo warnings issued.\n\n" ); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg); } show_warnings($val); close_log($outfile,$load_log); exit($val); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } sub get_p2br($) { my $txt = shift; my $rtxt = ''; my $len = length($txt); my ($i,$ch,$tt); $tt = ''; for ($i = 0; $i < $len; $i++) { $ch = substr($txt,$i,1); $tt .= $ch; last if ($tt =~ /<p>$/); } $i++; $tt = ''; for (; $i < $len; $i++) { $ch = substr($txt,$i,1); $tt .= $ch; if ($tt =~ /<br>$/) { $tt =~ s/<br>$//; $tt = trim_all($tt); $rtxt = $tt; } } return $rtxt; } sub get_i2i($) { my $txt = shift; my $rtxt = ''; my $len = length($txt); my ($i,$ch,$tt); $tt = ''; for ($i = 0; $i < $len; $i++) { $ch = substr($txt,$i,1); $tt .= $ch; last if ($tt =~ /<i>$/); } $i++; $tt = ''; for (; $i < $len; $i++) { $ch = substr($txt,$i,1); $tt .= $ch; if ($tt =~ /<\/i>$/) { $tt =~ s/<\/i>$//; $tt = trim_all($tt); $rtxt = $tt; } } return $rtxt; } sub get_br2_p($) { my $txt = shift; my $rtxt = ''; my $len = length($txt); my ($i,$ch,$tt); $tt = ''; for ($i = 0; $i < $len; $i++) { $ch = substr($txt,$i,1); $tt .= $ch; last if ($tt =~ /<br>$/); } $i++; $tt = ''; for (; $i < $len; $i++) { $ch = substr($txt,$i,1); $tt .= $ch; if ($tt =~ /<\/p>$/) { $tt =~ s/<\/p>$//; $tt = trim_all($tt); $rtxt = $tt; } } return $rtxt; } sub process_in_file($) { my ($inf) = @_; if (! open INF, "<$inf") { pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); } my @lines = <INF>; close INF; my $lncnt = scalar @lines; prt("Processing $lncnt lines, from [$inf]...\n"); my ($line,$inc,$lnn, $intbl,$intd,$intr,$tdcnt,$txt); my $tdtxt = ''; $lnn = 0; $intbl = 0; $intd = 0; $tdcnt = 0; my $txt1 = ''; my $txt2 = ''; my $txt3 = ''; my @array = (); foreach $line (@lines) { chomp $line; $line = trim_all($line); $lnn++; if ($intbl) { if ($line =~ /<\/table/i) { $intbl = 0; prt("$lnn: Exit table...\n"); } elsif ($intr) { if ($line =~ /<\/tr/) { $intr = 0; prt("$lnn: Exit </tr>\n"); } elsif ($intd) { if ($line =~ /<\/td>/) { $intd = 0; if ($tdcnt == 1) { $txt = get_p2br($tdtxt); $txt1 = $txt; $txt3 = get_br2_p($tdtxt); } else { $txt = get_i2i($tdtxt); $txt2 = $txt; if (length($txt1) && length($txt2) && length($txt3)) { push(@array, [$txt1,$txt2,$txt3]); } else { prt("FAILED: [$txt1] [$txt2] [$txt3]\n"); } } prt("$lnn: Exit </td> $txt $txt3\n"); $tdtxt = ''; if ($tdcnt == 2) { $txt1 = ''; $txt2 = ''; $txt3 = ''; } } else { $tdtxt .= ' ' if (length($tdtxt)); $tdtxt .= $line; } } else { # in <tr>, but not <td> if ($line =~ /<td/) { $intd = 1; $tdcnt++; prt("$lnn: Enter <td> $tdcnt\n"); } } } else { if ($line =~ /<tr/) { $intr = 1; prt("$lnn: Enter <tr>\n"); $tdcnt = 0; $tdtxt = ''; } } } elsif ($line =~ /<table/i) { $intbl = 1; prt("$lnn: Enter table...\n"); } } my ($key,$val); my $cnt = scalar @array; prt("List of $cnt keys...\n"); my ($min,$len); $min = 0; foreach $key (@array) { $txt1 = ${$key}[0]; $txt2 = ${$key}[2]; $len = length($txt1); $min = $len if ($len > $min); } foreach $key (@array) { $txt1 = ${$key}[0]; $txt2 = ${$key}[1]; $txt3 = ${$key}[2]; $txt1 .= ' ' while (length($txt1) < $min); prt(" { $txt1, \"$txt2\" }, // $txt3\n"); } } ######################################### ### MAIN ### parse_args(@ARGV); prt( "$pgmname: in [$cwd]: Hello, World...\n" ); process_in_file($in_file); pgm_exit(0,"Normal exit(0)"); ######################################## sub give_help { prt("$pgmname: version 0.0.1 2010-09-11\n"); prt("Usage: $pgmname [options] in-file\n"); prt("Options:\n"); prt(" --help (-h or -?) = This help, and exit 0.\n"); } sub need_arg { my ($arg,@av) = @_; pgm_exit(1,"ERROR: [$arg] must have following argument!\n") if (!@av); } sub parse_args { my (@av) = @_; my ($arg,$sarg); while (@av) { $arg = $av[0]; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); if (($sarg =~ /^h/i)||($sarg eq '?')) { give_help(); pgm_exit(0,"Help exit(0)"); } else { pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n"); } } else { $in_file = $arg; prt("Set input to [$in_file]\n"); } shift @av; } if ((length($in_file) == 0) && $dbgon) { $in_file = $def_file; } if (length($in_file) == 0) { pgm_exit(1,"ERROR: No input files found in command!\n"); } if (! -f $in_file) { pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n"); } } # eof - template.pl