txt2htm02.pl to HTML.

Generated: Sat Oct 12 17:23:21 2013 from txt2htm02.pl 2012/10/10 16.1 KB. text copy
#!perl -w
# Module: txt2htm02.pl
# coded using EditPlus v2.12 (76)
# 16/02/2012 - Turn off $debug_on, and other tidying...
# 06/08/2011 - Some BIG updates ;=))
# 2010/04/24 - more fixes
# 2010/03/21 - some fixes, and improvements
# March, 2005 geoff mclane
# Sep 2006 - update
# Orignal output was the whole file as one paragraph, using <br> to divide lines
# Add option to use <pre [class="cd"]>...</pre> formating

use strict;
use warnings;
use Cwd;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use File::Spec; # File::Spec->rel2abs($rel); # we are IN the SLN directory, get ABSOLUTE from RELATIVE

my $perl_dir = 'C:\GTools\perl';
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $perl_dir."\\temp.$pgmname.txt";
open_log($outfile);

my $out_file = $perl_dir."\\tempt2h.htm";

# user variables
my $load_log = 0;
my $in_file = '';
my $add_urls = 0;

my $debug_on = 0;
#my $def_file = 'C:\Documents and Settings\Geoff McLane\My Documents\unix\Ubuntu-11-nautilus-fails.txt';
my $def_file = 'C:\Projects\OpenLayers\OpenLayers-2.12\examples\temp.txt';

my $M_VERSION = "0.3";
my $start_time = time();
my $verb2 = 0;
my @input_files = ();
my $file_lines = 0;
my @files = ();
my @file_list = ();
my $WHITE_PATTERN = "^[ \t]*\$";
my $tab_stg = ' &nbsp;&nbsp;&nbsp;';
my $check_out = 0;
my $use_pre = 0;   # default to OFF
my $doc_type = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"'."\n".
'"http://www.w3.org/TR/html4/loose.dtd">'."\n";
my $add_links = 1;
my $use_fixed_font = 1;
my $verbosity = 0;

### program variables
my @warnings = ();
my $cwd = cwd();
my $os = $^O;

sub VERB1() { return $verbosity >= 1; }
sub VERB2() { return $verbosity >= 2; }
sub VERB5() { return $verbosity >= 5; }
sub VERB9() { return $verbosity >= 9; }

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        ###prt( "\nNo warnings issued.\n\n" );
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub init_out_file {
    my ($out_name, $in_name) = @_;
    prt("Creating $out_name\n");
    open(DSP, ">$out_name") || die "Can not create $out_name: $!\n";
    prt("Writing to $out_name ...\n") if (VERB1());
    $file_lines++;
   print DSP $doc_type;
    print DSP <<"EOF";

<html>

<head>
<title>$in_name to HTML</title>
<style type="text/css">
<!-- /* some style */
body { 
   background-image:url('http://geoffair.org/images/clds3.jpg');
   margin: 0cm 1cm 0cm 1cm; }
h1 {
 background:#efefef;
 border-style: solid solid solid solid;
 border-color:#d9e2e2;
 border-width:1px;
 padding:2px 2px 2px 2px;
 font-size:200%;
 text-align:center;
}
.ctr { text-align: center; }
.a { color:red; }
.b { color:#006666; }
.c { color:blue; }
.d { color:#a52a2a; }
.e { color:#9400d3; }
.f { color:#666666; }
.o { color:#008b8b; }
.v { color:#a52a2a; }
.t { color:#006600; }
.cd {
  /* top, right, bottom, left */
  padding: 0px 10px 0px 10px;
  margin: 1px 10px 1px 10px;
  background: #f0f8ff;
  border-width: 1px;
  border-style: solid solid solid solid;
  border-color: #cccccc;
  width: 90%;
  font-family:"Courier New";
}
.cn { font-family:"Courier New"; } 
// -->
</style>
</head>

<body>

<h1>$in_name to HTML</h1>

<p class="ctr"><a href="index.htm">index</a></p>

EOF

    print DSP "<div class=\"cn\">\n" if ($use_fixed_font);
    prt("Closing $out_name.\n") if (VERB1());
    close(DSP);
}

sub end_out_file {
    my ($out_name, $in_name) = @_;
    prt("Appending to $out_name\n") if (VERB1());
    open(DSP, ">>$out_name") || die "Can not append to $out_name: $!\n";
    prt("Writing to $out_name ...\n") if (VERB1());
    print DSP "</div>\n" if ($use_fixed_font);
   print DSP "<p class=\"ctr\"><a href=\"index.htm\">index</a></p>\n";
   print DSP '<p>Generated ';
   print DSP scalar localtime(time());
   print DSP ", from <b>$in_name</b>, by <b>$pgmname</b>, my Perl text-to-html \n";
   print DSP "<a href=\"http://geoffmclane.com/mperl/samples/index.htm\">'converter'</a></p>\n";
   print DSP '<!-- generated ';
   print DSP scalar localtime(time());
   print DSP ", from $in_name, by $pgmname, my text-to-html converter - geoffmclane.com -->\n";
    print DSP <<"EOF";
</body>
</html>
EOF

    prt("Closing $out_name.\n");
    close(DSP);
}



sub local_dirname($) { # passed a path, './dir1/dir2/file.name' returns './dir1/dir2/
    my ($file) = @_;
    my ($sub);
    ($sub = $file) =~ s,/+[^/]+$,,g;
    $sub = '.' if $sub eq $file;
    return $sub;
}

sub do_this_file($$) {
    my ($out_name,$mfile) = @_;
    prt("Opening, for append $out_name\n") if (VERB1());
    open(DSP, ">>$out_name") || die "Can't append to $out_name: $!\n";
    prt("Writing to $out_name ...\n") if (VERB1());
    $file_lines++;
    dsp_add_src(\*DSP, $mfile);
    close(DSP);
    prt("Closed $out_name.\n") if (VERB1());
}

sub convert_links($) {
    my ($tx) = shift;
    my $len = length($tx);
    my ($i,$ch,$tag);
    my $ntx = '';
    for ($i = 0; $i < $len; $i++) {
        $ch = substr($tx,$i,1);
        if ( !($ch =~ /\s/) ) {
            $tag = $ch; # start a tag
            $i++;   # and go to next
            for (; $i < $len; $i++) {
                $ch = substr($tx,$i,1);
                if ($ch =~ /\s/) { # exit on space (or end of line)
                    $i--;   # back up to collect this char later
                    last;
                }
                $tag .= $ch;    # accumuate tag
            }
            if ($tag =~ /^\w+:{1}\/{1}\/{1}\w+(.+)$/) {
                $tag = "<a target=\"_blank\" href=\"$tag\">$tag</a>";
            }
            $ntx .= $tag;
            next;
        }
        $ntx .= $ch;
    }
    return $ntx;
}

sub add_url_links($$) {
    my ($tx,$fdir) = @_;
    my $len = length($tx);
    my ($i,$ch,$tag,$ff);
    my $ntx = '';
    $tag = '';
    prt("Adding URLS to line [$tx]$len\n") if (VERB9());
    for ($i = 0; $i < $len; $i++) {
        $ch = substr($tx,$i,1);
        if ($ch =~ /\s/) {
            $ff = $fdir.$tag;
            if (length($tag)) {
                if ((-f $tag)||(-f $ff)) {
                    $ntx .= "<a target=\"_blank\" href=\"$tag\">$tag</a>";
                } else {
                    $ntx .= $tag;
                }
            }
            $tag = '';
            $ntx .= $ch;
        } else {
            $tag .= $ch;
        }
    }
    $ff = $fdir.$tag;
    if (length($tag)) {
        if ((-f $tag)||(-f $ff)) {
            $ntx .= "<a target=\"_blank\" href=\"$tag\">$tag</a>";
        } else {
            $ntx .= $tag;
        }
    }
    $tag = '';
    $len = length($ntx);
    prt("Returning line [$ntx]$len\n") if (VERB9());
    return $ntx;
}


sub line_to_html($) {
    my ($tx) = @_;
   $tx =~ s/&/&amp;/g; # convert '&' to '&amp;'
   $tx =~ s/\t/$tab_stg/g; # substitute TAB characters
   $tx =~ s/"/&quot;/g; # sub double quotes
   $tx =~ s/\</&lt;/g; # sub less than tag beginning
   $tx =~ s/\>/&gt;/g; # and html/xml tag ending
    my ($ch,$len,$i);
    my $nline = '';
    $ch = '';
    $len = length($tx);
    for ($i = 0; $i < $len; $i++) {
        $ch = substr($tx,$i,1);
        if ($ch =~ /\s/) {
            if (($nline =~ /\s$/) || ($nline =~ /&nbsp;$/)) {
                $ch = "&nbsp;";
            }
        }
        $nline .= $ch;
    }
    return $nline;
}

sub contains_a_link($) {
    my ($tx) = shift;
    return 1 if ($tx =~ /\w+:{1}\/{1}\/{1}\w+\./);
    return 0;
}

sub dsp_add_src($$) {
    my ($fh,$file) = @_;
   my $line_num = 0;
   my $dn_para = 0;
    my (@lines,$tx,$ln);
    my $need_br = 0;
   if (-f $file) {
      open(INF, $file) || die "Unable to open $file!\n";
      if ($use_pre) {
         print $fh '<pre class="cd">'."\n";
      }
        @lines = <INF>;
        close INF;
      $line_num = scalar @lines;
      prt("Reading $file ... $line_num lines...\n");
        $line_num = 0;
        my ($fnm,$fdir) = fileparse($file);
        ut_fix_directory(\$fdir);

        foreach $tx (@lines) {
         $line_num++;
         chomp $tx; # clear end of line
         $ln = length($tx);
         if ( $tx =~ /$WHITE_PATTERN/o ) {
            prt("white [$tx]$ln\n") if $verb2;
                if ($use_pre) {
               print $fh "\n"; # 2010-04-24 - add this blank line
                } else {    # if ( !$use_pre )
               print $fh "\</p\>\n" if $dn_para;
               $dn_para = 0;
                    $need_br = 0;
            }
         } else {
            if (!$use_pre) {
               print $fh "\<p\>\n" if ! $dn_para;
               $dn_para = 1;
            }
                $tx = line_to_html($tx);
            $ln = length($tx); # get the final length
            if ($use_pre) {
                    #if ($add_links && ($tx =~ /\s+\w+:{1}\/{1}\/{1}\w+/)) {
                    if ( $add_links && contains_a_link($tx) ) {
                        $tx = convert_links($tx);
                    }
                    if ($add_urls) {
                        $tx = add_url_links($tx,$fdir);
                    }
               print $fh "$tx\n"; # out the line
            } else {  # if ( !$use_pre ) {
               if (substr ($tx, 0, 2) eq '  ') { # if starts with 2 spaces
                  my $sps = 0;
                  my $nbs = ' &nbsp;';
                  for ($sps = 2; $sps < $ln; $sps++) {
                     if (substr ($tx, $sps, 1) ne ' ') {
                        last;
                     }
                     $nbs .= '&nbsp;' if $sps > 1;
                  }
                  $sps-- if $sps > 1; # back off last space, if more than 1
                  prt("Replacing $sps with [$nbs] ...\n") if $verb2;
                  $tx =~ s/ {$sps}/$nbs/; # replace (N) spaces with '&nbsp; x N
                  if ($verb2) {
                     my @vals = split(/\s/,$tx);
                     while (@vals) {
                        my $vc = shift (@vals);
                        prt("[$vc] ");
                     }
                     prt("\n");
                  }
               } # if it was space beginning
                    #if ($add_links && ($tx =~ /\s+\w+:{1}\/{1}\/{1}\w+/)) {
                    if ( $add_links && contains_a_link($tx) ) {
                        $tx = convert_links($tx);
                    }
                    if ($add_urls) {
                        $tx = add_url_links($tx,$fdir);
                    }
                    if ($need_br) {
                   print $fh "\<br\>\n"; # out a line separator
                    }
               print $fh "$tx\n"; # out the line
                    $need_br = 1;
            }
            prt("sig [$tx]$ln\n") if $verb2;
         }
      }
        $need_br = 0;
      if ($use_pre) {
         print $fh '</pre>'."\n";
      } else {
         print $fh "\</p\>\n" if $dn_para;
      }
      prt("Done $file ... $line_num lines ...\n");
   } else {
      print $fh "WARNING: Missed SOURCE [$file]\n";
      prtw("WARNING: Missed SOURCE [$file]\n");
   }
}


sub get_dir_list
{
    my $name = shift;
   # put all files in the current directory in @files:
   # opendir(THEDIR, ".") || die("Couldn't open current directory\n");
   opendir(THEDIR, $name) || die("Couldn't open current directory\n");
   @files = readdir(THEDIR);
   closedir(THEDIR);
   my $f_cnt = 0;
   my $d_cnt = 0;
   prt("Found " . $#files . " files and folders ...\n");
   foreach my $dfile (@files) {
    if ( -d $dfile ) {
       # if ($dfile eq '.' || $dfile eq '..') or
       if ($dfile =~ '^\.$' || $dfile =~ '^\.\.$') {
          # do nothing with DOT and DOUBLE DOT
       } else {
        $d_cnt++;
        prt("$dfile <DIR>\n") if $verb2;
       }
    } else {
     $f_cnt++;
     my $ff = $name . '\\' . $dfile;
     # $ff =~ s/\//\\/g; # set DOS path separators ...
     $ff =~ s/\\/\//g; # set *nix path separators ...
     my $sb = dirname($ff);
     $ff =~ s/\//\\/g; # set DOS path separators ...
     $sb =~ s/\//\\/g; # set DOS path separators ...
     prt("$dfile dos [$ff] [$sb] ") if $verb2;
     if ($f_cnt == 1) {
        $sb =~ s/\\/\//g; # set *nix path separators ...
        prt("[$sb]") if $verb2;
     }
     prt("\n") if $verb2;
    }
   }

   prt("Found " . $#files . " - folders = $d_cnt, files = $f_cnt ...\n");
}


############################################
### MAIN ###

parse_args(@ARGV);

pgm_exit(1,"ERROR: no input files found or specified\n") if ! @input_files;

# pre-process
foreach my $inf (@input_files) {
   if (-f $inf) {
      prt("File: $inf ok\n");
   } else {
      pgm_exit(1,"ERROR: Can not locate file [$inf] ... check command ...\n");
   }
}

$in_file = $input_files[0];   # get the FIRST
my ($nm,$dr) = fileparse($in_file);
init_out_file($out_file, $nm); # abort, if no create ...

# show count in the array ...
prt("Adding $#input_files lines to file $out_file.\n") if (VERB1());

foreach $in_file (@input_files) {
    do_this_file($out_file, $in_file);
}

end_out_file($out_file,$in_file);

prt("Done $out_file on " . localtime(time()) . ".\n");

system($out_file);

pgm_exit(0,"");

###################################################

sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg);
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^v/) {
                if ($sarg =~ /^v.*(\d+)$/) {
                    $verbosity = $1;
                } else {
                    while ($sarg =~ /^v/) {
                        $verbosity++;
                        $sarg = substr($sarg,1);
                    }
                }
                prt("Verbosity = $verbosity\n") if (VERB1());
            } elsif ($sarg =~ /^l/) {
                if ($sarg =~ /^ll/) {
                    $load_log = 2;
                } else {
                    $load_log = 1;
                }
                prt("Set to load log at end. ($load_log)\n") if (VERB1());
            } elsif ($sarg =~ /^o/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $out_file = $sarg;
                prt("Set out file to [$out_file].\n") if (VERB1());
            } elsif ($sarg =~ /^p/) {
                $use_pre = 1;
                prt("Use <pre> tag for text.\n") if (VERB1());
            } elsif ($sarg =~ /^u/) {
                $add_urls = 1;
                prt("Set to add links to file names.\n") if (VERB1());
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = $arg;
            prt("Added input to [$in_file]\n") if (VERB1());
            push(@input_files,$in_file);

        }
        shift @av;
    }

    if ((length($in_file) ==  0) && $debug_on) {
        $in_file = $def_file;
        prt("Set DEFAULT input to [$in_file]\n");
        $add_urls = 1;
        push(@input_files,$in_file);
        $verbosity = 9;
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
    if (! -f $in_file) {
        pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
    }
}

sub give_help {
    prt("$pgmname: version $M_VERSION\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help  (-h or -?) = This help, and exit 0.\n");
    prt(" --verb[n]     (-v) = Bump [or set] verbosity. def=$verbosity\n");
    prt(" --load        (-l) = Load LOG at end. ($outfile)\n");
    prt(" --out <file>  (-o) = Write output to this file.\n");
    prt(" --pre         (-p) = Use <pre> tag for text.\n");
    prt(" --urls        (-u) = Convert valid file names to url links.\n");
}

# eof - txt2htm02.pl
index -|- top