#!perl -w # Module: txt2htm02.pl # coded using EditPlus v2.12 (76) # 16/02/2012 - Turn off $debug_on, and other tidying... # 06/08/2011 - Some BIG updates ;=)) # 2010/04/24 - more fixes # 2010/03/21 - some fixes, and improvements # March, 2005 geoff mclane # Sep 2006 - update # Orignal output was the whole file as one paragraph, using
to divide lines # Add option to use

...

formating use strict; use warnings; use Cwd; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use File::Spec; # File::Spec->rel2abs($rel); # we are IN the SLN directory, get ABSOLUTE from RELATIVE my $perl_dir = 'C:\GTools\perl'; unshift(@INC, $perl_dir); require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n"; # log file stuff our ($LF); my $pgmname = $0; if ($pgmname =~ /(\\|\/)/) { my @tmpsp = split(/(\\|\/)/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = $perl_dir."\\temp.$pgmname.txt"; open_log($outfile); my $out_file = $perl_dir."\\tempt2h.htm"; # user variables my $load_log = 0; my $in_file = ''; my $add_urls = 0; my $debug_on = 0; #my $def_file = 'C:\Documents and Settings\Geoff McLane\My Documents\unix\Ubuntu-11-nautilus-fails.txt'; my $def_file = 'C:\Projects\OpenLayers\OpenLayers-2.12\examples\temp.txt'; my $M_VERSION = "0.3"; my $start_time = time(); my $verb2 = 0; my @input_files = (); my $file_lines = 0; my @files = (); my @file_list = (); my $WHITE_PATTERN = "^[ \t]*\$"; my $tab_stg = ' '; my $check_out = 0; my $use_pre = 0; # default to OFF my $doc_type = ''."\n"; my $add_links = 1; my $use_fixed_font = 1; my $verbosity = 0; ### program variables my @warnings = (); my $cwd = cwd(); my $os = $^O; sub VERB1() { return $verbosity >= 1; } sub VERB2() { return $verbosity >= 2; } sub VERB5() { return $verbosity >= 5; } sub VERB9() { return $verbosity >= 9; } sub show_warnings($) { my ($val) = @_; if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { ###prt( "\nNo warnings issued.\n\n" ); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg); } show_warnings($val); close_log($outfile,$load_log); exit($val); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } sub init_out_file { my ($out_name, $in_name) = @_; prt("Creating $out_name\n"); open(DSP, ">$out_name") || die "Can not create $out_name: $!\n"; prt("Writing to $out_name ...\n") if (VERB1()); $file_lines++; print DSP $doc_type; print DSP <<"EOF"; $in_name to HTML

$in_name to HTML

index

EOF print DSP "

\n" if ($use_fixed_font); prt("Closing $out_name.\n") if (VERB1()); close(DSP); } sub end_out_file { my ($out_name, $in_name) = @_; prt("Appending to $out_name\n") if (VERB1()); open(DSP, ">>$out_name") || die "Can not append to $out_name: $!\n"; prt("Writing to $out_name ...\n") if (VERB1()); print DSP "

\n" if ($use_fixed_font); print DSP "

index

\n"; print DSP '

Generated '; print DSP scalar localtime(time()); print DSP ", from $in_name, by $pgmname, my Perl text-to-html \n"; print DSP "'converter'

\n"; print DSP '\n"; print DSP <<"EOF"; EOF prt("Closing $out_name.\n"); close(DSP); } sub local_dirname($) { # passed a path, './dir1/dir2/file.name' returns './dir1/dir2/ my ($file) = @_; my ($sub); ($sub = $file) =~ s,/+[^/]+$,,g; $sub = '.' if $sub eq $file; return $sub; } sub do_this_file($$) { my ($out_name,$mfile) = @_; prt("Opening, for append $out_name\n") if (VERB1()); open(DSP, ">>$out_name") || die "Can't append to $out_name: $!\n"; prt("Writing to $out_name ...\n") if (VERB1()); $file_lines++; dsp_add_src(\*DSP, $mfile); close(DSP); prt("Closed $out_name.\n") if (VERB1()); } sub convert_links($) { my ($tx) = shift; my $len = length($tx); my ($i,$ch,$tag); my $ntx = ''; for ($i = 0; $i < $len; $i++) { $ch = substr($tx,$i,1); if ( !($ch =~ /\s/) ) { $tag = $ch; # start a tag $i++; # and go to next for (; $i < $len; $i++) { $ch = substr($tx,$i,1); if ($ch =~ /\s/) { # exit on space (or end of line) $i--; # back up to collect this char later last; } $tag .= $ch; # accumuate tag } if ($tag =~ /^\w+:{1}\/{1}\/{1}\w+(.+)$/) { $tag = "$tag"; } $ntx .= $tag; next; } $ntx .= $ch; } return $ntx; } sub add_url_links($$) { my ($tx,$fdir) = @_; my $len = length($tx); my ($i,$ch,$tag,$ff); my $ntx = ''; $tag = ''; prt("Adding URLS to line [$tx]$len\n") if (VERB9()); for ($i = 0; $i < $len; $i++) { $ch = substr($tx,$i,1); if ($ch =~ /\s/) { $ff = $fdir.$tag; if (length($tag)) { if ((-f $tag)||(-f $ff)) { $ntx .= "$tag"; } else { $ntx .= $tag; } } $tag = ''; $ntx .= $ch; } else { $tag .= $ch; } } $ff = $fdir.$tag; if (length($tag)) { if ((-f $tag)||(-f $ff)) { $ntx .= "$tag"; } else { $ntx .= $tag; } } $tag = ''; $len = length($ntx); prt("Returning line [$ntx]$len\n") if (VERB9()); return $ntx; } sub line_to_html($) { my ($tx) = @_; $tx =~ s/&/&/g; # convert '&' to '&' $tx =~ s/\t/$tab_stg/g; # substitute TAB characters $tx =~ s/"/"/g; # sub double quotes $tx =~ s/\/>/g; # and html/xml tag ending my ($ch,$len,$i); my $nline = ''; $ch = ''; $len = length($tx); for ($i = 0; $i < $len; $i++) { $ch = substr($tx,$i,1); if ($ch =~ /\s/) { if (($nline =~ /\s$/) || ($nline =~ / $/)) { $ch = " "; } } $nline .= $ch; } return $nline; } sub contains_a_link($) { my ($tx) = shift; return 1 if ($tx =~ /\w+:{1}\/{1}\/{1}\w+\./); return 0; } sub dsp_add_src($$) { my ($fh,$file) = @_; my $line_num = 0; my $dn_para = 0; my (@lines,$tx,$ln); my $need_br = 0; if (-f $file) { open(INF, $file) || die "Unable to open $file!\n"; if ($use_pre) { print $fh '

'."\n";
		}
        @lines = ;
        close INF;
		$line_num = scalar @lines;
		prt("Reading $file ... $line_num lines...\n");
        $line_num = 0;
        my ($fnm,$fdir) = fileparse($file);
        ut_fix_directory(\$fdir);

        foreach $tx (@lines) {
			$line_num++;
			chomp $tx; # clear end of line
			$ln = length($tx);
			if ( $tx =~ /$WHITE_PATTERN/o ) {
				prt("white [$tx]$ln\n") if $verb2;
                if ($use_pre) {
					print $fh "\n"; # 2010-04-24 - add this blank line
                } else {    # if ( !$use_pre )
					print $fh "\\n" if $dn_para;
					$dn_para = 0;
                    $need_br = 0;
				}
			} else {
				if (!$use_pre) {
					print $fh "\\n" if ! $dn_para;
					$dn_para = 1;
				}
                $tx = line_to_html($tx);
				$ln = length($tx); # get the final length
				if ($use_pre) {
                    #if ($add_links && ($tx =~ /\s+\w+:{1}\/{1}\/{1}\w+/)) {
                    if ( $add_links && contains_a_link($tx) ) {
                        $tx = convert_links($tx);
                    }
                    if ($add_urls) {
                        $tx = add_url_links($tx,$fdir);
                    }
					print $fh "$tx\n"; # out the line
				} else {  # if ( !$use_pre ) {
					if (substr ($tx, 0, 2) eq '  ') { # if starts with 2 spaces
						my $sps = 0;
						my $nbs = '  ';
						for ($sps = 2; $sps < $ln; $sps++) {
							if (substr ($tx, $sps, 1) ne ' ') {
								last;
							}
							$nbs .= ' ' if $sps > 1;
						}
						$sps-- if $sps > 1; # back off last space, if more than 1
						prt("Replacing $sps with [$nbs] ...\n") if $verb2;
						$tx =~ s/ {$sps}/$nbs/; # replace (N) spaces with '  x N
						if ($verb2) {
							my @vals = split(/\s/,$tx);
							while (@vals) {
								my $vc = shift (@vals);
								prt("[$vc] ");
							}
							prt("\n");
						}
					} # if it was space beginning
                    #if ($add_links && ($tx =~ /\s+\w+:{1}\/{1}\/{1}\w+/)) {
                    if ( $add_links && contains_a_link($tx) ) {
                        $tx = convert_links($tx);
                    }
                    if ($add_urls) {
                        $tx = add_url_links($tx,$fdir);
                    }
                    if ($need_br) {
    					print $fh "\\n"; # out a line separator
                    }
					print $fh "$tx\n"; # out the line
                    $need_br = 1;
				}
				prt("sig [$tx]$ln\n") if $verb2;
			}
		}
        $need_br = 0;
		if ($use_pre) {
			print $fh '

'."\n"; } else { print $fh "\\n" if $dn_para; } prt("Done $file ... $line_num lines ...\n"); } else { print $fh "WARNING: Missed SOURCE [$file]\n"; prtw("WARNING: Missed SOURCE [$file]\n"); } } sub get_dir_list { my $name = shift; # put all files in the current directory in @files: # opendir(THEDIR, ".") || die("Couldn't open current directory\n"); opendir(THEDIR, $name) || die("Couldn't open current directory\n"); @files = readdir(THEDIR); closedir(THEDIR); my $f_cnt = 0; my $d_cnt = 0; prt("Found " . $#files . " files and folders ...\n"); foreach my $dfile (@files) { if ( -d $dfile ) { # if ($dfile eq '.' || $dfile eq '..') or if ($dfile =~ '^\.$' || $dfile =~ '^\.\.$') { # do nothing with DOT and DOUBLE DOT } else { $d_cnt++; prt("$dfile \n") if $verb2; } } else { $f_cnt++; my $ff = $name . '\\' . $dfile; # $ff =~ s/\//\\/g; # set DOS path separators ... $ff =~ s/\\/\//g; # set *nix path separators ... my $sb = dirname($ff); $ff =~ s/\//\\/g; # set DOS path separators ... $sb =~ s/\//\\/g; # set DOS path separators ... prt("$dfile dos [$ff] [$sb] ") if $verb2; if ($f_cnt == 1) { $sb =~ s/\\/\//g; # set *nix path separators ... prt("[$sb]") if $verb2; } prt("\n") if $verb2; } } prt("Found " . $#files . " - folders = $d_cnt, files = $f_cnt ...\n"); } ############################################ ### MAIN ### parse_args(@ARGV); pgm_exit(1,"ERROR: no input files found or specified\n") if ! @input_files; # pre-process foreach my $inf (@input_files) { if (-f $inf) { prt("File: $inf ok\n"); } else { pgm_exit(1,"ERROR: Can not locate file [$inf] ... check command ...\n"); } } $in_file = $input_files[0]; # get the FIRST my ($nm,$dr) = fileparse($in_file); init_out_file($out_file, $nm); # abort, if no create ... # show count in the array ... prt("Adding $#input_files lines to file $out_file.\n") if (VERB1()); foreach $in_file (@input_files) { do_this_file($out_file, $in_file); } end_out_file($out_file,$in_file); prt("Done $out_file on " . localtime(time()) . ".\n"); system($out_file); pgm_exit(0,""); ################################################### sub need_arg { my ($arg,@av) = @_; pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av); } sub parse_args { my (@av) = @_; my ($arg,$sarg); while (@av) { $arg = $av[0]; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); if (($sarg =~ /^h/i)||($sarg eq '?')) { give_help(); pgm_exit(0,"Help exit(0)"); } elsif ($sarg =~ /^v/) { if ($sarg =~ /^v.*(\d+)$/) { $verbosity = $1; } else { while ($sarg =~ /^v/) { $verbosity++; $sarg = substr($sarg,1); } } prt("Verbosity = $verbosity\n") if (VERB1()); } elsif ($sarg =~ /^l/) { if ($sarg =~ /^ll/) { $load_log = 2; } else { $load_log = 1; } prt("Set to load log at end. ($load_log)\n") if (VERB1()); } elsif ($sarg =~ /^o/) { need_arg(@av); shift @av; $sarg = $av[0]; $out_file = $sarg; prt("Set out file to [$out_file].\n") if (VERB1()); } elsif ($sarg =~ /^p/) { $use_pre = 1; prt("Use

 tag for text.\n") if (VERB1());
            } elsif ($sarg =~ /^u/) {
                $add_urls = 1;
                prt("Set to add links to file names.\n") if (VERB1());
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = $arg;
            prt("Added input to [$in_file]\n") if (VERB1());
            push(@input_files,$in_file);

        }
        shift @av;
    }

    if ((length($in_file) ==  0) && $debug_on) {
        $in_file = $def_file;
        prt("Set DEFAULT input to [$in_file]\n");
        $add_urls = 1;
        push(@input_files,$in_file);
        $verbosity = 9;
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
    if (! -f $in_file) {
        pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
    }
}

sub give_help {
    prt("$pgmname: version $M_VERSION\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help  (-h or -?) = This help, and exit 0.\n");
    prt(" --verb[n]     (-v) = Bump [or set] verbosity. def=$verbosity\n");
    prt(" --load        (-l) = Load LOG at end. ($outfile)\n");
    prt(" --out   (-o) = Write output to this file.\n");
    prt(" --pre         (-p) = Use  tag for text.\n");
    prt(" --urls        (-u) = Convert valid file names to url links.\n");
}

# eof - txt2htm02.pl