chkhlink1.pl to HTML.

index -|- end

Generated: Sun Apr 15 11:45:54 2012 from chkhlink1.pl 2011/11/08 26.9 KB.

#!/perl -w
# NAME: chkhlink1.pl
# AIM: Given a input FOLDER, check all the HTML found for a <a href="...."
# reference and make sure that reference EXISTS,
# either as a LOCAL file,
# or that an IP address can be obtained for the HOST if http://<something> ...
# 08/11/2011 - Account for my space saving like fgfs-src.zip is only as fgfs-src.zip.txt
# 15/09/2011 - Add -d = Directory scan, show unlinked items. In this also added 
#               searching HTML for <link href="..." ...>, and <img src="..."...>
#               and switched to using lib_utils.pl
# 08/09/2011 - Write temporary results ONLY to 'perl' directory
# 02/08/2011 - Lots of updates of the UI
# 31/05/2007 - geoff mclane - geoffair.com/mperl/index.htm
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use Socket;
use File::Spec; # File::Spec->rel2abs($rel); # we are IN the SLN directory, get ABSOLUTE from RELATIVE

my $perl_dir = 'C:\GTools\perl';
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' ...\n";
require 'htmltools.pl' or die "Unable to load htmltools.pl ...\n";

my $VERS = "0.0.4 2011-11-08"; # file fgfs-src.zip may exist only as fgfs-src.zip.txt
# my $VERS = "0.0.3 2011-09-15"; # Add -d = Directory scan, show unlinked items.
# my $VERS = "0.0.2 2011-09-08"; # do not polute with 'temp' review files
# my $VERS = "0.0.1 2010-09-11"; # initial version

# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $perl_dir."\\temp.$pgmname.txt";
open_log($outfile);

my $recurse = 1;   # recursive
my $ignfpd = 1;      # ignore FRONTPAGE folders
my @fpfolders = qw( _vti_cnf _vti_pvt _private _derived );
my $load_log = 0;
my $show_ips = 0;
my $show_script = 0;
my $dir_scan = 0; # 15/09/2011 - Add -d = Directory scan, show unlinked items.
my $in_file = "";
my @in_files = ();
my @in_dirs = ();
my $inc_text_entries = 1;

my %g_hash = ();
my @scripts = ();
my @warnings = ();
my %warned = (); # do NOT repeat warnings
my %g_dir_scans = ();
my $temp_file1 = $perl_dir.'\temp1.txt';
my $temp_file2 = $perl_dir.'\temp2.txt';
my $found_as_text = 0;

# constants
my $HR_REM = 1;
my $HR_LOC = 2;
my $HR_FIL = 3;
my $HR_FTP = 4;
my $HR_MT  = 5;
my $HR_JAV = 6;

my $debug_on = 0;
my $def_file = 'C:\HOMEPAGE\GA\unix\tar-01.htm';

# debug only bits
my $dbg1 = 0;   # show entering folder ...
my $dbg2 = 0;   # show ALL HREF entries ...
my $dbg3 = 0;   # show IP found ...
my $dbg4 = 0;   # show entered/exit script
my $dbg5 = 0;   # show 'ok' when found
my $dbg6 = 0;   # show processing lines
my $dbg7 = 0;   # show anchor count
my $dbg8 = 0;   # show unique anchor href
my $dbg9 = 0;   # show files with SCRIPTS
my $dbg10 = 0;   # show diag for get_href_srcs() ...
my $dbg11 = 0;   # show FULL filename for missing IP ...
my $dbg_12 = 0;
my $dbg_13 = 0;

my $verbosity = 0;

sub VERB1() { return ($verbosity >= 1); }
sub VERB2() { return ($verbosity >= 2); }
sub VERB5() { return ($verbosity >= 5); }
sub VERB9() { return ($verbosity >= 9); }

my %type2name = (
    $HR_REM => 'remote',
    $HR_LOC => 'local',
    $HR_FIL => 'file',
    $HR_FTP => 'fpt',
    $HR_MT  => 'mailto',
    $HR_JAV => 'java'
    );

sub get_type_href($) {
    my $typ = shift;
    if (defined $type2name{$typ}) {
        return $type2name{$typ};
    }
    return "$typ not listed";
}

##################################
sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        my ($itm,$cnt);
        $cnt = scalar @warnings;
        prt( "\nGot $cnt WARNINGS...\n" );
        foreach $itm (@warnings) {
            $cnt = 0;
            $cnt = $warned{$itm} if (defined $warned{$itm});
            $cnt = '' if ($cnt < 2);
           prt("$itm $cnt\n");
        }
        prt("\n");
    } else {
       ### prt( "\nNo warnings issued.\n\n" );
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub showIPAddress {
   my ($nm) = shift;
   my @addr = gethostbyname($nm);
   my $cnt = 0;
   if( !@addr ) {
      prt( "Can't resolve [$nm]: $!\n" );
      return 0;
   }
   @addr = map { inet_ntoa($_) } @addr[4 .. $#addr];
   foreach my $k (@addr) {
      $cnt++;
      prt( "$cnt: $nm resolves to IP [$k]\n" ) if ($dbg3 || $show_ips || VERB9());
   }
   return $cnt;
}

sub add_2_scripts($$$) {
   my ($fil,$lns,$scr) = @_;
   my $sc = scalar @scripts; # per file 0
    my ($i,$cf);
   for ($i = 0; $i < $sc; $i++) {
      $cf = $scripts[$i][0];
      if ($cf eq $fil) {
         $scripts[$i][1] .= ":$lns";
         $scripts[$i][2] .= $scr;
         return 0;
      }
   }
    # start a new file of scripts
   push(@scripts, [$fil,$lns,$scr]);
   return 1;
}

sub get_href_srcs {
   my ($fil, @lns) = @_;
   my $lc = scalar @lns;
    my ($msg,$nxln,$script,$i,$i2);
   my $scnt = 0;
   my $slns = 0;   # count the SCRIPT lines
   my ($nm,$dir) = fileparse( $fil );
   prt( "Processing $lc lines from [$nm] dir=[$dir]...\n" ) if ($dbg6);
   my @isrc = ();
   my $ln = '';
   my $bal = '';
   my $inscript = 0;
   $slns = 0;
    $script = '';
   for ($i = 0; $i < $lc; $i++) {
        $i2 = $i + 1;
      $ln = $bal;
      $bal = '';
      $ln .= $lns[$i];
      chomp $ln;
      prt( "$i2: [$ln]\n" ) if ($dbg10);
      if ($inscript) {
            $script .= "\n".$ln;
         if ($ln =~ /<\/script>/i) {
            $inscript = 0;
            prt( "$i2: EXIT a SCRIPT [$ln]\n" ) if ($dbg4);
            add_2_scripts($fil,$slns,$script);
            $slns = 0;
            next;
         }
          prt( "$i2: Add to SCRIPT [$ln]\n" ) if ($dbg4);
         $slns++;
         next;
      }
      if ( $ln =~ /<a\s+(.*)/i ) {
         my $iln = $1;
         prt( "Found [$iln] ...\n" ) if ($dbg10);
         while ( !($iln =~ />/) && ($i < $lc)) {
            $i++;
                $i2 = $i + 1;
            $nxln = $lns[$i];
            chomp $nxln;
            prt( "Adding [$nxln] ...\n" ) if ($dbg10);
            $iln .= ' '.$nxln;
         }
         my $ind = index($iln, '>');
         if ($ind != -1) {
            $bal = substr($iln, $ind+1);
            $iln = substr($iln, 0, $ind+1);
         }
         #if ($iln =~ /src=\"(.+)\"/i) {
         if ($iln =~ /href=\s*\"(\S+)\"/i) {
            prt( "HREF = $1\nIn line [$iln]...\n" ) if ($dbg10);
            push(@isrc, $1);
            $scnt++;
         } else {
            if ( $iln =~ /name=\s*\"(\S+)\"/i ) {
               # ignore BOOKMARKS
            } else {
               $msg = "WARNING: HREF NOT FOUND in [$iln]...";
               prtw( "$msg\n" );
            }
         }
      } elsif ( $ln =~ /<script(.*)>/i ) {
         $inscript = 1;
         prt( "$i2: Entered a SCRIPT [$ln]\n" ) if ($dbg4);
            $script = $ln;
         $slns = 0;
         $nxln = substr($ln, 7);
         if ($nxln =~ /<\/script>/i) {
            $inscript = 0;
            prt( "$i2: And EXIT SCRIPT ...\n" ) if ($dbg4);
            add_2_scripts($fil,1,$script);
            $slns = 0;
         }
      }
   }
   if ($inscript) {
      $msg = "WARNING: EXIT WHILE IN SCRIPT in [$fil]...";
      prtw( "$msg\n" );
   }
   prt( "Returning $scnt HREF sources ...\n") if ($dbg10);
   return @isrc;
}

#   <link rel="stylesheet"
#        type="text/css"
#        href="cullam.css">
sub add_href_src($$) {
    my ($tag,$ra) = @_;
    my $len = length($tag);
    my ($ch,$i,$had_sp,$ok,$bal);
    $had_sp = 0;
    $ok = 0;
    for ($i = 0; $i < $len; $i++) {
        $ch = substr($tag,$i,1);
        if ($had_sp) {
            if ($ch =~ /\s/) {
                $had_sp++;
            } elsif ($ch =~ /h/i) {
                $bal = substr($tag,$i);
                if ($bal =~ /^href=/i) {
                    $ok = 1;
                    $i += 5;
                    $bal = substr($bal,5);
                    prt("[12] Got bal [$bal]\n") if ($dbg_12);
                    last;
                }
                $had_sp = 0;
            } else {
                $had_sp = 0;
            }
        } else {
            if ($ch =~ /\s/) {
                $had_sp = 1;
            }
        }
    }
    if ($ok) {
        $len = length($bal);
        my $src = '';
        for ($i = 0; $i < $len; $i++) {
            $ch = substr($bal,$i,1);
            last if ($ch =~ /\s/);
            $src .= $ch;
        }
        $src = strip_quotes($src);
        prt("[12] Got source [$src]\n") if ($dbg_12);
        push(@{$ra},$src);
        #### exit 1
    }
}

sub add_link_href_srcs($$$) {
   my ($fil,$ra,$rha) = @_;
   my $lc = scalar @{$ra};
    my ($line,$len,$ch,$i,$in_tag,$tag);
    $in_tag = 0;
    foreach $line (@{$ra}) {
        $line = trim_all($line);
        $len = length($line);
        next if ($len == 0);
        for ($i = 0; $i < $len; $i++) {
            $ch = substr($line,$i,1);
            if ($in_tag) {
                if ($ch eq '>') {
                    $in_tag = 0;
                    if ($tag =~ /^link\s+/i) {
                        prt("[12] Found link [$tag]\n") if ($dbg_12);
                        if ($tag =~ /href=/i) {
                            # get the HREF
                            add_href_src($tag,$rha);
                        }
                    }
                    $tag = '';
                } else {
                    $tag .= $ch;
                }
            } else {
                if ($ch eq '<') {
                    $tag = '';
                    $in_tag = 1;
                }
            }
        }
        $tag .= ' ' if (length($tag));
    }
}

# <img border="0"
#        src="images/checked_by_tidy.gif"
#        alt="checked by tidy"
#        width="32"
#        height="32">
sub add_img_src($$) {
    my ($tag,$ra) = @_;
    my $len = length($tag);
    my ($ch,$i,$had_sp,$ok,$bal);
    $had_sp = 0;
    $ok = 0;
    for ($i = 0; $i < $len; $i++) {
        $ch = substr($tag,$i,1);
        if ($had_sp) {
            if ($ch =~ /\s/) {
                $had_sp++;
            } elsif ($ch =~ /s/i) {
                $bal = substr($tag,$i);
                if ($bal =~ /^src=/i) {
                    $ok = 1;
                    $i += 4;
                    $bal = substr($bal,4);
                    prt("[13] Got bal [$bal]\n") if ($dbg_13);
                    last;
                }
                $had_sp = 0;
            } else {
                $had_sp = 0;
            }
        } else {
            if ($ch =~ /\s/) {
                $had_sp = 1;
            }
        }
    }
    if ($ok) {
        $len = length($bal);
        my $src = '';
        for ($i = 0; $i < $len; $i++) {
            $ch = substr($bal,$i,1);
            last if ($ch =~ /\s/);
            $src .= $ch;
        }
        $src = strip_quotes($src);
        prt("[13] Got source [$src]\n") if ($dbg_13);
        push(@{$ra},$src);
        #### exit 1
    }
}

sub add_img_href_srcs($$$) {
   my ($fil,$ra,$rha) = @_;
   my $lc = scalar @{$ra};
    my ($line,$len,$ch,$i,$in_tag,$tag);
    $in_tag = 0;
    foreach $line (@{$ra}) {
        $line = trim_all($line);
        $len = length($line);
        next if ($len == 0);
        for ($i = 0; $i < $len; $i++) {
            $ch = substr($line,$i,1);
            if ($in_tag) {
                if ($ch eq '>') {
                    $in_tag = 0;
                    if ($tag =~ /^img\s+/i) {
                        prt("[13] Found img [$tag]\n") if ($dbg_13);
                        if ($tag =~ /src=/i) {
                            # get the HREF
                            add_img_src($tag,$rha);
                        }
                    }
                    $tag = '';
                } else {
                    $tag .= $ch;
                }
            } else {
                if ($ch eq '<') {
                    $tag = '';
                    $in_tag = 1;
                }
            }
        }
        $tag .= ' ' if (length($tag));
    }
}


sub is_my_ext {
   my ($fil) = shift;
   my ($nm,$dir,$ext) = fileparse( $fil, qr/\.[^.]*/ );
   if ((lc($ext) eq ".htm")||(lc($ext) eq ".html")) {
      return 1;
   }
   return 0;
}

# my $ignfpd = 1;   # ignore FRONTPAGE folders
sub is_fp_folder {
   my ($inf) = shift;
   foreach my $fil (@fpfolders) {
      if (lc($inf) eq lc($fil)) {
         return 1;
      }
   }
   return 0;
}

sub mark_file_referenced($) {
    my $fil = shift;
    my $dcnt = 0;
    my $fcnt = 0;
    my ($dir,$ra,$f,$cnt,$i,$fnd,$ff);
    $fnd = 0;
    my $dfil = path_u2d($fil);
    my $qfil = quotemeta($dfil);
    foreach $dir (keys %g_dir_scans) {
        $ra = $g_dir_scans{$dir};
        $cnt = scalar @{$ra};
        $fcnt += $cnt;
        $dcnt++;
        for ($i = 0; $i < $cnt; $i++) {
            $ff = ${$ra}[$i][0];    # full file
            $f  = ${$ra}[$i][1];    # file.ext
            if (($ff eq $dfil)||($ff =~ /$qfil$/)) {
                ${$ra}[$i][2]++;    # bump count
                $fnd = 1;
                last;
            }
        }
    }
    prtw("WARNING: File [$fil] NOT found, in $dcnt directories, $fcnt files...\n") if (!$fnd);
}

sub scan_directory($$);


sub scan_directory($$) {
   my ($inf,$lev) = @_;
    my @found = ();
    my @dirs = ();
    my ($fil,$ff);
   if ( opendir( DIR, $inf ) ) {
      my @files = readdir(DIR);
      closedir DIR;
        $inf .= "\\" if ( !($inf =~ /(\\|\/)$/) );
      foreach $fil (@files) {
         next if (($fil eq ".")||($fil eq ".."));
         $ff = $inf.$fil;
         if (-d $ff) {
             next if (is_fp_folder($fil)); # ignore FRONTPAGE folders
                push(@dirs,$ff);
         } elsif (-f $ff) {
            push(@found,[$ff,$fil,0]);
         }
      }
        foreach $fil (@dirs) {
            my $ra = scan_directory($fil,$lev+1);
            push(@found,@{$ra}); # add these to list
        }
   } else {
      pgm_exit(1,"ERROR: Failed to open folder $inf ...\n");
   }
    return \@found;
}


sub process_folder($);

sub process_folder($) {
   my ($inf) = shift;
   my $fcnt = 0;
   prt( "Processing $inf folder ...\n" ) if ($dbg1);
   if ( opendir( DIR, $inf ) ) {
      my @files = readdir(DIR);
      closedir DIR;
      foreach my $fil (@files) {
         if (($fil eq ".")||($fil eq "..")) {
            next;
         }
         my $ff = $inf."\\".$fil;
         if ( -d $ff ) {
            if ($recurse) {
               next if ($ignfpd && is_fp_folder($fil)); # ignore FRONTPAGE folders
               process_folder( $ff );
            }
         } else {
            if (is_my_ext($fil)) {
               push(@in_files, $ff);
               $fcnt++;
            }
         }
      }
      prt( "Processed $inf folder finding $fcnt HTML files ...\n" );
   } else {
      prt( "ERROR: Failed to open folder $inf ...\n" );
   }
}

sub add_in_folders($) {
    my $rda = shift; #(\@in_dirs);
    my ($dir);
    foreach $dir (@{$rda}) {
        process_folder($dir);
    }
}

sub process_in_files($) {
    my $rfashift;
    my $frcnt = 0;
    my $frnf = 0;
    my $filcnt = 0;
    my ($hr_type,$ok,$msg,$file);
    my %counts = ();
    foreach $file (@{$rfa}) {
        $filcnt++;
        next if (defined $g_hash{$file});
        my ($nm,$dir) = fileparse($file);
        if ($dir_scan && !defined $g_dir_scans{$dir} ) {
            $g_dir_scans{$dir} = scan_directory($dir,0);
        }
        my @hrefs = ();
        if (open INF, "<$file") {
            my @lines = <INF>;
            close INF;
            mark_file_referenced($file) if ($dir_scan);
            #write_a_file( $temp_file1, @lines );
            #write2file(join("",@lines)."\n",$temp_file1);
            @lines = dropcomments_from_array(@lines);
            #write2file( join("\n", @lines)."\n", $temp_file2 );
            my @srcs = get_href_srcs($file, @lines);
            my $scnt = scalar @srcs;
            if ($scnt) {
                prt( "Found $scnt anchor href= in $nm ...\n" ) if ($dbg7);
                foreach my $src (@srcs) {
                    $ok = 0;
                    if ($src =~ /^http:/i) {
                        # remote HREF
                        $hr_type = $HR_REM;
                    } elsif ($src =~ /^https:/i) {
                        # remote HREF
                        $hr_type = $HR_REM;
                    } elsif ($src =~ /^ftp:/i) {
                        # remote HREF
                        $hr_type = $HR_FTP;
                    } elsif ($src =~ /^mailto:/i) {
                        # remote HREF
                        $hr_type = $HR_MT;
                    } elsif ( $src =~ /^#/ ) {
                        # local in page HREF
                        $hr_type = $HR_LOC;
                    } elsif ( $src =~ /^javascript:/i ) {
                        # a JAVASCRIPT HREF
                        $hr_type = $HR_JAV;
                    } else {
                        $frcnt++;
                        my $ind = index($src,'#');
                        if ( $ind != -1 ) {
                            $src = substr($src,0,$ind);
                        }
                        $ind = index($src,'?');
                        if ( $ind != -1 ) {
                            $src = substr($src,0,$ind);
                        }
                        my $ff = $dir.$src;
                        if ( -f $ff ) {
                            prt( "$src - ok\n" ) if ($dbg5);
                            $ok = 1;
                        } else {
                            if ($inc_text_entries && (-f $ff.".txt")) {
                                $found_as_text++; # accept this ".txt" extensions
                            } else {
                                $msg = "WARNING:1: [$src] NOT FOUND! in [$file]";
                                if (!defined $warned{$msg}) {
                                    $warned{$msg} = 1;
                                    prtw( "$msg\n" );
                                } else {
                                    $warned{$msg}++;
                                }
                                $frnf++;
                            }
                        }
                        $hr_type = $HR_FIL;
                    }
                    if ($hr_type == $HR_FIL) {
                        mark_file_referenced($src) if ($dir_scan);
                    }
                    #  OFFSETS    0    1     2        3
                    push(@hrefs, [$src,$file,$hr_type,$ok]);
                    $counts{$hr_type}++;
                }
            } else {
                prt( "Found NO HREFs in $nm ...\n" );
            }
            @srcs = ();
            add_link_href_srcs($file,\@lines,\@srcs);
            add_img_href_srcs($file,\@lines,\@srcs);
            $scnt = scalar @srcs;
            if ($scnt) {
                prt( "Found $scnt link href= or img src= in $nm ...\n" ) if ($dbg7);
                foreach my $src (@srcs) {
                    $ok = 0;
                    if ($src =~ /^http:/i) {
                        # remote HREF
                        $hr_type = $HR_REM;
                    } elsif ($src =~ /^https:/i) {
                        # remote HREF
                        $hr_type = $HR_REM;
                    } elsif ($src =~ /^ftp:/i) {
                        # remote HREF
                        $hr_type = $HR_FTP;
                    } elsif ($src =~ /^mailto:/i) {
                        # remote HREF
                        $hr_type = $HR_MT;
                    } elsif ( $src =~ /^#/ ) {
                        # local in page HREF
                        $hr_type = $HR_LOC;
                    } elsif ( $src =~ /^javascript:/i ) {
                        # a JAVASCRIPT HREF
                        $hr_type = $HR_JAV;
                    } else {
                        $frcnt++;
                        my $ind = index($src,'#');
                        if ( $ind != -1 ) {
                            $src = substr($src,0,$ind);
                        }
                        $ind = index($src,'?');
                        if ( $ind != -1 ) {
                            $src = substr($src,0,$ind);
                        }
                        my $ff = $dir.$src;
                        if ( -f $ff ) {
                            prt( "$src - ok\n" ) if ($dbg5);
                            $ok = 1;
                        } else {
                            $msg = "WARNING:2: [$src] NOT FOUND! in [$file]";
                            prtw( "$msg\n" );
                            $frnf++;
                        }
                        $hr_type = $HR_FIL;
                    }
                    if ($hr_type == $HR_FIL) {
                        mark_file_referenced($src) if ($dir_scan);
                    }
                    #  OFFSETS    0    1     2        3
                    push(@hrefs, [$src,$file,$hr_type,$ok]);
                    $counts{$hr_type}++;
                }
            } else {
                prt( "Found NO LINK HREF NOT IMG SRC in $nm ...\n" );
            }
        }
        $g_hash{$file} = [@hrefs];
    }
    #prt("Done $filcnt files, found $frcnt file references, $frnf NOT found.\n");
    $msg = '';
    foreach $hr_type (keys %counts) {
        $msg .= get_type_href($hr_type)." ".$counts{$hr_type}.", ";
    }
    $file = ($frnf ? "$frnf local NOT found" : "all local and files refs found");
    $file .= " ($found_as_text as .txt)" if ($found_as_text);
    prt("Done $filcnt files, HREFS $msg $file.\n");
}

sub process_http_refs() {
    my $hr = \%g_hash;
    my ($val,$msg);
    my ($href,$file,$hcnt2);
    my @httprefs = ();
    foreach $file (keys %{$hr}) {
        $href = ${$hr}{$file};
        foreach $val (@{$href}) {
            if (${$val}[2] == $HR_REM) {
                push(@httprefs,$val);
            }
        }
    }
    my $max_hcnt = scalar @httprefs;
    if (!$max_hcnt) {
        return;
    }
   prt( "Found $max_hcnt HREF entries ...\n" ) if ($dbg2 || VERB9());
    my %hrefs = ();
   for (my $i = 0; $i < $max_hcnt; $i++) {
      $href = $httprefs[$i][0];
      $file = $httprefs[$i][1];
      my ($nm,$dir) = fileparse($file);
      if (defined( $hrefs{$href} )) {
         $val = $hrefs{$href};
         if ($dbg11) {
            $val .= ' '.$file;
         } else {
            $val .= ' '.$nm;
         }
         $hrefs{$href} = $val;
      } else {
         if ($dbg11) {
            $val = $file;
         } else {
            $val = $nm;
         }
         $hrefs{$href} = $val;
      }
      prt( "$href in [$file]\n" ) if ($dbg2 || VERB9());
   }
   $hcnt2 = scalar keys(%hrefs);
   prt( "Found $hcnt2 different entries... of $max_hcnt total\n" );
   foreach my $key (keys %hrefs) {
      $val = $hrefs{$key};
      prt( "$key in $val\n" ) if ($dbg8);
      if ($key =~ /^http:\/\//i) {
         my $hkey = substr($key, 7);
         my @arr = split( /\//, $hkey );
         $hkey = $arr[0];
         if (showIPAddress( $hkey ) == 0) {
            $msg = "FAILED: NO IP FOR HOST [$hkey][$val]";
            prtw( "$msg\n" );
         }
      }
   }
}

sub process_scripts() {
    my $s_cnt = scalar @scripts;
   prt( "Got $s_cnt files containing SCRIPTS ...\n" );
   # push(@scripts, [$fil,$lns,$scr]);
    my ($file,$val,$scr,$i);
   for ($i = 0; $i < $s_cnt; $i++) {
      $file = $scripts[$i][0];
      $val = $scripts[$i][1];
      $scr = $scripts[$i][2];
      prt( "$file $val\n$scr\n" );
   }
}

sub process_missed() {
    my $dcnt = 0;
    my $fcnt = 0;
    my $mcnt = 0;
    my ($dir,$ra,$cnt,$i);
    my @missed = ();
    foreach $dir (keys %g_dir_scans) {
        $ra = $g_dir_scans{$dir};
        $fcnt += scalar @{$ra};
        $dcnt++;
        $cnt = scalar @{$ra};
        for ($i = 0; $i < $cnt; $i++) {
            if (${$ra}[$i][2] == 0) {
                $mcnt++;
                push(@missed, ${$ra}[$i][0]);
            }
        }
    }
    prt("Processed $dcnt directories, for $fcnt files, $mcnt missed...\n");
    foreach $dir (@missed) {
        prt(" $dir\n");
    }
}

# MAIN ===========================
#my $ra = scan_directory('C:\HOMEPAGE\GA\cullam',0);
#my $cnt = scalar @{$ra};
#prt("Found $cnt files...\n");
#exit 1;
parse_args(@ARGV);
add_in_folders(\@in_dirs);
###process_folder( $in_folder );
###push (@in_files, "C:\\HOMEPAGE\\GeoffAir\\mperl\\perl.htm");
my $in_file_cnt = scalar @in_files;
prt( "Found $in_file_cnt HTML files to process ...\n" );
process_in_files(\@in_files);
process_http_refs();
process_scripts() if ($show_script);
process_missed() if ($dir_scan);
pgm_exit(0,"");

###############################

########################################
sub give_help {
    prt("$pgmname: version $VERS\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help (-h or -?) = This help, and exit 0.\n");
    prt(" --dir        (-d) = Directory scan, and show 'unlinked' items.\n");
    prt(" --ip         (-i) = Show IP address after resolution.\n");
    prt(" --log        (-l) = Load LOG at end.\n");
    prt(" --show       (-s) = Show scripts found.\n");
    prt(" --verb[val]  (-v) = Bump, or set verbosity level 1 to 9\n");
}
sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg);
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^d/) {
                $dir_scan = 1;
            } elsif ($sarg =~ /^i/) {
                $show_ips = 1;
            } elsif ($sarg =~ /^l/) {
                $load_log = 1;
            } elsif ($sarg =~ /^s/) {
                $show_script = 1;
            } elsif ($sarg =~ /^v/) {
                if ($sarg =~ /^verb/) {
                    $sarg = 'v'.substr($sarg,4);
                }
                if ($sarg =~ /^v(\d+)$/) {
                    $verbosity = $1;
                } else {
                    while ($sarg =~ /^v/) {
                        $verbosity++;
                        $sarg = substr($sarg,1);
                    }
                }
                prt("Set verbosity to $verbosity\n");
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = File::Spec->rel2abs($arg);
            if (-f $in_file) {
                push(@in_files,$in_file);
            } elsif (-d $in_file) {
                push(@in_dirs,$in_file);
            } else {
                pgm_exit(1,"ERROR: Bare input [$arg] is NOT a file, or directory!\n");
            }
            prt("Set input to [$in_file]\n");
        }
        shift @av;
    }

    if ((length($in_file) ==  0) && $debug_on) {
        $in_file = $def_file;
        if (-f $in_file) {
            push(@in_files,$in_file);
        } elsif (-d $in_file) {
            push(@in_dirs,$in_file);
        } else {
            pgm_exit(1,"ERROR: Default input [$in_file] is NOT a file, or directory!\n");
        }
        prt("Set input to DEFAULT [$in_file]\n");
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
}

# eof - chkhlink1.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional