chklinks01.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:24 2010 from chklinks01.pl 2007/06/18 38.5 KB.

#!/perl -w
# NAME: chklinks01.pl
# AIM: Given a input FOLDER, check all the HTML found for a <a href="...."
# reference and make sure that reference EXISTS,
# either as a LOCAL file,
# or that an IP address can be obtained for the HOST if http://<something> ...
# AND check ALL image links <img src="..."...>, if it is a LOCAL file,
# and other 'link' items, like .zip, .txt, etc.
# 02/06/2007 - geoff mclane - geoffair.com/mperl/index.htm
use strict;
use warnings;
use File::Basename;
use Socket;
unshift(@INC, 'C:/GTools/perl');
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
require 'htmltools.pl' or die "Unable to load htmltools.pl ...\n";
# for htmltools, if functions used
my @imgs = ();
my @hrefs = ();
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
   my @tmpsp = split(/\\/,$pgmname);
   $pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);
prt( "$pgmname ... Hello, World ...\n" );
# SET A DEFAULT INPUT FOLDER / FILE
###my $in_folder = "C:\\HOMEPAGE\\simple\\index.htm";
my $in_folder = "C:\\HOMEPAGE\\GA\\index.html";
###my $in_folder = "C:\\HOMEPAGE\\GA\\flags\\index.htm";
###my $in_folder = "C:\\HOMEPAGE\\GeoffAir\\welcome.html";
# some FEATURES and USER variables
my $recurse = 1;   # recursive
my $ignfpd = 1;      # ignore FRONTPAGE folders
my $chkip = 1;      # check the IP address
my $showhreflinks = 0;   # show a WARNING when an IMG, ICO, etc is a REMOTE link
my $writeips = 1;   # write IP found to a file
my $ipfile = "iplinks.txt";
my @ipsfound = ();
my @html_ext = qw( .htm .html .shtml .php );
my @graf_ext = qw( .jpg .jpeg .gif .png .bmp .ico .mpg );
my @css_ext  = qw( .css );
my @script_ext = qw( .js .class .cgi );
my @fpfolders = qw( _vti_cnf _vti_pvt _private _derived );
my @excused = ( '?dir=test', '?dir=.' );
# program variables
# my @excludes = qw( cvineng2.htm );
my @excludes = qw( desktop.ini );
# NOTE: Each of these is a multidimensional array - see offset below
my @htm_files = ();   # store files found in folder
my @img_files = ();
my @css_files = ();
my @zip_files = ();
my @txt_files = ();
my @script_files = ();
my @other_files = ();
# offsets in above arrays
my $of_ff = 0;   # full file name
my $of_hr = 1;   # array ref of href links
my $of_im = 2;   # array ref of image links
my $of_lk = 3;   # linked count
my $of_sp = 4;   # spare
my @donesrcs = ();
my @doneimgs = ();
my %ext_hash = ();
my $cnt = 0;
my $file = '';
my @warnings = ();   # list of errors, warnings during running
my @httprefs = ();   # set of HREF src values
my @httpsrefs = ();
my @ftprefs = ();
my @mtrefs = ();
my $hcnt = 0;
my $href = '';
my %hrefs = ();
my $val = '';
my $msg = '';
my @scripts = ();
my $scnt = 0;
my $imgcnt = 0;
my $procnt = 0;
my $homefile = '';
my $total_hrefs = 0;
my $total_imgs = 0;
my @missed = ();
my $excusecnt = 0;
my $hrflnkcnt = 0; # $showhreflinks
# debug only bits
my $dbg1 = 0;   # show entering folder ...
my $dbg2 = 0;   # show ALL HREF entries ...
my $dbg3 = 0;   # show IP found ...
my $dbg4 = 0;   # show entered/exit script
my $dbg5 = 0;   # show 'ok' when found
my $dbg6 = 0;   # show processing lines
my $dbg7 = 0;   # show anchor count
my $dbg8 = 0;   # show unique anchor href
my $dbg9 = 0;   # show files with SCRIPTS
my $dbg10 = 0;   # show diag for get_img_srcs() ...
my $dbg11 = 0;   # in image processing show entered/exits script 
my $dbg12 = 0;   # in image processing show processing count
my $dbg13 = 0;   # in image processing show ok - found file
my $dbg14 = 0;   # in image processing show image count found
my $dbg15 = 0;   # in image processing show image count when NONE found
my $dbg16 = 0;   # show WARNINGS during run ...
my $dbg17 = 0;   # show MISSING or BLANK HREF in PHP file
my $dbg18 = 0;   # check_linkages: show 'ok', in 2nd link check
my $dbg19 = 0;   # check_local_links: show progress ...
my $dbg20 = 0;   # check_local_links: show ALL links - NONE IS ALWAYS SHOWN ...
my $dbg21 = 0;   # check_local_links: show LINK when found ...
my $dbg22 = 0;   # mark_image_link: show comparing, and comparision ...
my $dbg23 = 0;   # mark_image_link: show count of new images marked ...
my $dbg24 = 0;   # show each image file being marked
my $dbg25 = 0;   # show NO LINK FOUND
my $dbg26 = 0;   # show EACH HTML FILE BEING PROCESSED
my $dbg27 = 0;   # show EACH extesnions, and counts
parse_args(@ARGV);
if (length($in_folder) == 0) {
   mydie( "No input folder (or file) given/found in command ...\n" );
}
if (-f $in_folder) {
   ($homefile, $in_folder) = fileparse($in_folder);
   $in_folder =~ s/[\\\/]$//;
}
prt( "Checking $in_folder ...\n" );
process_folder( $in_folder );
show_found_counts();
process_file_array();
process_host_array();
############################
prt( "\n###### SHOW RESULTS ########\n" );
prt( "WARNING: $hrflnkcnt images by HREF not shown! (change \$showhreflinks)\n" ) if ($hrflnkcnt);
$scnt = scalar @scripts;
if ($scnt && $dbg9) {
   prt( "Got $scnt files containing SCRIPTS ...\n" );
   # push(@scripts, [$fil, $lns]);
   for (my $i = 0; $i < $scnt; $i++) {
      $file = $scripts[$i][0];
      $val = $scripts[$i][1];
      prt( "$file $val\n" );
   }
}
if (length($homefile)) {
   check_linkages( $homefile );
   check_local_links( $homefile );
   show_link_counts("HTML File", \@htm_files);
   show_link_counts("IMG Files", \@img_files);
   show_link_counts("CSS Files", \@css_files);
   show_link_counts("ZIP Files", \@zip_files);
   show_link_counts("TXT Files", \@txt_files);
   show_link_counts("Script Files", \@script_files);
   show_link_counts("Other Files", \@other_files);
}
$cnt = scalar @warnings;
if ($cnt) {
   prt( "\nWARNINGS FOLLOW ($cnt):\n" );
   foreach my $w (@warnings) {
      prt( "$w\n" );
   }
} else {
   prt( "No warnings ...\n" );
}
if (@missed) {
   prt( "\nMISSING FOLLOW: ".scalar @missed."\n" );
   foreach $file (@missed) {
      prt( "$file\n");
   }
}
prt( "###### END RESULTS ########\n" );
close_log($outfile,1);
exit(0);
##################################
sub process_file_array {
   my $max =scalar @htm_files;
   for (my $i = 0; $i < $max; $i++) {
      $file = $htm_files[$i][$of_ff];
      my ($nm,$dir,$ext) = fileparse( $file, qr/\.[^.]*/ );
      my $htot = 0;
      my $itot = 0;
      $procnt++;
      if (open INF, "<$file") {
         my @lines = <INF>;
         close INF;
         @lines = drop_php_from_array( @lines ) if (lc($ext) eq '.php');
         # THIS IS USING htmltool.pl - get a single line of TEXT ...
         my $txt = join( '', @lines ); # get whole text
         my @is = ret_imgs_array( $txt );
         my $ntxt = remove_script( $txt );
         $ntxt = trimblanklines($ntxt);
         @hrefs = (); # clear
         my @hr = ret_hrefs_array( $ntxt );
         ### collecthrefs( $txt, 0 );
         ### collectimgs( $txt, 0 );
         # bump the counts of HREF and IMGS collected
         $itot += scalar @is;
         $htot += scalar @hr;
         # store the references ... that is a reference to an array
         $htm_files[$i][$of_hr] = \@hr;
         $htm_files[$i][$of_im] = \@is;
         # new code to do some similar things, but while in the array
         ###@lines = drop_php_from_array( @lines ) if (lc($ext) eq '.php');
         @lines = dropcomments_from_array(@lines);
         my @srcs = get_img_srcs($file, @lines);
         $imgcnt += check_images( $file, @srcs );
         @srcs = get_href_srcs($file, @lines);
         check_hrefs( $file, @srcs );
      }
      $total_hrefs += $htot;
      $total_imgs += $itot;
      if ((($procnt % 100) == 0)||($max < 10)) {
         ###local $| = 1;
         ###prt( "\rDone $procnt HTML files ..." );
         if ($max < 10) {
            prt( "Done $file HTML file ... href,other($htot,$itot)\n" );
         } else {
            prt( "Done $procnt HTML files ... href,other ($total_hrefs,$total_imgs)\n" );
         }
      }
   }
   prt( "Completed $procnt HTML files ... Found $total_hrefs HREF, and $total_imgs IMG/OTHER tokens.\n" );
}
sub get_href_type {
   my ($src) = shift;
   if ($src =~ /^http:/i) {
      #push(@httprefs, [$src, $fil, $lnnos] );
      return 1; # remote HREF
   } elsif ($src =~ /^https:/i) {
      return 2; # remote HREF
      #push(@httpsrefs, [$src, $fil, $lnnos] );
   } elsif ($src =~ /^ftp:/i) {
      #push(@ftprefs, [$src, $fil, $lnnos] );
      return 3; # remote HREF
   } elsif ($src =~ /^mailto:/i) {
      #push(@mtrefs, [$src, $fil, $lnnos] );
      return 4; # remote HREF
   } elsif ( $src =~ /^javascript:/i ) {
      return 5; # a JAVASCRIPT HREF
   } elsif ( substr($src,0,1) eq '#') {
      # local in page HREF
      return 6;
   } else {
      my $ind = index($src,'#');
      if ( $ind != -1 ) {
         $src = substr($src,0,$ind);
      }
      $ind = index($src,'?');
      if ( $ind != -1 ) {
         $src = substr($src,0,$ind);
      }
      $src =~ s/\/$//;
      if (length($src)) {
         return 7;
      }
   }
   return 0;
}
sub get_local_href {
   my ($src) = shift;
   my $ind = index($src,'#');
   if ( $ind != -1 ) {
      $src = substr($src,0,$ind);
   }
   $ind = index($src,'?');
   if ( $ind != -1 ) {
      $src = substr($src,0,$ind);
   }
   $src =~ s/\/$//;
   return $src;
}
sub dos_2_unix($) {
   my ($du) = shift;
   $du =~ s/\\/\//g;
   return $du;
}
### my @donesrcs = ();
sub in_done_srcs {
   my ($f) = shift;
   foreach my $fd (@donesrcs) {
      if ($fd eq $f) {
         return 1;
      }
   }
   return 0;
}
sub in_done_imgs {
   my ($f) = shift;
   foreach my $fd (@doneimgs) {
      if ($fd eq $f) {
         return 1;
      }
   }
   return 0;
}
sub fix_rel_unix_path {
   my ($path) = shift;
   $path = dos_2_unix($path);
   my @a = split(/\//, $path);
   my $npath = '';
   my $max = scalar @a;
   my @na = ();
   for (my $i = 0; $i < $max; $i++) {
      my $p = $a[$i];
      if ($p eq '.') {
         # ignore this
      } elsif ($p eq '..') {
         if (@na) {
            pop @na;   # discard previous
         } else {
            prt( "WARNING: Got relative .. without previous!!!\n" );
         }
      } else {
         push(@na,$p);
      }
   }
   foreach my $pt (@na) {
      $npath .= "/" if length($npath);
      $npath .= $pt;
   }
   return $npath;
}
sub mark_image_link {
   my ($fnd, $src, $lev) = @_;
   my $fcnt = scalar @img_files;
   my $msrc = lc(dos_2_unix($src));
   prt( "Seeking [$msrc] in $fcnt images files ...\n" ) if ($dbg22);
   for (my $i = 0; $i < $fcnt; $i++) {
      my $fil = $img_files[$i][$of_ff];
      my $mfil = lc(dos_2_unix($fil));
      prt( "Comparing to $mfil ...\n" ) if ($dbg22);
      if ($msrc eq $mfil) {
         $val = $img_files[$i][$of_lk];
         $val++;
         $img_files[$i][$of_lk] = $val;
         prt( "IMG link to $fil ($i) $val\n" ) if ($dbg22);
         return 0;
      }
   }
   prt( "$src - NOT FOUND!\n" ) if ($dbg21);
   return 1;
}
sub mark_other_links {
   my ($fnd, $src, $lev) = @_;
   my $totcnt = 0;
   my $msrc = lc(dos_2_unix($src));
   my $fcnt = scalar @img_files;
   my $i;
   $totcnt += $fcnt;
   if (mark_image_link( $fnd, $src, $lev ) == 0) {
      return 0;
   }
   # maybe ZIP files
   $fcnt = scalar @zip_files;
   $totcnt += $fcnt;
   for ($i = 0; $i < $fcnt; $i++) {
      my $fil = $zip_files[$i][$of_ff];
      my $mfil = lc(dos_2_unix($fil));
      if ($msrc eq $mfil) {
         $val = $zip_files[$i][$of_lk];
         $val++;
         $zip_files[$i][$of_lk] = $val;
         prt( "ZIP link to $fil ($i) $val\n" ) if ($dbg21);
         return 0;
      }
   }
   # maybe TXT files
   $fcnt = scalar @txt_files;
   $totcnt += $fcnt;
   for ($i = 0; $i < $fcnt; $i++) {
      my $fil = $txt_files[$i][$of_ff];
      my $mfil = lc(dos_2_unix($fil));
      if ($msrc eq $mfil) {
         $val = $txt_files[$i][$of_lk];
         $val++;
         $txt_files[$i][$of_lk] = $val;
         prt( "TXT link to $fil ($i) $val\n" ) if ($dbg21);
         return 0;
      }
   }
   # maybe CSS files
   $fcnt = scalar @css_files;
   $totcnt += $fcnt;
   for ($i = 0; $i < $fcnt; $i++) {
      my $fil = $css_files[$i][$of_ff];
      my $mfil = lc(dos_2_unix($fil));
      if ($msrc eq $mfil) {
         $val = $css_files[$i][$of_lk];
         $val++;
         $css_files[$i][$of_lk] = $val;
         prt( "CSS link to $fil ($i) $val\n" ) if ($dbg21);
         return 0;
      }
   }
   # maybe SCRIPT files
   $fcnt = scalar @script_files;
   $totcnt += $fcnt;
   for ($i = 0; $i < $fcnt; $i++) {
      my $fil = $script_files[$i][$of_ff];
      my $mfil = lc(dos_2_unix($fil));
      if ($msrc eq $mfil) {
         $val = $script_files[$i][$of_lk];
         $val++;
         $script_files[$i][$of_lk] = $val;
         prt( "SCRIPT link to $fil ($i) $val\n" ) if ($dbg21);
         return 0;
      }
   }
   # OK, OTHER
   $fcnt = scalar @other_files;
   $totcnt += $fcnt;
   for ($i = 0; $i < $fcnt; $i++) {
      my $fil = $other_files[$i][$of_ff];
      my $mfil = lc(dos_2_unix($fil));
      if ($msrc eq $mfil) {
         $val = $other_files[$i][$of_lk];
         $val++;
         $other_files[$i][$of_lk] = $val;
         prt( "OTHER link to $fil ($i) $val\n" ) if ($dbg21);
         return 0;
      }
   }
   $totcnt += 1 if ($totcnt == 0);
   return $totcnt;
}
sub mark_link {
   my ($fnd, $src, $lev) = @_;
   my $fcnt = scalar @htm_files;
   my $msrc = lc(dos_2_unix($src));
   my $ff = $htm_files[$fnd][$of_ff];
   my $i = 0;
   my $totcnt = $fcnt;
   my $fil = '';
   my $mfil = '';
   my $val = 0;
   for ($i = 0; $i < $fcnt; $i++) {
      if ($i != $fnd) {
         $fil = $htm_files[$i][$of_ff];
         $mfil = lc(dos_2_unix($fil));
         if ($msrc eq $mfil) {
            $val = $htm_files[$i][$of_lk];
            $val++;
            $htm_files[$i][$of_lk] = $val;
            prt( "$ff ($fnd) linked to $fil ($i) $val\n" ) if ($dbg21);
            my $hr = $htm_files[$i][$of_hr];   # extract HREF ref.array
            my $im = $htm_files[$i][$of_im];   # extract IMAGE ref.array
            my $hrc = scalar @{$hr};
            my $imc = scalar @{$im};         # get count of images
            my $j = 0;
            my ($itmnam, $itmdir) = fileparse($fil);   # get name and path
            prt( "$lev [$fil] has $hrc href, $imc img/other links ..\n" ) if ($dbg19 || $dbg26);
            for ($j = 0; $j < $hrc; $j++) {
               my $hrf = ${$hr}[$j];
               my $hrt = get_href_type($hrf); 
               if ($hrt == 7) {
                  my $nsrc = fix_rel_unix_path($itmdir.get_local_href($hrf));
                  if ( !in_done_srcs($nsrc) ) {
                     push(@donesrcs, $nsrc);   # put it in DONE list
                     mark_link( $i, $nsrc, $lev + 1 );   # and MARK its links now
                  }
               }
            }
            $val = 0;
            prt( "$fil - Checking $imc images files ...\n") if ($dbg24);
            for ($j = 0; $j < $imc; $j++) {      # do each, in this linked file
               my $img = ${$im}[$j];         # get the image string
               my $isrc = $itmdir.$img;      # join it with the path
               my $nisrc = fix_rel_unix_path($isrc);   # fix rel, and force unix path
               prt( "Marking [$nisrc] - ".($j+1)." of $imc img/other links ..\n" ) if ($dbg19 || $dbg26);
               if ( !in_done_imgs($nisrc) ) {
                  push(@doneimgs, $nisrc);   # put it in DONE list
                  mark_other_links( $j, $nisrc, 0 );   # and MARK the link in @img_files
                  $val++;
               } else {
                  prt( "Already IN doneimgs ...\n" ) if ($dbg19 || $dbg26);
               }
            }
            prt( "$fil - Marked $val of $imc images files ...\n") if ($val && $dbg24);
            return 0;
         }
      }
   }
   # hmmmm, LINK not found in HREF files, maybe IMAGES, zip, etc ...
   $val = mark_other_links( $fnd, $src, $lev );
   if ($val) {
      $totcnt += $val;
      prt( "NO LINK FOUND HREF [$src]($msrc) in $totcnt file - $ff ($fnd) - ($lev)!\n" ) if ($dbg25);
      return 1;
   }
   return 0;
}
sub show_link_counts {
   my ($m, $hf) = @_;
   my $fcnt = scalar @{$hf};
   my $mcnt = 0;
   my $mss = "Showing LINKS for $fcnt $m files ...\n"; 
   if ($fcnt) {
      for (my $i = 0; $i < $fcnt; $i++) {
         my $fil = ${$hf}[$i][$of_ff];
         my $hrt = ${$hf}[$i][$of_lk];
         if ($hrt) {
            if ($dbg20) {
               prt( $mss ) if (length($mss));
               $mss = '';
               prt( "$i: $fil has $hrt links\n" );
            }
         } else {
            prt( $mss ) if (length($mss));
            $mss = '';
            prt( "$i: $fil has NO LINKS!\n" );
            $mcnt++;
         }
      }
      if ($mcnt) {
         prt( "Done LINKS for $fcnt $m files ... MISSED $mcnt!!!\n" );
      }
   } else {
      prt( "There are NO $m files ...\n" ) if ($dbg20);
   }
}
sub in_excused {
   my ($tx) = shift;
   foreach my $t (@excused) {
      if ($t eq $tx) {
         return 1;
      }
   }
   return 0;
}
# using the given HOME PAGE, try to TRACE ALL LINKS
sub check_local_links {
   my ($hf) = shift;
   my $fcnt = scalar @htm_files;
   my ($hfnm,$hfdir) = fileparse($hf);
   my $lchf = lc($hfnm);
   my $fnd = -1;
   my ($fil,$nm,$dir,$ext);
   if ($hfdir eq ".\\") {
      $hfdir = $in_folder."\\";
   }
   my $itmdir = '';
   my $itmnam = '';
   my $i = 0;
   my $i2 = 0;
   prt( "Checking local links, for $fcnt files. Finding $hf ...\n");
   for ($i = 0; $i < $fcnt; $i++) {
      $fil = $htm_files[$i][$of_ff];
      ($nm,$dir,$ext) = fileparse( $fil, qr/\.[^.]*/ );
      if (lc($nm.$ext) eq $lchf) {
         # have at least the NAME, but maybe not the FOLDER
         if (lc($hfdir) eq lc($dir)) {
            $fnd = $i;
            prt( "Found $nm$ext at index $fnd ... folder $dir ...\n" );
            last;
         }
      }
      ### prt( "[$nm$ext] is NOT [$lchf]  ... [$fil] $hf ...\n" );
   }
   if ($fnd == -1) {
      prt( "WARNING: check_local_links: Unable to find [$hf] ...\n" );
      return 1;
   }
   # process item 1 ...
   $procnt = 1;
   my $hr = $htm_files[$fnd][$of_hr];
   my $im = $htm_files[$fnd][$of_im];
   my $hrc = scalar @{$hr};
   my $imc = scalar @{$im};
   my $hrf = '';
   my $img = '';
   my $hrt = 0;
   my $src = '';
   my $nsrc = '';
   my $ff = '';
   my $shwerr = 0;
   my $emsg = '';
   $fil = $htm_files[$fnd][$of_ff];   # extract FULL PATH name of file ...
   $htm_files[$fnd][$of_lk] = 1;
   ($itmnam, $itmdir) = fileparse($fil);   # get name and path
   prt( "HOME [$fil] has $hrc href, $imc img/other links ..\n" ) if ($dbg19 || $dbg26);
   for ($i = 0; $i < $hrc; $i++) {
      $hrf = ${$hr}[$i];
      $hrt = get_href_type($hrf); 
      $i2 = $i + 1;
      $shwerr = 0;
      $emsg = "IN[$fil] $i2 HREF [$hrf]$hrt";
      if ($hrt == 0) {
         if (in_excused($hrf)) {
            $excusecnt++;
         } else {
            $emsg .= " CHECK UNKNOWN ****** CHECK ME ******[1]";
            push(@warnings, "WARNING: $emsg!" );
            $shwerr = 1;
         }
      } elsif ($hrt < 5) {
         $emsg .= " REMOTE";
      } elsif ($hrt == 5) {
         $emsg .= " JAVASCRIPT";
      } elsif ($hrt == 6) {
         $emsg .= " IN PAGE";
      } else {
         my $lref = get_local_href($hrf);
         $src = $itmdir.$lref;
         my $nusrc = fix_rel_unix_path($src);
         ### prt( "REL PATH [$src] to UNIX PATH [$nusrc]\n" );
         push(@donesrcs, $nusrc);   # put it in DONE list
         if ( mark_link( $fnd, $nusrc, 0 ) ) {
            $emsg .= " SITE REF [$nusrc] ***NO IN-SITE LINK***???";
            $msg = "$i2 [$fil] HREF [$hrf]$hrt SITE REF [$nusrc] ***NO IN-SITE LINK***???";
            if (-f $src) {
               $msg .= "\n*** BUT FILE EXISTS [$src] ***";
               $emsg .= "\n*** BUT FILE EXISTS [$src] ***";
               push(@warnings, "WARNING: Local HREF [$lref] in [$fil] OUTSIDE WEB! but EXISTS!" );
            } else {
               push(@missed, $msg );
               $shwerr = 1;
            }
         } else {
            $emsg .= " SITE REF [$src] ok" if ($dbg19);
         }
      }
      prt( "$emsg\n" ) if ($dbg19 || $shwerr);
   }
   prt( "HOME - Marking $imc images files ...\n") if ($dbg24);
   for ($i = 0; $i < $imc; $i++) {
      $img = ${$im}[$i];
      $src = $itmdir.$img;
      $nsrc = fix_rel_unix_path($src);
      prt( "HOME $fil - Mark $src ($nsrc) image ...\n" ) if ($dbg24);
      push(@doneimgs, $nsrc);   # put it in DONE list
      mark_other_links( $i, $nsrc, 0 );
   }
   return 0;
}
sub check_linkages {
   my ($hf) = shift;
   my $fcnt = scalar @htm_files;
   my ($hfnm,$hfdir) = fileparse($hf);
   my $lchf = lc($hfnm);
   my $fnd = -1;
   my ($fil,$nm,$dir,$ext);
   if ($hfdir eq ".\\") {
      $hfdir = $in_folder."\\";
   }
   my $itmdir = '';
   my $itmnam = '';
   my $i = 0;
   my $i2 = 0;
   prt( "Re-checking HREF and IMG/OTHER links, for $fcnt files ...\n");
   for ($i = 0; $i < $fcnt; $i++) {
      $fil = $htm_files[$i][$of_ff];
      ($nm,$dir,$ext) = fileparse( $fil, qr/\.[^.]*/ );
      if (lc($nm.$ext) eq $lchf) {
         # have at least the NAME, but maybe not the FOLDER
         if (lc($hfdir) eq lc($dir)) {
            $fnd = $i;
            prt( "Found $hf at index $fnd\n" );
            last;
         }
      }
      ### prt( "[$nm$ext] is NOT [$lchf]  ... [$fil] $hf ...\n" );
   }
   if ($fnd == -1) {
      prt( "WARNING: Unable to find [$hf] ...\n" );
      push(@warnings, "WARNING: Unable to find [$hf] ...");
      return 1;
   }
   # process item 1 ...
   $procnt = 1;
   my $hr = $htm_files[$fnd][$of_hr];
   my $im = $htm_files[$fnd][$of_im];
   my $hrc = scalar @{$hr};
   my $imc = scalar @{$im};
   my $hrf = '';
   my $img = '';
   my $hrt = 0;
   my $src = '';
   my $ff = '';
   my $shwerr = 0;
   my $emsg = '';
   $fil = $htm_files[$fnd][$of_ff];   # extract FULL PATH name of file ...
   ($itmnam, $itmdir) = fileparse($fil);   # get name and path
   prt( "\n" ) if ($dbg18);
   prt( "$procnt [$fil] has $hrc href, and $imc image links ..\n" ) if ($dbg18);
   for ($i = 0; $i < $hrc; $i++) {
      $hrf = ${$hr}[$i];
      $hrt = get_href_type($hrf); 
      $i2 = $i + 1;
      $shwerr = 0;
      $emsg = "HH[$fil] ";
      $emsg .= "$i2 HREF [$hrf]$hrt";
      if ($hrt == 0) {
         if (in_excused($hrf)) {
            $excusecnt++;
         } else {
            $emsg .= " CHECK UNKNOWN ****** CHECK ME ******[2]";
            push(@warnings, "WARNING: $emsg" );
            $shwerr = 1;
         }
      } elsif ($hrt < 5) {
         $emsg .= " REMOTE";
      } elsif ($hrt == 5) {
         $emsg .= " JAVASCRIPT";
      } elsif ($hrt == 6) {
         $emsg .= " LOCAL";
      } else {
         $src = $itmdir.get_local_href($hrf);
         if (-f $src) {
            $emsg .= " SITE REF [$src] ok";
         } else {
            $emsg .= " SITE REF [$src] ***MISSING***?[1]";
            push(@missed, $emsg );
            $shwerr = 1;
         }
      }
      prt( "$emsg\n" ) if ($dbg18 || $shwerr);
   }
   # From this beginning
   for (my $j = 0; $j < $fcnt; $j++) {
      $fil = $htm_files[$j][$of_ff];
      ($itmnam, $itmdir) = fileparse($fil);   # get name and path
      if ($j != $fnd) {
         $procnt++;
         $hr = $htm_files[$j][$of_hr];
         $im = $htm_files[$j][$of_im];
         $hrc = scalar @{$hr};
         $imc = scalar @{$im};
         prt( "\n" ) if ($dbg18);
         prt( "$procnt [$fil] has $hrc href, and $imc image links ..\n" ) if ($dbg18);
         for ($i = 0; $i < $hrc; $i++) {
            $hrf = ${$hr}[$i];
            $hrt = get_href_type($hrf); 
            $i2 = $i + 1;
            $shwerr = 0;
            $emsg = "HF[$fil] ";
            $emsg .= "$i2 HREF [$hrf]$hrt";
            if ($hrt == 0) {
               if (in_excused($hrf)) {
                  $excusecnt++;
               } else {
                  $emsg .= " CHECK UNKNOWN ****** CHECK ME ******[3]";
                  push(@warnings, "WARNING: $emsg");
                  $shwerr = 1;
               }
            } elsif ($hrt < 5) {
               $emsg .= " REMOTE";
            } elsif ($hrt == 5) {
               $emsg .= " JAVASCRIPT";
            } elsif ($hrt == 6) {
               $emsg .= " LOCAL";
            } else {
               $src = $itmdir.get_local_href($hrf);
               if (-f $src) {
                  $emsg .= " SITE REF [$src] ok";
               } else {
                  $emsg .= " SITE REF [$src] ***MISSING***?[3]";
                  push(@missed, $emsg );
                  $shwerr = 1;
               }
            }
            prt( "$emsg\n" ) if ($dbg18 || $shwerr);
         }
         for (my $i = 0; $i < $imc; $i++) {
            $img = ${$im}[$i];
            $emsg = "IF[$fil] [$img] ";
            if ($img =~ /^http:\/\/.*/i) {
               if ($showhreflinks) {
                  push(@warnings, "WARNING: IMG link is HREF $emsg [1]");
               } else {
                  $hrflnkcnt++;
               }
            } else {
               $src = $itmdir.$img;
               $shwerr = 0;
               if (-f $src) {
                  $emsg .= " IMG ok";
               } else {
                  $emsg .= " IMG ***MISSING***?[5]";
                  push(@missed, $emsg );
                  $shwerr = 1;
               }
            }
            prt( "$emsg\n" ) if ($dbg18 || $shwerr);
         }
      }
   }
   return 0;
}
sub check_images {
   my ($ifile, @srcs) = @_;
   my ($nm, $dir) = fileparse($ifile);
   my $scnt = scalar @srcs;
   if ($scnt) {
      prt( "Found $scnt imgs in $nm ...\n" ) if ($dbg14);
      for (my $i = 0; $i < $scnt; $i++) {
         my $src = $srcs[$i][0];
         my $lnn = $srcs[$i][1];
         if ($src =~ /^http:\/\//i) {
            # remote HREF
         } else {
            my $ff = $dir.$src;
            if ( -f $ff ) {
               prt( "$src - ok\n" ) if ($dbg13);
            } else {
               my $msg = "WARNING: [$src] $ifile:$lnn NOT FOUND!";
               push(@warnings, $msg);
               prt( "$msg\n" ) if ($dbg16);
            }
         }
      }
   } else {
      prt( "Found NO imgs in [$ifile] ...\n" ) if ($dbg15);
   }
   return $scnt;
}
sub get_img_srcs {
   my ($fil, @lns) = @_;
   my $lc = scalar @lns;
   my $scnt = 0;
   my ($nm,$dir) = fileparse( $fil );
   #my $sdbg12 = $dbg12;
   #my $sdbg11 = $dbg11;
   #my $sdbg16 = $dbg16;
   #my $sdbg10 = $dbg10;
   #if (lc($nm) eq 'moon.htm') {
   #   $dbg12 = 1;
   #   $dbg11 = 1;
   #   $dbg16 = 1;
   #   $dbg10 = 1;
   #}
   prt( "Processing $lc lines from [$nm] dir=[$dir]...\n" ) if ($dbg12);
   my @isrc = ();
   my $ln = '';
   my $bal = '';
   my $inscript = 0;
   my $msg = '';
   my $bgnln = 0;
   my $lnnos = '';
   for (my $i = 0; $i < $lc; $i++) {
      $ln = $bal;
      $bal = '';
      $ln .= $lns[$i];
      chomp $ln;
      prt( "$i [$ln] ...\n" ) if ($dbg10);
      if ($inscript) {
         if ($ln =~ /<\/script>/i) {
            $inscript =0;
            prt( "EXIT a SCRIPT ...\n" ) if ($dbg11);
         }
         next;
      }
      if ( $ln =~ /<img\s+(.*)/i ) {
         my $iln = $1;
         if ( $ln =~ /<script.*>/i ) {
            $msg = "WARNING: Also found SCRIPT in IMG line ...[$ln]";
            push(@warnings, $msg);
            prt( "$msg\n" ) if ($dbg16);
         }
         prt( "Found [$iln] ...\n" ) if ($dbg10);
         $bgnln = $i;
         while ( !($iln =~ />/) && ($i < $lc)) {
            $i++;
            my $nxln = $lns[$i];
            chomp $nxln;
            prt( "Adding [$nxln] ...\n" ) if ($dbg10);
            $iln .= ' ' if !($iln =~ /=$/);
            $iln .= $nxln;
         }
         $lnnos = "$bgnln:$i";
         my $ind = index($iln, '>');
         if ($ind != -1) {
            $bal = substr($iln, $ind+1);
            $iln = substr($iln, 0, $ind+1);
         }
         $iln = trim_all($iln);
         #if ($iln =~ /src=\"(.+)\"/i) {
         if ($iln =~ /src=\s*\"(\S+)\"/i) {
            prt( "SRC = $1 In line [$iln]$lnnos...\n" ) if ($dbg10);
            push(@isrc, [$1, $lnnos, $fil]);
            $scnt++;
         } elsif ($iln =~ /src=\s*(\S+)/i) {   # without QUOTES
            prt( "SRC = $1 In line [$iln]$lnnos...\n" ) if ($dbg10);
            push(@isrc, [$1, $lnnos, $fil]);
            $scnt++;
         } elsif ($iln =~ /src=\s*\'(\S+)\'/i) {   # single QUOTES
            prt( "SRC = $1 In line [$iln]$lnnos...\n" ) if ($dbg10);
            push(@isrc, [$1, $lnnos, $fil]);
            $scnt++;
         } else {
            $msg = "WARNING: SRC NOT FOUND in [$iln]$fil:$lnnos...";
            push(@warnings, $msg);
            prt( "$msg\n" ) if ($dbg16);
         }
      } elsif ( $ln =~ /<script.*>/i ) {
         $inscript = 1;
         prt( "Entered a SCRIPT ...\n" ) if ($dbg11);
         if ($ln =~ /<\/script>/i) {
            $inscript =0;
            prt( "EXIT a SCRIPT ...\n" ) if ($dbg11);
         }
      }
   }
   prt( "Returning $scnt img sources ...\n") if ($dbg10);
   #$dbg12 = $sdbg12;
   #$dbg11 = $sdbg11;
   #$dbg16 = $sdbg16;
   #$dbg10 = $sdbg10;
   return @isrc;
}
sub check_hrefs {
   my ($fil, @srcs) = @_;
   my ($fnm,$fdir,$fext) = fileparse( $fil, qr/\.[^.]*/ );
   my $scnt = scalar @srcs;
   my $isphp = (lc($fext) eq '.php');
   if ($scnt) {
      prt( "Found $scnt anchor href= in $fnm$fext ...\n" ) if ($dbg7);
      for (my $i = 0; $i < $scnt; $i++) {
         my $orgsrc = $srcs[$i][0];
         my $lnnos = $srcs[$i][1];
         my $src = $orgsrc;
         if ($src =~ /^http:/i) {
            # remote HREF
            push(@httprefs, [$src, $fil, $lnnos] );
         } elsif ($src =~ /^https:/i) {
            # remote HREF
            push(@httpsrefs, [$src, $fil, $lnnos] );
         } elsif ($src =~ /^ftp:/i) {
            # remote HREF
            push(@ftprefs, [$src, $fil, $lnnos] );
         } elsif ($src =~ /^mailto:/i) {
            # remote HREF
            push(@mtrefs, [$src, $fil, $lnnos] );
         } elsif ( $src =~ /^#/ ) {
            # local in page HREF
         } elsif ( $src =~ /^javascript:/i ) {
            # a JAVASCRIPT HREF
         } else {
            my $ind = index($src,'#');
            if ( $ind != -1 ) {
               $src = substr($src,0,$ind);
            }
            $ind = index($src,'?');
            if ( $ind != -1 ) {
               $src = substr($src,0,$ind);
            }
            $src =~ s/\/$//;
            if (length($src)) {
               my $ff = $fdir.$src;
               if ( -f $ff ) {
                  prt( "$src - ok\n" ) if ($dbg5);
               } else {
                  my $msg = "WARNING: href [$src] file NOT FOUND! in [$fil]$lnnos";
                  push(@warnings, $msg);
                  prt( "$msg\n" ) if ($dbg16);
               }
            } else {
               if ($isphp) {
                  prt( "Found BLANK HREF in PHP $fil ...\n" ) if ($dbg17);
               } else {
                  $msg = "WARNING: Found BLANK HREF in $fil ...";
                  push(@warnings, $msg);
                  prt( "$msg\n" ) if ($dbg16);
               }
            }
         }
      }
   } else {
      if ($isphp) {
         prt( "Found NO HREFs in PHP $fil ...\n" ) if ($dbg17);
      } else {
         prt( "Found NO HREFs in $fil ...\n" );
      }
   }
}
############################################################
# Only used is $chkip = 1;
# Show IP Address
# uses sockets, gethostbyname
# Return 0, if can NOT be resolved.
# else the number of IP addresses resolved.
############################################################
sub showIPAddress {
   my ($nm) = shift;
   my @addr = gethostbyname($nm);
   my $cnt = 0;
   if( !@addr ) {
      prt( "Can't resolve $nm: $!\n" );
      return 0;
   }
   @addr = map { inet_ntoa($_) } @addr[4 .. $#addr];
   foreach my $k (@addr) {
      $cnt++;
      prt( "$cnt: $nm resolves to IP [$k]\n" ) if ($dbg3);
   }
   return $cnt;
}
################################################
# Add to @scripts multidimensional array,
# if NOT already in there, when on the line
# numbers are added.
###############################################
sub add_2_scripts {
   my ($fil, $lns) = @_;
   my $sc = scalar @scripts;
   for (my $i = 0; $i < $sc; $i++) {
      my $cf = $scripts[$i][0];
      if ($cf eq $fil) {
         my $lc = $scripts[$i][1];
         $lc .= ":$lns";
         $scripts[$i][1] = $lc;
         return 0;
      }
   }
   push(@scripts, [$fil, $lns]);
   return 1;
}
####################################################
# Get HREF sources
# Given an ARRAY of file lines, check for
# anchor href="something" ...
# Return the "something" in an array
####################################################
sub get_href_srcs {
   my ($fil, @lns) = @_;
   my $lc = scalar @lns;
   my $scnt = 0;
   my $slns = 0;   # count the SCRIPT lines
   my ($nm,$dir) = fileparse( $fil );
   prt( "Processing $lc lines from [$nm] dir=[$dir]...\n" ) if ($dbg6);
   my @isrc = ();
   my $ln = '';
   my $bal = '';
   my $inscript = 0;
   $slns = 0;
   my $bgnln = 0;
   my $endln = 0;
   for (my $i = 0; $i < $lc; $i++) {
      $ln = $bal;
      $bal = '';
      $ln .= $lns[$i];
      chomp $ln;
      prt( "$i [$ln] ...\n" ) if ($dbg10);
      if ($inscript) {
         if ($ln =~ /<\/script>/i) {
            $inscript =0;
            prt( "EXIT a SCRIPT ...\n" ) if ($dbg4);
            add_2_scripts( $fil, $slns );
            $slns = 0;
            next;
         }
         $slns++;
         next;
      }
      if ( $ln =~ /<a\s+(.*)/i ) {
         my $iln = $1;
         prt( "Found [$iln] ...\n" ) if ($dbg10);
         $bgnln = $i;
         while ( !($iln =~ />/) && ($i < $lc)) {
            $i++;
            my $nxln = $lns[$i];
            chomp $nxln;
            prt( "Adding [$nxln] ...\n" ) if ($dbg10);
            $iln .= ' ' if !($iln =~ /=$/);
            $iln .= $nxln;
         }
         $endln = $i;
         my $ind = index($iln, '>');
         if ($ind != -1) {
            $bal = substr($iln, $ind+1);
            $iln = substr($iln, 0, $ind+1);
         }
         #if ($iln =~ /src=\"(.+)\"/i) {
         if ($iln =~ /href\s*=\s*\"(\S+)\"/i) {
            prt( "HREF = $1\nIn line [$iln]...\n" ) if ($dbg10);
            push(@isrc, [$1, "$bgnln:$endln"] );
            $scnt++;
         } else {
            if (( $iln =~ /name=\s*\"(\S+)\"/i )||( $iln =~ /name=(\S+)/i )) {
               # ignore BOOKMARKS
            } else {
               $msg = "WARNING: HREF NOT FOUND in [$iln]...";
               push(@warnings, $msg);
               prt( "$msg\n" ) if ($dbg16);
            }
         }
      } elsif ( $ln =~ /<script.*>/i ) {
         $inscript = 1;
         prt( "Entered a SCRIPT ...\n" ) if ($dbg4);
         $slns = 0;
         $ln = substr($ln, 7);
         if ($ln =~ /<\/script>/i) {
            $inscript =0;
            prt( "EXIT a SCRIPT ...\n" ) if ($dbg4);
            add_2_scripts( $fil, 1 );
            $slns = 0;
         }
      }
   }
   if ($inscript) {
      $msg = "WARNING: EXIT WHILE IN SCRIPT in [$fil]...";
      push(@warnings, $msg);
      prt( "$msg\n" ) if ($dbg16);
   }
   prt( "Returning $scnt HREF sources ...\n") if ($dbg10);
   return @isrc;
}
##########################################################
# Parse USER input
# Largerly still to be done
##########################################################
sub parse_args {
   my (@av) = @_;
   while (@av) {
      $in_folder = $av[0];
      prt( "Input folder set to [$in_folder]...\n" );
      shift @av;
   }
}
#########################################################
# Passed an array of extensions,
# check if this is one of them?
#########################################################
sub is_my_ext {
   my ($fil, @exts) = @_;
   my ($nm,$dir,$ext) = fileparse( $fil, qr/\.[^.]*/ );
   foreach my $ex (@exts) {
      if (lc($ex) eq lc($ext)) {
         return 1;
      }
   }
   return 0;
}
############################################
# only looking for HTM, HTML, PHP,
# could be extended to others maybe ...
############################################
sub is_htm_ext {
   my ($fil) = shift;
   return( is_my_ext($fil, @html_ext) );
}
sub is_graphic_ext {
   my ($fil) = shift;
   return( is_my_ext($fil, @graf_ext) );
}
sub is_zip_ext {
   my ($fil) = shift;
   my @arr = qw( .zip );
   return( is_my_ext($fil, @arr) );
}
sub is_css_ext {
   my ($fil) = shift;
   return( is_my_ext($fil, @css_ext) );
}
sub is_txt_ext {
   my ($fil) = shift;
   my @arr = qw( .txt );
   return( is_my_ext($fil, @arr) );
}
sub is_script_ext {
   my ($fil) = shift;
   return( is_my_ext($fil, @script_ext) );
}
################################################
# my $ignfpd = 1;   # ignore FRONTPAGE folders
################################################
sub is_fp_folder {
   my ($inf) = shift;
   foreach my $fil (@fpfolders) {
      if (lc($inf) eq lc($fil)) {
         return 1;
      }
   }
   return 0;
}
####################################
# Check if FILE is in EXCLUDE list
####################################
sub in_excludes {
   my ($fil) = shift;
   my $lcf = lc($fil);
   foreach my $f (@excludes) {
      if (lc($f) eq $lcf) {
         return 1;
      }
   }
   return 0;
}
####################################################################
# process_folder(folder) 
# Main DIRECTORY processing function
#
# Open the FOLDER given, and collect ALL files found,
# iterate into sub-directories, if $recurse is non-zero,
# and it is NOT a special FRONTPAGE (hidden) FOLDER.
#
# Files are collected into multidemensional arrays
# if NOT in @exclude list.
# if (is_htm_ext($fil)) {
#  push(@htm_files, [$ff, '', '', 0, 0] );
# } elsif (is_graphic_ext($fil)) {
#  push(@img_files, [$ff, '', '', 0, 0] );
# } elsif (is_zip_ext($fil)) {
#  push(@zip_files, [$ff, '', '', 0, 0] );
# } elsif (is_css_ext($fil)) {
#  push(@css_files, [$ff, '', '', 0, 0] );
# } elsif (is_txt_ext($fil)) {
#  push(@txt_files, [$ff, '', '', 0, 0] );
# } elsif (is_script_ext($fil)) {
#  push(@script_files, [$ff, '', '', 0, 0] );
# } else {
#  push(@other_files, [$ff, '', '', 0, 0] );
# }
#
####################################################################
sub process_folder {
   my ($inf) = shift;
   my $fcnt = 0;
   prt( "Processing $inf folder ...\n" ) if ($dbg1);
   if ( opendir( DIR, $inf ) ) {
      my @files = readdir(DIR);
      closedir DIR;
      foreach my $fil (@files) {
         if (($fil eq ".")||($fil eq "..")) {
            next;
         }
         my $ff = $inf."\\".$fil;
         if ( -d $ff ) {
            if ($recurse) {
               if ($ignfpd && is_fp_folder($fil)) {   # ignore FRONTPAGE folders
                  next;
               }
               process_folder( $ff );
            }
         } else {
            # NOTE: multidimensional arrays pushed - offsets into arrays
            # my $of_ff = 0;   # 1 - full file name
            # my $of_hr = 1;   # 2 - array ref of href links
            # my $of_im = 2;   # 3 - array ref of image links
            # my $of_lk = 3;   # 4 - linked count
            # my $of_sp = 4;   # 5 - spare
            if ( !in_excludes($fil) ) {
               my ($nm,$dir,$ext) = fileparse( $fil, qr/\.[^.]*/ );
               my $val = 0;
               $val = $ext_hash{$ext} if ( defined $ext_hash{$ext} );
               $val++;
               $ext_hash{$ext} = $val;
               if (is_htm_ext($fil)) {
                  push(@htm_files, [$ff, '', '', 0, 0] );
                  $fcnt++;
               } elsif (is_graphic_ext($fil)) {
                  push(@img_files, [$ff, '', '', 0, 0] );
               } elsif (is_zip_ext($fil)) {
                  push(@zip_files, [$ff, '', '', 0, 0] );
               } elsif (is_css_ext($fil)) {
                  push(@css_files, [$ff, '', '', 0, 0] );
               } elsif (is_txt_ext($fil)) {
                  push(@txt_files, [$ff, '', '', 0, 0] );
               } elsif (is_script_ext($fil)) {
                  push(@script_files, [$ff, '', '', 0, 0] );
               } else {
                  push(@other_files, [$ff, '', '', 0, 0] );
               }
            }
         }
      }
      prt( "Processed $inf folder finding $fcnt HTML files ...\n" ) if ($dbg1);
   } else {
      prt( "ERROR: Failed to open folder $inf ...\n" );
   }
}
##############################################
# Just to show the COUNTS in the ARRAYS
##############################################
sub show_found_counts {
   my $cnt = scalar @htm_files;
   prt( "Found $cnt HTML, ");
   $cnt = scalar @img_files;
   prt( "$cnt images, " );
   $cnt = scalar @css_files;
   prt( "$cnt css, " );
   $cnt = scalar @zip_files;
   prt( "$cnt zip, " );
   $cnt = scalar @txt_files;
   prt( "$cnt txt, " );
   $cnt = scalar @script_files;
   prt( "$cnt script, " );
   $cnt = scalar @other_files;
   prt( "and $cnt others ...\n" );
   $cnt = scalar keys %ext_hash;
   if ($dbg27) {
      prt( "$cnt extensions, and each count ...\n" );
      foreach my $key (keys %ext_hash) {
         my $val = $ext_hash{$key};
         prt( "$val $key ");
      }
      prt("\n");
   }
}
# @ipsfound = <INF>;
sub in_ips_found {
   my ($ip) = shift;
   my $lcip = lc($ip);
   foreach my $i (@ipsfound) {
      chomp $i;
      if (lc($i) eq $lcip) {
         return 1;
      }
   }
   return 0;
}
#######################################################
# Process the HTTP HREF sources
# if $chkip = 1; then attempt to resolve the IP
# addresses from the host name.
#######################################################
sub process_host_array {
   $hcnt = scalar @httprefs;
   if ($hcnt) {
      prt( "Found $hcnt HREF entries ...\n" );
      for (my $i = 0; $i < $hcnt; $i++) {
         $href = $httprefs[$i][0];
         $file = $httprefs[$i][1];
         my $lnn = $httprefs[$i][2];
         my ($nm,$dir) = fileparse($file);
         if (defined( $hrefs{$href} )) {
            $val = $hrefs{$href};
            $val .= ' '.$file;
         } else {
            $val = $file;
         }
         $val .= ":$lnn";
         $hrefs{$href} = $val;
         prt( "$href in [$file]$lnn\n" ) if ($dbg2);
      }
      $hcnt = scalar keys(%hrefs);
      prt( "Found $hcnt different entries ...\n" );
      if ($chkip) {
         my $inips = 0;
         prt( "Checking $hcnt IP addresses ... " );
         if ($writeips && ( -f $ipfile)) {
            if (open INF, "<$ipfile") {
               @ipsfound = <INF>;
               close INF;
               prt( "Have ".scalar @ipsfound." in $ipfile" );
            } else {
               prt( "Warning: Failed to open $ipfile" );
            }
         }
         prt("\n");
         $procnt = 0;
         foreach my $key (keys %hrefs) {
            $val = $hrefs{$key};
            $procnt++;
            prt( "$key in $val\n" ) if ($dbg8);
            if ($key =~ /^http:\/\//i) {
               my $hkey = substr($key, 7);
               my @arr = split( /\//, $hkey );
               $hkey = $arr[0];
               if ( !in_ips_found($hkey) ) {
                  if (showIPAddress( $hkey ) == 0) {
                     $msg = "FAILED: NO IP FOR HOST [$hkey][$val]";
                     push(@warnings, $msg);
                     prt( "$msg\n" ) if ($dbg16);
                  } elsif ($writeips) {
                     push(@ipsfound,"$hkey\n");
                  }
               } else {
                  $inips++;
               }
            }
            if (($procnt % 100) == 0) {
               prt( "Done $procnt IP Addresses ...\n" );
            }
         }
         prt( "Completed $procnt IP Addresses ... " );
         if ($writeips) {
            $val = join("\n", sort @ipsfound);
            $val = trimblanklines($val);
            write2file($val, $ipfile);
            prt( "$inips in previous. Written ".scalar @ipsfound." to $ipfile" );
         }
         prt("\n");
      }
   }
}
# eof - chklinks01.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional