pageview02.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:52 2010 from pageview02.pl 2006/07/06 7.9 KB.

#!/Perl
#
# AIM: Get info from net, and generate a page viewer ...
use Win32::Internet;
#use LWP::Simple;
my ($LF, $HF);
my $outfile = 'temp'.$0.'.txt';
my $htmfile = 'temppage2.htm';
my $savefile = 'tempsave.htm';
require "logfile.pl" or die "Missing logfile.pl ...\n";
open_log($outfile);
open $HF, ">$htmfile" or mydie("Failed to create $htmfile ...\n" );
my $testfile = 'temphtm.htm';
prt( "$0 ... Hello, World...\n" );
my $yurl = 'http://search.yahoo.com/search?p=javascript+iframe+scrollTo&sm=Yahoo%21+Search&fr=FP-tab-web-t403&toggle=1&cop=&ei=UTF-8';
my $gurl = 'http://www.google.com/search?q=javascript+iframe+scrollTo';
my $gurl2 = 'http://www.google.com/search?q=javascript+iframe+scrollTo&num=100&hl=en&lr=&start=100&sa=N';
my $gurl3 = 'http://www.google.com/search?q=javascript+iframe+scrollTo&num=100&hl=en&lr=&start=200&sa=N';
my $res = 'Results <b>301</b> - <b>400</b> of about <b>12,400</b> for <b>javascript iframe scrollTo</b>.  (<b>0.46</b> seconds)&nbsp;</font>';
my $loadfile = 0;
my @exclude = qw( www.google.com images.google.com groups.google.com news.google.com );
my @arr = ();
my $lc = 0;
my $url = $yurl;
if ($loadfile) {
 prt( "Loading file $testfile ...\n");
 if ( ! -f $testfile) {
    mydie( "ERROR: Unable to find $testfile ...\n" );
 }
 open IF, "<$testfile" or mydie( "ERROR: Unable to open test file \n");
 @arr = <IF>;
 close( IF );
 $lc = scalar @arr;
 if ($lc) {
   prt( "Got $lc filest lines ...\n" );
 } else {
   mydir( "ERROR: Got no lines from file ...\n" );
 }
} else {
 prt( "Fetching $url ...\n");
 my $conn = new Win32::Internet();
##my $text2 = Win32::Internet->new->FetchURL("$url");
 my $text2 = $conn->FetchURL("$url");
##my $text2 = get($url);
 my $llen = length($text2);
 prt( "Got $llen characters ...\n" );
 if (open TS, ">$savefile") {
   prt( "Written to $savefile ...\n" );
   print TS $text2;
   close(TS);
 } else {
   prt( "\n=========================================================================\n" );
   prt( $text2 );
   prt( "\n=========================================================================\n" );
 }
###prt( "\nWritten $llen characters ...\n" );
 @arr = split( /</, $text2 );
 $lc = scalar @arr;
 prt( "Got $lc split lines ...\n" );
}
my @hrefarr = ();
my %hurl = ();
my $rline = '';
my $rbgn = 0;
foreach my $line (@arr) {
   chomp $line;
   ###if ($line =~ /href="http:\/\//) {
      ###prt( "$line\n");
      if ($line =~ /.*href="(\S+)".*/io) {
         my $href = $1;
         if ($href =~ /http:\/\/(.*)/io) {
            my $st = $1;
            ##prt( "[$href]\n" );
            if ($href =~ m|^.*(\d+\.\d+\.\d+\.\d+)/search|io)  { # 1.2.3.4
               prt( "Discarded IP [$href][$st]\n" );
            } else {
               ###my @arr2 = split( /\//, $href );
               my @arr2 = split( /\//, $st );
               my $ha3 = '';
               my $fnd = 0;
               my @arr3 = ();
               my $nurl = $arr2[0];
               if (in_exclude($nurl)) {
                  prt( "Discard ADD [$nurl]][$href][$st]\n" );
               } else {
                  if (defined $hurl{$nurl}) {
                     ###prt( "Repeat [$nurl]\n" );
                     $ha3 = $hurl{$nurl};
                     @arr3 = split(/ /, $ha3);
                     $fnd = 0;
                     foreach my $u (@arr3) {
                        if ($u eq $href) {
                           $fnd = 1;
                           last;
                        }
                     }
                     if ($fnd) {
                        prt( "Repeat [$nurl]\n" );
                        prt( "Discarded REPEAT [$nurl]][$href][$st]\n" );
                     } else {
                        push(@arr3, $href);
                        $ha3 = join( ' ', @arr3 );
                        $hurl{$nurl} = $ha3;
                        prt( "Repeat [$nurl]\n" );
                        prt( "Added [$ha3]\n" );
                     }
                  } else {
                     $hurl{$nurl} = $href;
                  }
                  push(@hrefarr, $href);
               }
            }
         } else {
            prt( "Discarded [$href][$st]\n" );
         }
      } elsif ($rbgn) {
         if ($line =~ /\/font>/) {
            $rline .= ' ';
            $rline .= $line;
            prt( "End RESULT line ...[$rline]\n" );
            $rbgn = 0;
         } else {
            $rline .= ' ';
            $rline .= $line;
         }
      } elsif ($line =~ /Result/) {
         $rbgn = 1;
         $rline = $line;
      }
   ###}
   if (length($line)) {
      ###prt( "<$line\n" );
   }
}
### done it all
my $kc = keys %hurl;
prt( "Got $kc different sites ...\n" );
out_htm_head();
print $HF '<script language="javascript" type="text/javascript">';
print $HF "\n";
print $HF "<!-- \n";
print $HF "var ma = new Array(\n";
my $cnt = 0;
foreach my $k (keys %hurl) {
   my $ha3 = $hurl{$k};
   my @arr3 = split(/ /, $ha3);
   my $st;
   prt( "Site $k, with ". scalar @arr3 . " entries ...\n" );
   foreach my $a (@arr3) {
      if ($cnt) {
         print $HF ','."\n";
      }
      $cnt++;
      prt( "   $a\n" );
      $st = "2006/07/03";
      if ($a =~ /http:\/\/(.*)\//io) {
         my @arr4 = split( /\//, $1 );
         $st = $arr4[0];
      }
      print $HF 'new item( "' . $a . '", "' . $st . '", "Page '.$cnt.'" )';
   }
}
print $HF ");\n";
add_form();
print $HF "// -->\n";
print $HF '</script>'."\n";
out_htm_tail();
close( $HF );
if ($loadfile) {
close_log($outfile,1);
} else {
system( $htmfile );
close_log($outfile,0);
}
exit(0);
sub out_htm_head {
   print $HF <<"EOF";
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Language" content="en">
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="Generator" content="EditPlus">
<meta name="Author" content="Geoff McLane">
<meta name="Keywords" content="">
<meta name="Description" content="">
<title>Page Viewer</title>
<p align="center">
<iframe id="IFrame1" frameborder="1" scrolling="auto" style="width:98%;height:500px;"  src="http://geoffmclane.com/fg/index.htm">
IFRAME NOT SUPPORTED ...
</iframe>
</p>
<script language="javascript" type="text/javascript">
<!--
var displaymode = 0; // 1 to open new pages ...
var max = 0;
var curp = 0;
var curind = 0;
function item( lnk, dsc, fd ) {
 this.lnk = lnk;
 this.dsc = dsc;
 this.fd = fd;
}
function show_url( url ) {
 if ( document.getElementById && (displaymode==0) ) {
  //alert('Got (document.getElementById && (displaymode==0) ) ...');
  document.getElementById("IFrame1").src = url;
 } else if ( document.all && (displaymode==0) ) {
  //alert('Got (document.all && (displaymode==0) ) ...');
  document.all.IFrame1.src = url;
 } else {
  //alert( 'Not document.getElementById or document.all' );
  if (!window.win2||win2.closed){
   win2 = window.open( url );
  }else{ //else if win2 already exists
   win2.location = url;
   win2.focus();
  }
 }
}
function gone(){
 curind = document.jumpy.example.selectedIndex;
 var selectedurl=document.jumpy.example.options[curind].value
 show_url( selectedurl );
 // view(selectedurl);
 curind++;
 if (curind >= max) { curind = 0; }
 document.jumpy.example.selectedIndex = curind; // select NEXT page
}
function nextPage() {
    curp++;
   cur = 0;
   if (curp >= max) { curp = 0; }
   var pg = ma[curp].lnk;
   document.jumpy.example.selectedIndex = curp;
   show_url(pg);
   // setTimeout( "nextPage();", mto );
}
// -->
</script>
</head>
<body>
EOF
}
sub add_form {
   print $HF <<"EOF";
max = ma.length;
function set_form() {
 var n;
 document.writeln('<div align="right">');
 document.writeln('<form name="jumpy" action="">');
 document.writeln('<select name="example" size="1">');
 for( n = 0; n < max; n++ ) {
  var opt = '<option value="' + ma[n].lnk;
  if( 0 == n ) {
   opt += '" selected>';
  } else {
   opt += '">';
  }
  // opt += ma[n].fd; // + " (circa " + ma[n].dsc + " - " + ma[n].lnk + ")");
  opt += ma[n].fd + ' (' + ma[n].dsc + ')'; //  + " - " + ma[n].lnk + ")");
  document.writeln(opt);
  document.writeln('</option>');
 }
 document.writeln('</select>');
 document.writeln('<input type="button" name="test" value="Go!" onClick="gone()">');
 document.writeln('</form>');
 document.writeln('</div>');
}
set_form();
EOF
}
sub out_htm_tail {
   print $HF <<"EOF";
</body>
</html>
EOF
}
sub in_exclude {
   my ($ad) = shift;
   foreach my $s (@exclude) {
      if ($s eq $ad) {
         return 1;
      }
   }
   return 0;
}
## eof - pageviewer.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional