Generated: Tue Feb 2 17:54:22 2010 from autoexch01.pl 2006/08/27 4.9 KB.
#!/Perl use LWP::Simple; require "logfile.pl" or die "Missing logfile.pl ...\n"; # my simple log file and some other utility subs require "htmltools.pl" or die "Missing htmltools.pl ...\n"; require "currencyISO.pl" or die "Missing currencyISO.pl ...\n"; # log file stuff my ($LF); my $outfile = 'temp'.$0.'.txt'; # program variables my $URL = 'http://www.x-rates.com/d/USD/table.html'; my @hrefs = (); my @imgs = (); my @currency = (); open_log($outfile); prt( "$0 ... Hello, World...\n" ); prt("Fetching text from $URL ...\n"); my $text = get("$URL"); my $tcnt = length($text); prt( "Got $tcnt characters from URL $URL ...\n"); ###prt("[$text]\n"); $text = htmlexpand($text); my $txt2 = htmlclean01($text); ###prt( "len=".length($txt2)." [$txt2]\n"); ##my $ccnt = scalar @currency; my $ccnt = extractcurrency01($txt2); prt( "Got $ccnt currencies ...\n" ); my $msg = ''; if ($ccnt) { prt( "And finally, in CSV form ...\n" ); prt( "Currency,1 USD,2 USD\n" ); for (my $i = 0; $i < $ccnt; $i++) { prt( "".($currency[$i][0]).",".($currency[$i][1]).",".($currency[$i][2])."\n" ); } prt("From [$URL] on ".(scalar localtime)."\n"); } prt( "All done ...\n" ); close_log($outfile,1); exit(0); sub htmlclean01 { my ($rtxt) = shift; prt( "len=".length($rtxt)." Drop comments <!--...--> ...\n"); $rtxt = dropcomments($rtxt); ##prt("Text with NO COMMENTS [$txt1]\n"); prt( "len=".length($rtxt)." Strip <HEAD>...</HEAD> tag ...\n"); $rtxt = striptag($rtxt, 'HEAD'); prt( "len=".length($rtxt)." Strip <script>...</script> tag ...\n"); $rtxt = striptag($rtxt,'script'); prt( "len=".length($rtxt)." Strip <noscript>...</noscript> tag ...\n"); $rtxt = striptag($rtxt,'noscript'); prt( "len=".length($rtxt)." Remove <font ...> tags ...\n"); $rtxt = removefont($rtxt); prt( "len=".length($rtxt)." Remove <b> tags ...\n"); $rtxt = removetag($rtxt,'b'); prt( "len=".length($rtxt)." Remove tr attributes ...\n"); $rtxt = removetrattrib($rtxt); prt( "len=".length($rtxt)." Remove td attributes ...\n"); $rtxt = removetdattrib($rtxt); prt( "len=".length($rtxt)." Delete <a...> & </a>\n"); $rtxt = collecthrefs($rtxt,1); prt( "len=".length($rtxt)." Delete <img...>\n"); $rtxt = collectimgs($rtxt,1); prt( "len=".length($rtxt)." Do substitutions ...\n"); $rtxt = substitutions($rtxt); prt( "len=".length($rtxt)." Trim blank lines ...\n"); $rtxt = trimblanklines($rtxt); prt( "len=".length($rtxt)." Trim inline td ...\n"); $rtxt = triminlinetd($rtxt); return $rtxt; } sub extractcurrency01 { my ($txt) = shift; my $len = length($txt); my $ln = ''; my $ch = ''; my $lt = ''; my $nlt = ''; my $nln = ''; my $mode = 0; my $cnt = 0; my $cur = ''; my $v1 = 0; my $v2 = 0; my $i = 0; my $rcnt = 0; for ($i = 0; $i < $len; $i++) { $ch = substr($txt,$i,1); if ($ch eq "\n") { if ($ln =~ /.*<td.*>(.*)<\/td>/i) { $lt = $1; # get text between <td>...</td> # $nlt =~ s/\s//g; this removes ALL spaces - NOT GOOD! $nlt = trimbothends($lt); if (length($nlt)) { ###prt("Got inline <td>...</td> - [$ln] [$lt] [$nlt]...\n"); $nln = $ln; $nln =~ s/$lt/$nlt/; ###prt("New line [$nln]...\n"); if ($mode == 3) { if ($cnt == 0) { $cur = $nlt; $cnt += 1; } elsif ($cnt == 1) { $v1 = $nlt; $cnt += 1; } elsif ($cnt == 2) { $v2 = $nlt; $cnt = 0; push(@currency, [$cur, $v1, $v2]); $rcnt++; } } else { if ($mode == 0) { if ($nlt eq 'click on values to see graphs') { $mode = 1; } } elsif ($mode == 1) { if ($nlt eq '1 USD') { $mode = 2; } else { $mode = 0; } } elsif ($mode == 2) { if ($nlt eq 'in USD') { $mode = 3; } else { $mode = 0; } } } $ln = $nln; } } elsif ($ln =~ /<\/table>/i) { ###prt( "Close of TABLE ...\n" ); $mode = 0; } $ln = ''; } else { $ln .= $ch; } } return $rcnt; } # sample CVS file #Currency,1 USD,2 USD #Australian Dollar,1.32135,0.756802 #Brazilian Real,2.1593,0.463113 #British Pound,0.529549,1.8884 #Canadian Dollar,1.1078,0.90269 #Chinese Yuan,7.969,0.125486 #Danish Krone,5.8435,0.17113 #Euro,0.783269,1.2767 #Hong Kong Dollar,7.7796,0.128541 #Indian Rupee,46.41,0.0215471 #Japanese Yen,117.31,0.00852442 #Malaysian Ringgit,3.6785,0.27185 #Mexican Peso,10.964,0.0912076 #New Zealand Dollar,1.57282,0.635801 #Norwegian Kroner,6.302,0.15868 #Singapore Dollar,1.5802,0.632831 #South African Rand,7.1915,0.139053 #South Korean Won,961.8,0.00103972 #Sri Lanka Rupee,103.27,0.00968335 #Swedish Krona,7.2448,0.13803 #Swiss Franc,1.2375,0.808081 #Taiwan Dollar,32.85,0.0304414 #Thai Baht,37.67,0.0265463 #Venezuelan Bolivar,2144.6,0.000466287 #From [http://www.x-rates.com/d/USD/table.html] on Sun Aug 27 11:34:18 2006 # try to fit to # Country, Country, ISO, Rate # eof - autoexch01.pl