htmlparse.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:42 2010 from htmlparse.pl 2008/07/24 1.1 KB.

#!/perl -w
# NAME: htmlparse.pl
# AIM: Experiment with HTML Parser
use strict;
use warnings;
use HTML::TreeBuilder;
use HTML::Element;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $outfile = 'temp.'.$0.'.txt';
if ($0 =~ /\w{1}:\\.*/) {
   my @tmpsp = split(/\\/,$0);
   $outfile = 'temp.'.($tmpsp[-1]).'.txt';
}
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );
##my $in_file = 'p2html8.htm';
my $in_file = 'favorites.htm';
my $tree = HTML::TreeBuilder->new;
$tree->parse_file($in_file);
$tree->dump;
$tree = $tree->delete;
my $a = HTML::Element->new('a', href => 'http://www.perl.com/');
$a->push_content("The Perl Homepage");
my $tag = $a->tag;
prt( "$tag starts out as:".  $a->starttag. "\n" );
prt( "$tag ends as:".  $a->endtag . "\n" );
prt( "$tag's href attribute is: ". $a->attr('href'). "\n" );
my $links_r = $a->extract_links();
prt( "Hey, I found ". scalar(@$links_r). " links.\n");
prt( "And that, as HTML, is: ". $a->as_HTML . "\n" );
$a = $a->delete;
close_log($outfile,1);
exit(0);
# eof

index -|- top

checked by tidy  Valid HTML 4.01 Transitional