#!/perl -w
# NAME: parse-xml02.pl
# AIM: My attempt at my OWN XML parser
# This is the second attempt, with a complete RE-WRITE
# 27/01/2010 geoff mclane http://geoffair.net/mperl
use strict;
use warnings;
use Data::Dumper;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
my @tmpsp = split(/\\/,$pgmname);
$pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);
# autogeneration of functions
# auto-generate a set of functions
# my @TTColors = qw( red green blue white );
# for $name (@TTColors) {
# no strict 'refs'; # allow symbol table manipulation
# *$name = *{uc $name} = sub { "@_"; }
# }
# Options
my $add_blank_attribs = 0;
# features
my $load_log = 1;
my $add_header = 1;
#my $show_comments = 1;
my $keep_path_order = 1;
#my $exclude_blanks = 0;
my $out_xml = "tempxml5.xml";
my $out_xml2 = "tempxml7.xml";
# content hash reference strings
my $x_chr_cont = 'content';
my $x_chr_attr = 'attributes';
# element types
my $XT_HEADER = 1; # xml header
my $XT_COMMENT = 2; # comments
my $XT_DOCTYPE = 3; # doctype ]>
my $XT_CDATA = 4; # cdata
# elements
my $XT_ELE1 = 5; # simple
my $XT_ELE2 = 6; # closed
my $XT_ELE3 = 7; # complete
# stacked in an array
my $XAO_TYPE = 0;
my $XAO_TEXT = 1;
my $XAO_TAG = 2;
my $XAO_PRE = 3;
my $XAO_LNNUM = 4;
my $XAO_STACK = 5;
# options during parse
my $XO_SHOW1 = 1;
my $XO_SHOW2 = 2;
my $XO_SHOW3 = 4;
my $XO_SHOW4 = 8;
my %xml_type_names = (
$XT_HEADER => 'header',
$XT_COMMENT => 'comment',
$XT_DOCTYPE => 'doctype',
$XT_CDATA => 'cdata',
$XT_ELE1 => 'open',
$XT_ELE2 => 'close',
$XT_ELE3 => 'complete'
);
# special hash strings
my $x_cont = 'ContentArray';
my $x_warn = 'ErrorWarnings';
my $x_root = 'DocRoot';
my $x_file = 'FileName';
# information on XML
# from : http://www.w3.org/TR/REC-xml
# XML documents SHOULD begin with an XML declaration - XML HEADER
# '' - XML DOCTYPE
#my $in_file = 'tests.xml';
#my $in_file = 'test4.xml';
#my $in_file = 'test3.xml';
#my $in_file = 'test8.xml'; # has an ERROR
#my $in_file = 'test9.xml'; # has an ERROR
#my $in_file = 'C:\DTEMP\libxml2-2.6.30\result\slashdot16.xml'; # UTF-16 file
#my $in_file = 'C:\DTEMP\FG\CubeServ420.xml';
my $in_file = 'C:\FGCVS\FlightGear\data/Aircraft/B-2/B-2-set.xml';
#my $in_file = "C:\\FGCVS\\FlightGear\\data\\Aircraft\\787\\787.xml";
#my $in_file = "C:\\FGCVS\\FlightGear\\data\\Aircraft\\787\\787-set.xml";
#my $in_file = "C:\\FGCVS\\FlightGear\\data\\Aircraft\\c172p\\c172p-set.xml";
#my $in_file = "C:\\FGCVS\\FlightGear\\data\\Aircraft\\737-300\\737-300-set.xml";
#my $in_file = "tempxml6.xml";
# debug
my @warnings = ();
# ===================================================
my $test_xml = <
Title
contentitalics
EOF
# ===================================================
my $test_xml2 = <
Cessna 172P Skyhawk (1981 model)
David Megginson
production
jsb
c172p
Aircraft/c172p/Models/c172p.xml
n301dp
0
Aircraft/c172p/splash.png
false
true
-0.21
0.235
0.36
-12
Aircraft/c172p/Systems/KAP140.xml
Aircraft/c172p/c172-sound.xml
1
0
0
0
0
0
0
0.027
0.0
3
EOF
my $test_xml3 = <
]>
Hello, world!
EOF
sub prtw($) {
my ($tx) = shift;
$tx =~ s/\n$//;
prt("$tx\n");
push(@warnings,$tx);
}
sub show_warnings() {
if (@warnings) {
prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
foreach my $itm (@warnings) {
prt("$itm\n");
}
prt("\n");
} else {
###prt( "No warnings issued.\n\n" );
}
}
sub pgm_exit($$) {
my ($val,$msg) = @_;
if (length($msg)) {
$msg .= "\n" if ( !($msg =~ /\n$/) );
prt($msg);
}
show_warnings();
close_log($outfile,$load_log);
exit($val);
}
##########################################################
# ### MY XML PARSER ###
sub xml_get_content_hr($$) {
my ($txt,$rat) = @_;
my %h = ();
$h{$x_chr_cont} = $txt;
if ($add_blank_attribs) {
$h{$x_chr_attr} = $rat;
} else {
if (scalar keys(%{$rat})) {
$h{$x_chr_attr} = $rat;
}
}
return \%h;
}
sub xml_get_type_name($) {
my ($typ) = shift;
if (defined $xml_type_names{$typ}) {
return $xml_type_names{$typ};
}
return 'Unknown $typ!';
}
sub ret_stack_text($) {
my ($ra) = @_;
my $rtxt = '';
foreach my $tx (@{$ra}) {
$rtxt .= '/' if (length($rtxt));
$rtxt .= ${$tx}[0];
}
return $rtxt;
}
sub ret_stack_text_simple($) {
my ($ra) = @_;
my $rtxt = '';
my ($tx);
foreach $tx (@{$ra}) {
$rtxt .= '/' if (length($rtxt));
$rtxt .= $tx;
}
return $rtxt;
}
sub check_ele_stack($$$) {
my ($res,$rw,$opts) = @_;
my $cnt = scalar @{$res};
if ($cnt) {
my $wrap = 4;
my $msg = "WARNING: Still $cnt items ON THE STACK! ";
my $tcnt = 0;
$msg .= "\n" if ($cnt > $wrap);
for (my $i = 0; $i < $cnt; $i++) {
my $refts = ${$res}[$i];
my $toptag = ${$refts}[0];
my $toplnn = ${$refts}[1];
$msg .= "[".$toptag."]($toplnn) ";
$tcnt++;
if ($tcnt > $wrap) {
$tcnt = 0;
$msg .= "\n";
}
}
push(@{$rw},$msg);
prtw("$msg\n") if ($opts & $XO_SHOW4);
}
}
# element types
# xml header
# comments
# doctype ]>
# cdata
# elements
# simple
# closed
# complete
sub scan_xml_text($$$) {
my ($fil,$text,$opts) = @_;
my $len = length($text);
my ($i,$cc,$pretxt,$lnn,$element);
my ($xitem,$pc,$ppc,$pppc);
my ($eletyp,$eleref,$pele,$plnn);
my ($stkcnt,$bgnlnn,$stktxt,$msg);
my %xmlhash = ();
my $rxmlhash = \%xmlhash;
my @elestack = ();
my @xmlarray = ();
my @warns = ();
my @error = ();
my $doc_root = '';
my $dr_line = 0;
my $doc_error = 0;
$i = 0;
$lnn = 0;
$pretxt = '';
$element = '';
$cc = '';
$pc = '';
$ppc = '';
$pppc = '';
while (($i < $len) && !$doc_error) {
$eletyp = 0;
# accumulate text before an element
$pretxt = '';
for (; $i < $len; $i++) {
$cc = substr($text,$i,1);
$lnn++ if ($cc eq "\n");
last if ($cc eq '<');
$pretxt .= $cc;
}
# accumulate the element
$bgnlnn = $lnn;
$element = '';
$i++; # bump past '<' char
for (; $i < $len; $i++) {
$pppc = $ppc;
$ppc = $pc;
$pc = $cc;
$cc = substr($text,$i,1);
$lnn++ if ($cc eq "\n");
last if (($cc eq '>')||($cc =~ /\s/)); # stop on '>' OR a SPACE, or out of chars
$element .= $cc; # accumulate element
}
next if (length($element) == 0);
prt("$lnn: Process element [$element]\n") if ($opts & $XO_SHOW1);
# determine element type
if ($element =~ /^\?xml/i) {
$eletyp = $XT_HEADER; # is xml header
$xitem = $element;
if ($cc ne '>') {
for (; $i < $len; $i++) {
$cc = substr($text,$i,1);
$lnn++ if ($cc eq "\n");
last if ($cc eq '>');
$xitem .= $cc;
}
}
# store head
# $XAO_ TYPE TEXT TAG PRE LNNUM
$stktxt = ret_stack_text(\@elestack);
push(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn, $stktxt ]);
prt("$lnn: Done HEADER <$xitem> END HEADER\n") if ($opts & $XO_SHOW2);
$pretxt = '';
$element = '';
$i++; # skip last '>'
} elsif ($element =~ /^!--/) {
$eletyp = $XT_COMMENT; # comment
$xitem = $element;
if (!(($cc eq '>')&&($pc eq '-')&&($ppc eq '-'))) {
$pc = '*'; # make sure not trapped by
for (; $i < $len; $i++) {
$ppc = $pc;
$pc = $cc;
$cc = substr($text,$i,1);
$lnn++ if ($cc eq "\n");
last if (($cc eq '>')&&($pc eq '-')&&($ppc eq '-'));
$xitem .= $cc;
}
}
prt("$lnn: Done COMMENT <$xitem> END COMMENT\n") if ($opts & $XO_SHOW2);
# $XAO_ TYPE TEXT TAG PRE LNNUM
$stktxt = ret_stack_text(\@elestack);
push(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn, $stktxt ]);
$element = '';
$pretxt = '';
$i++; # skip last '>'
} elsif ($element =~ /^!DOCTYPE/) {
$eletyp = $XT_DOCTYPE; # doctype - $cc has to be a SPACE
$xitem = $element;
if ($cc ne '>') {
for (; $i < $len; $i++) {
$cc = substr($text,$i,1);
$lnn++ if ($cc eq "\n");
last if ($cc eq '>');
$xitem .= $cc;
if ($cc eq '[') { # enter DOCTYPE comment
$i++;
for (; $i < $len; $i++) {
$pppc = $ppc;
$ppc = $pc;
$pc = $cc;
$cc = substr($text,$i,1);
$lnn++ if ($cc eq "\n");
$xitem .= $cc;
last if ($cc eq ']');
if (($cc eq '-')&&($pc eq '-')&&($ppc eq '!')&&($pppc eq '<')) {
# entered comment in doctype
$i++;
$pppc = $ppc;
$ppc = $pc;
$pc = $cc;
$cc = substr($text,$i,1);
$lnn++ if ($cc eq "\n");
$xitem .= $cc;
$i++;
for (; $i < $len; $i++) {
$pppc = $ppc;
$ppc = $pc;
$pc = $cc;
$cc = substr($text,$i,1);
$lnn++ if ($cc eq "\n");
$xitem .= $cc;
last if (($cc eq '>')&&($pc eq '-')&&($ppc eq '-'));
}
}
}
}
}
prt("$lnn: Done DOCTYPE <$xitem> END DT\n") if ($opts & $XO_SHOW2);
} else {
$msg = "ERROR:$lnn: Closed DOCTYPE> - mal-formed XML!";
push(@error,$msg);
prtw("$msg\n") if ($opts & $XO_SHOW4);
$doc_error++;
last;
}
# $XAO_ TYPE TEXT TAG PRE LNNUM
$stktxt = ret_stack_text(\@elestack);
push(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn, $stktxt ]);
$pretxt = '';
$element = '';
$i++; # skip last '>'
} elsif ($element =~ /^!\[CDATA\[/) {
$eletyp = $XT_CDATA; # CDATA
$xitem = $element;
if (!(($cc eq '>') && ($pc eq ']') && ($ppc eq ']'))) {
for (; $i < $len; $i++) {
$ppc = $pc;
$pc = $cc;
$cc = substr($text,$i,1);
$lnn++ if ($cc eq "\n");
last if (($cc eq '>')&&($pc eq ']')&&($ppc eq ']'));
$xitem .= $cc;
}
}
prt("$lnn: Done CDATA <$xitem> END CDATA\n") if ($opts & $XO_SHOW2);
# $XAO_ TYPE TEXT TAG PRE LNNUM
$stktxt = ret_stack_text(\@elestack);
push(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn, $stktxt ]);
$element = '';
$pretxt = '';
$i++; # skip last '>'
}
# =================================================================
if (length($element)) {
# an element, which may be complete ie end in '/>', and may have attributes a="b"
$xitem = $element;
if ($cc eq '>') {
if ($element =~ /^\//) {
$xitem = $element;
$element = substr($element,1); # remove leading '/' from element
$eletyp = $XT_ELE2; # open, now closed - so pop
} elsif ($pc eq '/') {
# open/closed element
$eletyp = $XT_ELE3;
} else {
$eletyp = $XT_ELE1; # open, so push
}
} else {
for (; $i < $len; $i++) {
$pc = $cc;
$cc = substr($text,$i,1);
$lnn++ if ($cc eq "\n");
last if ($cc eq '>');
$xitem .= $cc;
}
if ($element =~ /^\//) {
$element = substr($element,1);
$eletyp = $XT_ELE2; # open, now closed - so pop
} elsif ($pc eq '/') {
# open/close element
$eletyp = $XT_ELE3;
} else {
$eletyp = $XT_ELE1; # open, so push
}
}
if ($eletyp == $XT_ELE1) {
push(@elestack,[$element,$lnn]);
$stkcnt = scalar @elestack;
if ($stkcnt == 1) {
if (length($doc_root)) {
$msg = "ERROR:$lnn: Have doc root [$doc_root]($dr_line), now 2nd root [$element]($lnn)!";
push(@error,$msg);
prtw("$msg\n") if ($opts & $XO_SHOW4);
$doc_error++;
last;
}
$doc_root = $element;
$dr_line = $lnn;
}
prt("$lnn: PUSHED [$element] ($stkcnt)\n") if ($opts & $XO_SHOW3);
} elsif ($eletyp == $XT_ELE2) {
if (@elestack) {
$eleref = $elestack[-1];
$pele = ${$eleref}[0];
$plnn = ${$eleref}[1];
if ($element eq $pele) {
pop @elestack;
$stkcnt = scalar @elestack;
prt("$lnn: POPPED [$element] ($stkcnt)\n") if ($opts & $XO_SHOW3);
} else {
$msg = "WARNING:$lnn: Element [$element] NOT last. Last is [$pele]($plnn)! NO POP";
push(@warns,$msg);
prtw("$msg\n") if ($opts & $XO_SHOW4);
}
} else {
$msg = "WARNING:$lnn: Element [$element] NOT ON EMPTY STACK! NO POP";
push(@warns,$msg);
prtw("$msg\n") if ($opts & $XO_SHOW4);
}
}
# $XAO_ TYPE TEXT TAG PRE LNNUM
$stktxt = ret_stack_text(\@elestack);
push(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn, $stktxt ]);
$pretxt = '';
$element = '';
$i++; # skip last '>'
}
}
if ($i < $len) {
$msg = "WARNING:$lnn: Still ".($len - $i)." characters in file [$fil] NOT PARSED!";
push(@warns,$msg);
prtw("$msg\n") if ($opts & $XO_SHOW4);
}
check_ele_stack(\@elestack,\@warns,$opts); # if (!$doc_error);
# fill up the HASH with collections
# =================================
${$rxmlhash}{$x_warn} = [ \@warns, \@error ] if (@warns || @error);
${$rxmlhash}{$x_cont} = \@xmlarray;
${$rxmlhash}{$x_root} = $doc_root;
${$rxmlhash}{$x_file} = $fil;
# =================================
return $rxmlhash;
}
sub has_utf_16_BOM($) {
my ($fil) = shift;
if (open INF, "<$fil") {
binmode INF;
my $buf = "";
if ((read INF, $buf, 2) == 2) {
close INF;
my $od1 = ord(substr($buf,0,1));
my $od2 = ord(substr($buf,1,1));
if (($od1 == 0xFF)&&($od2 == 0xFE)) {
return (16+2); # LittleEndians (windows)
} elsif (($od1 == 0xFE)&&($od2 == 0xFF)) {
return (16+4); # BigEndians (unix)
} elsif ($od1 == 0) {
return 4;
} elsif ($od2 == 0) {
return 2;
}
return 1;
}
close INF;
}
return 0;
}
sub parse_xml_file($) {
my ($fil) = @_;
my $bom = has_utf_16_BOM($fil);
if (!open INF, "<$fil") {
pgm_exit(1,"ERROR: Unable to open file [$fil]!");
}
if ($bom & 2) {
binmode INF, ":encoding(UTF-16LE)";
} elsif ($bom & 4) {
binmode INF, ":encoding(UTF-16BE)";
}
my @lines = ;
close INF;
$lines[0] = substr($lines[0],1) if ($bom & 16); # move PAST the BOM
my $text = join("",@lines);
my $len = length($text);
my $lnn = scalar @lines;
prt("Processing $lnn lines, $len chars, from ");
prt("\n ") if (length($fil) > 24);
prt("[$fil]");
if ($bom & 6) {
prt(" UTF-16LE") if ($bom & 2);
prt(" UTF-16BE") if ($bom & 4);
prt("(BOM)") if ($bom & 16);
}
prt("\n");
return scan_xml_text($fil,$text,0);
}
sub get_warn_error_text($) {
my ($rh) = @_;
my $text = '';
if (defined ${$rh}{$x_warn}) {
my $rwarn = ${$rh}{$x_warn}[0];
my $rerror = ${$rh}{$x_warn}[1];
my ($err);
if (@{$rwarn}) {
foreach $err (@{$rwarn}) {
$text .= "\n";
}
}
if (@{$rerror}) {
foreach $err (@{$rerror}) {
$text .= "\n";
}
}
}
return $text;
}
sub get_xml_ref_hash_text($$) {
my ($rh,$opts) = @_;
if (!defined ${$rh}{$x_cont}) {
return "\n";
}
my $ra = ${$rh}{$x_cont};
my $cnt = scalar @{$ra};
my $text = '';
my $ind = 0;
my $in;
my ($i,$eletyp,$xitem,$element,$pretxt,$bgnlnn,$eleref,$prev);
$prev = '';
$text .= get_warn_error_text($rh);
# $XAO_ TYPE TEXT TAG PRE LNNUM
#ush(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn ]);
for ($i = 0; $i < $cnt; $i++) {
$eleref = ${$ra}[$i];
$eletyp = ${$eleref}[$XAO_TYPE];
$xitem = ${$eleref}[$XAO_TEXT];
$element = ${$eleref}[$XAO_TAG];
$pretxt = ${$eleref}[$XAO_PRE];
$bgnlnn = ${$eleref}[$XAO_LNNUM];
# element types
$in = ' ' x $ind;
if ($eletyp == $XT_HEADER) { # xml header
$text .= "<$xitem>\n";
} elsif ($eletyp == $XT_COMMENT) { # comments
if ($opts & 1) {
$text .= "$in<$xitem>\n";
}
} elsif ($eletyp == $XT_DOCTYPE) { # doctype ]>
$text .= "<$xitem>\n";
} elsif ($eletyp == $XT_CDATA) { # cdata
if ($opts & 2) {
$text .= "$in<$xitem>\n";
} else {
$text =~ s/\n$//;
$text .= "<$xitem>\n";
}
} elsif ($eletyp == $XT_ELE1) { # simple
$ind++;
$text .= "$in<$xitem>\n";
$prev = $element;
} elsif ($eletyp == $XT_ELE2) { # closed
$ind-- if ($ind);
$in = ' ' x $ind;
$pretxt = trim_all($pretxt);
if ($prev eq $element) {
$text =~ s/\n$//;
$text .= $pretxt if (length($pretxt) && !($pretxt =~ /^\s+$/));
$text .= "<$xitem>\n";
} else {
$text .= $in;
if (length($pretxt) && !($pretxt =~ /^\s+$/)) {
$text .= "$pretxt\n" if (length($pretxt) && !($pretxt =~ /^\s+$/));
$text .= "$in<$xitem>\n";
} else {
$text .= "<$xitem>\n";
}
}
} elsif ($eletyp == $XT_ELE3) { # complete
$text .= "$in<$xitem>\n";
}
}
return $text;
}
sub write_xml_output($$) {
my ($fil,$rh) = @_;
my $xout = get_xml_ref_hash_text($rh,0);
write2file($xout,$fil);
prt("XML written to $fil file...\n");
# prt($xout);
}
sub xml_pgm_exit($) {
my ($rh) = @_;
if (defined ${$rh}{$x_warn}) {
my $txt = get_warn_error_text($rh);
prt($txt);
pgm_exit(1,"Exit with warnings, errors...");
} else {
pgm_exit(0,"Normal exit.");
}
}
sub is_in_array_ref($$) {
my ($tag,$rarr) = @_;
my $cnt = scalar @{$rarr};
my ($i,$ele);
for ($i = 0; $i < $cnt; $i++) {
$ele = ${$rarr}[$i];
return ($i+1) if ($ele eq $tag);
}
return 0;
}
sub is_in_array_ref_0($$) {
my ($tag,$rarr) = @_;
my $cnt = scalar @{$rarr};
my ($i,$ele);
for ($i = 0; $i < $cnt; $i++) {
$ele = ${$rarr}[$i]; # extract ref
return ($i+1) if (${$ele}[0] eq $tag);
}
return 0;
}
sub get_att_ref($) {
my ($txt) = @_;
$txt = substr($txt,1) while ($txt =~ /^\s/); # clear any leading spaces
my $len = length($txt);
my ($i,$cc,$key,$val);
$i = 0;
my %h = ();
while ($i < $len) {
$key = '';
for (; $i < $len; $i++) {
$cc = substr($txt,$i,1);
if ($cc eq '=') {
$i++;
$cc = substr($txt,$i,1);
last;
}
$key .= $cc;
}
return \%h if ($cc ne '"');
$i++;
$val = '';
for (; $i < $len; $i++) {
$cc = substr($txt,$i,1);
last if ($cc eq '"');
$val .= $cc;
}
$h{$key} = $val;
$i++; # bump over 2nd inverted commas
# and eat any spaces
for (; $i < $len; $i++) {
$cc = substr($txt,$i,1);
last if ( !($cc =~ /\s/) );
}
}
return \%h;
}
sub show_ele_hash($$) {
my ($reh,$out) = @_;
my ($key,$cnt,$cont,$xitem,$msg,$attrs,$atth);
my ($k,$v,$ec);
$cnt = scalar keys(%{$reh});
prt("Show element hash - count $cnt...\n") if ($out);
$cnt = 0;
foreach $key (keys %{$reh}) {
$cnt++;
$ec = ${$reh}{$key}{'count'};
$msg = "$cnt: [$key]($ec) ";
#prt("$cnt: key [$key] ".${$reh}{$key}{'count'}." ");
$xitem = $key;
$attrs = '';
if (defined ${$reh}{$key}{'xitem'}) {
$xitem = ${$reh}{$key}{'xitem'};
$attrs = $xitem;
$attrs =~ s/$key//; # strip OFF the key
$attrs = trim_all($attrs);
#$msg .= "[$attrs] ";
}
$atth = get_att_ref($attrs);
if (!defined ${$reh}{$key}{'attrs'}) {
${$reh}{$key}{'attrs'} = $atth;
}
$msg .= "<$key";
foreach $k (keys %{$atth}) {
$v = ${$atth}{$k};
$msg .= " $k=\"$v\"";
}
$msg .= ">";
if (defined ${$reh}{$key}{'content'}) {
$cont = ${$reh}{$key}{'content'};
if (length($cont)) {
#prt("cont [$cont]");
$msg .= "$cont$key>";
} else {
$msg .=" EMPTY";
delete ${$reh}{$key};
}
} else {
$msg .= "No CONTENT!";
delete ${$reh}{$key};
}
prt("$msg\n") if ($out);
}
prt("Done element hash - count $cnt...\n") if ($out);
}
sub transfer_ele_hash($$$) {
my ($sim,$rch,$reh) = @_;
my ($key);
foreach $key (keys %{$reh}) {
${$rch}{$sim}{$key} = [ ${$reh}{$key}{'content'}, ${$reh}{$key}{'attrs'} ];
}
}
sub xml_set_hash_ref_value($$$$) {
my ($p,$h,$s,$rv) = @_;
#prt("path [$p], to set element [$s] to value [$v]\n");
my @a = split('/',$p);
my $ac = scalar @a;
my $cnt = 0;
my ($k,$ra,$pth);
foreach $k (@a) {
last if ($k eq $s);
$cnt++;
}
if ($cnt >= $ac) {
pgm_exit(1,"ERROR: PATH [$p] DOES NOT CONTAIN [$s], VALUE ${$rv}[0]\n");
}
if ($cnt == 0) {
$pth = $s;
if (!defined ${$h}{$s}) {
${$h}{$s} = [];
}
$ra = ${$h}{$s};
push(@{$ra},$rv);
${$h}{$s} = $ra;
} elsif ($cnt == 1) {
$pth = $a[0]."/".$s;
if (!defined ${$h}{$a[0]}{$s}) {
${$h}{$a[0]}{$s} = [];
}
$ra = ${$h}{$a[0]}{$s};
push(@{$ra},$rv);
${$h}{$a[0]}{$s} = $ra;
} elsif ($cnt == 2) {
$pth = $a[0]."/".$a[1]."/".$s;
if (!defined ${$h}{$a[0]}{$a[1]}{$s}) {
${$h}{$a[0]}{$a[1]}{$s} = [];
}
$ra = ${$h}{$a[0]}{$a[1]}{$s};
push(@{$ra},$rv);
${$h}{$a[0]}{$a[1]}{$s} = $ra;
} elsif ($cnt == 3) {
$pth = $a[0]."/".$a[1]."/".$a[2]."/".$s;
if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$s}) {
${$h}{$a[0]}{$a[1]}{$a[2]}{$s} = [];
}
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$s};
push(@{$ra},$rv);
${$h}{$a[0]}{$a[1]}{$a[2]}{$s} = $ra;
} elsif ($cnt == 4) {
$pth = $a[0]."/".$a[1]."/".$a[2]."/".$a[3]."/".$s;
if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$s}) {
${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$s} = [];
}
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$s};
push(@{$ra},$rv);
${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$s} = $ra;
} elsif ($cnt == 5) {
$pth = $a[0]."/".$a[1]."/".$a[2]."/".$a[3]."/".$a[4]."/".$s;
if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$s}) {
${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$s} = [];
}
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$s};
push(@{$ra},$rv);
${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$s} = $ra;
} elsif ($cnt == 6) {
$pth = $a[0]."/".$a[1]."/".$a[2]."/".$a[3]."/".$a[4]."/".$a[5]."/".$s;
if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$s}) {
${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$s} = [];
}
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$s};
push(@{$ra},$rv);
${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$s} = $ra;
} elsif ($cnt == 7) {
$pth = $a[0]."/".$a[1]."/".$a[2]."/".$a[3]."/".$a[4]."/".$a[5]."/".$a[6]."/".$s;
if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$s}) {
${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$s} = [];
}
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$s};
push(@{$ra},$rv);
${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$s} = $ra;
} elsif ($cnt == 8) {
$pth = $a[0]."/".$a[1]."/".$a[2]."/".$a[3]."/".$a[4]."/".$a[5]."/".$a[6]."/".$a[7]."/".$s;
if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$s}) {
${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$s} = [];
}
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$s};
push(@{$ra},$rv);
${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$s} = $ra;
} elsif ($cnt == 9) {
$pth = $a[0]."/".$a[1]."/".$a[2]."/".$a[3]."/".$a[4]."/".$a[5]."/".$a[6]."/".$a[7]."/".$a[8]."/".$s;
if (!defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$a[8]}{$s}) {
${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$a[8]}{$s} = [];
}
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$a[8]}{$s};
push(@{$ra},$rv);
${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$a[8]}{$s} = $ra;
} else {
pgm_exit(1,"ERROR: Out of DEPTH - Increase in source.\n");
}
}
sub xml_get_hash_ref_content($$) {
my ($p,$h) = @_;
my @a = split('/',$p);
my $cnt = scalar @a;
my ($ra,$rh);
my $rtxt = '';
$ra = [ { } ];
if ($cnt == 1) {
if (defined ${$h}{$a[0]}) {
$ra = ${$h}{$a[0]};
}
} elsif ($cnt == 2) {
if (defined ${$h}{$a[0]}{$a[1]}) {
$ra = ${$h}{$a[0]}{$a[1]};
}
} elsif ($cnt == 3) {
if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}) {
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]};
}
} elsif ($cnt == 4) {
if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}) {
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]};
}
} elsif ($cnt == 5) {
if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}) {
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]};
}
} elsif ($cnt == 6) {
if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}) {
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]};
}
} elsif ($cnt == 7) {
if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}) {
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]};
}
} elsif ($cnt == 8) {
if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}) {
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]};
}
} elsif ($cnt == 9) {
if (defined ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$a[8]}) {
$ra = ${$h}{$a[0]}{$a[1]}{$a[2]}{$a[3]}{$a[4]}{$a[5]}{$a[6]}{$a[7]}{$a[8]};
}
} else {
pgm_exit(1,"ERROR: Out of DEPTH - Increase in source.\n");
}
$rh = ${$ra}[0];
$rtxt = (defined ${$rh}{$x_chr_cont}) ? ${$rh}{$x_chr_cont} : "'undef'";
return $rtxt;
} # end sub xml_get_hash_ref_content($$)
# forward reference
sub enum_hash_ref($$$);
sub enum_array_ref($$$);
sub enum_unknown_ref($$$);
sub enum_array_ref($$$) {
my ($ar,$lev,$t) = @_;
my ($k,$r,$c);
my $l2 = $lev + 1;
$c = 0;
foreach $k (@{$ar}) {
$r = ref($k);
$c++;
if ($r eq 'ARRAY') {
enum_array_ref($k,$l2,$t);
} elsif ($r eq 'HASH') {
enum_hash_ref($k,$l2,$t);
} else {
prt("$lev:A:$c:$t: $k\n");
}
}
}
sub enum_hash_ref($$$) {
my ($hr,$lev,$t) = @_;
my ($k,$v,$r,$i,$p);
my $l2 = $lev + 1;
foreach $k (keys %{$hr}) {
$v = ${$hr}{$k};
$r = ref($v);
$p = length($t) ? "$t/$k" : $k;
if ($r eq 'ARRAY') {
enum_array_ref($v,$l2,$p);
} elsif ($r eq 'HASH') {
enum_hash_ref($v,$l2,$p);
} else {
prt("$lev:H:$t: $k = $v\n");
}
}
}
sub enum_unknown_ref($$$) {
my ($hr,$lev,$t) = @_;
my $r = ref($hr);
if ($r eq 'HASH') {
enum_hash_ref($hr,$lev,$t);
} elsif ($r eq 'ARRAY') {
enum_array_ref($hr,$lev,$t);
} else {
prt("$lev:$t: $hr\n");
}
}
sub xml_get_all_children($$$$) {
my ($rh,$sim,$dep,$opts) = @_;
my %ch = ();
my %ch2 = ();
my $rch2 = \%ch2;
if (!defined ${$rh}{$x_cont}) {
return $rch2; # "\n";
}
my $ra = ${$rh}{$x_cont};
my $doc_root = ${$rh}{$x_root};
my $cnt = scalar @{$ra};
my $text = '';
my $ind = 0;
my $in;
my ($i,$eletyp,$xitem,$element,$pretxt,$bgnlnn,$eleref,$prev,$stktxt);
my (@arr,$inarr,$acnt,$diff,$i2,$typnam,$sst,$tpt,$msg,$hadyes);
my ($stkpath,$inref);
my ($topele,$topref,$attref,$topatt,$chr);
my @tagstack = ();
my %elehash = ();
my $dbg_ln = $opts;
$prev = '';
$hadyes = 0;
$inref = 0;
prt("Get all children of [$sim], depth $dep...\n") if ($dbg_ln);
$text .= get_warn_error_text($rh);
# $XAO_ TYPE TEXT TAG PRE LNNUM
#ush(@xmlarray, [ $eletyp, $xitem, $element, $pretxt, $bgnlnn ]);
for ($i = 0; $i < $cnt; $i++) {
$i2 = $i + 1;
$eleref = ${$ra}[$i];
$eletyp = ${$eleref}[$XAO_TYPE];
$xitem = ${$eleref}[$XAO_TEXT];
$element = ${$eleref}[$XAO_TAG];
$pretxt = ${$eleref}[$XAO_PRE];
$bgnlnn = ${$eleref}[$XAO_LNNUM];
$stktxt = ${$eleref}[$XAO_STACK];
next if (length($stktxt) == 0);
$sst = $stktxt;
$sst =~ s/^$doc_root\///;
$typnam = xml_get_type_name($eletyp);
# element types
$in = ' ' x $ind;
@arr = split('/',$stktxt);
$acnt = scalar @arr;
$inarr = is_in_array_ref($sim,\@arr);
$msg = "$i2: $typnam [$element][$sst]";
$msg .= " $inarr of $acnt ";
if ($inarr) {
$tpt = trim_all($pretxt);
$diff = ($acnt - $inarr);
if ($dep <= 0) {
$msg .= "YesD";
} elsif ($diff <= $dep) {
$msg .= "Yes";
} else {
$msg .= "Yes, but DEPTH!";
$inarr = 0;
}
$msg .= " $tpt" if (length($tpt));
prt("$msg\n") if ($dbg_ln);
$hadyes++;
} else {
if ($element eq $sim) {
$msg .= "NO, but YES because element is $sim";
$hadyes++;
$inarr = 1;
} else {
$msg .="NO";
$hadyes-- if ($hadyes);
}
prt("$msg\n") if ($dbg_ln);
}
next if (!$inarr);
if ($eletyp == $XT_HEADER) { # xml header
#$text .= "<$xitem>\n";
} elsif ($eletyp == $XT_COMMENT) { # comments
#if ($opts & 1) {
# $text .= "$in<$xitem>\n";
#}
} elsif ($eletyp == $XT_DOCTYPE) { # doctype ]>
#$text .= "<$xitem>\n";
} elsif ($eletyp == $XT_CDATA) { # cdata
$text .= "$i2:" if ($dbg_ln);
if ($opts & 2) {
$text .= "$in<$xitem>\n";
} else {
$text =~ s/\n$//;
$text .= "<$xitem>\n";
}
} elsif ($eletyp == $XT_ELE1) { # simple
#$ind++;
$text .= "$i2:" if ($dbg_ln);
$text .= "$in<$xitem>\n";
$prev = $xitem;
$prev =~ s/^$element//;
$attref = get_att_ref($prev);
$prev = $element;
$stkpath = ret_stack_text(\@tagstack);
push(@tagstack,[$element,$attref]);
$ind = scalar @tagstack;
if (defined $elehash{$element}) {
$elehash{$element}{'count'}++;
} else {
$elehash{$element}{'count'} = 1;
$elehash{$element}{'line'} = $i2;
$elehash{$element}{'xitem'} = $xitem;
}
} elsif ($eletyp == $XT_ELE2) { # closed
#$ind-- if ($ind);
$in = ' ' x $ind;
$pretxt = trim_all($pretxt);
$inref = is_in_array_ref_0($element,\@tagstack);
if (@tagstack) {
$ind = scalar @tagstack;
$topref = $tagstack[-1];
$topele = ${$topref}[0];
$topatt = ${$topref}[1];
if (!$inref) {
prtw("WARNING: element [$element] NOT IN STACK!\n");
next;
} elsif ($inref != $ind) {
prtw("WARNING: element [$element] NOT LAST STACK! last [$topele]\n");
next;
}
} else {
prtw("WARNING: element [$element] NOT IN EMPTY STACK!\n");
next;
}
$stkpath = ret_stack_text(\@tagstack);
pop @tagstack;
$ind = scalar @tagstack;
if (defined $elehash{$element}) {
#$elehash{$element}{'count'}-- if ($elehash{$element}{'count'});
$elehash{$element}{'content'} = $pretxt;
}
$in = ' ' x $ind;
if ($prev eq $element) {
$text =~ s/\n$//;
$text .= $pretxt if (length($pretxt) && !($pretxt =~ /^\s+$/));
$text .= "$i2:" if ($dbg_ln);
$text .= "<$xitem>\n";
} else {
$text .= $in;
if (length($pretxt) && !($pretxt =~ /^\s+$/)) {
$text .= "$pretxt\n" if (length($pretxt) && !($pretxt =~ /^\s+$/));
$text .= "$i2:" if ($dbg_ln);
$text .= "$in<$xitem>\n";
} else {
$text .= "$i2:" if ($dbg_ln);
$text .= "<$xitem>\n";
}
}
if (length($pretxt)) {
$chr = xml_get_content_hr($pretxt,$topatt);
xml_set_hash_ref_value($stkpath,$rch2,$element,$chr);
}
} elsif ($eletyp == $XT_ELE3) { # complete
$text .= "$i2:" if ($dbg_ln);
$text .= "$in<$xitem>\n";
}
}
#prt(Dumper($rch2));
#enum_hash_ref(\%ch2,0,'');
#enum_hash_ref($rch2,0,'');
prt("Debug parse text...\n$text\nEnd Debug parse text\n") if ($dbg_ln);
# ==========================================
show_ele_hash( \%elehash, 0 );
show_ele_hash( \%elehash, 0 );
transfer_ele_hash( $sim, \%ch, \%elehash );
# ==========================================
### pgm_exit(1,"TEMP EXIT");
#return \%ch;
#return \%ch2;
return $rch2;
}
sub show_child_hash($$$) {
my ($rc,$sim,$opt) = @_;
my $ri = ${$rc}{$sim};
my ($key,$rah,$ritm,$msg,$k,$v,$cont,$cnt);
$cnt = scalar keys( %{$ri} );
prt("Show $cnt children hash...\n");
foreach $key (keys %{$ri}) {
$msg = "<$key";
$ritm = ${$rc}{$sim}{$key};
$cont = ${$ritm}[0];
$rah = ${$ritm}[1];
foreach $k (keys %{$rah}) {
$v = ${$rah}{$k};
$msg .= " $k=\"$v\"";
}
$msg .= ">$cont$key>";
prt("$msg\n");
}
prt("Done $cnt children hash...\n");
}
sub show_child_hash2($$$) {
my ($rc,$sim,$opt) = @_;
my $ri = ${$rc}{$sim};
my @arr = qw(status description aero flight-model author);
my ($key,$p,$txt,$min,$len);
$min = 0;
foreach $key (@arr) {
$len = length($key);
$min = $len if ($len > $min);
}
foreach $key (@arr) {
$p = "$sim/$key";
$txt = xml_get_hash_ref_content($p,$rc);
$key .= ' ' while (length($key) < $min);
$key .= ':';
prt("$key $txt\n");
}
}
sub show_fg_sim_references($) {
my ($rh) = @_;
my $rc = xml_get_all_children($rh,"sim",0,0);
my ($txt,$cnt,$v,$h);
#enum_hash_ref($rc,0,'');
#enum_unknown_ref($rc,0,'');
#prt(Dumper($rc));
#$txt = xml_get_hash_ref_content('sim/status',$rc);
#prt("Got sim/status txt = [$txt]\n");
#$txt = xml_get_hash_ref_content('sim/status2',$rc);
#prt("Got sim/status2 txt = [$txt]\n");
#if (defined ${$rc}{'sim'}{'status'}[0]{'content'}) {
##if (defined ${$rc}{'sim'}{'status'}) {
#$v = ${$rc}{'sim'}{'status'};
#$h = ${$v}[0];
#if (defined ${$h}{'content'}) {
#$txt = ${$h}{'content'};
#prt("Got sim/status/h[content] $txt\n");
#} else {
#prt("Got sim/status $v\n");
#}
#}
#show_child_hash($rc,"sim",0);
show_child_hash2($rc,"sim",0);
}
####################################
# ### MAIN ###
#my $ref_hash = parse_xml_text("test",$test_xml2);
#my $ref_hash = parse_xml_text("test",$test_xml3);
#my $ref_hash = scan_xml_text('test',$test_xml3);
#my $ref_hash = scan_xml_text('test',$test_xml2);
parse_args(@ARGV);
my $ref_hash = parse_xml_file($in_file);
if (length($out_xml)) {
write_xml_output($out_xml,$ref_hash);
}
if (${$ref_hash}{$x_root} eq 'PropertyList') {
show_fg_sim_references($ref_hash);
}
xml_pgm_exit($ref_hash);
####################################
sub give_help {
prt("$pgmname: version 0.0.9 2010/01/29\n");
prt("Usage: $pgmname [options] input_file_name\n");
prt("Options:\n");
prt(" -h (-?) = This help, and exit.\n");
prt(" -i file = Alternate for input file name.\n");
prt(" -l = Load log at end.\n");
prt("Input file name will be parsed as an XML file.\n");
pgm_exit(0,"Help exit");
}
sub need_arg {
my ($a,@av) = @_;
if (!@av) {
pgm_exit(1,"ERROR: Arg [$a] MUST be followed by a 2nd argument! Aborting...\n");
}
}
sub parse_args {
my (@av) = @_;
my ($arg,$sarg);
while (@av) {
$arg = $av[0];
if ($arg =~ /^-/) {
$sarg = substr($arg,1);
$sarg = substr($sarg,1) while ($sarg =~ /^-/);
if (($sarg =~ /^h/i)||($sarg =~ /^\?/)) {
give_help($arg);
} elsif ($sarg =~ /^i/i) {
need_arg(@av);
shift @av;
$arg = $av[0];
$in_file = $arg;
} elsif ($sarg =~ /^l/i) {
$load_log = 1;
} else {
pgm_exit(1,"ERROR: Unknown argument [$arg]! Aborting...\n");
}
} else {
$in_file = $arg;
}
shift @av;
}
}
# eof - oarse-xml02.pl