#!/usr/bin/perl -w # NAME: osmcut2.pl # AIM: Read planet.osm, and extrct info... use strict; use warnings; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use File::stat; # get file info like if ($sb = stat($fil)) { $date = $sb->mtime; and $size = $sb->size; } use Time::HiRes qw(gettimeofday tv_interval); # provide more accurate timings use Cwd; my $os = $^O; my $perl_dir = '/home/geoff/bin'; my $PATH_SEP = '/'; my $temp_dir = '/tmp'; if ($os =~ /win/i) { $perl_dir = 'C:\GTools\perl'; $temp_dir = $perl_dir; $PATH_SEP = "\\"; } unshift(@INC, $perl_dir); require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n"; # log file stuff our ($LF); my $pgmname = $0; if ($pgmname =~ /(\\|\/)/) { my @tmpsp = split(/(\\|\/)/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt"; open_log($outfile); # user variables my $VERS = "0.0.1 2012-01-06"; my $load_log = 0; my $in_file = ''; my $verbosity = 0; my $debug_on = 1; my $def_file = 'D:\SAVES\OSM\planet-120229.osm'; my $out_xml = ''; my $max_line = 90; ### program variables my @warnings = (); my $cwd = cwd(); sub VERB1() { return $verbosity >= 1; } sub VERB2() { return $verbosity >= 2; } sub VERB5() { return $verbosity >= 5; } sub VERB9() { return $verbosity >= 9; } sub show_warnings($) { my ($val) = @_; if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { prt( "\nNo warnings issued.\n\n" ) if (VERB9()); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg); } show_warnings($val); close_log($outfile,$load_log); exit($val); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } sub get_hash_ref($) { my $tag = shift; my %hash = (); return \%hash; } sub process_in_file($) { my ($fil) = @_; my ($INF,$date,$size,$sb); if ($sb = stat($fil)) { $date = $sb->mtime; $size = $sb->size; } else { pgm_exit(1,"ERROR: Unable to stat file [$fil]\n"); } if (! open $INF, "<$fil") { pgm_exit(1,"ERROR: Unable to open file [$fil]\n"); } my ($line,$lnn,$len,$i,$ch,$tag,$it,$iq,$qc); my (@arr,$tag1,$txt,$show,$had_sp); my ($bgn,$end,$elap); $lnn = 0; $it = 0; $iq = 0; my $tag_cnt = 0; $txt = ''; my %shown = (); my $hmax = int(($max_line - 4) / 2); my $max_line2 = $max_line + 10; my $file_off = 0; $show = 0; prt("Processing file $fil, ".get_nn($size)." bytes, ".lu_get_YYYYMMDD_hhmmss($date)."\n"); $bgn = [ gettimeofday ]; while (<$INF>) { $line = $_; $lnn++; $len = length($line); $file_off += $len + 1; chomp $line; $line = trim_all($line); $len = length($line); for ($i = 0; $i < $len; $i++) { $ch = substr($line,$i,1); if ($it) { if ($iq) { $tag .= $ch; $iq = 0 if ($ch eq $qc); } elsif ($ch eq '>') { $it = 0; $tag_cnt++; if ($tag =~ /^\//) { # end tag only } elsif ($tag =~ /^changeset\s+/) { # lots of these } elsif ($tag =~ /^tag\s/) { # quite a few } else { @arr = space_split($tag); $tag1 = $arr[0]; if ($tag1 =~ /^\//) { # closing tag } else { if (!defined $shown{$tag1}) { $shown{$tag1} = 1; prt("$lnn: [$txt] <$tag1\n"); $show = 1; } } } $txt = ''; } else { $tag .= $ch; } } else { if ($iq) { $iq = 0 if ($ch eq $qc); } elsif ($ch eq '<') { $tag = ""; $it = 1; $had_sp = 0; next; } if ($ch =~ /\s/) { $txt .= $ch if (length($txt)); } else { $txt .= $ch; } } } if ($show || (($lnn % 30000) == 0)) { #if (($len > $max_line2) && !$show) { # $line = substr($line,0,$max_line)."...\n".substr($line,($len-$max_line)); #} #prt("$lnn:$tag_cnt:$file_off: $line\n"); my $pctd = (($file_off / $size) * 100); my $pct = int(($pctd+0.05) * 10) / 10; $end = [ gettimeofday ]; $elap = tv_interval( $bgn, $end ); $end = secs_HHMMSS(int($elap * (100 / $pctd)+0.5)); prt("$lnn:$pct: $line $end\n"); $show = 0; } #if ($lnn > 1000000) { # last; #} } close $INF; prt("Read $lnn lines...\n"); } ######################################### ### MAIN ### parse_args(@ARGV); process_in_file($in_file); pgm_exit(0,""); ######################################## sub give_help { prt("$pgmname: version $VERS\n"); prt("Usage: $pgmname [options] in-file\n"); prt("Options:\n"); prt(" --help (-h or -?) = This help, and exit 0.\n"); prt(" --verb[n] (-v) = Bump [or set] verbosity. def=$verbosity\n"); prt(" --load (-l) = Load LOG at end. ($outfile)\n"); prt(" --out (-o) = Write output to this file.\n"); } sub need_arg { my ($arg,@av) = @_; pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av); } sub parse_args { my (@av) = @_; my ($arg,$sarg); while (@av) { $arg = $av[0]; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); if (($sarg =~ /^h/i)||($sarg eq '?')) { give_help(); pgm_exit(0,"Help exit(0)"); } elsif ($sarg =~ /^v/) { if ($sarg =~ /^v.*(\d+)$/) { $verbosity = $1; } else { while ($sarg =~ /^v/) { $verbosity++; $sarg = substr($sarg,1); } } prt("Verbosity = $verbosity\n") if (VERB1()); } elsif ($sarg =~ /^l/) { $load_log = 1; prt("Set to load log at end.\n") if (VERB1()); } elsif ($sarg =~ /^o/) { need_arg(@av); shift @av; $sarg = $av[0]; $out_xml = $sarg; prt("Set out file to [$out_xml].\n") if (VERB1()); } else { pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n"); } } else { $in_file = $arg; prt("Set input to [$in_file]\n") if (VERB1()); } shift @av; } if ((length($in_file) == 0) && $debug_on) { $in_file = $def_file; prt("Set DEFAULT input to [$in_file]\n"); $load_log = 1; } if (length($in_file) == 0) { pgm_exit(1,"ERROR: No input files found in command!\n"); } if (! -f $in_file) { pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n"); } } # eof - template.pl