Generated: Mon Aug 16 14:14:22 2010 from getfilecounts.pl 2010/04/13 9.1 KB.
#!/perl -w # NAME: getfilecounts.pl # AIM: Given a root file, or folder, scan ALL directories, and report 'type' counts... # but in essence this is just a 'template' for further file processing # 2010/04/13 - geoff mclane - http://geoffair.net/mperl/ use strict; use warnings; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use Cwd; unshift(@INC, 'C:\GTools\perl'); require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $perl_dir = 'C:\GTools\perl'; my $outfile = $perl_dir."\\temp.$pgmname.txt"; open_log($outfile); # user variables my $load_log = 0; my $in_file = 'C:\HOMEPAGE\FG\index.html'; my @fpfolders = qw( aspnet_client _vti_cnf _vti_pvt _private _derived ); my @html_ext = qw( .htm .html .shtml .php ); my @graf_ext = qw( .jpg .jpeg .gif .png .bmp .ico .mpg ); my @css_ext = qw( .css ); my @script_ext = qw( .js .class .cgi ); ### program variables my @warnings = (); my $cwd = cwd(); my $os = $^O; my @g_file_array = (); my @g_xclude_dirs = (); my @g_xclude_files = (); my $g_indir = ''; my $g_infile = ''; # none yet my $g_dir_count = 0; my $g_item_count = 0; ### debug my $dbg01 = 0; # show each directory parse ### forward refs sub process_sub_dir($); sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg) } close_log($outfile,$load_log); exit($val); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } sub show_warnings() { if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { prt( "\nNo warnings issued.\n\n" ); } } sub os_is_win() { return (($os eq 'MSWin32') ? 1 : 0); } sub dos_2_unix($) { my ($du) = shift; $du =~ s/\\/\//g; return $du; } sub is_fp_folder($) { my ($inf) = shift; $inf = lc($inf) if (os_is_win()); foreach my $fil (@fpfolders) { $fil = lc($fil) if (os_is_win()); return 1 if ($inf eq $fil); } return 0; } sub is_xclude_folder($) { my ($inf) = shift; $inf = lc($inf) if (os_is_win()); foreach my $fil (@g_xclude_dirs) { $fil = lc($fil) if (os_is_win()); return 1 if ($inf eq $fil); } return 0; } sub is_excluded_dir($) { my ($dir) = shift; return 1 if (is_fp_folder($dir)); return 1 if (is_xclude_folder($dir)); return 0; } sub is_user_excluded($) { my ($file) = shift; $file = lc($file) if (os_is_win()); foreach my $fil (@g_xclude_files) { $fil = lc($fil) if (os_is_win()); return 1 if ($file eq $fil); } return 0; } ######################################################### # Passed an array REF of extensions, # check if this is one of them? ######################################################### sub is_this_extent($$) { my ($ext, $rex) = @_; my $lcx = lc($ext); foreach my $x (@{$rex}) { return 1 if ($lcx eq lc($x)); } return 0; } ############################################ # only looking for HTM, HTML, PHP, # could be extended to others maybe ... ############################################ # test an EXTENSION, or form '.htm'... sub is_htm_ext($) { my ($ext) = shift; return( is_this_extent($ext,\@html_ext) ); } sub is_graf_ext($) { my ($ext) = shift; return( is_this_extent($ext,\@graf_ext) ); } sub is_zip_ext($) { my ($ext) = shift; my @arr = qw( .zip .gz ); return( is_this_extent($ext,\@arr) ); } sub is_css_ext($) { my ($ext) = shift; return( is_this_extent($ext, \@css_ext) ); } sub is_txt_ext($) { my ($ext) = shift; my @arr = qw( .txt ); return( is_this_extent($ext, \@arr) ); } sub is_script_ext($) { my ($fil) = shift; return( is_this_extent($fil, \@script_ext) ); } # test a FILE/PATH extension sub is_htm_file_ext($) { my ($fil) = shift; my ($n,$d,$e) = fileparse($fil,qr/\.[^.]*/); return( is_htm_ext($e) ); } sub is_graphic_file_ext($) { my ($fil) = shift; my ($n,$d,$e) = fileparse($fil,qr/\.[^.]*/); return( is_graf_ext($e) ); } sub is_zip_file_ext($) { my ($fil) = shift; my ($n,$d,$e) = fileparse($fil,qr/\.[^.]*/); return( is_zip_ext($e) ); } sub is_css_file_ext($) { my ($fil) = shift; my ($n,$d,$e) = fileparse($fil,qr/\.[^.]*/); return( is_css_ext($e) ); } sub is_txt_file_ext($) { my ($fil) = shift; my ($n,$d,$e) = fileparse($fil,qr/\.[^.]*/); return( is_txt_ext($e) ); } sub is_script_file_ext($) { my ($fil) = shift; my ($n,$d,$e) = fileparse($fil,qr/\.[^.]*/); return( is_script_ext($e) ); } sub get_file_ext_type($) { my ($fil) = shift; return 1 if (is_htm_file_ext($fil)); return 2 if (is_graphic_file_ext($fil)); return 3 if (is_zip_file_ext($fil)); return 4 if (is_css_file_ext($fil)); return 5 if (is_txt_file_ext($fil)); return 6 if (is_script_file_ext($fil)); return 0; } sub type_2_stg($) { my ($t) = shift; return "HTML" if ($t == 1); return "IMG" if ($t == 2); return "ZIP" if ($t == 3); return "CSS" if ($t == 4); return "TXT" if ($t == 5); return "SCRIPT" if ($t == 6); return "Other"; } sub add_2_g_file_array($) { my ($ff) = shift; push(@g_file_array, [$ff,get_file_ext_type($ff),0,0]); } sub get_g_file_array_counts() { my $len = scalar @g_file_array; my %hash = (); my ($typ,$i); for ($i = 0; $i < $len; $i++) { $typ = $g_file_array[$i][1]; $hash{$typ}++; } my $res = "Total=$len"; $len = 0; foreach $typ (keys %hash) { $i = $hash{$typ}; $res .= " ".type_2_stg($typ)."=$i"; $len += $i; } $res .= " ($len)"; return $res; } sub process_sub_dir($) { my ($indir) = @_; $indir = dos_2_unix($indir); # use ALL unix form of path pgm_exit(1,"ERROR: Unable to open directory [$indir]!\n") if ( !opendir( DIR, $indir ) ); my @files = readdir(DIR); closedir DIR; my ($fcnt,$file,$ff,@dirs); $fcnt = scalar @files; prt("Processing $fcnt files, from [$indir]...\n") if ($dbg01); $indir .= '/' if !($indir =~ /(\\|\/)$/); @dirs = (); foreach $file (@files) { next if (($file eq '.')||($file eq '..')); $g_item_count++; $ff = $indir.$file; if (-f $ff) { add_2_g_file_array($ff) if (!is_user_excluded($file)); } elsif (-d $ff) { push(@dirs,$ff) if (!is_excluded_dir($file)); $g_dir_count++; } else { pgm_exit(1,"ERROR: WHAT IS THIS? [$ff]!!\n"); } } foreach $ff (@dirs) { process_sub_dir($ff); } } sub get_all_files($) { my ($inf) = @_; $inf = dos_2_unix($inf); # use ALL unix form of path my ($infile,$indir); if (-f $inf) { ($infile,$indir) = fileparse($inf); #prt("Got file [$infile], in directory [$indir]...\n"); $indir = $cwd if ($indir =~ /^\.(\\|\/)$/); } elsif (-d $inf) { $indir = $inf; $infile = ''; # none yet } $indir .= '/' if !($indir =~ /(\\|\/)$/); pgm_exit(1,"ERROR: Unable to open directory [$indir]!\n") if ( !opendir( DIR, $indir ) ); my @files = readdir(DIR); closedir DIR; my ($fcnt,$file,$ff,@dirs); $fcnt = scalar @files; prt("Processing $fcnt files, from [$indir]...\n") if ($dbg01); @dirs = (); foreach $file (@files) { next if (($file eq '.')||($file eq '..')); $g_item_count++; $ff = $indir.$file; if (-f $ff) { add_2_g_file_array($ff) if (!is_user_excluded($file)); } elsif (-d $ff) { push(@dirs,$ff) if (!is_excluded_dir($file)); $g_dir_count++; } else { pgm_exit(1,"ERROR: WHAT IS THIS? [$ff]!!\n"); } } # now process the subdirectories, and sub-sub... foreach $ff (@dirs) { process_sub_dir($ff); } } ######################################### ### MAIN ### parse_args(@ARGV); prt( "$pgmname: in [$cwd]: In file [$in_file]...\n" ); if (-f $in_file) { ($g_infile,$g_indir) = fileparse($in_file); #prt("Got file [$infile], in directory [$indir]...\n"); $g_indir = $cwd if ($g_indir =~ /^\.(\\|\/)$/); } elsif (-d $in_file) { $g_indir = $in_file; $g_infile = ''; # none yet } else { pgm_exit(1,"ERROR: In file [$in_file] NOT valid!\n"); } get_all_files($in_file); prt("Processed $g_item_count items, $g_dir_count directories, for -\n"); prt( get_g_file_array_counts()."\n" ); pgm_exit(0,"Normal exit(0)"); ######################################## sub parse_args { my (@av) = @_; while (@av) { $in_file = $av[0]; shift @av; } } # eof - getfilecounts.pl