Generated: Tue Feb 2 17:54:22 2010 from autoword02.pl 2007/09/22 7.5 KB.
#!/Perl # Series: Automation of Microsoft Word # original from : http://www.stouk.com/documents/perl/gui/guiref/page04.htm # Purpose: Load an existing document, and write it out to a TEXT ONLY file. # I had some initial trouble getting this to function, hence the addition of # som eLOG file code, so I could review what was happening ... and there # is a lot of DEAD CODE left commented out, to remind me of the tries ... # author: geoff mclane - mailto: geoffair _at_ hotmail _dot_ com - 2006-06-12 # 22/09/2007 - added command line argument parse -in infile -out outfile # use strict; use Win32::OLE; use Win32::OLE::Const 'Microsoft Word'; my ($LOG); my $write_log = 0; my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = "temp.".$pgmname.".txt"; if ( open( $LOG, ">$outfile" ) ) { $write_log = 1; } else { $write_log = 0; prt( "WARNING: Unable to open $outfile LOG ...\n" ); } ################################################################################## # *** ALTER THIS TO POINT TO YOUR OWN TEST DOCUMENT, AND TEST TEXT FILE RESULT *** #my $infile = 'c:\tmp\test.doc'; #my $file_out = 'c:\tmp\test.txt'; my $infile = 'c:\tmp\test3.doc'; my $file_out = 'c:\tmp\test3.txt'; my $do_clean = 1; # OR ACCEPT COMMAND LINE INPUT parse_args(@ARGV); ################################################################################## my $dbg1 = 0; ### can be all in one line ### my $Word = Win32::OLE->GetActiveObject('Word.Application') || Win32::OLE->new('Word.Application', 'Quit'); ### or tested, like ... my $Word = Win32::OLE->GetActiveObject('Word.Application'); if ($Word) { prt( "Using running application found ... \n" ); } else { prt( "No running application found ... starting new ...\n" ); $Word = Win32::OLE->new('Word.Application', 'Quit'); if ($Word) { prt("Loaded Word application ...\n"); } else { mydie("YEEK! Unable to load word ... aborting \n" ); } } ###prt( "Setting word as visible ...\n" ); ###$Word->{'Visible'} = 1; # if you want to see what's going on, but it is all quite quick ;=)) prt( "Loading Word application with $infile...\n" ); #my $doc = $Word->Documents->Open({ # FileName => $infile, # ConfirmConversions => 0, # ReadOnly => 1, # AddToRecentFiles => 0, # Format => wdOpenFormatAuto}); # OR SIMPLY ... my $doc = $Word->Documents->Open($infile) || mydie("Unable to open document " . Win32::OLE->LastError()); # appears these are not needed ... #print "Setting the file name to $file_out ...\n"; #$Word->Documents->FileName($file_out); #$Word->Documents->FileFormat(1); $Word->Documents->AddToRecentFiles(0); if ( -f $file_out ) { print "Deleting previous output $file_out ...\n"; unlink $file_out; # remove any previous } # not sure this is needed, or does anything ... ###$Word->DisplayAlerts = wdAlertsNone; - no, should be $Word->{'DisplayAlerts'} = wdAlertsNone; # lots of tries that FAILED ###$Word->Documents->SaveAs( { 'FileName->'."$file_out",}); ###my $cmd = "FileName -> $file_out"; ###$Word->Documents->SaveAs( { $cmd } ); ###$Word->Documents->SaveAs( "$file_out" ); # $Word->Documents->SaveAs( FileName => "$file_out" ); # $Word->Documents->SaveAs({ # $Word->SaveAs({ # $Word->ActiveDocument->SaveAs({ # and then it WORKED with ... my $aDoc = $Word->ActiveDocument; if ($aDoc) { prt( "Doing SaveAs on ActiveDocument ...to $file_out ...\n" ); $aDoc->SaveAs({ FileName => "$file_out", FileFormat => wdFormatDOSTextLineBreaks }); if ( -f $file_out) { prt("Appears new file created ...\n"); $Word->Documents->Close; $Word->Documents->Quit; $Word->Quit; # close it all ... if ($do_clean) { if (open FIL, "<$file_out") { my @lines = <FIL>; my @nlines = (); close FIL; prt( "Got ".scalar @lines." to process ...\n" ); my $had = 0; foreach my $line (@lines) { my $ln = trim_all($line); prt( "$ln\n" ) if ($dbg1); if (($ln eq '>')||(length($ln) == 0)) { if ($had) { prt( "Had HAD ... skipped ...\n" ) if ($dbg1); next; } prt( "Set HAD ... skip next ...\n" ) if ($dbg1); $had = 1; } else { prt( "Clear HAD ... no skip ...\n" ) if ($dbg1); $had = 0; } push(@nlines, $line); } write_a_file( $file_out, @nlines ); } else { prt("Warning: unable to open file $file_out ...\n"); } } } else { prt("Warning: text file not created ...\n"); $Word->Documents->Close; $Word->Documents->Quit; $Word->Quit; # close it all ... } } else { prt("ERROR: Failed to get ActiveDocument ...\n"); $Word->Documents->Close; $Word->Documents->Quit; $Word->Quit; # close it all ... } ##$Word->Documents->Close; ##$Word->Documents->Quit; ##$Word->Quit; # close it all ... close $LOG; ### system($outfile); # ENUMERATION OF OUTPUT FORMATS # wdFormatDocument - Microsoft Word format. # wdFormatDOSText - Microsoft DOS text format. # wdFormatDOSTextLineBreaks - Microsoft DOS text with line breaks preserved. # wdFormatEncodedText - Encoded text format. # wdFormatFilteredHTML - Filtered HTML format. # wdFormatHTML - Standard HTML format. # wdFormatRTF - Rich text format (RTF). # wdFormatTemplate - Microsoft Word template format. # wdFormatText - Microsoft Windows text format. # wdFormatTextLineBreaks - Microsoft Windows text format with line breaks preserved. # wdFormatUnicodeText - Unicode text format. # wdFormatWebArchive - Web archive format. # wdFormatXML - Extensible Markup Language (XML) format. ################################ ### output and log file sub wlog { my $ml = shift; print $LOG $ml; } sub prt { my $m = shift; if ($write_log) { wlog($m); } print $m; } sub mydie { my $msg = shift; if ($write_log) { wlog($msg); } die $msg; } ############################## # argument parsing # Ensure argument exists, or die. sub require_arg { my ($arg, @arglist) = @_; mydie( "ERROR: no argument given for option '$arg' ...\n" ) if ! @arglist; } sub give_help { prt( "$pgmname [-in filepath] [-out filename] [-clean]\n" ); prt( "Default in = $infile, out = $file_out ...\n" ); mydie( "And of course you MUST have MS Word installed ...\n" ); } sub parse_args { my (@av) = @_; while (@av) { my $arg = $av[0]; if (($arg eq '?')|| ($arg eq '-?')|| ($arg eq '-h')|| ($arg eq -'H')) { give_help(); } elsif ($arg eq '-clean') { $do_clean = 1; prt( "Do clean set...\n" ); } elsif ($arg eq '-in') { require_arg(@av); shift @av; $arg = $av[0]; $infile = $arg; prt( "In file set to [$infile]\n" ); } elsif ($arg eq '-out') { require_arg(@av); shift @av; $arg = $av[0]; $file_out = $arg; prt( "Out file set to [$file_out]\n" ); } else { prt( "ERROR: Unknown command [$arg] ...\n" ); give_help(); } shift @av; } } ##################### ### file action sub write_a_file { my ($fil, @txt) = @_; open WOF, ">$fil" or mydie("ERROR: Unable to open $fil! $!\n"); print WOF @txt; close WOF; } sub trim_all { my ($ln) = shift; chomp $ln; # remove CR (\n) $ln =~ s/\r$//; # remove LF (\r) $ln =~ s/\t/ /g; # TAB(s) to a SPACE while ($ln =~ /\s\s/) { $ln =~ s/\s\s/ /g; # all double space to SINGLE } while ($ln =~ /^\s/) { $ln = substr($ln,1); # remove all LEADING space } while ($ln =~ /\s$/) { $ln = substr($ln,0, length($ln) - 1); # remove all TRAILING space } return $ln; } # eof - autoword02.pl