# TWiki Collaboration Platform, http://TWiki.org/ # # Copyright (C) 1999-2004 Peter Thoeny, peter@thoeny.com # # For licensing info read license.txt file in the TWiki root. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details, published at # http://www.gnu.org/copyleft/gpl.html =pod ---+ package TWiki::Contrib::OO2TWiki Addon for converting Open Document Text (.odt) and StarOffice/OpenOffice.org Star Writer (.sxw) word processor files to TWiki markup. Open Document format is used by OpenOffice.org 2.0 and KOffice Writer. =cut package TWiki::Contrib::OO2TWiki; use strict; use integer; #Use CGI::Carp; use Error qw( :try ); use TWiki; use TWiki::UI; use TWiki::UI::Upload; use TWiki::Func; use TWiki::Meta; use File::Temp; use File::Spec; use Archive::Zip qw(:ERROR_CODES); use vars qw($VERSION $pluginName $debug $wikiWordRegex); $VERSION = '0.3'; $pluginName = 'OpenOffice2TWikiAddOn'; # Name of this Plugin #TWiki::setupRegexes(); # why is this necessary? $wikiWordRegex = TWiki::Func::getRegularExpression('wikiWordRegex'); =pod ---++ StaticMethod convert( $session, $web, $topic, $scruptUrl, $query ) =view= command handler. This method is designed to be invoked via the =TWiki::UI::run= method. =cut sub convert { my $session = shift; my $query = $session->{cgiQuery}; my $webName = $session->{webName}; my $topicName = $session->{topicName}; my $filePath = $query->param( 'filepath' ) || ""; my $userName = $session->{user}->login(); # hack to make TWiki::Func::* work $TWiki::Plugins::SESSION = $session; if ($filePath =~ /^$/) { prepare( $session, $webName, $topicName, $userName, $query ); } else { doconvert( $session, $webName, $topicName, $userName, $query ); } } =pod ---++ prepare( $web, $topic, $query ) Prepare for a 'convert' URL. CGI parameters are: | =skin= | Skin to use in presenting pages | =cut sub prepare { my ( $session, $webName, $topic, $userName, $query ) = @_; my $skin = $session->getSkin(); my $store = $session->{store}; TWiki::UI::checkMirror( $session, $webName, $topic ); # return if TWiki::UI::isMirror( $webName, $topic ); TWiki::UI::checkWebExists( $session, $webName, $topic, 'view' ); # return unless TWiki::Func::webExists( $webName, $topic ); my $tmpl = ""; my $text = ""; my $meta = ""; $debug = TWiki::Func::getPreferencesFlag( "OO2TWIKI_DEBUG" ); my $wikiUserName = TWiki::Func::getWikiName( $userName ); my $enableLink = TWiki::Func::checkAccessPermission( "CHANGE", $wikiUserName, undef, $topic, $webName ); ( $meta, $text ) = $session->{store}->readTopic( $session->{user}, $webName, $topic, undef ); my %args = ( "attr" => "", "path" => "", "comment" => "" ); # modify the attach template to do a conversion # KoenMartens: wouldn't it be better to use a dedicated template? $skin = TWiki::Func::getPreferencesValue( "SKIN" ) unless ( $skin ); $tmpl = TWiki::Func::readTemplate( "attachnew", $skin ); $tmpl =~ s/\bnew\b/converted/go; $tmpl =~ s/\bAttach\b/Upload and reparent/go; # SMELL - may not make sense with some skins $tmpl =~ s/%SCRIPTURLPATH%\/upload/%SCRIPTURLPATH%\/convert/go; $tmpl =~ s/%SCRIPTURLPATH\{"upload"\}%/%SCRIPTURLPATH\{"convert"\}%/go; $tmpl =~ s/%ATTACHTABLE%//go; $tmpl =~ s/%FILEUSER%//go; $session->enterContext( 'can_render_meta', $meta ); $tmpl =~ s/%ATTACHLINKBOX%/disabled="1"/go unless $enableLink; $tmpl = &TWiki::handleCommonTags( $session, $tmpl, $webName, $topic ); #$tmpl = &TWiki::Render::getRenderedVersion( $session, $tmpl, $webName, $topic ); $tmpl = $session->{renderer}->getRenderedVersion( $tmpl, $webName, $topic ); # $tmpl = &TWiki::handleMetaTags( $webName, $topic, $tmpl, $meta ); $tmpl =~ s/%HIDEFILE%//go; $tmpl =~ s/%FILENAME%//go; $tmpl =~ s/%FILEPATH%//go; $tmpl =~ s/%FILECOMMENT%//go; $tmpl =~ s/( ?) *<\/?(nop|noautolink)\/?>\n?/$1/gois; # remove and tags TWiki::writeDebug($session,"[$$] Preparing to convert a child topic of '$webName/$topic' with $pluginName") if $debug; #TWiki::Func::writeHeader( TWiki::Func::getCgiQuery() ) unless -t; $session->writeCompletePage( $tmpl ); print $tmpl; } =pod ---++ convert( $web, $topic, $userName, $query) Perform the functions of an 'upload' url but convert the relevant parts using XSLT and attach embedded images to the new topic. CGI parameters passed in $query: | =filepath= | Client side name of the uploaded file | | =filecomment= | Comment to associate with file in parent topic | | =createlink= | if defined, will create a link to file at end of topic | | =hidefile= | if defined, will hide images from the attachment table | =cut sub doconvert { my ( $session, $webName, $parentTopic, $userName, $query ) = @_; TWiki::UI::checkMirror( $session, $webName, $parentTopic ); #return if TWiki::UI::isMirror( $webName, $parentTopic ); my $meta = new TWiki::Meta; my $text = ""; my $parentText = ""; my $oopsUrl = ""; my $filePath = $query->param( 'filepath' ) || ""; my $newTopic = $filePath; my $zipFile = $query->upload( 'filepath' ) || ""; # why doesn't this work? #my $zipFile = $query->tmpFileName( $filePath ); my $fileComment = $query->param( 'filecomment' ) || ""; my $createLink = $query->param( 'createlink' ) || ""; my $hideFile = $query->param( 'hidefile' ) || ""; my $stylesheet = "sxw.xslt"; $debug = TWiki::Func::getPreferencesFlag( "OO2TWIKI_DEBUG" ); TWiki::writeDebug($session,"[$$] Converting '$filePath' with $pluginName") if $debug; # try to convert it to a WikiWord $newTopic =~ s/([^\/\\]*$)/$1/; # remove leading path if ($newTopic =~ s/(.+)\.(\w+)/$1/) { # chop extension $stylesheet = "$2.xslt"; # use extension for the stylesheet name } $newTopic =~ s/^(\d+)(.*)/$2$1/go; # move leading digits to end Eg. 2005AnnualReport -> AnnualReport2005 $newTopic =~ s/[\W_]+/ /go; # convert non-word chars to space $newTopic =~ s/(\d)([[:alpha:]])/$1 $2/go; # insert space between digit and word chars $newTopic = join('', map { ucfirst } split(/\s/, $newTopic)); # untaint if( $newTopic =~ /^([a-zA-Z0-9]*)$/ ) { $newTopic=$1; } else { $newTopic=''; } $filePath=~m|([^\\/]*)$|; $filePath=$1; TWiki::writeDebug($session,"[$$] Rationalised '$filePath' to '$newTopic'") if $debug; throw Error::Simple("Invalid filename [$newTopic] (cannot create a WikiWord)") unless $newTopic; if ($createLink) { ( $meta, $text ) = $session->{store}->readTopic( $session->{user}, $webName, $parentTopic, undef ); # check for oops URL in case of error: if( $text =~ /^http.*?\/oops/ ) { TWiki::Func::redirectCgiQuery( $query, $text ); return; } # only link if it's not already linked! $createLink = $parentText !~ /$newTopic/s; } if ($createLink) { # lock the parent $oopsUrl = TWiki::Func::setTopicEditLock( $webName, $parentTopic, 1 ); if( $oopsUrl ) { TWiki::Func::redirectCgiQuery( $query, $oopsUrl ); # assuming valid query return; } TWiki::writeDebug($session,"[$$] Parent topic '$webName/$parentTopic' locked") if $debug; } if (TWiki::Func::topicExists($webName, $newTopic)) { $oopsUrl = TWiki::Func::setTopicEditLock( $webName, $newTopic, 1 ); if( $oopsUrl ) { TWiki::Func::redirectCgiQuery( $query, $oopsUrl ); # assuming valid query return; } TWiki::writeDebug($session,"[$$] Topic '$webName/$newTopic' locked") if $debug; # read topic text to get META ( $meta, $text ) = $session->{store}->readTopic( $session->{user}, $webName, $newTopic, undef ); #($meta, $text) = TWiki::Store::readTopic( $webName, $newTopic ); # check for oops URL in case of error: if( $text =~ /^http.*?\/oops/ ) { TWiki::Func::redirectCgiQuery( $query, $text ); return; } } # FIXME oops if file not found $stylesheet = TWiki::Func::getPubDir() . "/" . TWiki::Func::getTwikiWebname() . "/$pluginName/$stylesheet"; TWiki::writeDebug($session,"[$$] Attempting to use '$stylesheet' for XSLT transform") if $debug; throw Error::Simple( "XSLT stylesheet [$stylesheet] not found") unless (-f $stylesheet); seek($zipFile,0,0); my $tmpZip = new File::Temp(TEMPLATE => 'OO2TWikiXXXXXXXXXX', DIR => File::Spec->tmpdir(), UNLINK => $debug ? 0 : 1, SUFFIX => '.zip'); my $tmpZipName = $tmpZip->filename; while(<$zipFile>) { print $tmpZip $_; } close($tmpZip); TWiki::writeDebug($session,"wrote upload to tmp file ".$tmpZipName); # close $zipFile if( $TWiki::OS eq "WINDOWS"); my $zip = new Archive::Zip; throw Error::Simple( "Cannot read $tmpZipName" ) unless $zip->read($tmpZipName) == AZ_OK; TWiki::writeDebug($session,"[$$] Zip archive '$tmpZipName' is readable") if $debug; # Create a temp file for extracting the content.xml into my $contentFile = new File::Temp(TEMPLATE => 'OO2TWikiXXXXXXXXXX', DIR => File::Spec->tmpdir(), UNLINK => $debug ? 0 : 1, SUFFIX => '.xml'); select($contentFile); $|=1; # autoflush otherwise we get zero file size errors select(STDOUT); my $content = $zip->memberNamed('content.xml'); throw Error::Simple( "content.xml not found in $tmpZipName") unless $content->extractToFileHandle($contentFile) == AZ_OK; TWiki::writeDebug($session,"[$$] content.xml extracted from '$tmpZipName' successfully") if $debug; # Create a temp file for output (gets re-used for attachments) my $outputFile = new File::Temp(TEMPLATE => 'OO2TWikiXXXXXXXXXX', DIR => File::Spec->tmpdir(), UNLINK => $debug ? 0 : 1, SUFFIX => '.tml'); select($outputFile); $|=1; # autoflush otherwise we get zero file size errors select(STDOUT); # Convert contentFile to PDF using HTMLDOC # $TWiki::xsltprocCmd must be set in TWiki.cfg my @xsltprocArgs; push @xsltprocArgs, "--novalid", "--output" => "$outputFile", "$stylesheet", "$contentFile"; TWiki::writeDebug($session,"[$$] Calling xsltproc with args: @xsltprocArgs") if $debug; system($TWiki::cfg{xsltprocCmd}, @xsltprocArgs); if ($? == -1) { throw Error::Simple( "Failed to start xsltproc ($TWiki::xsltprocCmd): $!\n"); } elsif ($? & 127) { printf STDERR "child died with signal %d, %s coredump\n", ($? & 127), ($? & 128) ? 'with' : 'without'; throw Error::Simple( "Conversion failed: '$!'"); } else { printf STDERR "child exited with value %d\n", $? >> 8 unless $? >> 8 == 0; } TWiki::writeDebug($session,"[$$] xsltproc finished successfully") if $debug; $text = ""; my %title = (); my $inVerbatim = 0; # true if in a verbatim block seek($outputFile, 0, 0); while (my $line = <$outputFile>) { # FIXME - this will only catch the first occurrence on a line if ($line =~ /$/); $inVerbatim-- if ($line =~ /^<\/verbatim>$/); if ($inVerbatim) { # fix output escaping in verbatim blocks $line =~ s/<//go; $line =~ s/&/&/go; } else { next if ($line =~ /^(---\++ )?Table of Contents$/i); # remove superfluous TOC headings # fix consecutive styling to get around a TWiki rendering bug $line =~ s/\b\* \*\b/ /go; $line =~ s/\b= =\b/ /go; $line =~ s/\b== ==\b/ /go; $line =~ s/\b_ _\b/ /go; $line =~ s/\b__ __\b/ /go; } # TODO: post-process tables to vertically merge similar cells $text .= $line; } TWiki::writeDebug($session,"[$$] Finished reading xsltproc output") if $debug; # replace some common Unicode chars # SMELL - this should be done in xslt, but how? $text =~ s/\xE2\x94\x82/│/go; $text =~ s/\xE2\x94\x80/─/go; $text =~ s/\xE2\x94\x8C/┌/go; $text =~ s/\xE2\x94\x90/┐/go; $text =~ s/\xE2\x94\x94/└/go; $text =~ s/\xE2\x94\x98/┘/go; $text =~ s/\xE2\x94\x9C/├/go; $text =~ s/\xE2\x94\xAC/┬/go; $text =~ s/\xE2\x94\xB4/┴/go; $text =~ s/\xE2\x80\x9C/"/go; # smart(?) quotes $text =~ s/\xE2\x80\x9D/"/go; $text =~ s/\xE2\x80\x98/'/go; $text =~ s/\xE2\x80\x99/'/go; $text =~ s/\xE2\x80\x93/-/go; # used as an en-dash $text =~ s/\xE2\x80\x94/-/go; # used as an em-dash $text =~ s/\xE2\x80\xA2/-/go; # used as a bullet-point (possibly inside a table) $text =~ s/\xEF\x81\xAF//go; $text =~ s/\xC2\xA0//go; $text =~ s/(put( "TOPICPARENT", { name => $parentTopic } ); TWiki::writeDebug($session,$meta->stringify()); # SMELL - need to use TWiki::Store so we can save META $session->{store}->saveTopic( $session->{user}, $webName, $newTopic, $text, $meta, undef ); TWiki::writeDebug($session,"[$$] saveTopic returned") if $debug; # attach the original so it doesn't get lost my $pubDir = TWiki::Func::getPubDir() || ""; $pubDir .= "/$webName/$newTopic"; # SMELL - nothing equivalent in TWiki::Func # my @error = TWiki::UI::Upload::updateAttachment( $webName, # $newTopic, # $userName, # 0, # no need to create links # 0, # no point doing properties only # $filePath, # "$zipFile", # must be in string context to get filename! # "", # attachment name (ignored if !propsOnly) # 0, # never hide original file # "Original $filePath document" ); # check if upload has non zero size my @stats = stat $zipFile; my $fileSize = $stats[7]; my $fileDate = $stats[9]; $session->{store}->saveAttachment( $webName, $newTopic, $filePath, $session->{user}, { dontlog => !$TWiki::cfg{Log}{upload}, comment => "Original $filePath document", # hide => $hideFile, createlink => $createLink, # stream => $stream, file => $tmpZipName, filepath => $filePath, filesize => $fileSize, filedate => $fileDate, } ); # if ($error[0]) { # TWiki::Func::redirectCgiQuery( TWiki::Func::getOopsUrl($webName, $newTopic, @error) ); # return; # } TWiki::writeDebug($session,"[$$] updateAttachment of (Original) '$filePath' to '$webName/$newTopic' was successful") if $debug; if ($createLink) { # update parentTopic - don't care if it fails $fileComment =~ s/\s+/ /go; $fileComment =~ s/^\s*//o; $fileComment =~ s/\s*$//o; if ($newTopic =~ /^$wikiWordRegex$/) { $parentText .= "\t* $newTopic: $fileComment\n"; } else { $parentText .= "\t* [[$newTopic]]: $fileComment\n"; } TWiki::Func::saveTopicText( $webName, $parentTopic, $parentText, "", 0 ); TWiki::Func::setTopicEditLock( $webName, $parentTopic, 0 ); # unlock parent TWiki::writeDebug($session,"[$$] Topic '$webName/$parentTopic' updated and unlocked") if $debug; } # extract the images and attach them to the new topic for my $image ($zip->membersMatching('((Pictures|ObjectReplacements)/.+)|Object \d+')) { # Create a temp file for extracting the image into my $imgFile = new File::Temp(TEMPLATE => 'OO2TWikiXXXXXXXXXX', DIR => File::Spec->tmpdir(), UNLINK => $debug ? 0 : 1, SUFFIX => '.img'); # seek($contentFile, 0, 0); # truncate($contentFile, 0); my $returnVal = $image->extractToFileHandle($imgFile); TWiki::writeDebug($session,"[$$] Extracting image returned '$returnVal'") if $debug; if ($returnVal == AZ_OK) { my $member = $image->fileName(); $member =~ s/.*[\\\/]//go; $member =~ s/ /_/go; $member =~ /^(.*)$/; # untaint $member = $1; my $titleText = $title{ (split(/\./, $member))[0] } || 'Unknown'; TWiki::writeDebug($session,"[$$] Extracted '$member'") if $debug; if (index($member, '.') < 0) { # try to determine file type # FIXME: should do proper file magic check my $imgType = ''; binmode $imgFile; seek($imgFile, 28, 0); read($imgFile, $imgType, 3); if ($imgType =~ /WMF/) { TWiki::writeDebug($session,"[$$] Filetype of '$member' appears to be Windows Metafile") if $debug; $member .= '.wmf'; $image->fileName($image->fileName().'.wmf'); } elsif (system($TWiki::cfg{RCS}{FgrepCmd}, "-q", "Microsoft Visio Drawing", "$imgFile") == 0) { TWiki::writeDebugTimes("[$$] Filetype of '$member' appears to be Microsoft Visio Drawing") if $debug; $member .= '.vsd'; $image->fileName($image->fileName().'.vsd'); } else { TWiki::writeDebugTimes("[$$] Filetype of '$member' is unknown") if $debug; } } # update properties only if same size as old file # my $propsOnly = 0; # if (defined -f "$pubDir/$member") { # TWiki::writeDebug($session,"[$$] Testing size of '$pubDir/$member'") if $debug; # $propsOnly = (-s "$imgFile" == -s "$pubDir/$member"); # TWiki::writeDebug($session,"[$$] Extracted '$member' has same size as '$pubDir/$member'. Updating properties only") if $debug and $propsOnly; # TWiki::writeDebug($session,"[$$] propsOnly = '$propsOnly'") if $debug; # TWiki::writeDebug($session,"[$$] hideFile = '$hideFile'") if $debug; # } my @stats = stat $imgFile; my $fileSize = $stats[7]; my $fileDate = $stats[9]; seek($imgFile, 0, 0); $session->{store}->saveAttachment( $webName, $newTopic, $member, $session->{user}, { dontlog => !$TWiki::cfg{Log}{upload}, comment => "'$titleText' extracted from '$filePath'", hide => $hideFile, createlink => 0, # stream => $stream, stream => $imgFile, filepath => "$imgFile", filesize => $fileSize, filedate => $fileDate, } ); # SMELL - nothing equivalent in TWiki::Func # my @error = TWiki::UI::Upload::updateAttachment( $webName, # $newTopic, # $userName, # 0, # create links? they should be in the text already # $propsOnly, # properties only # $member, # "$contentFile", # must be in string context to get filename! # $member, # attachment name (ignored if !propsOnly) # $hideFile, # hide file # "'$titleText' extracted from '$filePath'" ); # if ($error[0]) { # TWiki::Func::redirectCgiQuery( TWiki::Func::getOopsUrl($webName, $newTopic, @error) ); # return; # } TWiki::writeDebug($session,"[$$] updateAttachment of '$member' to '$webName/$newTopic' was successful") if $debug; } else { TWiki::writeDebug($session,"[$$] Error extracting '$image' from '$tmpZipName' (returned $returnVal)"); } } TWiki::Func::setTopicEditLock( $webName, $newTopic, 0 ); # unlock topic TWiki::writeDebug($session,"[$$] Topic '$webName/$newTopic' unlocked") if $debug; # finally, display the new topic and yell WOOHOO!!! TWiki::writeDebug($session,"[$$] Topic '$webName/$newTopic' finished successfully. Displaying topic.") if $debug; TWiki::Func::redirectCgiQuery( $query, TWiki::Func::getViewUrl( $webName, $newTopic ) ); } 1;