# TWiki Collaboration Platform, http://TWiki.org/
#
# Copyright (C) 1999-2004 Peter Thoeny, peter@thoeny.com
#
# For licensing info read license.txt file in the TWiki root.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details, published at 
# http://www.gnu.org/copyleft/gpl.html
=pod

---+ package TWiki::Contrib::OO2TWiki

Addon for converting Open Document Text (.odt) and StarOffice/OpenOffice.org
Star Writer (.sxw) word processor files to TWiki markup.  Open Document format
is used by OpenOffice.org 2.0 and KOffice Writer.

=cut

package TWiki::Contrib::OO2TWiki;

use strict;
use integer;

#Use CGI::Carp;
use Error qw( :try );
use TWiki;
use TWiki::UI;
use TWiki::UI::Upload;
use TWiki::Func;
use TWiki::Meta;
use File::Temp;
use File::Spec;
use Archive::Zip qw(:ERROR_CODES);

use vars qw($VERSION $pluginName $debug $wikiWordRegex);

$VERSION = '0.4';
$pluginName = 'OpenOffice2TWikiAddOn';  # Name of this Plugin
#TWiki::setupRegexes(); # why is this necessary?
$wikiWordRegex = TWiki::Func::getRegularExpression('wikiWordRegex');

=pod
---++ StaticMethod convert( $session, $web, $topic, $scruptUrl, $query )
=view= command handler.
This method is designed to be
invoked via the =TWiki::UI::run= method.

=cut
sub convert {
    my $session = shift;

    my $query = $session->{cgiQuery};
    my $webName = $session->{webName};
    my $topicName = $session->{topicName};

    my $filePath = $query->param( 'filepath' ) || "";

    my $userName = $session->{user}->login();

    # hack to make TWiki::Func::* work
    $TWiki::Plugins::SESSION = $session;

    if ($filePath =~ /^$/) {
      prepare( $session, $webName, $topicName, $userName, $query );
    } else {
      doconvert( $session, $webName, $topicName, $userName, $query );
    }

}

=pod

---++ prepare( $web, $topic, $query )
Prepare for a 'convert' URL. CGI parameters are:
| =skin= | Skin to use in presenting pages |

=cut
sub prepare {
  my ( $session, $webName, $topic, $userName, $query ) = @_;
  my $skin = $session->getSkin();
  my $store = $session->{store};

  TWiki::UI::checkMirror( $session, $webName, $topic );
#  return if TWiki::UI::isMirror( $webName, $topic );

  TWiki::UI::checkWebExists( $session, $webName, $topic, 'view' );
#  return unless TWiki::Func::webExists( $webName, $topic );

  my $tmpl = "";
  my $text = "";
  my $meta = "";
  $debug = TWiki::Func::getPreferencesFlag( "OO2TWIKI_DEBUG" );

  my $wikiUserName = TWiki::Func::getWikiName( $userName );
  my $enableLink = TWiki::Func::checkAccessPermission( "CHANGE", $wikiUserName, undef, $topic, $webName );

  ( $meta, $text ) = $session->{store}->readTopic( $session->{user}, $webName, $topic, undef );
  my %args = ( "attr" => "", "path" => "", "comment" => "" );

  # modify the attach template to do a conversion
  # KoenMartens: wouldn't it be better to use a dedicated template?
  $skin = TWiki::Func::getPreferencesValue( "SKIN" ) unless ( $skin );
  $tmpl = TWiki::Func::readTemplate( "attachnew", $skin );
  $tmpl =~ s/\bnew\b/converted/go;
  $tmpl =~ s/\bAttach\b/Upload and reparent/go; # SMELL - may not make sense with some skins
  $tmpl =~ s/%SCRIPTURLPATH%\/upload/%SCRIPTURLPATH%\/convert/go;
  $tmpl =~ s/%SCRIPTURLPATH\{"upload"\}%/%SCRIPTURLPATH\{"convert"\}%/go;

  $tmpl =~ s/%ATTACHTABLE%//go;
  $tmpl =~ s/%FILEUSER%//go;
  $session->enterContext( 'can_render_meta', $meta );
  $tmpl =~ s/%ATTACHLINKBOX%/disabled="1"/go unless $enableLink;
  $tmpl = &TWiki::handleCommonTags( $session, $tmpl, $webName, $topic );
  #$tmpl = &TWiki::Render::getRenderedVersion( $session, $tmpl, $webName, $topic );
  $tmpl = $session->{renderer}->getRenderedVersion( $tmpl, $webName, $topic );
#  $tmpl = &TWiki::handleMetaTags( $webName, $topic, $tmpl, $meta );
  $tmpl =~ s/%HIDEFILE%//go;
  $tmpl =~ s/%FILENAME%//go;
  $tmpl =~ s/%FILEPATH%//go;
  $tmpl =~ s/%FILECOMMENT%//go;
  $tmpl =~ s/( ?) *<\/?(nop|noautolink)\/?>\n?/$1/gois;   # remove <nop> and <noautolink> tags

  TWiki::writeDebug($session,"[$$] Preparing to convert a child topic of '$webName/$topic' with $pluginName") if $debug;

  #TWiki::Func::writeHeader( TWiki::Func::getCgiQuery() ) unless -t;
  $session->writeCompletePage( $tmpl );
  print $tmpl;
}

=pod

---++ convert( $web, $topic, $userName, $query)
Perform the functions of an 'upload' url but convert the relevant parts
using XSLT and attach embedded images to the new topic.
CGI parameters passed in $query:
| =filepath= | Client side name of the uploaded file |
| =filecomment= | Comment to associate with file in parent topic |
| =createlink= | if defined, will create a link to file at end of topic |
| =hidefile= | if defined, will hide images from the attachment table |

=cut
sub doconvert {
  my ( $session, $webName, $parentTopic, $userName, $query ) = @_;

  TWiki::UI::checkMirror( $session, $webName, $parentTopic );
  #return if TWiki::UI::isMirror( $webName, $parentTopic );

  my $meta = new TWiki::Meta;
  my $text = "";
  my $parentText = "";
  my $oopsUrl = "";
  my $filePath = $query->param( 'filepath' ) || "";
  my $newTopic = $filePath;
  my $zipFile = $query->upload( 'filepath' ) || ""; # why doesn't this work?
  #my $zipFile = $query->tmpFileName( $filePath );
  my $fileComment = $query->param( 'filecomment' ) || "";
  my $createLink = $query->param( 'createlink' ) || "";
  my $hideFile = $query->param( 'hidefile' ) || "";
  my $stylesheet = "sxw.xslt";

  $debug = TWiki::Func::getPreferencesFlag( "OO2TWIKI_DEBUG" );
  TWiki::writeDebug($session,"[$$] Converting '$filePath' with $pluginName") if $debug;

  # try to convert it to a WikiWord
  $newTopic =~ s/([^\/\\]*$)/$1/; # remove leading path
  if ($newTopic =~ s/(.+)\.(\w+)/$1/) { # chop extension
    $stylesheet = "$2.xslt"; # use extension for the stylesheet name
  }
  $newTopic =~ s/^(\d+)(.*)/$2$1/go; # move leading digits to end Eg. 2005AnnualReport -> AnnualReport2005
  $newTopic =~ s/[\W_]+/ /go; # convert non-word chars to space
  $newTopic =~ s/(\d)([[:alpha:]])/$1 $2/go; # insert space between digit and word chars
  $newTopic = join('', map { ucfirst } split(/\s/, $newTopic));

  # untaint
  if( $newTopic =~ /^([a-zA-Z0-9]*)$/ ) {
    $newTopic=$1;
  } else {
    $newTopic='';
  }
  $filePath=~m|([^\\/]*)$|;
  $filePath=$1;

  TWiki::writeDebug($session,"[$$] Rationalised '$filePath' to '$newTopic'") if $debug;
  throw Error::Simple("Invalid filename [$newTopic] (cannot create a WikiWord)") unless $newTopic;

  if ($createLink) {
    ( $meta, $parentText ) = $session->{store}->readTopic( $session->{user}, $webName, $parentTopic, undef );
    # check for oops URL in case of error:
    if( $text =~ /^http.*?\/oops/ ) {
      TWiki::Func::redirectCgiQuery( $query, $text );
      return;
    }
    # only link if it's not already linked!
    $createLink = $parentText !~ /$newTopic/s;
  }
  if ($createLink) {
    # lock the parent
    $oopsUrl = TWiki::Func::setTopicEditLock( $webName, $parentTopic, 1 );
    if( $oopsUrl ) {
        TWiki::Func::redirectCgiQuery( $query, $oopsUrl ); # assuming valid query
        return;
    }
    TWiki::writeDebug($session,"[$$] Parent topic '$webName/$parentTopic' locked") if $debug;
  }

  if (TWiki::Func::topicExists($webName, $newTopic)) {
    $oopsUrl = TWiki::Func::setTopicEditLock( $webName, $newTopic, 1 );
    if( $oopsUrl ) {
      TWiki::Func::redirectCgiQuery( $query, $oopsUrl ); # assuming valid query
      return;
    }
    TWiki::writeDebug($session,"[$$] Topic '$webName/$newTopic' locked") if $debug;

    # read topic text to get META
    ( $meta, $text ) = $session->{store}->readTopic( $session->{user}, $webName, $newTopic, undef );
    #($meta, $text) = TWiki::Store::readTopic( $webName, $newTopic );
    # check for oops URL in case of error:
    if( $text =~ /^http.*?\/oops/ ) {
      TWiki::Func::redirectCgiQuery( $query, $text );
      return;
    }
  }

  # FIXME oops if file not found
  $stylesheet = TWiki::Func::getPubDir() . "/" . TWiki::Func::getTwikiWebname()
                . "/$pluginName/$stylesheet";
  TWiki::writeDebug($session,"[$$] Attempting to use '$stylesheet' for XSLT transform") if $debug;
  throw Error::Simple( "XSLT stylesheet [$stylesheet] not found") unless (-f $stylesheet);

  seek($zipFile,0,0);
  my $tmpZip = new File::Temp(TEMPLATE => 'OO2TWikiXXXXXXXXXX',
                                  DIR => File::Spec->tmpdir(),
                                  UNLINK => $debug ? 0 : 1,
                                  SUFFIX => '.zip');
  my $tmpZipName = $tmpZip->filename;
  while(<$zipFile>) {
    print $tmpZip $_;
  }
  close($tmpZip);
  TWiki::writeDebug($session,"wrote upload to tmp file ".$tmpZipName);
  

#  close $zipFile if( $TWiki::OS eq "WINDOWS");
  my $zip = new Archive::Zip;
  throw Error::Simple( "Cannot read $tmpZipName" ) unless $zip->read($tmpZipName) == AZ_OK;
  TWiki::writeDebug($session,"[$$] Zip archive '$tmpZipName' is readable") if $debug;

  # Create a temp file for extracting the content.xml into
  my $contentFile = new File::Temp(TEMPLATE => 'OO2TWikiXXXXXXXXXX',
                                  DIR => File::Spec->tmpdir(),
                                  UNLINK => $debug ? 0 : 1,
                                  SUFFIX => '.xml');
  select($contentFile); $|=1; # autoflush otherwise we get zero file size errors
  select(STDOUT);

  my $content = $zip->memberNamed('content.xml');
  throw Error::Simple( "content.xml not found in $tmpZipName")
    unless $content->extractToFileHandle($contentFile) == AZ_OK;
  TWiki::writeDebug($session,"[$$] content.xml extracted from '$tmpZipName' successfully") if $debug;

  # Create a temp file for output (gets re-used for attachments)
  my $outputFile = new File::Temp(TEMPLATE => 'OO2TWikiXXXXXXXXXX',
                                  DIR => File::Spec->tmpdir(),
                                  UNLINK => $debug ? 0 : 1,
                                  SUFFIX => '.tml');
  select($outputFile); $|=1; # autoflush otherwise we get zero file size errors
  select(STDOUT);

  # Convert contentFile to PDF using HTMLDOC
  # $TWiki::xsltprocCmd must be set in TWiki.cfg
  my @xsltprocArgs;
  push @xsltprocArgs,
    "--novalid",
    "--output" => "$outputFile",
    "$stylesheet",
    "$contentFile";

  TWiki::writeDebug($session,"[$$] Calling xsltproc with args: @xsltprocArgs") if $debug;
  system($TWiki::cfg{xsltprocCmd}, @xsltprocArgs);
  if ($? == -1) {
    throw Error::Simple( "Failed to start xsltproc ($TWiki::xsltprocCmd): $!\n");
  }
  elsif ($? & 127) {
    printf STDERR "child died with signal %d, %s coredump\n",
      ($? & 127),  ($? & 128) ? 'with' : 'without';
    throw Error::Simple( "Conversion failed: '$!'");
  }
  else {
    printf STDERR "child exited with value %d\n", $? >> 8 unless $? >> 8 == 0;
  }
  TWiki::writeDebug($session,"[$$] xsltproc finished successfully") if $debug;

  $text = "";
  my %title = ();
  my $inVerbatim = 0; # true if in a verbatim block
  seek($outputFile, 0, 0);
  while (my $line = <$outputFile>) {
    # FIXME - this will only catch the first occurrence on a line
    if ($line =~ /<img src=".*[\\\/](.*?)\.png" title="([ \w]+)"/) {
      # copy image title for attachment comment
      my $key = $1;
      my $val = $2;
      $title{$key} = $val;
    }
    $inVerbatim++ if ($line =~ /^<verbatim>$/);
    $inVerbatim-- if ($line =~ /^<\/verbatim>$/);
    if ($inVerbatim) {
      # fix output escaping in verbatim blocks
      $line =~ s/&lt;/</go;
      $line =~ s/&gt;/>/go;
      $line =~ s/&amp;/&/go;
    } else {
      next if ($line =~ /^(---\++ )?Table of Contents$/i); # remove superfluous TOC headings
      # fix consecutive styling to get around a TWiki rendering bug
      $line =~ s/\b\*  \*\b/ /go;
      $line =~ s/\b=  =\b/ /go;
      $line =~ s/\b==  ==\b/ /go;
      $line =~ s/\b_  _\b/ /go;
      $line =~ s/\b__  __\b/ /go;
    }
    # TODO: post-process tables to vertically merge similar cells
    $text .= $line;
  }
  TWiki::writeDebug($session,"[$$] Finished reading xsltproc output") if $debug;
  # replace some common Unicode chars
  # SMELL - this should be done in xslt, but how?
  $text =~ s/\xE2\x94\x82/&#9474;/go;
  $text =~ s/\xE2\x94\x80/&#9472;/go;
  $text =~ s/\xE2\x94\x8C/&#9484;/go;
  $text =~ s/\xE2\x94\x90/&#9488;/go;
  $text =~ s/\xE2\x94\x94/&#9492;/go;
  $text =~ s/\xE2\x94\x98/&#9496;/go;
  $text =~ s/\xE2\x94\x9C/&#9500;/go;
  $text =~ s/\xE2\x94\xAC/&#9516;/go;
  $text =~ s/\xE2\x94\xB4/&#9524;/go;
  $text =~ s/\xE2\x80\x9C/"/go; # smart(?) quotes
  $text =~ s/\xE2\x80\x9D/"/go;
  $text =~ s/\xE2\x80\x98/'/go;
  $text =~ s/\xE2\x80\x99/'/go;
  $text =~ s/\xE2\x80\x93/-/go; # used as an en-dash
  $text =~ s/\xE2\x80\x94/-/go; # used as an em-dash
  $text =~ s/\xE2\x80\xA2/-/go; # used as a bullet-point (possibly inside a table)
  $text =~ s/\xEF\x81\xAF//go;
  $text =~ s/\xC2\xA0//go;
  $text =~ s/(<img src=\")\.+/$1/go; # don't allow relative image links
  # typically, any use of heading styles before TOC will be NOTOC headings
  while ($text =~ s/(\n\r?---\++)( .*\n%TOC)/$1!!$2/gos) { 1; }
  TWiki::writeDebug($session,"[$$] Finished post-processing of xsltproc output") if $debug;

  # save newTopic
  $meta->put( "TOPICPARENT", { name => $parentTopic } );
  TWiki::writeDebug($session,$meta->stringify());
  # SMELL - need to use TWiki::Store so we can save META
  $session->{store}->saveTopic( $session->{user}, $webName, $newTopic, $text, $meta, undef );
  TWiki::writeDebug($session,"[$$] saveTopic returned") if $debug;

  # attach the original so it doesn't get lost
  my $pubDir = TWiki::Func::getPubDir() || "";
  $pubDir .= "/$webName/$newTopic";
  # SMELL - nothing equivalent in TWiki::Func
#  my @error = TWiki::UI::Upload::updateAttachment( $webName,
#         $newTopic,
#         $userName, 
#         0, # no need to create links
#         0, # no point doing properties only
#         $filePath,
#         "$zipFile", # must be in string context to get filename!
#         "", # attachment name (ignored if !propsOnly)
#         0, # never hide original file
#         "Original $filePath document" );

  # check if upload has non zero size
  my @stats = stat $zipFile;
  my $fileSize = $stats[7];
  my $fileDate = $stats[9];

  $session->{store}->saveAttachment( $webName,
			$newTopic,
			$filePath,
			$session->{user},
            { dontlog => !$TWiki::cfg{Log}{upload},   
              comment => "Original $filePath document",
#              hide => $hideFile,
              createlink => $createLink,
#              stream => $stream,
	      file => $tmpZipName,
              filepath => $filePath,
              filesize => $fileSize,
              filedate => $fileDate,
          } );


#  if ($error[0]) {
#    TWiki::Func::redirectCgiQuery( TWiki::Func::getOopsUrl($webName, $newTopic, @error) );
#    return;
#  }
  TWiki::writeDebug($session,"[$$] updateAttachment of (Original) '$filePath' to '$webName/$newTopic' was successful") if $debug;

  if ($createLink) {
    # update parentTopic - don't care if it fails
    $fileComment =~ s/\s+/ /go;
    $fileComment =~ s/^\s*//o;
    $fileComment =~ s/\s*$//o;
    if ($newTopic =~ /^$wikiWordRegex$/) {
      $parentText .= "\t* $newTopic: $fileComment\n";
    } else {
      $parentText .= "\t* [[$newTopic]]: $fileComment\n";
    }
    TWiki::Func::saveTopicText( $webName, $parentTopic, $parentText, "", 0 );
    TWiki::Func::setTopicEditLock( $webName, $parentTopic, 0 ); # unlock parent
    TWiki::writeDebug($session,"[$$] Topic '$webName/$parentTopic' updated and unlocked") if $debug;
  }

  # extract the images and attach them to the new topic
  for my $image ($zip->membersMatching('((Pictures|ObjectReplacements)/.+)|Object \d+')) {
    # Create a temp file for extracting the image into
    my $imgFile = new File::Temp(TEMPLATE => 'OO2TWikiXXXXXXXXXX',
                                     DIR => File::Spec->tmpdir(),
                                     UNLINK => $debug ? 0 : 1,
                                     SUFFIX => '.img');
    # seek($contentFile, 0, 0);
    # truncate($contentFile, 0);
    my $returnVal = $image->extractToFileHandle($imgFile);
    TWiki::writeDebug($session,"[$$] Extracting image returned '$returnVal'") if $debug;
    if ($returnVal == AZ_OK) {
      my $member = $image->fileName();
      $member =~ s/.*[\\\/]//go;
      $member =~ s/ /_/go;
      $member =~ /^(.*)$/; # untaint
      $member = $1;
      my $titleText = $title{ (split(/\./, $member))[0] } || 'Unknown';
      TWiki::writeDebug($session,"[$$] Extracted '$member'") if $debug;
      if (index($member, '.') < 0) {
        # try to determine file type
        # FIXME: should do proper file magic check
        my $imgType = '';
        binmode $imgFile;
        seek($imgFile, 28, 0);
        read($imgFile, $imgType, 3);
        if ($imgType =~ /WMF/) {
          TWiki::writeDebug($session,"[$$] Filetype of '$member' appears to be Windows Metafile") if $debug;
          $member .= '.wmf';
          $image->fileName($image->fileName().'.wmf');
        }
        elsif (system($TWiki::cfg{RCS}{FgrepCmd}, "-q", "Microsoft Visio Drawing", "$imgFile") == 0) {
          TWiki::writeDebugTimes("[$$] Filetype of '$member' appears to be Microsoft Visio Drawing") if $debug;
          $member .= '.vsd';
          $image->fileName($image->fileName().'.vsd');
        } else {
          TWiki::writeDebugTimes("[$$] Filetype of '$member' is unknown") if $debug;
        }
      }
      # update properties only if same size as old file
#      my $propsOnly = 0;
#      if (defined -f "$pubDir/$member") {
#        TWiki::writeDebug($session,"[$$] Testing size of '$pubDir/$member'") if $debug;
#        $propsOnly = (-s "$imgFile" == -s "$pubDir/$member");
#        TWiki::writeDebug($session,"[$$] Extracted '$member' has same size as '$pubDir/$member'. Updating properties only") if $debug and $propsOnly;
#        TWiki::writeDebug($session,"[$$] propsOnly = '$propsOnly'") if $debug;
#        TWiki::writeDebug($session,"[$$] hideFile = '$hideFile'") if $debug;
#      }

  my @stats = stat $imgFile;
  my $fileSize = $stats[7];
  my $fileDate = $stats[9];
        seek($imgFile, 0, 0);
  $session->{store}->saveAttachment( $webName,
			$newTopic,
			$member,
			$session->{user},
            { dontlog => !$TWiki::cfg{Log}{upload},   
              comment => "'$titleText' extracted from '$filePath'",
              hide => $hideFile,
              createlink => 0,
#              stream => $stream,
	      stream => $imgFile,
              filepath => "$imgFile",
              filesize => $fileSize,
              filedate => $fileDate,
          } );


      # SMELL - nothing equivalent in TWiki::Func
#      my @error = TWiki::UI::Upload::updateAttachment( $webName,
#             $newTopic,
#             $userName, 
#             0, # create links? they should be in the text already
#             $propsOnly, # properties only
#             $member,
#             "$contentFile", # must be in string context to get filename!
#             $member, # attachment name (ignored if !propsOnly)
#             $hideFile, # hide file
#             "'$titleText' extracted from '$filePath'" );
#      if ($error[0]) {
#        TWiki::Func::redirectCgiQuery( TWiki::Func::getOopsUrl($webName, $newTopic, @error) );
#        return;
#      }
      TWiki::writeDebug($session,"[$$] updateAttachment of '$member' to '$webName/$newTopic' was successful") if $debug;
    } else {
      TWiki::writeDebug($session,"[$$] Error extracting '$image' from '$tmpZipName' (returned $returnVal)");
    }
  }

  TWiki::Func::setTopicEditLock( $webName, $newTopic, 0 ); # unlock topic
  TWiki::writeDebug($session,"[$$] Topic '$webName/$newTopic' unlocked") if $debug;

  # finally, display the new topic and yell WOOHOO!!!
  TWiki::writeDebug($session,"[$$] Topic '$webName/$newTopic' finished successfully. Displaying topic.") if $debug;
  TWiki::Func::redirectCgiQuery( $query, TWiki::Func::getViewUrl( $webName, $newTopic ) );
}

1;
