#====================================Perl Script: Convert HTML(MSWord) to Twiki Markup===================
#!/usr/bin/perl
use Cwd;
use File::Copy;
use Archive::Zip qw( :ERROR_CODES :CONSTANTS );

my $ts = time();
$versiondate="2009-06-23";
my $dir = cwd;
my $scripttitle = $0;
$scripttitle=~ s/\.pl$//i;
my $fromcmd=1;
if ($scripttitle =~ m/\/|\\/){
	my @t = split /\/|\\/, $scripttitle;
	$scripttitle = $t[$#t];
	$fromcmd=0;
}
my $msgtitle="$scripttitle, 2009 CharlieMao";
my @develper= qw(CharlieMao charliemao@hotmail.com);
my $usage=&Help(@ARGV);

#Parse variable from configuration file
my $MYNAME= "";
my $htmldir= $dir."/DocHTML";
my $DebugDir= $dir."/ZDebug";
my $TopicWord = "";
my $hdlevel=3;
my $ProcessSubFolder=1;
my $Debug= 0;
my $TagsL= "";
my $TagsR= "";
my $tabfmt= ""; 
my @twikiheading;
my @AuthorNameList;
my %CustomStyleFrom;
my %CustomStyleTo;
my $csdefined = 0;
my $Au=0;
my @linetoberemoved;
&ReadConfig($dir."/_".$scripttitle.".ini");
if (!$MYNAME){
	MsgBox0("Please edit configuratuon first. TWiki username is not found!\n\n".
	"=========================================\n$usage\n",0,$msgtitle);
	exit;
}

#Convertable tags
my %HTMLTag2TempStr;
my %TempStr2TWikiMarkup;
my %TWikiMarkupDoubleSymbol;
my %HTMLDirect2TWikiMarkup;
&HTML2TWikiMarkupSpecial;

#temporary placeholder variables
my $tablerowend= "twikitabletwiki";
my $tbhdl= "twikitableheadrowleft";
my $tbhdr= "twikitableheadrowright";
my $left= "twikiOPENbrackets";
my $right= "twikiCLOSEbrackets";

#Bullet and list variable
my @twikibullit;
my @twikinumber;
my @twikichar;
my @dstr;
&bulletnumberanddatestr;

#Unicode and Color
my %htmlsym;
&definehtmlsym;
my %twikicolor;
&definecolorscheme;

#scalar variables
my $nofilectr = 0;
my $dirfrom;
my $fileorig;
my $filefrom;
my $twikimarkuptitle;
my $zipTitle;
my $imagedir;
my $twikidir;
my $filesigniture;
my $TitleNoExt;
my $MSWordCtr;
my $tmpctr;
my $hiddentext;
my $allHText;
#array or hash variables
my @issuedate;
my @twikitopicid;
my @authorlist;
my @papertitles;
my @doclist;
my @ConvCompList;
my @HiddenList;
my %anchorlist;
my %reflist;
#Constants
my $anchstr= "#AnchRef";
my $attachstr= "%ATTACHURLPATH%";
	
$dir = "$htmldir";
my $noSubFolder=0;
my @alldirlist= &findalldirectory($dir);
my $totalfileprocessed=0;
&StartConversion;
$ts = time()-$ts;
if ($fromcmd){
	print "\n$totalfileprocessed files are processed in $ts s.\n";
} else {
	MsgBox0("$totalfileprocessed files are processed in $ts s.\n\n\n".
	"=========================================\n$usage\n",0,$msgtitle);
}

sub StartConversion{
	#Start to convert HTML file to TWiki from directory and sub directory
	if (!$ProcessSubFolder){
		$noSubFolder=1;
	} else {
		$noSubFolder=@alldirlist;
	}
	
	my $dctr=0;
	for (my $kk=0; $kk<$noSubFolder; $kk++) {
		my $f = $alldirlist[$kk];
		$f =~ s/_files$/\.htm/i;
		if($alldirlist[$kk] !~ m/\/twiki_/i && !(-f $f)){
			$dctr++;
			print "Search $alldirlist[$kk]...\n";		
			&rewritehtmlforonedirectory($alldirlist[$kk], $dctr);
		}
	}
}

#rewrite html documents from one directory for Wiki start
sub rewritehtmlforonedirectory{
	#	read files from directory
	$dirfrom = shift;
	my $kk = shift;

	opendir(dirhdl, $dirfrom ) or die "Unable to open directory $dirfrom \n $!";
		my @filelistindir= readdir dirhdl;
	closedir dirhdl;
	if (length($dirfrom) > 3) {
		$dirfrom="$dirfrom/";
	}
	my $dir0 = "$dirfrom";
	$nofilectr = 0;

	for (my $i=0; $i<=$#filelistindir; $i++) {
		$fileorig = $filelistindir[$i];
		$filefrom="$dirfrom$fileorig";
		if ($fileorig =~ m/\.htm/i && $dirfrom !~ /twiki\_/i && 
			$fileorig !~ m/^header.htm/i && $fileorig !~ m/\.pl$/i && $fileorig !~ m/^~/i) {
			my $fctr0=$nofilectr+1;
			print "$kk-$fctr0\t$fileorig\n";
			$hiddentext="";
			#read html file
			open($fhfrom, "<$filefrom") or die "Unable to open file $filefrom\n $!";
				my @lines = <$fhfrom>;
			close($fhfrom);

			&FindFileTitle($fileorig);
			
			#find text between <body>...</body>
			$_ = join "", @lines;
			m/<o\:Words>(\d{1,})<\/o\:Words>/is;
			$MSWordCtr= $1;
			m/<body.+?>(.+?)<\/body>/is;
			my $fulltext = $1;
			if ($Debug) {
				my $f = $DebugDir."/_TWiki_HTML(BodyOnly).txt";
				open($fhfrom, ">$f") or die "Unable to open file $f.\n $!";
					print $fhfrom $fulltext;
				close($fhfrom);
			}

			$fulltext = &ConsolidateLine($fulltext);								
			my $StrBC = $fulltext;

			$fulltext = &FormatImages($fulltext,"\n");
			$fulltext = &FormatTableControl($fulltext,"$tablerowend");
			$fulltext = &FormatOutlineHeadings($fulltext,"\n");			
			$fulltext = &PresOtherFeatures($fulltext);
			$fulltext = &Finalization($fulltext);
			my $heading = &FindTAuthDate($fulltext);
			my $ending = "\n\n-- Main.$MYNAME - ".localtime(time())."\n\n%COMMENT%\n";
			open($fhfrom, ">$twikimarkuptitle") or die "Unable to open file $twikimarkuptitle\n $!";
				print $fhfrom $heading;
				print $fhfrom $fulltext;
				print $fhfrom $ending;
			close($fhfrom);
			if ($Debug) {
				my $f = $DebugDir."/_TWiki_Text.txt";
				copy("$twikimarkuptitle", "$f");
			}
			#copy("$twikimarkuptitle", "$twikimarkuptitle.html");

			#Verify conversion accuracy ===============================
			my %WordsComp1 = &ListWords($StrBC);
			my $wc1 = $tmpctr;
			my %WordsComp2 = &ListWords($fulltext);			
			my $wc2 = $tmpctr;
			if ($MSWordCtr ==0) {
				$MSWordCtr= $wc1;
			}
			my $wdctr = "Word Count,$MSWordCtr,$wc1,$wc2,$per%,\t$hiddentext\n";
			my $per = sprintf("%.1f",$wc2/$MSWordCtr*100);
			print "\t(WordCount)\tWord=$MSWordCtr\tHTML=$wc1\tTwiki=$wc2\t$per%\t$hiddentext\n";
=com
			if ($hiddentext){
				$allHText = &trim($allHText);
				$allHText =~ s/\ +?/ /isg;
				$allHText =~ s/\n/ /isg;
				$allHText =~ s/NNNNNN/\n/isg;
				print "$hiddentext\n$allHText\n";
			}
=cut
			my $s = "";
			my $sp = "";
			my $wliststr = "";
			my $ctr = 0;
			foreach $key (sort keys %WordsComp1){
				#matches
				my $v1 = $WordsComp1{"$key"};
				my $v2 = $WordsComp2{"$key"} + $HiddenList{"$key"};
				$WordsComp2{"$key"} = "";
				if (!$v2) {
					$v2 = 0;
				}
				my $d = $v1 - $v2;
				$ctr -= $d;
				if (length($key)>1 && ($v1 - $v2>1) && $key !~ m/\d*?mm$/i 
					&& $key !~ m/^\d{1,}$/i
					&& $key !~ m/^\r\ef\d*?$/i 
					){
					$s .= "\t$key\t(O)=$v1\t(N)=$v2\n";
					$sp .=",$key,$v1,$v2, $d\n";
				}
			}
			my $pat = qr/table|sort|tableborder|off|tablerules|all|cellborder|br|m|toc|wikitoc|anchrefref|\d+|endcolor|footnote/;
			$ctr=0;
			foreach $key (sort keys %WordsComp2){
				my $d = $WordsComp2{"$key"};
				if ($d>0 && $key !~ m/$pat/){
					$ctr += $d;	
					$wliststr .= "\t$key\t$d\t$ctr\n";
				}
			}
			if ($s){
				print "\tUnmatched (word)\n$s\n";
			}
			if ($wliststr){
				print "\tNew word\n$wliststr";
				&Copy2Clipboard($wliststr);
    			#print "Data are also available from system clipboard!";
			}
			$ConvCompList[$nofilectr] = "$filelistindir[$i]\n$wdctr$sp\n";
			#Verify conversion accuracy ===============================
			
			$nofilectr++;
	   	}
	}
	&WriteIndexFile($dirfrom);
	$totalfileprocessed +=$nofilectr;
	
	open($fhw, ">$dirfrom"."index_size.csv") or die "Unable to open file $curdir"."index_size.csv\n $!";
		print $fhw "Word Count,Word,HTML,TWiki,TWiki/Word%\n";
		for (my $j=0; $j<@papertitles; $j++) {
			print $fhw "$ConvCompList[$j]";
		}
	close($fhw);
	print "\n";
}
#===============================================
#rewrite html documents from one directory for Wiki end

sub findalldirectory{
	my $dir= shift;
	my $nodir= 0;
	my @alldirlist;
	$alldirlist[$nodir] = $dir;
	$nodir++;
	my @newdir = &checkdir($dir);
	push @alldirlist,@newdir;
	my $nodirform = 1;
	my $nodirto = $#alldirlist;
recursive:
	for (my $j=$nodirform; $j<=$nodirto; $j++) {
		my @newdir= &checkdir($alldirlist[$j]);
		push @alldirlist,@newdir;
	}
	if ($#alldirlist > $nodirto) {
		$nodirform = $nodirto+1;
		$nodirto = $#alldirlist;
		goto recursive;
	}
	return @alldirlist;
}

sub checkdir{
	my $thisdir = shift;
	my @filelist= &findfiles ($thisdir);
	if (length($thisdir) > 3) {
		$thisdir = $thisdir."/";
	}
	my @newsub;
	for (my $i=0; $i<=$#filelist; $i++) {
		$filelist[$i] = "$thisdir$filelist[$i]";
		if (-d "$filelist[$i]") {
			if ($filelist[$i] !~ m/(\.\.|\.)$/) {
				push @newsub,$filelist[$i];
			}
		}
	}
	return @newsub;
}

sub findfiles{
	my $curdir = shift;
	opendir(dirhdl, $curdir) or die "Unable to open directory $curdir\n $!";
	my @filelist = readdir dirhdl;
	closedir dirhdl;
	return @filelist;
}

sub ListWords{
	my $s = shift;
	$s = lc($s);
	$s =~ s/_//isg;
	$s =~ s/<.+?>//isg;

	my @w = split /(\w+)/, $s;
	my %wl;
	$tmpctr = 0;
	foreach (@w) {
		if (m/\w/){
			$tmpctr++;
			$wl{"$_"}++;
		}
	}
=com
	$s = "";
	foreach $key (sort keys %wl){
		$s .= "$key\t$wl{$key}\n";
	}
	&Copy2Clipboard ($s);
=cut
	return %wl;
}

sub FormatOutlineHeadings{		
	$_ = shift;
	my $sep = shift;
	my $fulltext = $_;
	$_ = $fulltext;
	my @headlist = m/(<h.+?>.+?<\/h.>)/ismg;# original string
	for (my $i=0; $i<@headlist; $i++){
		$_ = $headlist[$i];
		my $oldstr = quotemeta("$_");
		s/\n/\ /ism;
		m/<h([\d]{1}).*?>([\.\d]*?)(.+?)<\/h.>/ism;
		my $fsuc = ucfirst($3);
		$fsuc = &OutlineWordFC($3);
		my $newstr = "$sep$twikiheading[$1]\ $2\ \ $fsuc$sep$sep\[\[\#WikiTOC\]\[TOC\]\]$sep";
		$fulltext =~ s/$oldstr/\n$newstr$sep/ism;
	}
	return $fulltext;
}

sub FormatImages{
	#find images and replace with TWiki markup
	$_ = shift;
	my $sep = shift;
	my $fulltext = $_;
	my @imagelist = m/(<img.+?>)/ismg;
	my @imagefitletitleold;
	my @imagefitletitlenew;
	for (my $i=0; $i<@imagelist; $i++){
		$_ = $imagelist[$i];
		m/src=\"((.+?)\/(.+?))\"/;
		
		$imagefitletitleold[$i] = $1;
		$imagefitletitlenew[$i] = $filesigniture.$3;
		$imagefitletitlenew[$i] =~ s/\&\#amp\;/\&/ig;
		my $oldstr = quotemeta("$imagelist[$i]");
		my $newstr = "$sep${left}img src=\"$attachstr/$imagefitletitlenew[$i]\"$right";
		$fulltext =~ s/$oldstr/$sep$sep$newstr/ism;
	}
	#copy and zip images
	if (@imagelist>0) {
		my $zip = Archive::Zip->new();
		for (my $i=0; $i<@imagelist; $i++){
			my $ima = $imagefitletitlenew[$i];
			my $imafrom = "$dirfrom$imagefitletitleold[$i]";
			my $imato = "$twikidir$ima";
			$imato =~ s/\%20/\ /g;
			if (-f "$imafrom"){
				copy("$imafrom","$imato");
				my $file_member = $zip->addFile( "$imato","$ima");
			} else {
				print "File doesn't exist.\t$imafrom\n";
			}
		}
		unless ( $zip->writeToFileNamed("$zipTitle") == AZ_OK ) {
			die 'write error';
		}				
	}
	$fulltext =~ s/\n//isg;
	$fulltext =~ s/<\/p>/<\/p>\n\n/isg;
	return $fulltext;
}

sub Symbol2Unicode{
	$_ = shift;
	if (m/$attachstr/) {
		return $_;
	}
	#number to number &#184;
	my @C123 = split /(\&\#\d+\;)/;
	my $unicode = "";
	foreach (@C123){
		my $s = $htmlsym{"$_"};
		if ($s){
			$unicode .= $s;
		} else {
			$unicode .= $_;
		}
	}
	#Symbol to number
	my @abc = split //,$unicode;
	$unicode = "";
	foreach (@abc){
		my $s = $htmlsym{"$_"};
		if ($s){
			$unicode .= $s;
		} else {
			$unicode .= $_;
		}
	}
	return "$unicode";
}

sub FindFileTitle{
	#word document title
	$_ = shift;
	s/\.\w{3,4}$//i;
	$papertitles[$nofilectr] = &WordFC($_);
	$doclist[$nofilectr] = &WordFC($_).".doc(x)";
	$issuedate[$nofilectr] = "*CopyDateHere*";
	if (m/([0-9]{6,})/){
		$issuedate[$nofilectr] = $1;
	}
	$authorlist[$nofilectr] = "*CopyAuthorHere*";
	
	my $d = "";
	if (m/([0-9]{4,})/g) {
		$d=$1;
	}
	s/[\d]{4,}//gi;
	s/[\&\.\-_+=*@$%^?;:!,'"\[\]\(\)\{\}]//gi;
	#metacharacters: {}[]()^$.|*+?
	$_ = lc("$_");
	my @tmp = split / /;
	$_ = uc($tmp[0]);
	my $j=@tmp;
	if (@tmp>3){
		$j=3;
	}
	$filesigniture = $tmp[0];
	for (my $i0=1; $i0<$j; $i0++){
		$_ .= ucfirst($tmp[$i0]);
		$filesigniture .= substr($tmp[$i0],0,1);
	}
	#twiki word ID
	$twikitopicid[$nofilectr] = $_;
	if ($TopicWord){
		$twikitopicid[$nofilectr] = $TopicWord.sprintf("%02d", $nofilectr);
	}
	$filesigniture = lc($filesigniture);

	$TitleNoExt = $fileorig;
	$TitleNoExt =~ s/\.ht.+?//i;
	$imagedir = $dirfrom.$TitleNoExt."_files/";
	$twikidir = $dirfrom."twiki_".$TitleNoExt;
	$zipTitle = "$twikidir\\_$TitleNoExt.zip";
	if (-d $twikidir) {
		$_ = $twikidir;
		s/\//\\/g;
		system("del /Q \"$twikidir\\*.*\"\n");
	} else {
		mkdir $twikidir;
	}
	$twikidir .= "/";
	
	$twikimarkuptitle = $twikidir."_$TitleNoExt.txt";
}

sub issuedatecheck{
	$_ = shift;
	my $d1 = '\d+\ +\d+\ +\d+';
	my $d2 = '\d+\-\d+\-\d+';
	my $d3 = '\d+\/\d+\/\d+';
	my $d4 = '\w+\ +\d+\ +\d+';
	my $d5 = '\w+\ +\d+\,\ +\d+';
	my $d6 = '\d+\w+\ +\d+\ +\d+';
	my $d7 = '\w+\ +\d+';
	my $dpat = qr/($d1|$d2|$d3|$d4|$d5|$d6|$d7)/;
	#print "$_\n$dpat\n";
	if (m/$dpat/i){
		return 1;
	} else {
		return 0;
	}
}

sub FindTAuthDate{
	if (!$Au){
		goto continueout;
	}

	$_ = shift;
	$issuedate[$nofilectr]="";
	$authorlist[$nofilectr]= "";
	$papertitles[$nofilectr]= "";
	my $to = 40;
	
	#limit title, author and date search to first heading
	my @lines = split /\n/;
	for (my $j=0; $j<$to; $j++){		
		if ($lines[$j] =~ m/\-\-\-+/){
			$to = $j-1;
			last;
		}
	}
	
	my $date = 0;
	for (my $j=0; $j<$to; $j++){
		$_ = &trim(uc($lines[$j]));
		s/[\,\;\:\'\"\?\.]{1,}/ /ig;
		if (length($_)<20){
			my @tmp = split /\ /;
			for (my $k=0; $k<@tmp; $k++){
				if ($dstr{"$tmp[$k]"}){
					my $isdate = &issuedatecheck($lines[$j]);
					if ($isdate){
						$issuedate[$nofilectr] = $lines[$j];
						$date = $j;
						last;
					}
				}
			}
		}
	}

	my $au = 0;
	for (my $j=0; $j<$to; $j++){
		$_ = &trim(uc($lines[$j]));
		s/[\,\;\:\'\"\?\.]{1,}/ /ig;
		my @tmp = split /\ /;
		for (my $k=0; $k<@tmp; $k++){
			if ($AuthorNameList{"$tmp[$k]"}){
				if (!$authorlist[$nofilectr]) {
					$authorlist[$nofilectr] .= $lines[$j];
				} else {
					$authorlist[$nofilectr] .= "; ".$lines[$j];
				}
				if (!$au){
					$au = $j;
				}
				last;
			}
			if ($au<$date && $j>=$date){
				last;
			}
		}
	}
#find title
	$to = $au;
	if ($to > $date){
		$to = $date;
	}
	
	for (my $j=0; $j<$to;$j++) {
		if ($lines[$j] =~ m/\w+/){
			my @wctr = split /(\w+)/,$lines[$j];
			if (@wctr <40){
				if (!$papertitles[$nofilectr]){
					$papertitles[$nofilectr] .= "$lines[$j]";
				} else {
					$papertitles[$nofilectr] .= " $lines[$j]";
				}
			} else {
				last;
			}
		}
	}
	if (!$issuedate[$nofilectr]) {
		$issuedate[$nofilectr] = "*CopyDateHere*";
	} else {
		$issuedate[$nofilectr] = &trim($issuedate[$nofilectr]);
	}
	if (!$authorlist[$nofilectr]) {
		$authorlist[$nofilectr] = "*CopyAuthorHere*";
	} else {
		$authorlist[$nofilectr] = &trim($authorlist[$nofilectr]);
	}
	if (!$papertitles[$nofilectr]) {
		$papertitles[$nofilectr] = substr($doclist[$nofilectr],0,length($doclist[$nofilectr])-4);
	}
	
continueout:
	$papertitles[$nofilectr] = &WordFC($papertitles[$nofilectr]);
	$papertitles[$nofilectr] = &trim($papertitles[$nofilectr]);
	$papertitles[$nofilectr] =~ s/\%br%\ //ig;
	
	return "Suggested TWiki Topic Entry:\n\n|$issuedate[$nofilectr]|[[$twikitopicid[$nofilectr]][$papertitles[$nofilectr]]]|$authorlist[$nofilectr]|$doclist[$nofilectr]|\n=====================================\nDelete above text, and proof reading the conversion before publish.\n\n".
	"---+!! $papertitles[$nofilectr]\n\n".
	"*$authorlist[$nofilectr]*\n\n*$issuedate[$nofilectr]*\n\n#WikiTOC\n".
	"---++!! Table of Contents\n\n%TOC%\n\n------\n\n";
}

sub OutlineWordFC{
	my $s = shift;
	$s = &trim($s);
	my @w = split /(<.*?>)/, $s;
	$s = "";
	foreach (@w){
		if (m/</){
			$s .= $_;
		} else {
			$s .= &WordFC($_)
		}
	}
	return $s;
}

sub WordFC{
	my $s = shift;
	my $allc = shift;
	if ($allc){
		return uc($s);
	}
	my @w = split /(\w+)/, $s;
	my $ctr = 0;
	$s = "";
	foreach (@w){
		$s .= ucfirst($_);
	}	
	return $s;
}

sub Finalization{
	my $fulltext = shift;
	#Restore HTML Style Tags
	$fulltext =~ s/$left/</ismg;
	$fulltext =~ s/$right/>/ismg;
		
	#restore table
	$fulltext =~ s/$tablerowend/\n/ismg;
	$fulltext =~ s/(\|\ {1,}\|)/\|\|/ismg;
	$fulltext =~ s/\|\ {1,}(.*?)/\|$1/ismg;
	$fulltext =~ s/(\+|\!)[\ ]{2,}/$1\ /ismg;
	$fulltext =~ s/$tbhdl\ *?([<\w])/  *$1/ismg;
	$fulltext =~ s/$tbhdl/  */ismg;
	$fulltext =~ s/$tbhdr/*  /ismg;
	
	#remove un-used reference
	foreach $Anch (keys %anchorlist){
		foreach $Ref (keys %reflist){
			if ($Anch eq $Ref){
				goto referred;
			}
		}	
		$fulltext =~ s/$anchstr$Anch//isg;
referred:
	}
	
	#remove empty tags
	$fulltext =~ s/<strike><\/strike>|<s><\/s>|<u><\/u>//ismg;

	#remove additional spaces
	$fulltext =~ s/^\ {1,}\b//ismg;
	$fulltext =~ s/^\ {1,}(\[)/$1/ismg;
	$fulltext =~ s/^\*\ +\b/\*/ismg;	
	$fulltext =~ s/^\.<img/<img/ismg;	

	$fulltext =~ s/<(sub|sup|u|s)> *?/<$1>/ismg;
	$fulltext =~ s/ *?<(\/sub|\/sup|\/u|\/s)>/<$1>/ismg;
	$fulltext =~ s/_\ *?_//ismg;
	$fulltext =~ s/\ +?<img/<img/ismg;
	$fulltext =~ s/\ +?\]/]/ismg;	
	$fulltext =~ s/\[\ +?/[/ismg;	
	
	$fulltext =~ s/\ {1,}$/\*/ismg;	
	$fulltext =~ s/\ {1,}\*$/\*/ismg;	
	
	#format bullets
	$fulltext =~ s/\nbullettwiki//isg;						
	$fulltext =~ s/bullettwiki//isg;
	$fulltext =~ s/^([\ ]{3,}[\*a\.1]{1,})\ {1,}\b/$1 /ismg;
	$fulltext =~ s/^([\ ]{3,}[\*a\.1]{1,})\ {1,}<b>/$1 <b>/ismg;

	foreach $key (sort keys %TempStr2TWikiMarkup){
		my $v1 = $TempStr2TWikiMarkup{"$key"};
		$fulltext =~ s/$key/$v1/isg;
	}
	$fulltext =~ s/\_\ +\_//sig;
	$fulltext =~ s/([\_]{2})//sig;

	foreach $key (keys %TWikiMarkupDoubleSymbol){
		my $v1 = $TWikiMarkupDoubleSymbol{"$key"};
		$fulltext =~ s/$key/$v1/sig;
	}
	$fulltext =~ s/(\_([^\w])\_)$/$2/ismg;	

	#remove empty line
	$fulltext =~ s/^[\ \[\]<>bru%\/\*\.\:\|\^\_]+?$//ismg;
	
	$fulltext =~ s/^(acknowledgement.*?)$/*Acknowledgement*/ismg;
	
	#return cumtom bullet and number style if these are actually content in text
	if ($csdefined) {
		foreach $key (keys %CustomStyleTo){
			my $v = $CustomStyleTo{"$key"};
			s/$key/$v/isg;
		}
	}

	#* in heading number
	$fulltext =~ s/^([\-\+|\!]{4,}\ )(\*([\d\.]{1,})\*)/$1$3 /ismg;

	$fulltext =~ s/\n{3,}/\n\n/ismg;
	return $fulltext;
}

sub ConsolidateLine{
	$_ = shift;
	
	#convert cumtom bullet and number style
	if ($csdefined) {
		foreach $key (keys %CustomStyleFrom){
			my $v = $CustomStyleFrom{"$key"};
			s/$key/$v/isg;
		}
	}

	#change
	foreach $key (keys %HTMLDirect2TWikiMarkup){
		my $v1 = $HTMLDirect2TWikiMarkup{"$key"};
		s/$key/$v1/isg;
	}
	
	s/(<\/h\d.*?>)/$1<p><\/p>/isg;
	s/(class=Section\d>)/$1<p><\/p>/isg;
	my $sc = chr(160);
	s/$sc{1,}/\ /isg;
	s/<br clear=all>//isg;
	
	#remove
	s/(&nbsp;|\t|\ ){1,}/\ /isg;
	#s/\ {1,}/\ /isg;
	s/\n/ /isg;
	s/<!--\[if.*?>.+?if\]-->//isg; 		#remove HTML comment
	s/<w\:data>.+?<\/w\:data>//isg;
	s/<p class=MsoToc.*?>.+?<\/p>//isg; #remove table of contents
	
	#s/>\ {1,}</></isg; #causing table data losss
	s/<\/p>/<\/p>\n\n/isg;
	#========================================================
	$_= &RemoveSpecialText($_);
	#========================================================
	s/\ {1,}\n/\n/isg;
	s/\n\ {1,}/\n/isg;
	s/(\ )(<\/i>)/$2$1/isg;
	
	my $s = $_;
	my @tags = split /(<.+?>)/;
	my %tag;
	for (my $i=0; $i<@tags; $i++){
		$_ = $tags[$i];
		if (m/</){
			s/(\;|\:)\ /$1/g;
			$tag{"$_"} = $_;
			$tags[$i] = $_;
		}
	}

	$s = join //, @tags;
	if ($Debug) {
		my $f = $DebugDir."/_TWiki_HTML(Paragraphs).txt";
		open($fhfrom, ">$f") or die "Unable to open file $f.\n $!";
			print $fhfrom $s;
		close($fhfrom);
		
		my $f = $DebugDir."/_TWiki_HTML_Tags.txt";
		open($fhfrom, ">$f") or die "Unable to open file $f.\n $!";
			foreach $key (sort keys %tag){
				print $fhfrom "$key\n\n";
			}
		close($fhfrom);	
	}
	return $s; 
}
		
sub PresOtherFeatures{
	$_ = shift;
	#preserve HTML link
	s/(\<a href=\"(http\:\/\/.+?)\".*?\>(.+?)\<\/a\>)/\[\[$2\]\[$3\]\]/isg;
	s/(\<a href=\"(mailto\:.+?)\"\>(.+?)\<\/a\>)/\[\[$2\]\[$3\]\]/isg;

	my @lines = split /\n/;
	for (my $i=0; $i<@lines; $i++) {
		$lines[$i] =~ s/^\ {1,}//;
		$lines[$i] =~ s/\ {1,}$//;
		$_ = "\ ".$lines[$i]."\ ";
		my $isHd = "";
		if (m/\-\-\-\+/){
			$isHd = 1;
		}
=com
		#microsoft word automatically convert < to &lt; and > to &gt;
		my @lt = split /</;
		my @gt = split />/;
		if ($#lt != $#gt){
			$lines[$i] =~ s/</$left/;
			$lines[$i] =~ s/>/$right/;
		} else {
=cut
			my @list = split /(<.+?>)/, $lines[$i];
			my $blt=0;
			my $italic = 0;
			for (my $j=0; $j<@list; $j++){
				if ($list[$j] !~ m/</){
					$list[$j] =~ s/REF _Ref\d{5,} \\h//ig;
				} else {
					#Remove hidden text
					if ($list[$j] =~ m/display:none;mso-hide:all/i){
						my $r1 = $j;
						my $k = $r1+1;
						my $span=1;
						my $hstr = "";
						while ($span && $k<@list-1) {
							if ($list[$k] =~ m/<\//i){
								$span--;
							} elsif ($list[$k] =~ m/<\b/i){
								$span++;
							} else {
								$hstr .= " $list[$k]";
							}
							$k++;
						}
						if (!$span) {#complete set found
							$hiddentext = "Hidden text removed.";
							for (my $ih=$r1; $ih<$k; $ih++){
								$list[$ih] = "";
							}
							$hstr = lc($hstr);
							$hstr =~ s/_//isg;
							$allHText .= "$hstr"."NNNNNN";
							my @htext = split /(\w+)/, $hstr;
							foreach (@htext){
								$HiddenList{"$_"}++;
							}
						} else {
							$hiddentext = "Hidden text are not removed.";						
						}
					}

					#Caption	<p class=MsoCaption>
					if ($list[$j] =~ m/MsoCaption/i){
						my $msocaption = "*";
						$list[0] = "$msocaption".$list[0];
						$list[$#list] .= "$msocaption";
					}
	
					#my %anchorlist:	%reflist;	$anchstr = "AnchRef";
					#Anchor	<a name="_Toc232315869">	<a name="_Ref231712490">
					if ($list[$j] =~ m/<a name=\"_(\w{5,})/i){
						$list[0] ="$anchstr$1\n".$list[0];
						$anchorlist{"$1"} = $1;
						#$reflist{"$1"} = $1;
					}
					
					#linked to anchor
					#<span style='mso-field-code:" NOTEREF _Ref149039334 \\h "'>
					#<span style='mso-field-code:" REF _Ref231712547 \\r \\h &#1;\\* MERGEFORMAT "'>
					if ($list[$j] =~ m/<span.+?mso-field-code\:\" (NOTEREF|REF) _(\w{5,})\ .+?>/i){
						my $s = $2;
						#$anchorlist{"$s"} = $s;
						$list[$j] ="[[$anchstr$s][";
						$reflist{"$s"} = $s;
						my $span=1;
						$k = $j+1;
						while ($span && $k<@list-1) {
							if ($list[$k] =~ m/<span/i){
								$span++;
							} elsif ($list[$k] =~ m/<\/span/i){
								$span--;
							}
							if (!$span){
								$list[$k] = "]]";
							}
							$k++;
						}
					}
	
					#Footnote	<a style='mso-footnote-id:ftn1' href="#_ftn1" name="_ftnref1" title="">
					if ($list[$j] =~ m/<a style='mso-(foot|end)note-id/i){
						(my $name) = ($list[$j] =~ m/name=\"\_([a-z]{3,}\d{1,}).+?>/i);
						(my $Ref) = ($list[$j] =~ m/href=\"\#\_([a-z]{3,}\d{1,})/i);
						if ($name){
							$list[0] ="#FootNote$name\n".$list[0];
						}					
						if ($name){
							$list[$j] ="[[#FootNote$Ref][";
						}					

						$k = $j+1;
						while ($k<@list-1) {
							if ($list[$k] =~ m/<\/span/i){
								$list[$k] = "]]";
								goto EndFootNote;
							}
							$k++;
						}
EndFootNote:
					}
=com
					#italic
					if ($list[$j] =~ m/(<i .+?>)/i){
						$italic = 1;
						$list[$j] =" _";
						my $span=1;
						$k = $j+1;
						while ($span && $k<@list-1) {
							$list[$k] = &trim($list[$k]);
							if ($list[$k] =~ m/<\//i){
								$span--;
							}
							if (!$span){
								$list[$k] = "_ ";
							}
							$k++;
						}
					}
=cut
					#Symbol to unicode
					if ($list[$j] =~ m/(<.+?font-family\:Symbol.+?>)/i){
						$list[$j] ="";
						my $span=1;
						$k = $j+1;
						while ($span  && $k<@list-1) {
							if ($list[$k] =~ m/<span/i){
								$span++;
							} elsif ($list[$k] =~ m/<\/span/i){
								$span--;
							} elsif ($list[$k] !=~ m/</i){
								$list[$k] = &Symbol2Unicode($list[$k]);
							}
							if (!$span){
								$list[$k] = "";
							}
							$k++;
						}
					}
					#Syntax: %RED% red text %ENDCOLOR%
					#<span style='color:red|#ff00ff'>red</span>
					#In table some times the color are not saved to html. It is not this script's problem.
					if ($list[$j] =~ m/<span style='color:(\#*\w{2,})'.*?>/i) {
						my $c = $1;
						$c = uc("$c");
						$c = $twikicolor{"$c"};
						if ($c){
							$list[$j] = "$c";
						} else {
							$list[$j] =~ s/</$left/i;
							$list[$j] =~ s/>/$right/i;
						}
						my $span=1;
						$k = $j+1;
						while ($span && $k<@list-1) {
							if ($list[$k] =~ m/<span/i){
								$span++;
							} elsif ($list[$k] =~ m/<\/span/i){
								$span--;
							}
							if (!$span){
								if ($c){
									$list[$k] = "%ENDCOLOR%";
								} else {
									$list[$k] =~ s/</$left/i;;
									$list[$k] =~ s/>/$right/i;;
								}
							}
							$k++;
						}
					} 
					
					#bullet	+ number list
					#<p class=Bullet1> <span style='mso-list:Ignore'>(a)<span style='font:7.0pt "Times New Roman"'>
					if ($list[$j] =~ m/(Bullet|mso\-list\:Ignore)/i && !$blt && !$isHd) {
						my $n = "";
						for (my $k =$j+1; $k<@list; $k++){
							if ($list[$k] !~ m/</){
								if (trim($list[$k])){
									$n = $list[$k];
									if (length($n)<5){
										$list[$k] = "";
										goto identified;
									} else {
										goto listout;
									}
								}
							}
						}
identified:						
						$list[$j] =~ m/level(\d)/i;
						$list[$j] = "bullettwiki$twikibullit[$1]";
						if ($n =~ m/[0-9]{1}/){
							$list[$j] = "bullettwiki$twikinumber[$1]";
						} elsif ($n =~ m/[a-z]{1}/){
							$list[$j] = "bullettwiki$twikichar[$1]";
						}
						$blt = 1;
					}
listout:
				}
			}
			#$lines[$i] = "".(join //, @list); #Problem: numbers were added
			$lines[$i] = "";
			for (my $j=0; $j<@list; $j++){
				$lines[$i] .= "$list[$j]";
			}
			foreach $key (sort keys %HTMLTag2TempStr){
				my $v1 = $HTMLTag2TempStr{"$key"};
				$lines[$i] =~ s/$key/$v1/ig;
			}
			
			$lines[$i] =~ s/<(s|\/s)>/${left}$1${right}/oig;
			if ($TagsL ne ""){
				$lines[$i] =~ s/$TagsL/${left}$1${right}/ig;
				$lines[$i] =~ s/$TagsR/${left}$1${right}/ig;
			}
			$lines[$i] =~ s/<.+?>//ig;			
		}
=com
	}
=cut
	return join "\n", @lines;
}

sub RemoveSpecialText{
	$_ = shift;
	my @lines = split /\n/;
	my $j=0;
	while ($j<200){
		foreach (@linetoberemoved) {
			$lines[$j] =~ s/^$_$//i;
		}
		$j++;
		if ($lines[$j] =~ m/\-\-\-+/){
			goto findstart;
		}
	}
findstart:
	$_ = join "\n", @lines;
	s/\n{3,}/\n\n/isg;
	return $_;
}

sub rearrangedatestr{
	$_ = shift;
	
    $fmt1 = '(?<y>\d{2,4})-(?<m>\d{1,2})-(?<d>\d{1,2})';# named backreferences
    $fmt2 = '(?<m>\d{1,2})/(?<d>\d{1,2})/(?<y>\d{2,4})';
    $fmt3 = '(?<d>\d{1,2})\.(?<m>\d{1,2})\.(?<y>\d\{2,4})';
    $fmt4 = '(?<d>\d{1,2})\ (?<m>[a-z]{1,})\ (?<y>\d{2,4})';
    $fmt5 = '(?<m>[a-z]{1,})\ (?<d>\d{1,2})\ (?<y>\d{2,4})';
    m{$fmt1|$fmt2|$fmt3|$fmt4||$fmt5}i;
	return "$+{m} $+{d} $+{y}";
}

sub trim{
	$_ = shift;
	chomp;
	s/^(\ {1,})//i;
	s/(\ {1,})$//i;
	return $_;
}

sub WriteIndexFile{
	my $curdir = shift;
	my $line = "||[[][]]|||\n";
	open($fhw, ">$curdir"."index.txt") or die "Unable to open file $curdir"."index.txt\n $!";
		print $fhw $tabfmt;
		print $fhw "| *Issue Date* | *Title* | *Author(s)* |  *From*  |\n";
		for (my $j=$nofilectr-1; $j>=0; $j--) {
			$_ = "|$issuedate[$j]|[[$twikitopicid[$j]][$papertitles[$j]]]|$authorlist[$j]|$doclist[$j]|\n\n";
			print $fhw $_;
		}
		print $fhw "\n\n<!--\n$line$line$line$line$line-->\n";
	close($fhw);
}

sub MsgBox0{
	my $param = shift;
	my $flag =shift;
	my $title = shift;                                
	use Win32;
	Win32::MsgBox("$param",$flag,$title);
	#Win32::MsgBox(MESSAGE [, FLAGS [, TITLE]])
}

sub Copy2Clipboard{
	my $str = shift;
    use Win32::Clipboard;
    my $CLIP = Win32::Clipboard();
    $CLIP->Set($str);
}

sub FormatTableControl{
	#find table and format to TWiki style
	$_ = shift;
	my $sep = shift;
	my $fulltext = $_;
	my @tablelist = m/(<table.+?>.+?<\/table>)/ismg;# original string
	for (my $i=0; $i<@tablelist; $i++){
		$_ = $tablelist[$i];
		my $oldstr = quotemeta("$_");
		my $newstr = &FormatOneTable($_, $sep);
		$fulltext =~ s/$oldstr/$sep$newstr\n\n/ism;
	}
	return "$fulltext";
}

sub formatcell{
	$_ = shift;
	my $tabrow1 = shift;
	my $col1 = shift;
	my $sp2a = shift;
	s/^\s+//;
	s/\s+$//;
	if (length($_)<1) {
		return "|";#"  ";
	}
	if (!$tabrow1) {
		s/(<b>|<\/b>)//ig;
		$newval1 = "|$tbhdl$_$tbhdr";
	} else {
		if (!$col1) {
			$newval1 = "|$_";#"$sp2a";	#left alignment for first col1umn
		} else {
			if ($_>0){
				#$newval1 = "|$sp2a$_$sp2a";
				$newval1 = "|$_";
			} else {
				$newval1 = "|$_";
			}
		}
	}
	return $newval1;
}

sub FormatOneTable{
	$_ = shift;
	my $sep = shift;
	s/\n//ismg;
	my $tabtype = 1;
	if (m/colspan\=[0-9]{1,}/i || m/rowspan\=[0-9]{1,}/i){
		$tabtype = 2;
	}
	s/\|/\\|/gi;
	s/\^/\\^/gi;
	s/<(\/*)th/<$1td/gi;
	s/<\/*tab.+?>//gi;	#remove <table></table>
	s/<tr.*?>//gi;		#remove <tr>							
	s/<\/td>/wikicell/gi;

	#	split table to individual cells================================
	my @ROWS = split /<\/tr>/i;
	my $norows = $#ROWS;
	my $maxcol = 0;
	for (my $tabrow=0; $tabrow<$norows; $tabrow++){
		my $col = 0;
		my @CELLS = split /wikicell/, $ROWS[$tabrow];
		#===============================
		for (my $js=0; $js<@CELLS; $js++){
			$_ = $CELLS[$js];
			if (m/colspan=([0-9]{1,})/i) {
				for ($ns=1; $ns<$1; $ns++){
					splice @CELLS,$js+1,0,'';
				}
			}
		}
		#===============================
		my $nocol = $#CELLS;
		$colctr[$tabrow]= $nocol;
		if ($maxcol<$nocol) {
			$maxcol = $nocol;
		}
		for (my $tabcol=0; $tabcol<$nocol; $tabcol++){
			$_ = $CELLS[$tabcol];
			my @ptag = m/<\/p>/xgi;
			if (@ptag>1) {
				s/<\/p>/%BR%/gi;
			}
			if (m/(colspan=[0-9]{1,})/i || m/(rowspan=[0-9]{1,})/i){
				$newtable[$tabrow][$tabcol] = $_;
			} else {
				$newtable[$tabrow][$tabcol] = &formatcell($_,$tabrow,$col,$sp2);
			}
			$col++;
		}
	}
	if ($tabtype < 2) {
		goto tablenotspaned;
	}
	
	#	expand spanned cell============================================				
	for (my $tabcol=0; $tabcol<$maxcol; $tabcol++){
		my $colprint = $maxcol-1;
		for (my $tabrow=0; $tabrow<$norows; $tabrow++){
			$_ = $newtable[$tabrow][$tabcol];
			my $orig = $_;
			if (m/(colspan=[0-9]{1,})/i || m/(rowspan=[0-9]{1,})/i){
				$newtable[$tabrow][$tabcol] = 
					&formatcell($_,$tabrow,$tabcol,$sp2);
			} else {
				$newtable[$tabrow][$tabcol] = $orig;
				$orig = "";
			}
			
			$_ = $orig;
			if ($_){
				$nocolspan = 1;
				$norowspan = 0;						
				if (m/colspan\=([0-9]{1,}).+?rowspan\=([0-9]{1,})/i){
					$nocolspan = $1;
					$norowspan = $2;
				} elsif ( m/rowspan=([0-9]{1,})/i ) {
					$norowspan = $1;
				}
				
				if ($norowspan){
					#expand rows under current rows
					for (my $nrow=$tabrow+1; $nrow<$tabrow+$norowspan; $nrow++){
						for ($nr=o; $nr<$colctr[$nrow]; $nr++){#construct new row
							$newrow[$nr] = $newtable[$nrow][$nr];
						}
						$colctr[$nrow] = $colctr[$nrow] + $nocolspan;
						
						for (my $cs=1; $cs<=$nocolspan; $cs++){
							if (!$tabcol) {
								unshift @newrow, '|^'; # add to beginning
							} else {
								splice @newrow,$tabcol,0,'|^'; # insert to middle
							}
						}
						for ($nr=o; $nr<$colctr[$nrow]; $nr++){
							$newtable[$nrow][$nr] = $newrow[$nr];
						}
					}
				}					
			}
		}
	}
tablenotspaned:						
	#put table together===============================================
	$_ = "";
	for (my $tabrow=0; $tabrow<$norows; $tabrow++){
		$rowstr = "";
		for (my $tabcol=0; $tabcol<$maxcol; $tabcol++){
			$rowstr .= $newtable[$tabrow][$tabcol];
		}
		$rowstr .= "|$sep";
		$_ .= $rowstr;
	}
	return $tabfmt."$_";
}

sub HTML2TWikiMarkupSpecial{
	my @htmltag= qw(b i tt);
	my @twikitag= qw(* _ =);
	for (my $i=0; $i<@htmltag; $i++){
		#opening
		my $p = qr/<$htmltag[$i].*?>/;
		my $r = "twiki$htmltag[$i]OPENtwiki";
		$HTMLTag2TempStr{"$p"} = $r;
		$r = qr/$r\ *?/;
		$TempStr2TWikiMarkup{"$r"} = $twikitag[$i];
		
		#closing
		$p = qr/<\/$htmltag[$i]>/;
		$r = "twiki$htmltag[$i]CLOSEtwiki";
		$HTMLTag2TempStr{"$p"} = $r;
		$r = qr/\ *?$r/;
		$TempStr2TWikiMarkup{"$r"} = $twikitag[$i];
	}
	
	$p = qr/\_\*|\*\_/;
	$TWikiMarkupDoubleSymbol{"$p"} = "__";
	$p = qr/\*\=|\=\*/;
	$TWikiMarkupDoubleSymbol{"$p"} = "==";
	
	$p = qr/<br.*?>/;
	$HTMLDirect2TWikiMarkup{"$p"} = "%BR%";
	$p = qr/<hr.*?>/;
	$HTMLDirect2TWikiMarkup{"$p"} = "<p><\/p>\n<p>----------<\/p>\n";
	$p = qr/\%20/;
	$HTMLDirect2TWikiMarkup{"$p"} = " ";
	$p = qr/\&amp\;/;
	$HTMLDirect2TWikiMarkup{"$p"} = "\&";	
=com	
	foreach $key (sort keys %HTMLTag2TempStr){
		my $v1 = $HTMLTag2TempStr{"$key"};
		print "$key\t$v1\n";
	}
	foreach $key (sort keys %TempStr2TWikiMarkup){
		my $v1 = $TempStr2TWikiMarkup{"$key"};
		print "$key\t$v1\n";
	}
	foreach $key (keys %TWikiMarkupDoubleSymbol){
		my $v1 = $TWikiMarkupDoubleSymbol{"$key"};
		print "$key\t$v1\n";
	}
	foreach $key (keys %HTMLDirect2TWikiMarkup){
		my $v1 = $HTMLDirect2TWikiMarkup{"$key"};
		print "$key\t$v1\n";
	}
	#HTML tag to TWiki markup
	b=*
	#Bold Text: Words get shown in bold by enclosing them in * asterisks. 
	i=_
	#Italic Text: Words get shown in italic by enclosing them in _ underscores.
	tt==
	#specified text to be formatted in monospace, or fixed-width font
	#Fixed Font: Words get shown in fixed font by enclosing them in = equal signs. 
	
	#Bold Italic: Words get shown in bold italic by enclosing them in double-underscores. __Bold italic__
	#Bold Fixed Font: Words get shown in bold fixed font by enclosing them in double equal signs. ==Bold fixed==
	#Separator (Horizontal Rule): Three or more three dashes at the beginning of a line. ----
=cut

}

sub definecolorscheme{
	$twikicolor{"YELLOW"} = "%YELLOW%";
	$twikicolor{"ORANGE"} = "%ORANGE%";
	$twikicolor{"RED"} = "%RED%";
	$twikicolor{"PINK"} = "%PINK%";
	$twikicolor{"PURPLE"} = "%PURPLE%";
	$twikicolor{"TEAL"} = "%TEAL%";
	$twikicolor{"NAVY"} = "%NAVY%";
	$twikicolor{"BLUE"} = "%BLUE%";
	$twikicolor{"AQUA"} = "%AQUA%";
	$twikicolor{"LIME"} = "%LIME%";
	$twikicolor{"GREEN"} = "%GREEN%";
	$twikicolor{"OLIVE"} = "%OLIVE%";
	$twikicolor{"MAROON"} = "%MAROON%";
	$twikicolor{"BROWN"} = "%BROWN%";
	$twikicolor{"BLACK"} = "%BLACK%";
	$twikicolor{"GRAY"} = "%GRAY%";
	$twikicolor{"SILVER"} = "%SILVER%";
	$twikicolor{"WHITE"} = "%WHITE%";
=com
	  * Set YELLOW = <font color="#ffff00">
      * Set ORANGE = <font color="#ff6600">
      * Set RED = <font color="#ff0000">
      * Set PINK = <font color="#ff00ff">
      * Set PURPLE = <font color="#800080">
      * Set TEAL = <font color="#008080">
      * Set NAVY = <font color="#000080">
      * Set BLUE = <font color="#0000ff">
      * Set AQUA = <font color="#00ffff">
      * Set LIME = <font color="#00ff00">
      * Set GREEN = <font color="#008000">
      * Set OLIVE = <font color="#808000">
      * Set MAROON = <font color="#800000">
      * Set BROWN = <font color="#996633">
      * Set BLACK = <font color="#000000">
      * Set GRAY = <font color="#808080">
      * Set SILVER = <font color="#c0c0c0">
      * Set WHITE = <font color="#ffffff">
      * Set ENDCOLOR = </font>
=cut
}

sub definehtmlsym{
	#$symhtml[154]==$htmlsym{154}
	#Basic Latin : Unicode U+0000 - U+007F (0-127)
=com
	$htmlsym{"&#32;"}="&#32;";
	$htmlsym{"!"}="&#33;";
	$htmlsym{"#"}="&#35;";
	$htmlsym{"%"}="&#37;";
	$htmlsym{"&#38;"}="&#38;";
	$htmlsym{"("}="&#40;";
	$htmlsym{")"}="&#41;";
	$htmlsym{"+"}="&#43;";
	$htmlsym{","}="&#44;";
	$htmlsym{"."}="&#46;";
	$htmlsym{"/"}="&#47;";
	$htmlsym{"0"}="&#48;";
	$htmlsym{"1"}="&#49;";
	$htmlsym{"2"}="&#50;";
	$htmlsym{"3"}="&#51;";
	$htmlsym{"4"}="&#52;";
	$htmlsym{"5"}="&#53;";
	$htmlsym{"6"}="&#54;";
	$htmlsym{"7"}="&#55;";
	$htmlsym{"8"}="&#56;";
	$htmlsym{"9"}="&#57;";
	$htmlsym{":"}="&#58;";
	$htmlsym{";"}="&#59;";
	$htmlsym{"&#60;"}="&#60;";
	$htmlsym{"="}="&#61;";
	$htmlsym{"&#62;"}="&#62;";
	$htmlsym{"?"}="&#63;";
	$htmlsym{"["}="&#91;";
	$htmlsym{"]"}="&#93;";
	$htmlsym{"_"}="&#95;";
	$htmlsym{"{"}="&#123;";
	$htmlsym{"|"}="&#124;";
	$htmlsym{"}"}="&#125;";
=cut
	
	#Latin-1 Supplement : Unicode U+0080 - U+00FF (128-255)
	$htmlsym{"&#176;"}="&#176;";
	$htmlsym{"&#177;"}="&#177;";
	$htmlsym{"&#180;"}="&#215;";
	$htmlsym{"&#184;"}="&#247;";
	$htmlsym{"&#210;"}="&#174;";
	$htmlsym{"&#211;"}="&#169;";
	$htmlsym{"&#216;"}="&#172;";
	$htmlsym{"&#226;"}="&#174;";
	$htmlsym{"&#227;"}="&#169;";
	
	#Latin Extended-B : Unicode U+0180 - U+024F (384-591)
	$htmlsym{"&#166;"}="&#402;";
	
	#Greek : Unicode U+0370 - U+03FF (880-1023)
	$htmlsym{"A"}="&#913;";
	$htmlsym{"B"}="&#914;";
	$htmlsym{"G"}="&#915;";
	$htmlsym{"D"}="&#916;";
	$htmlsym{"E"}="&#917;";
	$htmlsym{"Z"}="&#918;";
	$htmlsym{"H"}="&#919;";
	$htmlsym{"Q"}="&#920;";
	$htmlsym{"I"}="&#921;";
	$htmlsym{"K"}="&#922;";
	$htmlsym{"L"}="&#923;";
	$htmlsym{"M"}="&#924;";
	$htmlsym{"N"}="&#925;";
	$htmlsym{"X"}="&#926;";
	$htmlsym{"O"}="&#927;";
	$htmlsym{"P"}="&#928;";
	$htmlsym{"R"}="&#929;";
	$htmlsym{"S"}="&#931;";
	$htmlsym{"T"}="&#932;";
	$htmlsym{"U"}="&#933;";
	$htmlsym{"F"}="&#934;";
	$htmlsym{"C"}="&#935;";
	$htmlsym{"Y"}="&#936;";
	$htmlsym{"W"}="&#937;";
	$htmlsym{"a"}="&#945;";
	$htmlsym{"b"}="&#946;";
	$htmlsym{"g"}="&#947;";
	$htmlsym{"d"}="&#948;";
	$htmlsym{"e"}="&#949;";
	$htmlsym{"z"}="&#950;";
	$htmlsym{"h"}="&#951;";
	$htmlsym{"q"}="&#952;";
	$htmlsym{"i"}="&#953;";
	$htmlsym{"k"}="&#954;";
	$htmlsym{"l"}="&#955;";
	$htmlsym{"m"}="&#956;";
	$htmlsym{"n"}="&#957;";
	$htmlsym{"x"}="&#958;";
	$htmlsym{"o"}="&#959;";
	$htmlsym{"p"}="&#960;";
	$htmlsym{"r"}="&#961;";
	$htmlsym{"V"}="&#962;";
	$htmlsym{"s"}="&#963;";
	$htmlsym{"t"}="&#964;";
	$htmlsym{"u"}="&#965;";
	$htmlsym{"f"}="&#966;";
	$htmlsym{"c"}="&#967;";
	$htmlsym{"y"}="&#968;";
	$htmlsym{"w"}="&#969;";
	$htmlsym{"J"}="&#977;";
	$htmlsym{"j"}="&#981;";
	$htmlsym{"v"}="&#982;";
	$htmlsym{"&#161;"}="&#978;";
	
	#General Punctuation : Unicode U+2000 - U+206F (8192-8303)
	$htmlsym{"&#162;"}="&#8242;";
	$htmlsym{"&#164;"}="&#8260;";
	$htmlsym{"&#178;"}="&#8243;";
	$htmlsym{"&#188;"}="&#8230;";
	
	#Letterlike Symbols : Unicode U+2100 - U+214F (8448-8527)
	$htmlsym{"&#192;"}="&#8501;";
	$htmlsym{"&#193;"}="&#8465;";
	$htmlsym{"&#194;"}="&#8476;";
	$htmlsym{"&#195;"}="&#8472;";
	$htmlsym{"&#212;"}="&#8482;";
	$htmlsym{"&#228;"}="&#8482;";
	
	#Currency Symbols : Unicode U+20A0 - U+20CF (8352-8399)
	$htmlsym{"&#240;"}="&#8364;";
	
	#Arrows : Unicode U+2190 - U+21FF (8592-8703)
	$htmlsym{"&#171;"}="&#8596;";
	$htmlsym{"&#172;"}="&#8592;";
	$htmlsym{"&#173;"}="&#8593;";
	$htmlsym{"&#174;"}="&#8594;";
	$htmlsym{"&#175;"}="&#8595;";
	$htmlsym{"&#191;"}="&#8629;";
	$htmlsym{"&#219;"}="&#8660;";
	$htmlsym{"&#220;"}="&#8656;";
	$htmlsym{"&#221;"}="&#8657;";
	$htmlsym{"&#222;"}="&#8658;";
	$htmlsym{"&#223;"}="&#8659;";
	
	#Mathematical Operators : Unicode U+2200 - U+22FF (8704-8959)
	$htmlsym{"&#34;"}="&#8704;";
	$htmlsym{"&#36;"}="&#8707;";
	$htmlsym{"&#39;"}="&#8717;";
	$htmlsym{"&#42;"}="&#8727;";
	$htmlsym{"&#45;"}="&#8722;";
	$htmlsym{"&#64;"}="&#8773;";
	$htmlsym{"&#92;"}="&#8756;";
	$htmlsym{"&#94;"}="&#8869;";
	$htmlsym{"&#126;"}="&#8764;";
	$htmlsym{"&#163;"}="&#8804;";
	$htmlsym{"&#165;"}="&#8734;";
	$htmlsym{"&#179;"}="&#8805;";
	$htmlsym{"&#181;"}="&#8733;";
	$htmlsym{"&#182;"}="&#8706;";
	$htmlsym{"&#183;"}="&#8729;";
	$htmlsym{"&#185;"}="&#8800;";
	$htmlsym{"&#186;"}="&#8801;";
	$htmlsym{"&#187;"}="&#8776;";
	$htmlsym{"&#196;"}="&#8855;";
	$htmlsym{"&#197;"}="&#8853;";
	$htmlsym{"&#198;"}="&#8709;";
	$htmlsym{"&#199;"}="&#8745;";
	$htmlsym{"&#200;"}="&#8746;";
	$htmlsym{"&#201;"}="&#8835;";
	$htmlsym{"&#202;"}="&#8839;";
	$htmlsym{"&#203;"}="&#8836;";
	$htmlsym{"&#204;"}="&#8834;";
	$htmlsym{"&#205;"}="&#8838;";
	$htmlsym{"&#206;"}="&#8712;";
	$htmlsym{"&#207;"}="&#8713;";
	$htmlsym{"&#208;"}="&#8736;";
	$htmlsym{"&#209;"}="&#8711;";
	$htmlsym{"&#213;"}="&#8719;";
	$htmlsym{"&#214;"}="&#8730;";
	$htmlsym{"&#215;"}="&#8901;";
	$htmlsym{"&#217;"}="&#8743;";
	$htmlsym{"&#218;"}="&#8744;";
	$htmlsym{"&#229;"}="&#8721;";
	$htmlsym{"&#242;"}="&#8747;";
	
	#Miscellaneous Technical : Unicode U+2300 - U+23FF (8960-9215)
	$htmlsym{"&#189;"}="&#9168;";
	$htmlsym{"&#190;"}="&#9135;";
	$htmlsym{"&#225;"}="&#9001;";
	$htmlsym{"&#230;"}="&#9115;";
	$htmlsym{"&#231;"}="&#9116;";
	$htmlsym{"&#232;"}="&#9117;";
	$htmlsym{"&#233;"}="&#9121;";
	$htmlsym{"&#234;"}="&#9122;";
	$htmlsym{"&#235;"}="&#9123;";
	$htmlsym{"&#236;"}="&#9127;";
	$htmlsym{"&#237;"}="&#9128;";
	$htmlsym{"&#238;"}="&#9129;";
	$htmlsym{"&#239;"}="&#9130;";
	$htmlsym{"&#241;"}="&#9002;";
	$htmlsym{"&#243;"}="&#8992;";
	$htmlsym{"&#244;"}="&#9134;";
	$htmlsym{"&#245;"}="&#8993;";
	$htmlsym{"&#246;"}="&#9118;";
	$htmlsym{"&#247;"}="&#9119;";
	$htmlsym{"&#248;"}="&#9120;";
	$htmlsym{"&#249;"}="&#9124;";
	$htmlsym{"&#250;"}="&#9125;";
	$htmlsym{"&#251;"}="&#9126;";
	$htmlsym{"&#252;"}="&#9131;";
	$htmlsym{"&#253;"}="&#9132;";
	$htmlsym{"&#254;"}="&#9133;";
	
	#Geometric Shapes : Unicode U+25A0 - U+25FF (9632-9727)
	$htmlsym{"&#224;"}="&#9674;";
	
	#Miscellaneous Symbols : Unicode U+2600 - U+26FF (9728-9983)
	$htmlsym{"&#167;"}="&#9827;";
	$htmlsym{"&#168;"}="&#9830;";
	$htmlsym{"&#169;"}="&#9829;";
	$htmlsym{"&#170;"}="&#9824;";
}

#====================================Help File=================================
sub ReadConfig{
	my $file= shift;
	my @text;
	if (!(-f "$file")){
		print "ERROR: Config file not found: $file";
		MsgBox0("Config file not found: $file.\n\nRestore $file and try again!\n\n\n".
		"=========================================\n$usage\n",0,$msgtitle);
		exit;
	}
	open($fhfrom, "<$file") or die "Unable to open file $file\n $!";
		@texts= <$fhfrom>;
	close($fhfrom);
	my $i=0;
	while ($i<@texts){
		$_ = $texts[$i];
		chop();         # Get rid of the trailling \n
        s/^\s*//;     	# Remove spaces at the start of the line
        s/\s*$//;     	# Remove spaces at the end of the line
        if (m/^#/ || !$_ ){
        	splice @texts,$i,1;
        } else {
        	$i++;
        }
	}
	my $str = join "", @texts;
	my $sep = '####';
	$str =~ s/\n/$sep/isg;
	my @config = split /(\[.+?\])/,$str;
	my $ctr =0;	
	while ($ctr<@config){
		$_ = $config[$ctr];
		$ctr++;
		if (m/\[.+?\]/) {
			my $nxt = $config[$ctr];
			my $NotEmpty= $nxt;
			$NotEmpty=~ s/$sep//g;
			if ($nxt !~ m/\[.+?\]/&& $NotEmpty ne "") {
				$ctr++;
				if (m/MyTWikiName/){
					$nxt =~ s/$sep//g;
					$MYNAME=$nxt;
				} elsif (m/FormatTable/i){
					$nxt =~ s/$sep//g;
					$tabfmt = $nxt."\n";
				} elsif (m/HTMLDir/i){
					$nxt =~ s/$sep//g;
					$htmldir= $nxt;
					if ($htmldir !~ m/^[a-z]\:/i){
						$htmldir = $dir."/".$htmldir;
					}
					$htmldir =~ s/\\/\//g;
					$htmldir =~ s/[\\|\/]$//g;
				} elsif (m/TopicWord/i){
					$nxt =~ s/$sep//g;
					$TopicWord= $nxt;
				} elsif (m/Heading/i){
					$nxt =~ s/$sep//g;
					$hdlevel= $nxt;
					$twikiheading[0] = "---";
					for (my $i=1; $i<=9; $i++){
						$twikiheading[$i] = $twikiheading[$i-1]."+";
					}
					for (my $i=$hdlevel+1; $i<=9; $i++){
						$twikiheading[$i] = $twikiheading[$i]."!!";
					}
				} elsif (m/Sub Folder/i){
					$nxt =~ s/$sep//g;
					$ProcessSubFolder= $nxt;
				} elsif (m/Debug/i){
					$nxt =~ s/$sep//g;
					$Debug= $nxt;
					if ($Debug && !(-d $DebugDir)) {
						mkdir $DebugDir;
					}
				} elsif (m/PotentialAuthors/i){
					$Au= 1;
					$nxt =~ s/$sep/ /g;
					$nxt =~ s/\ +/\ /isg;
					my @tmp = split /\ /, uc($nxt);
					for (my $i=0; $i<@tmp; $i++){
						$AuthorNameList{$tmp[$i]} = 1;
					}
				} elsif (m/Preserve tags/i){
					$nxt =~ s/^$sep//;
					$nxt =~ s/$sep$//;
					$TagsL= $nxt;
					$TagsL=~ s/$sep/\|/g;
					$TagsL="<($TagsL).*?>";
					$TagsL= qr/$TagsL/;
					#print $TagsL."\n";
					$TagsR= $nxt;
					$TagsR=~ s/$sep/|\//g;
					$TagsR="/$TagsR";
					$TagsR="<($TagsR)>";
					#print $TagsR."\n";
					$TagsR= qr/$TagsR/;
					#print $TagsR."\n";
				} elsif (m/Text To Be Removed/i){
					$nxt =~ s/$sep//;
					$nxt =~ s/$sep/\n/g;
					$nxt =~ s/\ +/\ /g;
					$nxt =~ s/\ /\.\*\?/g;
					@linetoberemoved = split /\n/, $nxt;
					for (my $i=0; $i<@linetoberemoved; $i++){
						$linetoberemoved[$i] = '.*?'.$linetoberemoved[$i].'.*?';
						$linetoberemoved[$i] = qr/$linetoberemoved[$i]/;
					}
				} elsif (m/CustomerStyleDefinition/i){
					$csdefined = 1;
					$nxt =~ s/$sep//;
					$nxt =~ s/$sep/\n/g;
					my @cs= split /\n/, $nxt;
					for (my $i=0; $i<@cs; $i++){
						my @t = split /=/, $cs[$i];
						$t[0] = &trim($t[0]);
						$t[1] = &trim($t[1]);
						if ($t[1] =~ m/Bullet|list/i) {
							my $t0 = qr/$t[0]/;
							my $t1 = qr/$t[1]/;
							$CustomStyleFrom{"$t0"} = $t[1];
							$CustomStyleTo{"$t1"} = $t[0];
						}
					}
				}
			}
		}
	}
}

sub bulletnumberanddatestr{
	$twikibullit[0] = "   * ";
	$twikibullit[1] = "   * ";
	$twikinumber[0] = "   1. ";
	$twikinumber[1] = "   1. ";
	$twikichar[0] = "   a. ";
	$twikichar[1] = "   a. ";
	for (my $i=2; $i<=9; $i++){
		$twikibullit[$i] = "   ".$twikibullit[$j];
		$twikinumber[$i] = "   ".$twikinumber[$j];
		$twikichar[$i] = "   ".$twikichar[$j];
	}
	#	Define date string list to be used searching "issue date"
	my $datestr = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,".
		"JANUARY,FEBRUARY,MARCH,APRIL,JUNE,JULY,AUGUST,SEPTEMBER,OCTOBER,NOVEMBER,DECEMBER";
	my @tmp = split /,/, $datestr;
	for (my $i=0; $i<@tmp; $i++){
		$dstr{"$tmp[$i]"} = "M";
	}
	for (my $i=70; $i<=99; $i++){
		$dstr{"$i"} = $i;
	}
	for (my $i=1900; $i<=2100; $i++){
		$dstr{"$i"} = $i;
	}
}

sub Help{
	my $helponly= "";
	my $s = "Usage...\n\t>perl \"".$scripttitle.".pl\" [/c][/a][/v][/i|h|help]\n\n".
		"\t/[c]\tCopyright\n".
		"\t/[a]\tAuthor\n".
		"\t/[v]\tVersion\n".
		"\t/[i|h|help]\tGenerate this document in TWikiML\n\n".
		"*Or double click ".$scripttitle.".pl\" from C:\Word2TWiki.*\n";
		#"\t/[iw] Instruction to use this script in word\n\n";
	foreach (@_){
		if (m/\/c/i){
			$s .= &CopyRight();
			$helponly= 1;
		} elsif (m/\/v/i){
			$s .= &Version();
			$helponly= 1;
		} elsif (m/\/all/i){
			$s .= &CopyRight()."\n".&CodeAuthor()."\n".&Version();
			$helponly= 1;
		} elsif (m/\/a/i){
			$s .= &CodeAuthor();
			$helponly= 1;
		} elsif (m/\/i/i || m/\/h/i){
			$s = &Instruction($s);
			$helponly= 1;
		} elsif (m/\/iw/i){
			my $i = &Instruction();
			$i =~ s/^[-\+ ]+?//ism;
			$i =~ s/^|//ism;
			$i =~ s/|/\t/ism;
			$s = '<html>\n<head>\n<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
<title>'.$0.'</title>\n
</head>\n<body>'.$s.$i.'</body></html>\n';
			$helponly= 1;
		}
	}
	&Copy2Clipboard($s);
	if($fromcmd){
		print $s;
	}
	if ($helponly) {
    	print "\nData are also available from system clipboard!";
		exit;
	}
	return $s;
}

sub CopyRight{
	my $s = "Copyright (c) 2009 ".$develper[0].". All rights reserved.\n".
	"This script is free; you can redistribute it and/or modify it under the same terms as Perl itself. ".
	"This program is distributed in the hope that it will be useful, but without any warranty; ".
	"without even the implied warranty of merchantability or fitness for a particular purpose.\n";
	return $s;
}

sub CodeAuthor{
	my $s = "Current maintainer: ".$develper[0]." <".$develper[1].">\n".
			"Original author: ".$develper[0]." <".$develper[1].">\n";
	return $s;
}

sub Version{
	my $s = "Current Version: Ver 0.2\n";
	return $s;
}

sub VERSIONHISTROY{
	my $s = "Ver 0.2, $versiondate\n".'
	Ver 0.1, 2009-06-21
	';
	return $s;
}

sub Instruction{
my $str = shift;
$str =~ s/Usage...\n//is;
$str =~ s/\n/\n\n/ism;
my $s = '
---+!! Convert MS Word Document to TWiki Markup (Window OS Only)

#TWikiTOC
%TOC%

---+ Function
   * Use VBA to convert multiple MS word documents to HTML files. Use perl script to convert any number of html files in any nember of sub-folders to TWiki markup text files (Use recursive search).

---+ Usage
[[#TWikiTOC][TOC]]

---++ Step 1: Convert Word Document to HTML File
   * Open "!ConvertWordToHTML.doc" from C:\Word2TWiki. (If the buttons do not work, please enable macro first.)
   * Use one of the options to convert word documents to HTML files, which are saved in C:\Word2TWiki\DocHTML.
      * Click "Convert All Word Documents From Folder" button if you have copied word documents to the C:\Word2TWiki\DocWord.
      * Click "Convert All Opened Files (Exclude Me)" button if you have opened all word documents.

---++ Step 2: Convert HTML File to TWiki Markup
*From command line:*
	>perl "HTML(MSWord)2TWikiMarkup.pl" [/c][/a][/v][/i|h|help]

   * /[c]	Copyright
   * /[a]	Author
   * /[v]	Version
   * /[i|h|help]	Generate this document in TWikiML

*Or double click HTML(MSWord)2TWikiMarkup.pl" from C:Word2TWiki.*

*Output files:*

   * TWiki Markup text files ("_FileTitle.txt") are saved in twiki_FileTitle sub-folder in the C:\Word2TWiki\DocHTML.
   * In the folder, there is a zipped image file that contains all images.

---++ Step 3: Proof Read TWiki Markup Documents.
   * Open text file "_FileTitle.txt" in twiki_FileTitle sub-folder in the C:\Word2TWiki\DocHTML.
   * Edit title, author and issue date as necessary.
   * Remove some lines in the beginning.
   * Proof read the file and correct any errors.

---+ Features
[[#TWikiTOC][TOC]]

---++ TWikiML Features
   * Headings. User can change heading levels to be included in the TOC in configuration file.
   * Italic, underline, bold, bold italic.
   * html link, email link and cross reference.
   * Unicode for symbols. (Refer to [[http://www.alanwood.net/demos/symbol.html][Unicode alternatives for Greek and special characters in HTML]])
   * Text color.
   * Simple bullet lists ("   * "), number lists ("   1. " and "   a. "). Levels are preserved if they are detectable.
   * Tables with or without rowspans and colspans.
   
---++ Other HTML Tags
   * Other HTML tags are preserved but only kept minimum. Refer to the section [Preserve tags] in configuration file, user can add or delete tags from the list.
   * Support custom bullet and number list styles. Refer to the section [CustomerStyleDefinition] in configuration file.

---++ Perl Script Feature
   * Images are zipped. No images are lost.
   * Recursive processes HTML files in sub-folders.
   * Create well formatted and clean text, minimum proof reading and correction.
   * Hidden text removed.
   * Special text removed. These texts are defined in the section [Text To Be Removed] in configuration file. User should edit this section.
   * Title, author and date are collected. Title must be in the first few lines, then followed by authors and date.

---+ Software Requirement - !ActivePerl for Windows
[[#TWikiTOC][TOC]]

   * !ActivePerl for Windows. Download new !ActivePerl Installer from http://www.activestate.com/store/activeperl/download/ and install.
      
---+ Perl Script and !ConvertWordToHTML.doc Installation
   * Download [[%ATTACHURL%/MsWordToTWikiOnWindowsAddOn.zip][MsWordToTWikiOnWindowsAddOn.zip]] file.
   * Open *MsWordToTWikiOnWindowsAddOn.zip* and extract all files including directory. Following files are installed:
   
|  *File*  |  *Directory*  |  *Comment*  |
|!ConvertWordToHTML.docm|C:\Word2TWiki|Used for MSOffice Word 2007.|
|!ConvertWordToHTML.doc|C:\Word2TWiki|Used for previous MSOffice Word.|
|'.$scripttitle.'.pl|C:\Word2TWiki|Script used to convert HTML (MS Word) document to TWiki markup.|
|_'.$scripttitle.'.ini|C:\Word2TWiki|Configuration file is used to control perl script execution. User must edit this file before using perl script.|
   
---+ Requirement to Word Document
[[#TWikiTOC][TOC]]

*To reduce conversion error, reduce proof read and edit time, the word document should use these word features*
   * Heading.
   * Caption.
   * Bullet and number List.
   * Endnote and footnote.
   * Cross Reference.
   * Do not insert table within table cell.
   * Do not add free shapes on existing image.
   * Word drawing including text boxes has to be reformted as gif, jpg, bmp, png etc.
   
---+ Bugs

   * 
   
---+ Limitation
[[#TWikiTOC][TOC]] 

   * 

   
-- Main.'.$develper[0].' - '.$versiondate;

#*For comment, suggestion or reporting bugs please contact [[mailto:'.$develper[1].']['.$develper[0].']].*';
	return $s;
}
