# # TWiki WikiClone ($wikiversion has version info) # # Copyright (C) 2000-2001 Andrea Sterbini, a.sterbini@flashnet.it # Copyright (C) 2001 Peter Thoeny, Peter@Thoeny.com # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details, published at # http://www.gnu.org/copyleft/gpl.html # # ========================= # # # Each plugin is a package that contains the subs: # # initPlugin ( $topic, $web, $user, $installWeb ) # commonTagsHandler ( $text, $topic, $web ) # startRenderingHandler( $text, $web ) # outsidePREHandler ( $text ) # insidePREHandler ( $text ) # endRenderingHandler ( $text ) # # initPlugin is required, all other are optional. # For increased performance, all handlers except initPlugin are # disabled. To enable a handler remove the leading DISABLE_ from # the function name. # # ========================= package TWiki::Plugins::SpacedWikiWordPlugin; # ========================= use vars qw( $web $topic $user $installWeb $VERSION $debug $doSpace $siteLocale ); $VERSION = '1.000'; $doSpace = 0; # default is not to space wiki words # ========================= sub initPlugin { ( $topic, $web, $user, $installWeb ) = @_; # check for Plugins.pm versions if( $TWiki::Plugins::VERSION < 1 ) { &TWiki::Func::writeWarning( "Version mismatch between SpacedWikiWordPlugin and Plugins.pm" ); return 0; } # Get plugin debug flag $debug = &TWiki::Func::getPreferencesFlag( "SPACEDWIKIWORDPLUGIN_DEBUG" ); # Is this a skin to space? my $skin = &TWiki::Func::getSkin(); my $list = &TWiki::Func::getPreferencesValue( "SPACEDWIKIWORDPLUGIN_SPACESKIN" ); $doSpace = 1 if $list =~ /\b$skin\b/; # Get param to turn spacing on/off # TBD: is there a way for a plugin to access regular option parsing??? if ( defined $ENV{'QUERY_STRING'} ) { $ENV{'QUERY_STRING'} =~ /\bspace=on\b/ and $doSpace = 1 or $ENV{'QUERY_STRING'} =~ /\bspace=off\b/ and $doSpace = 0; } # Get list of stop words not to space $dontSpaceRegExp = "TWiki"; # TWiki is hardwired if ( $doSpace ) { for $word ( split '[ ,:;]+', &TWiki::Func::getPreferencesValue( "SPACEDWIKIWORDPLUGIN_DONTSPACE" )) { $dontSpaceRegExp .= "|$word"; } } # Setup locale and regular expressions for WikiWords setupLocale(); setupRegexes(); # Plugin correctly initialized &TWiki::Func::writeDebug( "- TWiki::Plugins::SpacedWikiWord::initPlugin( $web.$topic ) does space: $doSpace" ) if $debug; return 1; } # =========================== # Read the configuration file at compile time in order to set localed BEGIN { do "TWiki.cfg"; # Do a dynamic 'use locale' for this module if( $useLocale ) { require locale; import locale (); } } # ========================= # Run-time locale setup - 'use locale' must be done in BEGIN block # for regexes and sorting to work properly, although regexes can still # work without this in 'non-locale regexes' mode (see setupRegexes routine). sub setupLocale { $siteCharset = 'ISO-8859-1'; # Defaults if locale mis-configured $siteLang = 'en'; if ( $useLocale ) { if ( not defined $siteLocale or $siteLocale !~ /[a-z]/i ) { &TWiki::Func::writeWarning ( "TWiki::Plugins::SpacedWikiWordPlugin: Locale $siteLocale unset or has no alphabetic characters" ); return; } # Extract the character set from locale and use in HTML templates # and HTTP headers $siteLocale =~ m/\.([a-z0-9_-]+)$/i; $siteCharset = $1 if defined $1; ##writeDebug "Charset is now $siteCharset"; # Extract the language - use to disable plural processing if # non-English $siteLocale =~ m/^([a-z]+)_/i; $siteLang = $1 if defined $1; ##writeDebug "Language is now $siteLang"; # Set environment variables for grep # FIXME: collate probably not necessary since all sorting is done # in Perl $ENV{'LC_CTYPE'}= $siteLocale; $ENV{'LC_COLLATE'}= $siteLocale; # Load POSIX for i18n support require POSIX; import POSIX qw( locale_h LC_CTYPE LC_COLLATE ); ##my $old_locale = setlocale(LC_CTYPE); ##writeDebug "Old locale was $old_locale"; # Set new locale my $locale = setlocale(&LC_CTYPE, $siteLocale); setlocale(&LC_COLLATE, $siteLocale); ##writeDebug "New locale is $locale"; } } # ========================= # Set up pre-compiled regexes for use in rendering. All regexes with # unchanging variables in match should use the '/o' option, even if not in a # loop, to help mod_perl, where the same code can be executed many times # without recompilation. sub setupRegexes { # Build up character class components for use in regexes. # Depends on locale mode and Perl version, and finally on # whether locale-based regexes are turned off. if ( not $useLocale or $] < 5.006 or not $localeRegexes ) { # No locales needed/working, or Perl 5.005_03 or lower, so just use # any additional national characters defined in TWiki.cfg $upperAlpha = "A-Z$upperNational"; $lowerAlpha = "a-z$lowerNational"; $numeric = '\d'; $mixedAlpha = "${upperAlpha}${lowerAlpha}"; } else { # Perl 5.6 or higher with working locales $upperAlpha = "[:upper:]"; $lowerAlpha = "[:lower:]"; $numeric = "[:digit:]"; $mixedAlpha = "[:alpha:]"; } $mixedAlphaNum = "${mixedAlpha}${numeric}"; $lowerAlphaNum = "${lowerAlpha}${numeric}"; # Compile regexes for efficiency and ease of use # Note: qr// locks in regex modes (i.e. '-xism' here) - see Friedl # book at http://regex.info/. # TWiki concept regexes $wikiWordRegex = qr/[$upperAlpha]+[$lowerAlpha]+[$upperAlpha]+[$mixedAlphaNum]*/; $webNameRegex = qr/[$upperAlpha]+[$mixedAlphaNum]*/; $defaultWebNameRegex = qr/_[${mixedAlphaNum}_]+/; $anchorRegex = qr/\#[${mixedAlphaNum}_]+/; $abbrevRegex = qr/[$upperAlpha]{3,}/; # Simplistic email regex, e.g. for WebNotify processing - no i18n # characters allowed $emailAddrRegex = qr/([A-Za-z0-9\.\+\-\_]+\@[A-Za-z0-9\.\-]+)/; # Single-character alpha-based regexes $singleUpperAlphaRegex = qr/[$upperAlpha]/; $singleLowerAlphaRegex = qr/[$lowerAlpha]/; $singleUpperAlphaNumRegex = qr/[${upperAlpha}${numeric}]/; $singleMixedAlphaNumRegex = qr/[${upperAlpha}${lowerAlpha}${numeric}]/; $singleMixedNonAlphaRegex = qr/[^${upperAlpha}${lowerAlpha}]/; $singleMixedNonAlphaNumRegex = qr/[^${upperAlpha}${lowerAlpha}${numeric}]/; # Multi-character alpha-based regexes $mixedAlphaNumRegex = qr/[${mixedAlphaNum}]*/; } sub spacedWikiWord # $_[0] is the word to space { my $dontSpaceWord; # Check for words that shouldn't be spaced out and save marker instead # Known limitation: Only identifies one non-spacable word per link /StefanLindmark if ( $_[0] =~ m/(${dontSpaceRegExp})/g ) { $dontSpaceWord = $1; # Can't add extra spaces (will break e.g. table alignment) $_[0] =~ s/^$dontSpaceWord$/=/g ; $_[0] =~ s/^$dontSpaceWord/= /g ; $_[0] =~ s/$dontSpaceWord$/ =/g ; $_[0] =~ s/$dontSpaceWord/ = /g ; } # Space out the different combinations $_[0] =~ s/([${lowerAlpha}${numeric}])([${upperAlpha}])/$1 $2/go; #lower alphanum followed by upper $_[0] =~ s/([${mixedAlpha}])([${numeric}])/$1 $2/go; #letter followed by number $_[0] =~ s/([${upperAlpha}]+)([${upperAlpha}][${lowerAlpha}${numeric}])(?!$)/$1 $2/go; #ACRONYM followed by Word $_[0] =~ s/([${upperAlpha}]+)([${upperAlpha}][a-rt-z${numeric}]$)/$1 $2/o if ( $siteLang eq 'en' ); #but not plural s at EOL if language is English # Restore saved non-spacable word from marker $_[0] =~ s/(^ )?=/$dontSpaceWord/go; return $_[0]; } # ========================= sub spaceWords { # Change WikiWord to [[WikiWord][Wiki Word]] or # change Web.WikiWord to [[Web.WikiWord][Wiki Word]] my $text = $_[0]; $text =~ s/(^|\s|\()(${webNameRegex}\.|)?(${wikiWordRegex})/"${1}[[$2$3][".spacedWikiWord("$3")."]]"/geo; return $text; } # ========================= sub startRenderingHandler { # Cut up the entire topic text and select the chunks # where WikiWords can be spaced without breaking things # # The regexp implements the following recursion # 1. Run spaceWords on entire text up until first match of any of: # - double bracket expression ("[[xxxxx]]") # - A tag pair ("xxxx") # - Generic single HTML tag (""), but not "" # - URL ("xxxx://xxxxxxxx\s") # - Anchor ("\s#xxxx\s") # - end of text # 2. Leave matched expression untouched and repeat from 1 # with remaining text from after match $_[0] =~ s/(.*?)((\[\[.*?\]\])|(<[aA] .*?<\/[aA]>)|(<(?!nop).*?>)|([^a-z][a-z]*?\:\/\/.*?\s)|(\s#\w+\s)|$)/spaceWords($1).$2/geos; } 1;