*** lib/TWiki.pm~ Sat Feb 1 19:55:21 2003 --- lib/TWiki.pm Wed Feb 26 13:28:20 2003 *************** *** 1484,1495 **** my $attrTemplate = extractNameValuePair( $attributes, "template" ); # undocumented my $attrHeader = extractNameValuePair( $attributes, "header" ); my $attrFormat = extractNameValuePair( $attributes, "format" ); return &TWiki::Search::searchWeb( "1", $attrWeb, $searchVal, $attrScope, $attrOrder, $attrRegex, $attrLimit, $attrReverse, $attrCasesensitive, $attrNosummary, $attrNosearch, $attrNoheader, $attrNototal, $attrBookview, $attrRenameview, ! $attrShowlock, $attrNoEmpty, $attrTemplate, $attrHeader, $attrFormat ); } --- 1484,1497 ---- my $attrTemplate = extractNameValuePair( $attributes, "template" ); # undocumented my $attrHeader = extractNameValuePair( $attributes, "header" ); my $attrFormat = extractNameValuePair( $attributes, "format" ); + my $attrHitformat = extractNameValuePair( $attributes, "hitformat" ); return &TWiki::Search::searchWeb( "1", $attrWeb, $searchVal, $attrScope, $attrOrder, $attrRegex, $attrLimit, $attrReverse, $attrCasesensitive, $attrNosummary, $attrNosearch, $attrNoheader, $attrNototal, $attrBookview, $attrRenameview, ! $attrShowlock, $attrNoEmpty, $attrTemplate, $attrHeader, $attrFormat, ! $attrHitformat ); } *** lib/TWiki/Search.pm~ Sat Jan 4 20:36:46 2003 --- lib/TWiki/Search.pm Wed Feb 26 13:50:58 2003 *************** *** 61,66 **** --- 61,67 ---- $theRegex, $theLimit, $revSort, $caseSensitive, $noSummary, $noSearch, $noHeader, $noTotal, $doBookView, $doRenameView, $doShowLock, $noEmpty, $theTemplate, $theHeader, $theFormat, + $hitFormat, @junk ) = @_; ##TWiki::writeDebug "Search locale is $TWiki::siteLocale"; *************** *** 647,654 **** } $tempVal =~ s/\$summary/&TWiki::makeTopicSummary( $text, $topic, $thisWebName )/geos; $tempVal =~ s/\$formfield\(\s*([^\)]*)\s*\)/getMetaFormField( $meta, $1 )/geos; ! $tempVal =~ s/\$pattern\(\s*(.*?\s*\.\*)\)/getTextPattern( $text, $1 )/geos; ! $tempVal =~ s/\$nop(\(\))?//gos; # remove filler, useful for nested search $tempVal =~ s/\$quot(\(\))?/\"/gos; # expand double quote $tempVal =~ s/\$percnt(\(\))?/\%/gos; # expand percent $tempVal =~ s/\$dollar(\(\))?/\$/gos; # expand dollar --- 648,659 ---- } $tempVal =~ s/\$summary/&TWiki::makeTopicSummary( $text, $topic, $thisWebName )/geos; $tempVal =~ s/\$formfield\(\s*([^\)]*)\s*\)/getMetaFormField( $meta, $1 )/geos; ! if ($hitFormat) { ! $tempVal = getAllHits( $text, $tempVal, $hitFormat); ! } else { ! $tempVal =~ s/\$pattern\(\s*(.*?\s*\.\*)\)/getTextPattern( $text, $1 )/geos; ! } ! $tempVal =~ s/\$nop(\(\))?//gos; # remove filler, useful for nested search $tempVal =~ s/\$quot(\(\))?/\"/gos; # expand double quote $tempVal =~ s/\$percnt(\(\))?/\%/gos; # expand percent $tempVal =~ s/\$dollar(\(\))?/\$/gos; # expand dollar *************** *** 803,809 **** --- 808,876 ---- } #========================= + sub getAllHits + { + my( $prepattern, $hitre, $postpattern, $delims, $delime, $rest); + my( $theText, $thePattern, $hitFormat) = @_; + ( $prepattern, $delims, $rest) = ( $thePattern =~ /(.*)\$pattern(.)(.*)/ ); + + ( $delims, $delime) = cnvdelims( $delims); + + ( $hitre, $postpattern) = ( $rest =~ /(.*?[^\\])$delime(.*)/ ); + $hitre =~ s/\\([$delims$delime])/$1/g; #unescape escaped delimiters + + $hitFormat ||= ''; + $prepattern ||= ''; + $hitre ||= ''; + $postpattern ||= ''; + + $hitre =~ s/([^\\])([\$\@\%\&\#\'\`\/])/$1\\$2/go; # escape some special chars + my @hits = ($theText =~ /$hitre/gis); + my $hitstr = ''; + my $foo = ''; + foreach (@hits) { + next if /hitformat=/;# don't show multihit search strings as hits + ($foo = $hitFormat) =~ s/\$hit/$_/g; + $hitstr .= $foo; + } + + if ($hitstr) { + $hitstr = $prepattern . $hitstr . $postpattern; + $hitstr =~ s/\%SEARCH/%SEARCH/g; # prevent recursive searches + } + return $hitstr; + } + + #========================= + # specify pattern start and end delimiters in a way that they + # won't be considered metacharacters in the pattern regex + sub cnvdelims + { + my $delims = $_[0]; + my $delime; + + if ( $delims eq '(') { + $delime = ')'; + } + elsif ( $delims eq '[') { + $delime = ']'; + } + elsif ( $delims eq '{') { + $delime = '}'; + } + elsif ( $delims eq '<') { + $delime = '>'; + } + else { + $delime = $delims; + } + return( sprintf( '\%.3o', ord($delims)),sprintf( '\%.3o', ord($delime))); + } + + #========================= + + 1; # EOF *** data/TWiki/FormattedSearchinTopics.txt~ Wed Feb 26 13:23:34 2003 --- data/TWiki/FormattedSearchinTopics.txt Wed Feb 26 13:23:43 2003 *************** *** 0 **** --- 1,103 ---- + %META:TOPICINFO{author="guest" date="1042753216" format="1.0" version="1.3"}% + %TOC% + %STARTINCLUDE% + ---# TWiki Formatted Search in Topics + + ---++ Overview + + Normally the results of a FormattedSearch summarizes each topic + meeting the search criterea. With the =format="..."= parameter + there is considerable flexibility in how the search topic + information can be presented. With the multihit patch to + TWiki, the =%SEARCH= is extended to permit formatting and + displaying of each location in a topic which + meets the specified search criterea. This is done by adding + a =hitformat= parameter to the =%SEARCH=, and + generalizing the =format= string. + + ---++ Syntax + + To indicate that a FormattedSearchinTopics is desired, + a =hitformat="..."= parameter is specified in the + =%SEARCH{...}%=. Whenever a hit is found in a topic, the + string specified by =hitformat= is used to format the hit, + with the string "$hit" in the string being replaced by the + indicated text in the hit. (See the discussion below + on =$pattern(...)= for how the "indicated text" is specified.) + + =format="..."= is used similar to a normal FormattedSearch, + except that the =$pattern(...)= variable is mandatory and used to + specify exactly what in a topic will constitute a hit. Unlike a normal + FormattedSearch, the character following the =$pattern= need not be + a "("; it may be any character and constitutes the starting delimiter. + The ending delimiter to the =$pattern= specification will be + the same character, except '(' will have an ending delimiter ')', + '<' will have '>', '{' will have '}', and '[' will have ']'. Any use of + the starting or ending delimiters in the actual pattern must be preceded by '\'. + + This generalization of delimiters is available because, as discussed below, + every pattern string must indicate the hit by use of =(...)= within the string, + and it gets untidy adding '\' everywhere. A wise choice of delimiters + allows easier specification of a pattern string without excessive '\'s. + + Examples of pattern strings would be + + $pattern(abc\(def\)ghi) + $patternxabc(def)ghix + $pattern@abc(def)ghi@ + $pattern + + + The pattern string is a perl RegularExpression, in which each nested =(...)= + (or =\(...\)= + if '(' and ')' were chosen for delimiters) specifies what will be substituted + for =$hit= in the =format="..."= string if the _entire_ pattern string matches + some text in the topic. This permits only a portion of a hit to be selected. It + should be noted that the search within a topic is automatically case-insensitive. + + To prevent display of the actual =%SEARCH{...}%= string in the search results, + hits with the string "hitformat=" in them are ignored, and hits with "%SEARCH" in + them will not cause a second =%SEARCH= to be performed. + + ---++ Designing a FormattedSearchinTopics + + There are several processing steps in a FormattedSearchinTopics, and different + information is available at each step for output as the results: + + * The webs specified by the =web='...'= are searched for topics matching the ="text", search="text", and topic="..."= parameters. It is these parameters (as modified by =regex="..."= parameters, etc.) which determine what topics will be inspected for hits. In essence, this is a first-level quick search to limit the topics which will be searched more carefully for hits. + * For these topics, the =hitformat= and =format= parameter strings are used to format the output as follows: + * The =hitformat= string is divided into three parts: The _prehit_ text before the "$hit" string, the "$hit" string, and the _posthit_ text after the "$hit" string. + * The =format= string is divided also into three parts: the _prepattern_ string before the =$pattern(...)=, the _pattern_ string in the =$pattern(...)=, and the _postpattern_ string after the =$pattern(...)=. + * The _pattern_ string has various _hit_ strings indicated by =(...)=. + * Taking liberties with new lines, the output of a successful FormattedSearchinTopics will be presented as follows: + + Optional header string as specified by the header="..." parameter + prepattern string for topic 1 + prehit string + hit #1 in topic 1 + posthit string + prehit string + hit #2 in topic 1 + posthit string + ... + postpattern string for topic 1 + prepattern string for topic 2 + prehit string + hit #1 in topic 2 + posthit string + ... + + + ---++ Example + + If we want to display the paragraphs whereever "GPL" is mentioned, a search like: + + + %SEARCH{ "GPL" hitformat=" * $hit
" scope="text" regex="on" nosearch="on" nototal="on" header="*Web: $web*" format="
Topic: [[$topic]]
$pattern(\([^\n\r]*GPL[^\n\r]*\))
"}% +
+ + will give the following: + + %SEARCH{ "GPL" hitformat=" * $hit
" scope="text" regex="on" nosearch="on" nototal="on" header="*Web: $web*" format="
Topic: [[$topic]]
$pattern(\([^\n\r]*GPL[^\n\r]*\))
"}% + +