Only in HTML-Scrubber-StripScripts-0.02: blib
Only in HTML-Scrubber-StripScripts-0.02: Changes
Only in HTML-Scrubber-StripScripts-0.02: Makefile
Only in HTML-Scrubber-StripScripts-0.02: Makefile.PL
Only in HTML-Scrubber-StripScripts-0.02: MANIFEST
Only in HTML-Scrubber-StripScripts-0.02: pm_to_blib
Only in HTML-Scrubber-StripScripts-0.02: README
diff -ur HTML-Scrubber-StripScripts-0.02.orig/StripScripts.pm HTML-Scrubber-StripScripts-0.02/StripScripts.pm
--- HTML-Scrubber-StripScripts-0.02.orig/StripScripts.pm 2004-03-18 12:07:31.000000000 -0500
+++ HTML-Scrubber-StripScripts-0.02/StripScripts.pm 2004-12-01 00:06:15.000000000 -0500
@@ -64,6 +64,20 @@
URLs in C attributes in C tags. Set this option to a true
value to allow them. Ignored unless C is true.
+=item C
+
+By default, the scrubber won't allow forms or input elements. Set
+this option to allow them. If C is defined, but
+C is defined, then input elements will be allowed, but
+forms will not.
+
+=item C
+
+By default, the scrubber won't allow attributes that specify styles or
+classes for their tags. Set this option to a true value to allow
+them. This won't allow all possible style definitions (since that
+allows javascript), but only a semi-arbitary subset.
+
=item C
By default, the scrubber will be configured to deal with a snippet
@@ -91,12 +105,20 @@
use vars qw(%re);
%re = (
+ number => qr#^-?\d+$#,
size => qr#^[+-]?\d+(?:\./d+)?[%*]?$#,
color => qr#^(?:\w{2,20}|\#[\da-fA-F]{6})$#,
word => qw#^\w*$#,
wordlist => qr#(?:[\w\-\, ]{1,200})$#,
text => qr#^[^\0]*$#,
- url => qr# (?:^ (?:https?|ftp) :// ) | (?:^ [\w\.,/-]+ $) #ix,
+ url => qr<^((https? :// [\w\-\.]{1,100} (?:\:\d{1,5})?)?
+ (/ [\w\-\_\.!~*\|;+\$,]*)*
+ |
+ [\w\-\_\.!~*\|;+\$,]*
+ )
+ (\? [\w;\/?:@&=+\$,\[\]\-_\.!\~*\'\(\)%]*)?
+ (\# [\w]*)?
+ $>x,
a_mailto => qr# (?:^ (?:https?|ftp) :// ) | (?:^ [\w\.,/-]+ $) | (?:^ mailto: [\w\-\.\+\=\*]+\@[\w\-\.]+ $) #ix,
);
@@ -109,8 +131,6 @@
@src = ( src => $re{'url'} ) if $cfg{Allow_src};
@background = ( background => $re{'url'} ) if $cfg{Allow_src};
- my %empty = ();
-
my %font_attr = (
'size' => $re{'size'},
'face' => $re{'wordlist'},
@@ -126,6 +146,14 @@
'align' => $re{'word'},
);
+ my %common_attr = (
+ ( $cfg{Allow_styles} ? (
+ 'class' => $re{'wordlist'},
+ 'style' => $re{'word'},
+ ) : () ),
+ 'id' => $re{'word'},
+ );
+
my %cellha_attr = (
'align' => $re{'word'},
'char' => $re{'word'},
@@ -162,30 +190,38 @@
);
my %rules = (
- 'br' => { 'clear' => $re{'word'} },
- 'em' => \%empty,
- 'strong' => \%empty,
- 'dfn' => \%empty,
- 'code' => \%empty,
- 'samp' => \%empty,
- 'kbd' => \%empty,
- 'var' => \%empty,
- 'cite' => \%empty,
- 'abbr' => \%empty,
- 'acronym' => \%empty,
- 'q' => { @cite },
- 'blockquote' => { @cite },
- 'sub' => \%empty,
- 'sup' => \%empty,
- 'tt' => \%empty,
- 'i' => \%empty,
- 'b' => \%empty,
- 'big' => \%empty,
- 'small' => \%empty,
- 'u' => \%empty,
- 's' => \%empty,
- 'strike' => \%empty,
- 'font' => \%font_attr,
+ 'br' => { 'clear' => $re{'word'},
+ %common_attr,
+ },
+ 'em' => \%common_attr,
+ 'strong' => \%common_attr,
+ 'dfn' => \%common_attr,
+ 'code' => \%common_attr,
+ 'samp' => \%common_attr,
+ 'kbd' => \%common_attr,
+ 'var' => \%common_attr,
+ 'cite' => \%common_attr,
+ 'abbr' => \%common_attr,
+ 'acronym' => \%common_attr,
+ 'q' => { @cite,
+ %common_attr,
+ },
+ 'blockquote' => { @cite,
+ %common_attr,
+ },
+ 'sub' => \%common_attr,
+ 'sup' => \%common_attr,
+ 'tt' => \%common_attr,
+ 'i' => \%common_attr,
+ 'b' => \%common_attr,
+ 'big' => \%common_attr,
+ 'small' => \%common_attr,
+ 'u' => \%common_attr,
+ 's' => \%common_attr,
+ 'strike' => \%common_attr,
+ 'font' => { %font_attr,
+ %common_attr,
+ },
'table' => { 'frame' => $re{'word'},
'rules' => $re{'word'},
%texta_attr,
@@ -200,52 +236,103 @@
'bordercolorlight' => $re{'color'},
'bordercolordark' => $re{'color'},
'summary' => $re{'text'},
+ %common_attr,
},
- 'caption' => { 'align' => $re{'word'} },
- 'colgroup' => \%col_attr,
- 'col' => \%col_attr,
- 'thead' => \%cellhv_attr,
- 'tfoot' => \%cellhv_attr,
- 'tbody' => \%cellhv_attr,
+ 'caption' => { 'align' => $re{'word'},
+ %common_attr,
+ },
+ 'colgroup' => { %col_attr,
+ %common_attr,
+ },
+ 'col' => { %col_attr,
+ %common_attr,
+ },
+ 'thead' => { %cellhv_attr,
+ %common_attr,
+ },
+ 'tfoot' => { %cellhv_attr,
+ %common_attr,
+ },
+ 'tbody' => { %cellhv_attr,
+ %common_attr,
+ },
'tr' => { bgcolor => $re{'color'},
%cellhv_attr,
+ %common_attr,
},
- 'th' => \%thtd_attr,
- 'td' => \%thtd_attr,
- 'ins' => \%insdel_attr,
- 'del' => \%insdel_attr,
- 'a' => { @href },
- 'h1' => \%texta_attr,
- 'h2' => \%texta_attr,
- 'h3' => \%texta_attr,
- 'h4' => \%texta_attr,
- 'h5' => \%texta_attr,
- 'h6' => \%texta_attr,
- 'p' => \%texta_attr,
- 'div' => \%texta_attr,
- 'span' => \%texta_attr,
+ 'th' => { %thtd_attr,
+ %common_attr,
+ },
+ 'td' => { %thtd_attr,
+ %common_attr,
+ },
+ 'ins' => { %insdel_attr,
+ %common_attr,
+ },
+ 'del' => { %insdel_attr,
+ %common_attr,
+ },
+ 'a' => { @href,
+ 'name' => $re{'word'},
+ %common_attr,
+ },
+ 'h1' => { %texta_attr,
+ %common_attr,
+ },
+ 'h2' => { %texta_attr,
+ %common_attr,
+ },,
+ 'h3' => { %texta_attr,
+ %common_attr,
+ },,
+ 'h4' => { %texta_attr,
+ %common_attr,
+ },,
+ 'h5' => { %texta_attr,
+ %common_attr,
+ },,
+ 'h6' => { %texta_attr,
+ %common_attr,
+ },,
+ 'p' => { %texta_attr,
+ %common_attr,
+ },,
+ 'div' => { %texta_attr,
+ %common_attr,
+ },,
+ 'span' => { %texta_attr,
+ %common_attr,
+ },,
'ul' => { 'type' => $re{'word'},
'compact' => $re{'word'},
+ %common_attr,
},
'ol' => { 'type' => $re{'text'},
'compact' => $re{'word'},
'start' => $re{'size'},
+ %common_attr,
},
'li' => { 'type' => $re{'text'},
'value' => $re{'size'},
+ %common_attr,
},
- 'dl' => { 'compact' => $re{'word'} },
- 'dt' => \%empty,
- 'dd' => \%empty,
- 'address' => \%empty,
+ 'dl' => { 'compact' => $re{'word'},
+ %common_attr,
+ },
+ 'dt' => \%common_attr,
+ 'dd' => \%common_attr,
+ 'address' => \%common_attr,
'hr' => { %texta_attr,
'width' => $re{'size'},
'size ' => $re{'size'},
'noshade' => $re{'word'},
+ %common_attr,
},
- 'pre' => { 'width' => $re{'size'} },
- 'center' => \%empty,
- 'nobr' => \%empty,
+ 'pre' => { 'width' => $re{'size'},
+ %common_attr,
+ },
+ 'center' => \%common_attr,
+ 'nobr' => \%common_attr,
'img' => { @src,
'alt' => $re{'text'},
'width' => $re{'size'},
@@ -254,17 +341,85 @@
'hspace' => $re{'size'},
'vspace' => $re{'size'},
'align' => $re{'word'},
+ %common_attr,
},
+ # Allow forms only if forms _and_ hrefs allowed
+ ( $cfg{Allow_forms} && $cfg{Allow_href} ?
+ ( 'form' => { 'action' => $re{'url'},
+ 'accept' => $re{'text'},
+ 'enctype' => $re{'text'},
+ 'method' => $re{'word'},
+ 'name' => $re{'word'},
+ 'target' => $re{'word'},
+ %common_attr,
+ },
+ ) : ()
+ ),
+ # Allow input elements if forms allowed
+ ( $cfg{Allow_forms} ?
+ ( 'button' => { 'disabled' => $re{'text'},
+ 'name' => $re{'word'},
+ 'type' => $re{'word'},
+ 'value' => $re{'text'},
+ %common_attr,
+ },
+ 'fieldset' => \%common_attr,
+ 'input' => { 'accept' => $re{'text'},
+ 'align' => $re{'text'},
+ 'alt' => $re{'text'},
+ 'checked' => $re{'text'},
+ 'disabled' => $re{'text'},
+ 'maxlength' => $re{'number'},
+ 'name' => $re{'text'},
+ 'readonly' => $re{'text'},
+ 'size' => $re{'number'},
+ ( $cfg{Allow_href} ? ( 'src' => $re{'url'} ) : () ),
+ 'type' => $re{'text'},
+ 'value' => $re{'text'},
+ %common_attr,
+ },
+ 'label' => { 'for' => $re{'word'},
+ %common_attr,
+ },
+ 'legend' => { 'align' => $re{'word'},
+ %common_attr,
+ },
+ 'select' => { 'disabled' => $re{'text'},
+ 'multiple' => $re{'text'},
+ 'name' => $re{'text'},
+ 'size' => $re{'number'},
+ %common_attr,
+ },
+ 'optgroup' => { 'label' => $re{'text'},
+ 'disabled' => $re{'text'},
+ %common_attr,
+ },
+ 'option' => { 'disabled' => $re{'text'},
+ 'label' => $re{'text'},
+ 'selected' => $re{'text'},
+ 'value' => $re{'text'},
+ %common_attr,
+ },
+ 'textarea' => { 'cols' => $re{'number'},
+ 'rows' => $re{'number'},
+ 'disabled' => $re{'text'},
+ 'name' => $re{'text'},
+ 'readonly' => $re{'text'},
+ %common_attr,
+ },
+ ) : ()
+ ),
( $cfg{Whole_document} ?
- ( 'body' => { 'bgcolor' => $re{'color'},
+ ( 'body' => { 'bgcolor' => $re{'color'},
@background,
'link' => $re{'color'},
'vlink' => $re{'color'},
'alink' => $re{'color'},
'text' => $re{'color'},
+ %common_attr,
},
'head' => {},
- 'title' => {},
+ 'title' => \%common_attr,
'html' => {},
) : ()
),
Only in HTML-Scrubber-StripScripts-0.02: StripScripts.pm~
Only in HTML-Scrubber-StripScripts-0.02: t