Only in HTML-Scrubber-StripScripts-0.02: blib Only in HTML-Scrubber-StripScripts-0.02: Changes Only in HTML-Scrubber-StripScripts-0.02: Makefile Only in HTML-Scrubber-StripScripts-0.02: Makefile.PL Only in HTML-Scrubber-StripScripts-0.02: MANIFEST Only in HTML-Scrubber-StripScripts-0.02: pm_to_blib Only in HTML-Scrubber-StripScripts-0.02: README diff -ur HTML-Scrubber-StripScripts-0.02.orig/StripScripts.pm HTML-Scrubber-StripScripts-0.02/StripScripts.pm --- HTML-Scrubber-StripScripts-0.02.orig/StripScripts.pm 2004-03-18 12:07:31.000000000 -0500 +++ HTML-Scrubber-StripScripts-0.02/StripScripts.pm 2004-12-01 00:06:15.000000000 -0500 @@ -64,6 +64,20 @@ URLs in C attributes in C tags. Set this option to a true value to allow them. Ignored unless C is true. +=item C + +By default, the scrubber won't allow forms or input elements. Set +this option to allow them. If C is defined, but +C is defined, then input elements will be allowed, but +forms will not. + +=item C + +By default, the scrubber won't allow attributes that specify styles or +classes for their tags. Set this option to a true value to allow +them. This won't allow all possible style definitions (since that +allows javascript), but only a semi-arbitary subset. + =item C By default, the scrubber will be configured to deal with a snippet @@ -91,12 +105,20 @@ use vars qw(%re); %re = ( + number => qr#^-?\d+$#, size => qr#^[+-]?\d+(?:\./d+)?[%*]?$#, color => qr#^(?:\w{2,20}|\#[\da-fA-F]{6})$#, word => qw#^\w*$#, wordlist => qr#(?:[\w\-\, ]{1,200})$#, text => qr#^[^\0]*$#, - url => qr# (?:^ (?:https?|ftp) :// ) | (?:^ [\w\.,/-]+ $) #ix, + url => qr<^((https? :// [\w\-\.]{1,100} (?:\:\d{1,5})?)? + (/ [\w\-\_\.!~*\|;+\$,]*)* + | + [\w\-\_\.!~*\|;+\$,]* + ) + (\? [\w;\/?:@&=+\$,\[\]\-_\.!\~*\'\(\)%]*)? + (\# [\w]*)? + $>x, a_mailto => qr# (?:^ (?:https?|ftp) :// ) | (?:^ [\w\.,/-]+ $) | (?:^ mailto: [\w\-\.\+\=\*]+\@[\w\-\.]+ $) #ix, ); @@ -109,8 +131,6 @@ @src = ( src => $re{'url'} ) if $cfg{Allow_src}; @background = ( background => $re{'url'} ) if $cfg{Allow_src}; - my %empty = (); - my %font_attr = ( 'size' => $re{'size'}, 'face' => $re{'wordlist'}, @@ -126,6 +146,14 @@ 'align' => $re{'word'}, ); + my %common_attr = ( + ( $cfg{Allow_styles} ? ( + 'class' => $re{'wordlist'}, + 'style' => $re{'word'}, + ) : () ), + 'id' => $re{'word'}, + ); + my %cellha_attr = ( 'align' => $re{'word'}, 'char' => $re{'word'}, @@ -162,30 +190,38 @@ ); my %rules = ( - 'br' => { 'clear' => $re{'word'} }, - 'em' => \%empty, - 'strong' => \%empty, - 'dfn' => \%empty, - 'code' => \%empty, - 'samp' => \%empty, - 'kbd' => \%empty, - 'var' => \%empty, - 'cite' => \%empty, - 'abbr' => \%empty, - 'acronym' => \%empty, - 'q' => { @cite }, - 'blockquote' => { @cite }, - 'sub' => \%empty, - 'sup' => \%empty, - 'tt' => \%empty, - 'i' => \%empty, - 'b' => \%empty, - 'big' => \%empty, - 'small' => \%empty, - 'u' => \%empty, - 's' => \%empty, - 'strike' => \%empty, - 'font' => \%font_attr, + 'br' => { 'clear' => $re{'word'}, + %common_attr, + }, + 'em' => \%common_attr, + 'strong' => \%common_attr, + 'dfn' => \%common_attr, + 'code' => \%common_attr, + 'samp' => \%common_attr, + 'kbd' => \%common_attr, + 'var' => \%common_attr, + 'cite' => \%common_attr, + 'abbr' => \%common_attr, + 'acronym' => \%common_attr, + 'q' => { @cite, + %common_attr, + }, + 'blockquote' => { @cite, + %common_attr, + }, + 'sub' => \%common_attr, + 'sup' => \%common_attr, + 'tt' => \%common_attr, + 'i' => \%common_attr, + 'b' => \%common_attr, + 'big' => \%common_attr, + 'small' => \%common_attr, + 'u' => \%common_attr, + 's' => \%common_attr, + 'strike' => \%common_attr, + 'font' => { %font_attr, + %common_attr, + }, 'table' => { 'frame' => $re{'word'}, 'rules' => $re{'word'}, %texta_attr, @@ -200,52 +236,103 @@ 'bordercolorlight' => $re{'color'}, 'bordercolordark' => $re{'color'}, 'summary' => $re{'text'}, + %common_attr, }, - 'caption' => { 'align' => $re{'word'} }, - 'colgroup' => \%col_attr, - 'col' => \%col_attr, - 'thead' => \%cellhv_attr, - 'tfoot' => \%cellhv_attr, - 'tbody' => \%cellhv_attr, + 'caption' => { 'align' => $re{'word'}, + %common_attr, + }, + 'colgroup' => { %col_attr, + %common_attr, + }, + 'col' => { %col_attr, + %common_attr, + }, + 'thead' => { %cellhv_attr, + %common_attr, + }, + 'tfoot' => { %cellhv_attr, + %common_attr, + }, + 'tbody' => { %cellhv_attr, + %common_attr, + }, 'tr' => { bgcolor => $re{'color'}, %cellhv_attr, + %common_attr, }, - 'th' => \%thtd_attr, - 'td' => \%thtd_attr, - 'ins' => \%insdel_attr, - 'del' => \%insdel_attr, - 'a' => { @href }, - 'h1' => \%texta_attr, - 'h2' => \%texta_attr, - 'h3' => \%texta_attr, - 'h4' => \%texta_attr, - 'h5' => \%texta_attr, - 'h6' => \%texta_attr, - 'p' => \%texta_attr, - 'div' => \%texta_attr, - 'span' => \%texta_attr, + 'th' => { %thtd_attr, + %common_attr, + }, + 'td' => { %thtd_attr, + %common_attr, + }, + 'ins' => { %insdel_attr, + %common_attr, + }, + 'del' => { %insdel_attr, + %common_attr, + }, + 'a' => { @href, + 'name' => $re{'word'}, + %common_attr, + }, + 'h1' => { %texta_attr, + %common_attr, + }, + 'h2' => { %texta_attr, + %common_attr, + },, + 'h3' => { %texta_attr, + %common_attr, + },, + 'h4' => { %texta_attr, + %common_attr, + },, + 'h5' => { %texta_attr, + %common_attr, + },, + 'h6' => { %texta_attr, + %common_attr, + },, + 'p' => { %texta_attr, + %common_attr, + },, + 'div' => { %texta_attr, + %common_attr, + },, + 'span' => { %texta_attr, + %common_attr, + },, 'ul' => { 'type' => $re{'word'}, 'compact' => $re{'word'}, + %common_attr, }, 'ol' => { 'type' => $re{'text'}, 'compact' => $re{'word'}, 'start' => $re{'size'}, + %common_attr, }, 'li' => { 'type' => $re{'text'}, 'value' => $re{'size'}, + %common_attr, }, - 'dl' => { 'compact' => $re{'word'} }, - 'dt' => \%empty, - 'dd' => \%empty, - 'address' => \%empty, + 'dl' => { 'compact' => $re{'word'}, + %common_attr, + }, + 'dt' => \%common_attr, + 'dd' => \%common_attr, + 'address' => \%common_attr, 'hr' => { %texta_attr, 'width' => $re{'size'}, 'size ' => $re{'size'}, 'noshade' => $re{'word'}, + %common_attr, }, - 'pre' => { 'width' => $re{'size'} }, - 'center' => \%empty, - 'nobr' => \%empty, + 'pre' => { 'width' => $re{'size'}, + %common_attr, + }, + 'center' => \%common_attr, + 'nobr' => \%common_attr, 'img' => { @src, 'alt' => $re{'text'}, 'width' => $re{'size'}, @@ -254,17 +341,85 @@ 'hspace' => $re{'size'}, 'vspace' => $re{'size'}, 'align' => $re{'word'}, + %common_attr, }, + # Allow forms only if forms _and_ hrefs allowed + ( $cfg{Allow_forms} && $cfg{Allow_href} ? + ( 'form' => { 'action' => $re{'url'}, + 'accept' => $re{'text'}, + 'enctype' => $re{'text'}, + 'method' => $re{'word'}, + 'name' => $re{'word'}, + 'target' => $re{'word'}, + %common_attr, + }, + ) : () + ), + # Allow input elements if forms allowed + ( $cfg{Allow_forms} ? + ( 'button' => { 'disabled' => $re{'text'}, + 'name' => $re{'word'}, + 'type' => $re{'word'}, + 'value' => $re{'text'}, + %common_attr, + }, + 'fieldset' => \%common_attr, + 'input' => { 'accept' => $re{'text'}, + 'align' => $re{'text'}, + 'alt' => $re{'text'}, + 'checked' => $re{'text'}, + 'disabled' => $re{'text'}, + 'maxlength' => $re{'number'}, + 'name' => $re{'text'}, + 'readonly' => $re{'text'}, + 'size' => $re{'number'}, + ( $cfg{Allow_href} ? ( 'src' => $re{'url'} ) : () ), + 'type' => $re{'text'}, + 'value' => $re{'text'}, + %common_attr, + }, + 'label' => { 'for' => $re{'word'}, + %common_attr, + }, + 'legend' => { 'align' => $re{'word'}, + %common_attr, + }, + 'select' => { 'disabled' => $re{'text'}, + 'multiple' => $re{'text'}, + 'name' => $re{'text'}, + 'size' => $re{'number'}, + %common_attr, + }, + 'optgroup' => { 'label' => $re{'text'}, + 'disabled' => $re{'text'}, + %common_attr, + }, + 'option' => { 'disabled' => $re{'text'}, + 'label' => $re{'text'}, + 'selected' => $re{'text'}, + 'value' => $re{'text'}, + %common_attr, + }, + 'textarea' => { 'cols' => $re{'number'}, + 'rows' => $re{'number'}, + 'disabled' => $re{'text'}, + 'name' => $re{'text'}, + 'readonly' => $re{'text'}, + %common_attr, + }, + ) : () + ), ( $cfg{Whole_document} ? - ( 'body' => { 'bgcolor' => $re{'color'}, + ( 'body' => { 'bgcolor' => $re{'color'}, @background, 'link' => $re{'color'}, 'vlink' => $re{'color'}, 'alink' => $re{'color'}, 'text' => $re{'color'}, + %common_attr, }, 'head' => {}, - 'title' => {}, + 'title' => \%common_attr, 'html' => {}, ) : () ), Only in HTML-Scrubber-StripScripts-0.02: StripScripts.pm~ Only in HTML-Scrubber-StripScripts-0.02: t