MediaWiki  REL1_24
SanitizerTest.php
Go to the documentation of this file.
00001 <?php
00002 
00007 class SanitizerTest extends MediaWikiTestCase {
00008 
00009     protected function setUp() {
00010         parent::setUp();
00011 
00012         AutoLoader::loadClass( 'Sanitizer' );
00013     }
00014 
00018     public function testDecodeNamedEntities() {
00019         $this->assertEquals(
00020             "\xc3\xa9cole",
00021             Sanitizer::decodeCharReferences( '&eacute;cole' ),
00022             'decode named entities'
00023         );
00024     }
00025 
00029     public function testDecodeNumericEntities() {
00030         $this->assertEquals(
00031             "\xc4\x88io bonas dans l'\xc3\xa9cole!",
00032             Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&#233;cole!" ),
00033             'decode numeric entities'
00034         );
00035     }
00036 
00040     public function testDecodeMixedEntities() {
00041         $this->assertEquals(
00042             "\xc4\x88io bonas dans l'\xc3\xa9cole!",
00043             Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&eacute;cole!" ),
00044             'decode mixed numeric/named entities'
00045         );
00046     }
00047 
00051     public function testDecodeMixedComplexEntities() {
00052         $this->assertEquals(
00053             "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
00054             Sanitizer::decodeCharReferences(
00055                 "&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)"
00056             ),
00057             'decode mixed complex entities'
00058         );
00059     }
00060 
00064     public function testInvalidAmpersand() {
00065         $this->assertEquals(
00066             'a & b',
00067             Sanitizer::decodeCharReferences( 'a & b' ),
00068             'Invalid ampersand'
00069         );
00070     }
00071 
00075     public function testInvalidEntities() {
00076         $this->assertEquals(
00077             '&foo;',
00078             Sanitizer::decodeCharReferences( '&foo;' ),
00079             'Invalid named entity'
00080         );
00081     }
00082 
00086     public function testInvalidNumberedEntities() {
00087         $this->assertEquals(
00088             UTF8_REPLACEMENT,
00089             Sanitizer::decodeCharReferences( "&#88888888888888;" ),
00090             'Invalid numbered entity'
00091         );
00092     }
00093 
00101     public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
00102         $this->setMwGlobals( array(
00103             'wgUseTidy' => false
00104         ) );
00105 
00106         if ( $escaped ) {
00107             $this->assertEquals( "&lt;$tag&gt;",
00108                 Sanitizer::removeHTMLtags( "<$tag>" )
00109             );
00110         } else {
00111             $this->assertEquals( "<$tag></$tag>\n",
00112                 Sanitizer::removeHTMLtags( "<$tag>" )
00113             );
00114         }
00115     }
00116 
00120     public static function provideHtml5Tags() {
00121         $ESCAPED = true; # We want tag to be escaped
00122         $VERBATIM = false; # We want to keep the tag
00123         return array(
00124             array( 'data', $VERBATIM ),
00125             array( 'mark', $VERBATIM ),
00126             array( 'time', $VERBATIM ),
00127             array( 'video', $ESCAPED ),
00128         );
00129     }
00130 
00131     function dataRemoveHTMLtags() {
00132         return array(
00133             // former testSelfClosingTag
00134             array(
00135                 '<div>Hello world</div />',
00136                 '<div>Hello world</div>',
00137                 'Self-closing closing div'
00138             ),
00139             // Make sure special nested HTML5 semantics are not broken
00140             // http://www.whatwg.org/html/text-level-semantics.html#the-kbd-element
00141             array(
00142                 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
00143                 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
00144                 'Nested <kbd>.'
00145             ),
00146             // http://www.whatwg.org/html/text-level-semantics.html#the-sub-and-sup-elements
00147             array(
00148                 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
00149                 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
00150                 'Nested <var>.'
00151             ),
00152             // http://www.whatwg.org/html/text-level-semantics.html#the-dfn-element
00153             array(
00154                 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
00155                 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
00156                 '<abbr> inside <dfn>',
00157             ),
00158         );
00159     }
00160 
00165     public function testRemoveHTMLtags( $input, $output, $msg = null ) {
00166         $GLOBALS['wgUseTidy'] = false;
00167         $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
00168     }
00169 
00174     public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
00175         $this->assertEquals( $expected,
00176             Sanitizer::decodeTagAttributes( $attributes ),
00177             $message
00178         );
00179     }
00180 
00181     public static function provideTagAttributesToDecode() {
00182         return array(
00183             array( array( 'foo' => 'bar' ), 'foo=bar', 'Unquoted attribute' ),
00184             array( array( 'foo' => 'bar' ), '    foo   =   bar    ', 'Spaced attribute' ),
00185             array( array( 'foo' => 'bar' ), 'foo="bar"', 'Double-quoted attribute' ),
00186             array( array( 'foo' => 'bar' ), 'foo=\'bar\'', 'Single-quoted attribute' ),
00187             array(
00188                 array( 'foo' => 'bar', 'baz' => 'foo' ),
00189                 'foo=\'bar\'   baz="foo"',
00190                 'Several attributes'
00191             ),
00192             array(
00193                 array( 'foo' => 'bar', 'baz' => 'foo' ),
00194                 'foo=\'bar\'   baz="foo"',
00195                 'Several attributes'
00196             ),
00197             array(
00198                 array( 'foo' => 'bar', 'baz' => 'foo' ),
00199                 'foo=\'bar\'   baz="foo"',
00200                 'Several attributes'
00201             ),
00202             array( array( ':foo' => 'bar' ), ':foo=\'bar\'', 'Leading :' ),
00203             array( array( '_foo' => 'bar' ), '_foo=\'bar\'', 'Leading _' ),
00204             array( array( 'foo' => 'bar' ), 'Foo=\'bar\'', 'Leading capital' ),
00205             array( array( 'foo' => 'BAR' ), 'FOO=BAR', 'Attribute keys are normalized to lowercase' ),
00206 
00207             # Invalid beginning
00208             array( array(), '-foo=bar', 'Leading - is forbidden' ),
00209             array( array(), '.foo=bar', 'Leading . is forbidden' ),
00210             array( array( 'foo-bar' => 'bar' ), 'foo-bar=bar', 'A - is allowed inside the attribute' ),
00211             array( array( 'foo-' => 'bar' ), 'foo-=bar', 'A - is allowed inside the attribute' ),
00212             array( array( 'foo.bar' => 'baz' ), 'foo.bar=baz', 'A . is allowed inside the attribute' ),
00213             array( array( 'foo.' => 'baz' ), 'foo.=baz', 'A . is allowed as last character' ),
00214             array( array( 'foo6' => 'baz' ), 'foo6=baz', 'Numbers are allowed' ),
00215 
00216             # This bit is more relaxed than XML rules, but some extensions use
00217             # it, like ProofreadPage (see bug 27539)
00218             array( array( '1foo' => 'baz' ), '1foo=baz', 'Leading numbers are allowed' ),
00219             array( array(), 'foo$=baz', 'Symbols are not allowed' ),
00220             array( array(), 'foo@=baz', 'Symbols are not allowed' ),
00221             array( array(), 'foo~=baz', 'Symbols are not allowed' ),
00222             array(
00223                 array( 'foo' => '1[#^`*%w/(' ),
00224                 'foo=1[#^`*%w/(',
00225                 'All kind of characters are allowed as values'
00226             ),
00227             array(
00228                 array( 'foo' => '1[#^`*%\'w/(' ),
00229                 'foo="1[#^`*%\'w/("',
00230                 'Double quotes are allowed if quoted by single quotes'
00231             ),
00232             array(
00233                 array( 'foo' => '1[#^`*%"w/(' ),
00234                 'foo=\'1[#^`*%"w/(\'',
00235                 'Single quotes are allowed if quoted by double quotes'
00236             ),
00237             array( array( 'foo' => '&"' ), 'foo=&amp;&quot;', 'Special chars can be provided as entities' ),
00238             array( array( 'foo' => '&foobar;' ), 'foo=&foobar;', 'Entity-like items are accepted' ),
00239         );
00240     }
00241 
00246     public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
00247         $this->assertEquals( " $inputAttr",
00248             Sanitizer::fixTagAttributes( $inputAttr, $inputEl ),
00249             $message
00250         );
00251     }
00252 
00253     public static function provideDeprecatedAttributes() {
00255         return array(
00256             array( 'clear="left"', 'br' ),
00257             array( 'clear="all"', 'br' ),
00258             array( 'width="100"', 'td' ),
00259             array( 'nowrap="true"', 'td' ),
00260             array( 'nowrap=""', 'td' ),
00261             array( 'align="right"', 'td' ),
00262             array( 'align="center"', 'table' ),
00263             array( 'align="left"', 'tr' ),
00264             array( 'align="center"', 'div' ),
00265             array( 'align="left"', 'h1' ),
00266             array( 'align="left"', 'p' ),
00267         );
00268     }
00269 
00274     public function testCssCommentsChecking( $expected, $css, $message = '' ) {
00275         $this->assertEquals( $expected,
00276             Sanitizer::checkCss( $css ),
00277             $message
00278         );
00279     }
00280 
00281     public static function provideCssCommentsFixtures() {
00283         return array(
00284             // Valid comments spanning entire input
00285             array( '/**/', '/**/' ),
00286             array( '/* comment */', '/* comment */' ),
00287             // Weird stuff
00288             array( ' ', '/****/' ),
00289             array( ' ', '/* /* */' ),
00290             array( 'display: block;', "display:/* foo */block;" ),
00291             array( 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
00292                 'Backslash-escaped comments must be stripped (bug 28450)' ),
00293             array( '', '/* unfinished comment structure',
00294                 'Remove anything after a comment-start token' ),
00295             array( '', "\\2f\\2a unifinished comment'",
00296                 'Remove anything after a backslash-escaped comment-start token' ),
00297             array(
00298                 '/* insecure input */',
00299                 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
00300                     . '(src=\'asdf.png\',sizingMethod=\'scale\');'
00301             ),
00302             array(
00303                 '/* insecure input */',
00304                 '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
00305                     . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
00306             ),
00307             array( '/* insecure input */', 'width: expression(1+1);' ),
00308             array( '/* insecure input */', 'background-image: image(asdf.png);' ),
00309             array( '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ),
00310             array( '/* insecure input */', 'background-image: -moz-image(asdf.png);' ),
00311             array( '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ),
00312             array(
00313                 '/* insecure input */',
00314                 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
00315             ),
00316             array(
00317                 '/* insecure input */',
00318                 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
00319             ),
00320         );
00321     }
00322 
00326     public static function provideAttributeSupport() {
00328         return array(
00329             array(
00330                 'div',
00331                 ' role="presentation"',
00332                 ' role="presentation"',
00333                 'Support for WAI-ARIA\'s role="presentation".'
00334             ),
00335             array( 'div', ' role="main"', '', "Other WAI-ARIA roles are currently not supported." ),
00336         );
00337     }
00338 
00343     public function testAttributeSupport( $tag, $attributes, $expected, $message ) {
00344         $this->assertEquals( $expected,
00345             Sanitizer::fixTagAttributes( $attributes, $tag ),
00346             $message
00347         );
00348     }
00349 }