MediaWiki  REL1_22
SanitizerTest.php
Go to the documentation of this file.
00001 <?php
00002 
00007 class SanitizerTest extends MediaWikiTestCase {
00008 
00009     protected function setUp() {
00010         parent::setUp();
00011 
00012         AutoLoader::loadClass( 'Sanitizer' );
00013     }
00014 
00018     public function testDecodeNamedEntities() {
00019         $this->assertEquals(
00020             "\xc3\xa9cole",
00021             Sanitizer::decodeCharReferences( '&eacute;cole' ),
00022             'decode named entities'
00023         );
00024     }
00025 
00029     public function testDecodeNumericEntities() {
00030         $this->assertEquals(
00031             "\xc4\x88io bonas dans l'\xc3\xa9cole!",
00032             Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&#233;cole!" ),
00033             'decode numeric entities'
00034         );
00035     }
00036 
00040     public function testDecodeMixedEntities() {
00041         $this->assertEquals(
00042             "\xc4\x88io bonas dans l'\xc3\xa9cole!",
00043             Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&eacute;cole!" ),
00044             'decode mixed numeric/named entities'
00045         );
00046     }
00047 
00051     public function testDecodeMixedComplexEntities() {
00052         $this->assertEquals(
00053             "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
00054             Sanitizer::decodeCharReferences(
00055                 "&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)"
00056             ),
00057             'decode mixed complex entities'
00058         );
00059     }
00060 
00064     public function testInvalidAmpersand() {
00065         $this->assertEquals(
00066             'a & b',
00067             Sanitizer::decodeCharReferences( 'a & b' ),
00068             'Invalid ampersand'
00069         );
00070     }
00071 
00075     public function testInvalidEntities() {
00076         $this->assertEquals(
00077             '&foo;',
00078             Sanitizer::decodeCharReferences( '&foo;' ),
00079             'Invalid named entity'
00080         );
00081     }
00082 
00086     public function testInvalidNumberedEntities() {
00087         $this->assertEquals( UTF8_REPLACEMENT, Sanitizer::decodeCharReferences( "&#88888888888888;" ), 'Invalid numbered entity' );
00088     }
00089 
00097     public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
00098         $this->setMwGlobals( array(
00099             'wgUseTidy' => false
00100         ) );
00101 
00102         if ( $escaped ) {
00103             $this->assertEquals( "&lt;$tag&gt;",
00104                 Sanitizer::removeHTMLtags( "<$tag>" )
00105             );
00106         } else {
00107             $this->assertEquals( "<$tag></$tag>\n",
00108                 Sanitizer::removeHTMLtags( "<$tag>" )
00109             );
00110         }
00111     }
00112 
00116     public static function provideHtml5Tags() {
00117         $ESCAPED = true; # We want tag to be escaped
00118         $VERBATIM = false; # We want to keep the tag
00119         return array(
00120             array( 'data', $VERBATIM ),
00121             array( 'mark', $VERBATIM ),
00122             array( 'time', $VERBATIM ),
00123             array( 'video', $ESCAPED ),
00124         );
00125     }
00126 
00127     function dataRemoveHTMLtags() {
00128         return array(
00129             // former testSelfClosingTag
00130             array(
00131                 '<div>Hello world</div />',
00132                 '<div>Hello world</div>',
00133                 'Self-closing closing div'
00134             ),
00135             // Make sure special nested HTML5 semantics are not broken
00136             // http://www.whatwg.org/html/text-level-semantics.html#the-kbd-element
00137             array(
00138                 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
00139                 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
00140                 'Nested <kbd>.'
00141             ),
00142             // http://www.whatwg.org/html/text-level-semantics.html#the-sub-and-sup-elements
00143             array(
00144                 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
00145                 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
00146                 'Nested <var>.'
00147             ),
00148             // http://www.whatwg.org/html/text-level-semantics.html#the-dfn-element
00149             array(
00150                 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
00151                 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
00152                 '<abbr> inside <dfn>',
00153             ),
00154         );
00155     }
00156 
00161     public function testRemoveHTMLtags( $input, $output, $msg = null ) {
00162         $GLOBALS['wgUseTidy'] = false;
00163         $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
00164     }
00165 
00170     public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
00171         $this->assertEquals( $expected,
00172             Sanitizer::decodeTagAttributes( $attributes ),
00173             $message
00174         );
00175     }
00176 
00177     public static function provideTagAttributesToDecode() {
00178         return array(
00179             array( array( 'foo' => 'bar' ), 'foo=bar', 'Unquoted attribute' ),
00180             array( array( 'foo' => 'bar' ), '    foo   =   bar    ', 'Spaced attribute' ),
00181             array( array( 'foo' => 'bar' ), 'foo="bar"', 'Double-quoted attribute' ),
00182             array( array( 'foo' => 'bar' ), 'foo=\'bar\'', 'Single-quoted attribute' ),
00183             array( array( 'foo' => 'bar', 'baz' => 'foo' ), 'foo=\'bar\'   baz="foo"', 'Several attributes' ),
00184             array( array( 'foo' => 'bar', 'baz' => 'foo' ), 'foo=\'bar\'   baz="foo"', 'Several attributes' ),
00185             array( array( 'foo' => 'bar', 'baz' => 'foo' ), 'foo=\'bar\'   baz="foo"', 'Several attributes' ),
00186             array( array( ':foo' => 'bar' ), ':foo=\'bar\'', 'Leading :' ),
00187             array( array( '_foo' => 'bar' ), '_foo=\'bar\'', 'Leading _' ),
00188             array( array( 'foo' => 'bar' ), 'Foo=\'bar\'', 'Leading capital' ),
00189             array( array( 'foo' => 'BAR' ), 'FOO=BAR', 'Attribute keys are normalized to lowercase' ),
00190 
00191             # Invalid beginning
00192             array( array(), '-foo=bar', 'Leading - is forbidden' ),
00193             array( array(), '.foo=bar', 'Leading . is forbidden' ),
00194             array( array( 'foo-bar' => 'bar' ), 'foo-bar=bar', 'A - is allowed inside the attribute' ),
00195             array( array( 'foo-' => 'bar' ), 'foo-=bar', 'A - is allowed inside the attribute' ),
00196             array( array( 'foo.bar' => 'baz' ), 'foo.bar=baz', 'A . is allowed inside the attribute' ),
00197             array( array( 'foo.' => 'baz' ), 'foo.=baz', 'A . is allowed as last character' ),
00198             array( array( 'foo6' => 'baz' ), 'foo6=baz', 'Numbers are allowed' ),
00199 
00200             # This bit is more relaxed than XML rules, but some extensions use
00201             # it, like ProofreadPage (see bug 27539)
00202             array( array( '1foo' => 'baz' ), '1foo=baz', 'Leading numbers are allowed' ),
00203             array( array(), 'foo$=baz', 'Symbols are not allowed' ),
00204             array( array(), 'foo@=baz', 'Symbols are not allowed' ),
00205             array( array(), 'foo~=baz', 'Symbols are not allowed' ),
00206             array( array( 'foo' => '1[#^`*%w/(' ), 'foo=1[#^`*%w/(', 'All kind of characters are allowed as values' ),
00207             array( array( 'foo' => '1[#^`*%\'w/(' ), 'foo="1[#^`*%\'w/("', 'Double quotes are allowed if quoted by single quotes' ),
00208             array( array( 'foo' => '1[#^`*%"w/(' ), 'foo=\'1[#^`*%"w/(\'', 'Single quotes are allowed if quoted by double quotes' ),
00209             array( array( 'foo' => '&"' ), 'foo=&amp;&quot;', 'Special chars can be provided as entities' ),
00210             array( array( 'foo' => '&foobar;' ), 'foo=&foobar;', 'Entity-like items are accepted' ),
00211         );
00212     }
00213 
00218     public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
00219         $this->assertEquals( " $inputAttr",
00220             Sanitizer::fixTagAttributes( $inputAttr, $inputEl ),
00221             $message
00222         );
00223     }
00224 
00225     public static function provideDeprecatedAttributes() {
00227         return array(
00228             array( 'clear="left"', 'br' ),
00229             array( 'clear="all"', 'br' ),
00230             array( 'width="100"', 'td' ),
00231             array( 'nowrap="true"', 'td' ),
00232             array( 'nowrap=""', 'td' ),
00233             array( 'align="right"', 'td' ),
00234             array( 'align="center"', 'table' ),
00235             array( 'align="left"', 'tr' ),
00236             array( 'align="center"', 'div' ),
00237             array( 'align="left"', 'h1' ),
00238             array( 'align="left"', 'span' ),
00239         );
00240     }
00241 
00246     public function testCssCommentsChecking( $expected, $css, $message = '' ) {
00247         $this->assertEquals( $expected,
00248             Sanitizer::checkCss( $css ),
00249             $message
00250         );
00251     }
00252 
00253     public static function provideCssCommentsFixtures() {
00255         return array(
00256             // Valid comments spanning entire input
00257             array( '/**/', '/**/' ),
00258             array( '/* comment */', '/* comment */' ),
00259             // Weird stuff
00260             array( ' ', '/****/' ),
00261             array( ' ', '/* /* */' ),
00262             array( 'display: block;', "display:/* foo */block;" ),
00263             array( 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
00264                 'Backslash-escaped comments must be stripped (bug 28450)' ),
00265             array( '', '/* unfinished comment structure',
00266                 'Remove anything after a comment-start token' ),
00267             array( '', "\\2f\\2a unifinished comment'",
00268                 'Remove anything after a backslash-escaped comment-start token' ),
00269             array( '/* insecure input */', 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader(src=\'asdf.png\',sizingMethod=\'scale\');' ),
00270             array( '/* insecure input */', '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader(src=\'asdf.png\',sizingMethod=\'scale\')";' ),
00271             array( '/* insecure input */', 'width: expression(1+1);' ),
00272             array( '/* insecure input */', 'background-image: image(asdf.png);' ),
00273             array( '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ),
00274             array( '/* insecure input */', 'background-image: -moz-image(asdf.png);' ),
00275             array( '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ),
00276             array( '/* insecure input */', 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);' ),
00277             array( '/* insecure input */', 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);' ),
00278         );
00279     }
00280 
00284     public static function provideAttributeSupport() {
00286         return array(
00287             array( 'div', ' role="presentation"', ' role="presentation"', 'Support for WAI-ARIA\'s role="presentation".' ),
00288             array( 'div', ' role="main"', '', "Other WAI-ARIA roles are currently not supported." ),
00289         );
00290     }
00291 
00296     public function testAttributeSupport( $tag, $attributes, $expected, $message ) {
00297         $this->assertEquals( $expected,
00298             Sanitizer::fixTagAttributes( $attributes, $tag ),
00299             $message
00300         );
00301     }
00302 }