MediaWiki
REL1_24
|
00001 <?php 00002 00007 class SanitizerTest extends MediaWikiTestCase { 00008 00009 protected function setUp() { 00010 parent::setUp(); 00011 00012 AutoLoader::loadClass( 'Sanitizer' ); 00013 } 00014 00018 public function testDecodeNamedEntities() { 00019 $this->assertEquals( 00020 "\xc3\xa9cole", 00021 Sanitizer::decodeCharReferences( 'école' ), 00022 'decode named entities' 00023 ); 00024 } 00025 00029 public function testDecodeNumericEntities() { 00030 $this->assertEquals( 00031 "\xc4\x88io bonas dans l'\xc3\xa9cole!", 00032 Sanitizer::decodeCharReferences( "Ĉio bonas dans l'école!" ), 00033 'decode numeric entities' 00034 ); 00035 } 00036 00040 public function testDecodeMixedEntities() { 00041 $this->assertEquals( 00042 "\xc4\x88io bonas dans l'\xc3\xa9cole!", 00043 Sanitizer::decodeCharReferences( "Ĉio bonas dans l'école!" ), 00044 'decode mixed numeric/named entities' 00045 ); 00046 } 00047 00051 public function testDecodeMixedComplexEntities() { 00052 $this->assertEquals( 00053 "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas Ĉio dans l'école)", 00054 Sanitizer::decodeCharReferences( 00055 "Ĉio bonas dans l'école! (mais pas &#x108;io dans l'&eacute;cole)" 00056 ), 00057 'decode mixed complex entities' 00058 ); 00059 } 00060 00064 public function testInvalidAmpersand() { 00065 $this->assertEquals( 00066 'a & b', 00067 Sanitizer::decodeCharReferences( 'a & b' ), 00068 'Invalid ampersand' 00069 ); 00070 } 00071 00075 public function testInvalidEntities() { 00076 $this->assertEquals( 00077 '&foo;', 00078 Sanitizer::decodeCharReferences( '&foo;' ), 00079 'Invalid named entity' 00080 ); 00081 } 00082 00086 public function testInvalidNumberedEntities() { 00087 $this->assertEquals( 00088 UTF8_REPLACEMENT, 00089 Sanitizer::decodeCharReferences( "�" ), 00090 'Invalid numbered entity' 00091 ); 00092 } 00093 00101 public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) { 00102 $this->setMwGlobals( array( 00103 'wgUseTidy' => false 00104 ) ); 00105 00106 if ( $escaped ) { 00107 $this->assertEquals( "<$tag>", 00108 Sanitizer::removeHTMLtags( "<$tag>" ) 00109 ); 00110 } else { 00111 $this->assertEquals( "<$tag></$tag>\n", 00112 Sanitizer::removeHTMLtags( "<$tag>" ) 00113 ); 00114 } 00115 } 00116 00120 public static function provideHtml5Tags() { 00121 $ESCAPED = true; # We want tag to be escaped 00122 $VERBATIM = false; # We want to keep the tag 00123 return array( 00124 array( 'data', $VERBATIM ), 00125 array( 'mark', $VERBATIM ), 00126 array( 'time', $VERBATIM ), 00127 array( 'video', $ESCAPED ), 00128 ); 00129 } 00130 00131 function dataRemoveHTMLtags() { 00132 return array( 00133 // former testSelfClosingTag 00134 array( 00135 '<div>Hello world</div />', 00136 '<div>Hello world</div>', 00137 'Self-closing closing div' 00138 ), 00139 // Make sure special nested HTML5 semantics are not broken 00140 // http://www.whatwg.org/html/text-level-semantics.html#the-kbd-element 00141 array( 00142 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>', 00143 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>', 00144 'Nested <kbd>.' 00145 ), 00146 // http://www.whatwg.org/html/text-level-semantics.html#the-sub-and-sup-elements 00147 array( 00148 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>', 00149 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>', 00150 'Nested <var>.' 00151 ), 00152 // http://www.whatwg.org/html/text-level-semantics.html#the-dfn-element 00153 array( 00154 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>', 00155 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>', 00156 '<abbr> inside <dfn>', 00157 ), 00158 ); 00159 } 00160 00165 public function testRemoveHTMLtags( $input, $output, $msg = null ) { 00166 $GLOBALS['wgUseTidy'] = false; 00167 $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg ); 00168 } 00169 00174 public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) { 00175 $this->assertEquals( $expected, 00176 Sanitizer::decodeTagAttributes( $attributes ), 00177 $message 00178 ); 00179 } 00180 00181 public static function provideTagAttributesToDecode() { 00182 return array( 00183 array( array( 'foo' => 'bar' ), 'foo=bar', 'Unquoted attribute' ), 00184 array( array( 'foo' => 'bar' ), ' foo = bar ', 'Spaced attribute' ), 00185 array( array( 'foo' => 'bar' ), 'foo="bar"', 'Double-quoted attribute' ), 00186 array( array( 'foo' => 'bar' ), 'foo=\'bar\'', 'Single-quoted attribute' ), 00187 array( 00188 array( 'foo' => 'bar', 'baz' => 'foo' ), 00189 'foo=\'bar\' baz="foo"', 00190 'Several attributes' 00191 ), 00192 array( 00193 array( 'foo' => 'bar', 'baz' => 'foo' ), 00194 'foo=\'bar\' baz="foo"', 00195 'Several attributes' 00196 ), 00197 array( 00198 array( 'foo' => 'bar', 'baz' => 'foo' ), 00199 'foo=\'bar\' baz="foo"', 00200 'Several attributes' 00201 ), 00202 array( array( ':foo' => 'bar' ), ':foo=\'bar\'', 'Leading :' ), 00203 array( array( '_foo' => 'bar' ), '_foo=\'bar\'', 'Leading _' ), 00204 array( array( 'foo' => 'bar' ), 'Foo=\'bar\'', 'Leading capital' ), 00205 array( array( 'foo' => 'BAR' ), 'FOO=BAR', 'Attribute keys are normalized to lowercase' ), 00206 00207 # Invalid beginning 00208 array( array(), '-foo=bar', 'Leading - is forbidden' ), 00209 array( array(), '.foo=bar', 'Leading . is forbidden' ), 00210 array( array( 'foo-bar' => 'bar' ), 'foo-bar=bar', 'A - is allowed inside the attribute' ), 00211 array( array( 'foo-' => 'bar' ), 'foo-=bar', 'A - is allowed inside the attribute' ), 00212 array( array( 'foo.bar' => 'baz' ), 'foo.bar=baz', 'A . is allowed inside the attribute' ), 00213 array( array( 'foo.' => 'baz' ), 'foo.=baz', 'A . is allowed as last character' ), 00214 array( array( 'foo6' => 'baz' ), 'foo6=baz', 'Numbers are allowed' ), 00215 00216 # This bit is more relaxed than XML rules, but some extensions use 00217 # it, like ProofreadPage (see bug 27539) 00218 array( array( '1foo' => 'baz' ), '1foo=baz', 'Leading numbers are allowed' ), 00219 array( array(), 'foo$=baz', 'Symbols are not allowed' ), 00220 array( array(), 'foo@=baz', 'Symbols are not allowed' ), 00221 array( array(), 'foo~=baz', 'Symbols are not allowed' ), 00222 array( 00223 array( 'foo' => '1[#^`*%w/(' ), 00224 'foo=1[#^`*%w/(', 00225 'All kind of characters are allowed as values' 00226 ), 00227 array( 00228 array( 'foo' => '1[#^`*%\'w/(' ), 00229 'foo="1[#^`*%\'w/("', 00230 'Double quotes are allowed if quoted by single quotes' 00231 ), 00232 array( 00233 array( 'foo' => '1[#^`*%"w/(' ), 00234 'foo=\'1[#^`*%"w/(\'', 00235 'Single quotes are allowed if quoted by double quotes' 00236 ), 00237 array( array( 'foo' => '&"' ), 'foo=&"', 'Special chars can be provided as entities' ), 00238 array( array( 'foo' => '&foobar;' ), 'foo=&foobar;', 'Entity-like items are accepted' ), 00239 ); 00240 } 00241 00246 public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) { 00247 $this->assertEquals( " $inputAttr", 00248 Sanitizer::fixTagAttributes( $inputAttr, $inputEl ), 00249 $message 00250 ); 00251 } 00252 00253 public static function provideDeprecatedAttributes() { 00255 return array( 00256 array( 'clear="left"', 'br' ), 00257 array( 'clear="all"', 'br' ), 00258 array( 'width="100"', 'td' ), 00259 array( 'nowrap="true"', 'td' ), 00260 array( 'nowrap=""', 'td' ), 00261 array( 'align="right"', 'td' ), 00262 array( 'align="center"', 'table' ), 00263 array( 'align="left"', 'tr' ), 00264 array( 'align="center"', 'div' ), 00265 array( 'align="left"', 'h1' ), 00266 array( 'align="left"', 'p' ), 00267 ); 00268 } 00269 00274 public function testCssCommentsChecking( $expected, $css, $message = '' ) { 00275 $this->assertEquals( $expected, 00276 Sanitizer::checkCss( $css ), 00277 $message 00278 ); 00279 } 00280 00281 public static function provideCssCommentsFixtures() { 00283 return array( 00284 // Valid comments spanning entire input 00285 array( '/**/', '/**/' ), 00286 array( '/* comment */', '/* comment */' ), 00287 // Weird stuff 00288 array( ' ', '/****/' ), 00289 array( ' ', '/* /* */' ), 00290 array( 'display: block;', "display:/* foo */block;" ), 00291 array( 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;", 00292 'Backslash-escaped comments must be stripped (bug 28450)' ), 00293 array( '', '/* unfinished comment structure', 00294 'Remove anything after a comment-start token' ), 00295 array( '', "\\2f\\2a unifinished comment'", 00296 'Remove anything after a backslash-escaped comment-start token' ), 00297 array( 00298 '/* insecure input */', 00299 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader' 00300 . '(src=\'asdf.png\',sizingMethod=\'scale\');' 00301 ), 00302 array( 00303 '/* insecure input */', 00304 '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader' 00305 . '(src=\'asdf.png\',sizingMethod=\'scale\')";' 00306 ), 00307 array( '/* insecure input */', 'width: expression(1+1);' ), 00308 array( '/* insecure input */', 'background-image: image(asdf.png);' ), 00309 array( '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ), 00310 array( '/* insecure input */', 'background-image: -moz-image(asdf.png);' ), 00311 array( '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ), 00312 array( 00313 '/* insecure input */', 00314 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);' 00315 ), 00316 array( 00317 '/* insecure input */', 00318 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);' 00319 ), 00320 ); 00321 } 00322 00326 public static function provideAttributeSupport() { 00328 return array( 00329 array( 00330 'div', 00331 ' role="presentation"', 00332 ' role="presentation"', 00333 'Support for WAI-ARIA\'s role="presentation".' 00334 ), 00335 array( 'div', ' role="main"', '', "Other WAI-ARIA roles are currently not supported." ), 00336 ); 00337 } 00338 00343 public function testAttributeSupport( $tag, $attributes, $expected, $message ) { 00344 $this->assertEquals( $expected, 00345 Sanitizer::fixTagAttributes( $attributes, $tag ), 00346 $message 00347 ); 00348 } 00349 }