MediaWiki
REL1_22
|
00001 <?php 00002 00007 class SanitizerTest extends MediaWikiTestCase { 00008 00009 protected function setUp() { 00010 parent::setUp(); 00011 00012 AutoLoader::loadClass( 'Sanitizer' ); 00013 } 00014 00018 public function testDecodeNamedEntities() { 00019 $this->assertEquals( 00020 "\xc3\xa9cole", 00021 Sanitizer::decodeCharReferences( 'école' ), 00022 'decode named entities' 00023 ); 00024 } 00025 00029 public function testDecodeNumericEntities() { 00030 $this->assertEquals( 00031 "\xc4\x88io bonas dans l'\xc3\xa9cole!", 00032 Sanitizer::decodeCharReferences( "Ĉio bonas dans l'école!" ), 00033 'decode numeric entities' 00034 ); 00035 } 00036 00040 public function testDecodeMixedEntities() { 00041 $this->assertEquals( 00042 "\xc4\x88io bonas dans l'\xc3\xa9cole!", 00043 Sanitizer::decodeCharReferences( "Ĉio bonas dans l'école!" ), 00044 'decode mixed numeric/named entities' 00045 ); 00046 } 00047 00051 public function testDecodeMixedComplexEntities() { 00052 $this->assertEquals( 00053 "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas Ĉio dans l'école)", 00054 Sanitizer::decodeCharReferences( 00055 "Ĉio bonas dans l'école! (mais pas &#x108;io dans l'&eacute;cole)" 00056 ), 00057 'decode mixed complex entities' 00058 ); 00059 } 00060 00064 public function testInvalidAmpersand() { 00065 $this->assertEquals( 00066 'a & b', 00067 Sanitizer::decodeCharReferences( 'a & b' ), 00068 'Invalid ampersand' 00069 ); 00070 } 00071 00075 public function testInvalidEntities() { 00076 $this->assertEquals( 00077 '&foo;', 00078 Sanitizer::decodeCharReferences( '&foo;' ), 00079 'Invalid named entity' 00080 ); 00081 } 00082 00086 public function testInvalidNumberedEntities() { 00087 $this->assertEquals( UTF8_REPLACEMENT, Sanitizer::decodeCharReferences( "�" ), 'Invalid numbered entity' ); 00088 } 00089 00097 public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) { 00098 $this->setMwGlobals( array( 00099 'wgUseTidy' => false 00100 ) ); 00101 00102 if ( $escaped ) { 00103 $this->assertEquals( "<$tag>", 00104 Sanitizer::removeHTMLtags( "<$tag>" ) 00105 ); 00106 } else { 00107 $this->assertEquals( "<$tag></$tag>\n", 00108 Sanitizer::removeHTMLtags( "<$tag>" ) 00109 ); 00110 } 00111 } 00112 00116 public static function provideHtml5Tags() { 00117 $ESCAPED = true; # We want tag to be escaped 00118 $VERBATIM = false; # We want to keep the tag 00119 return array( 00120 array( 'data', $VERBATIM ), 00121 array( 'mark', $VERBATIM ), 00122 array( 'time', $VERBATIM ), 00123 array( 'video', $ESCAPED ), 00124 ); 00125 } 00126 00127 function dataRemoveHTMLtags() { 00128 return array( 00129 // former testSelfClosingTag 00130 array( 00131 '<div>Hello world</div />', 00132 '<div>Hello world</div>', 00133 'Self-closing closing div' 00134 ), 00135 // Make sure special nested HTML5 semantics are not broken 00136 // http://www.whatwg.org/html/text-level-semantics.html#the-kbd-element 00137 array( 00138 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>', 00139 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>', 00140 'Nested <kbd>.' 00141 ), 00142 // http://www.whatwg.org/html/text-level-semantics.html#the-sub-and-sup-elements 00143 array( 00144 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>', 00145 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>', 00146 'Nested <var>.' 00147 ), 00148 // http://www.whatwg.org/html/text-level-semantics.html#the-dfn-element 00149 array( 00150 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>', 00151 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>', 00152 '<abbr> inside <dfn>', 00153 ), 00154 ); 00155 } 00156 00161 public function testRemoveHTMLtags( $input, $output, $msg = null ) { 00162 $GLOBALS['wgUseTidy'] = false; 00163 $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg ); 00164 } 00165 00170 public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) { 00171 $this->assertEquals( $expected, 00172 Sanitizer::decodeTagAttributes( $attributes ), 00173 $message 00174 ); 00175 } 00176 00177 public static function provideTagAttributesToDecode() { 00178 return array( 00179 array( array( 'foo' => 'bar' ), 'foo=bar', 'Unquoted attribute' ), 00180 array( array( 'foo' => 'bar' ), ' foo = bar ', 'Spaced attribute' ), 00181 array( array( 'foo' => 'bar' ), 'foo="bar"', 'Double-quoted attribute' ), 00182 array( array( 'foo' => 'bar' ), 'foo=\'bar\'', 'Single-quoted attribute' ), 00183 array( array( 'foo' => 'bar', 'baz' => 'foo' ), 'foo=\'bar\' baz="foo"', 'Several attributes' ), 00184 array( array( 'foo' => 'bar', 'baz' => 'foo' ), 'foo=\'bar\' baz="foo"', 'Several attributes' ), 00185 array( array( 'foo' => 'bar', 'baz' => 'foo' ), 'foo=\'bar\' baz="foo"', 'Several attributes' ), 00186 array( array( ':foo' => 'bar' ), ':foo=\'bar\'', 'Leading :' ), 00187 array( array( '_foo' => 'bar' ), '_foo=\'bar\'', 'Leading _' ), 00188 array( array( 'foo' => 'bar' ), 'Foo=\'bar\'', 'Leading capital' ), 00189 array( array( 'foo' => 'BAR' ), 'FOO=BAR', 'Attribute keys are normalized to lowercase' ), 00190 00191 # Invalid beginning 00192 array( array(), '-foo=bar', 'Leading - is forbidden' ), 00193 array( array(), '.foo=bar', 'Leading . is forbidden' ), 00194 array( array( 'foo-bar' => 'bar' ), 'foo-bar=bar', 'A - is allowed inside the attribute' ), 00195 array( array( 'foo-' => 'bar' ), 'foo-=bar', 'A - is allowed inside the attribute' ), 00196 array( array( 'foo.bar' => 'baz' ), 'foo.bar=baz', 'A . is allowed inside the attribute' ), 00197 array( array( 'foo.' => 'baz' ), 'foo.=baz', 'A . is allowed as last character' ), 00198 array( array( 'foo6' => 'baz' ), 'foo6=baz', 'Numbers are allowed' ), 00199 00200 # This bit is more relaxed than XML rules, but some extensions use 00201 # it, like ProofreadPage (see bug 27539) 00202 array( array( '1foo' => 'baz' ), '1foo=baz', 'Leading numbers are allowed' ), 00203 array( array(), 'foo$=baz', 'Symbols are not allowed' ), 00204 array( array(), 'foo@=baz', 'Symbols are not allowed' ), 00205 array( array(), 'foo~=baz', 'Symbols are not allowed' ), 00206 array( array( 'foo' => '1[#^`*%w/(' ), 'foo=1[#^`*%w/(', 'All kind of characters are allowed as values' ), 00207 array( array( 'foo' => '1[#^`*%\'w/(' ), 'foo="1[#^`*%\'w/("', 'Double quotes are allowed if quoted by single quotes' ), 00208 array( array( 'foo' => '1[#^`*%"w/(' ), 'foo=\'1[#^`*%"w/(\'', 'Single quotes are allowed if quoted by double quotes' ), 00209 array( array( 'foo' => '&"' ), 'foo=&"', 'Special chars can be provided as entities' ), 00210 array( array( 'foo' => '&foobar;' ), 'foo=&foobar;', 'Entity-like items are accepted' ), 00211 ); 00212 } 00213 00218 public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) { 00219 $this->assertEquals( " $inputAttr", 00220 Sanitizer::fixTagAttributes( $inputAttr, $inputEl ), 00221 $message 00222 ); 00223 } 00224 00225 public static function provideDeprecatedAttributes() { 00227 return array( 00228 array( 'clear="left"', 'br' ), 00229 array( 'clear="all"', 'br' ), 00230 array( 'width="100"', 'td' ), 00231 array( 'nowrap="true"', 'td' ), 00232 array( 'nowrap=""', 'td' ), 00233 array( 'align="right"', 'td' ), 00234 array( 'align="center"', 'table' ), 00235 array( 'align="left"', 'tr' ), 00236 array( 'align="center"', 'div' ), 00237 array( 'align="left"', 'h1' ), 00238 array( 'align="left"', 'span' ), 00239 ); 00240 } 00241 00246 public function testCssCommentsChecking( $expected, $css, $message = '' ) { 00247 $this->assertEquals( $expected, 00248 Sanitizer::checkCss( $css ), 00249 $message 00250 ); 00251 } 00252 00253 public static function provideCssCommentsFixtures() { 00255 return array( 00256 // Valid comments spanning entire input 00257 array( '/**/', '/**/' ), 00258 array( '/* comment */', '/* comment */' ), 00259 // Weird stuff 00260 array( ' ', '/****/' ), 00261 array( ' ', '/* /* */' ), 00262 array( 'display: block;', "display:/* foo */block;" ), 00263 array( 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;", 00264 'Backslash-escaped comments must be stripped (bug 28450)' ), 00265 array( '', '/* unfinished comment structure', 00266 'Remove anything after a comment-start token' ), 00267 array( '', "\\2f\\2a unifinished comment'", 00268 'Remove anything after a backslash-escaped comment-start token' ), 00269 array( '/* insecure input */', 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader(src=\'asdf.png\',sizingMethod=\'scale\');' ), 00270 array( '/* insecure input */', '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader(src=\'asdf.png\',sizingMethod=\'scale\')";' ), 00271 array( '/* insecure input */', 'width: expression(1+1);' ), 00272 array( '/* insecure input */', 'background-image: image(asdf.png);' ), 00273 array( '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ), 00274 array( '/* insecure input */', 'background-image: -moz-image(asdf.png);' ), 00275 array( '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ), 00276 array( '/* insecure input */', 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);' ), 00277 array( '/* insecure input */', 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);' ), 00278 ); 00279 } 00280 00284 public static function provideAttributeSupport() { 00286 return array( 00287 array( 'div', ' role="presentation"', ' role="presentation"', 'Support for WAI-ARIA\'s role="presentation".' ), 00288 array( 'div', ' role="main"', '', "Other WAI-ARIA roles are currently not supported." ), 00289 ); 00290 } 00291 00296 public function testAttributeSupport( $tag, $attributes, $expected, $message ) { 00297 $this->assertEquals( $expected, 00298 Sanitizer::fixTagAttributes( $attributes, $tag ), 00299 $message 00300 ); 00301 } 00302 }