MediaWiki  REL1_24
HtmlFormatterTest.php
Go to the documentation of this file.
00001 <?php
00002 
00006 class HtmlFormatterTest extends MediaWikiTestCase {
00015     public function testTransform( $input, $expectedText,
00016         $expectedRemoved = array(), $callback = false
00017     ) {
00018         $input = self::normalize( $input );
00019         $formatter = new HtmlFormatter( HtmlFormatter::wrapHTML( $input ) );
00020         if ( $callback ) {
00021             $callback( $formatter );
00022         }
00023         $removedElements = $formatter->filterContent();
00024         $html = $formatter->getText();
00025         $removed = array();
00026         foreach ( $removedElements as $removedElement ) {
00027             $removed[] = self::normalize( $formatter->getText( $removedElement ) );
00028         }
00029         $expectedRemoved = array_map( 'self::normalize', $expectedRemoved );
00030 
00031         $this->assertValidHtmlSnippet( $html );
00032         $this->assertEquals( self::normalize( $expectedText ), self::normalize( $html ) );
00033         $this->assertEquals( asort( $expectedRemoved ), asort( $removed ) );
00034     }
00035 
00036     private static function normalize( $s ) {
00037         return str_replace( "\n", '',
00038             str_replace( "\r", '', $s ) // "yay" to Windows!
00039         );
00040     }
00041 
00042     public function getHtmlData() {
00043         $removeImages = function ( HtmlFormatter $f ) {
00044             $f->setRemoveMedia();
00045         };
00046         $removeTags = function ( HtmlFormatter $f ) {
00047             $f->remove( array( 'table', '.foo', '#bar', 'div.baz' ) );
00048         };
00049         $flattenSomeStuff = function ( HtmlFormatter $f ) {
00050             $f->flatten( array( 's', 'div' ) );
00051         };
00052         $flattenEverything = function ( HtmlFormatter $f ) {
00053             $f->flattenAllTags();
00054         };
00055         return array(
00056             // remove images if asked
00057             array(
00058                 '<img src="/foo/bar.jpg" alt="Blah"/>',
00059                 '',
00060                 array( '<img src="/foo/bar.jpg" alt="Blah">' ),
00061                 $removeImages,
00062             ),
00063             // basic tag removal
00064             array(
00065                 // @codingStandardsIgnoreStart Ignore long line warnings.
00066                 '<table><tr><td>foo</td></tr></table><div class="foo">foo</div><div class="foo quux">foo</div><span id="bar">bar</span>
00067 <strong class="foo" id="bar">foobar</strong><div class="notfoo">test</div><div class="baz"/>
00068 <span class="baz">baz</span>',
00069                 // @codingStandardsIgnoreEnd
00070                 '<div class="notfoo">test</div>
00071 <span class="baz">baz</span>',
00072                 array(
00073                     '<table><tr><td>foo</td></tr></table>',
00074                     '<div class="foo">foo</div>',
00075                     '<div class="foo quux">foo</div>',
00076                     '<span id="bar">bar</span>',
00077                     '<strong class="foo" id="bar">foobar</strong>',
00078                     '<div class="baz"/>',
00079                 ),
00080                 $removeTags,
00081             ),
00082             // don't flatten tags that start like chosen ones
00083             array(
00084                 '<div><s>foo</s> <span>bar</span></div>',
00085                 'foo <span>bar</span>',
00086                 array(),
00087                 $flattenSomeStuff,
00088             ),
00089             // total flattening
00090             array(
00091                 '<div style="foo">bar<sup>2</sup></div>',
00092                 'bar2',
00093                 array(),
00094                 $flattenEverything,
00095             ),
00096             // UTF-8 preservation and security
00097             array(
00098                 '<span title="&quot; \' &amp;">&lt;Тест!&gt;</span> &amp;&lt;&#38;&#0038;&#x26;&#x026;',
00099                 '<span title="&quot; \' &amp;">&lt;Тест!&gt;</span> &amp;&lt;&amp;&amp;&amp;&amp;',
00100                 array(),
00101                 $removeTags, // Have some rules to trigger a DOM parse
00102             ),
00103             // https://bugzilla.wikimedia.org/show_bug.cgi?id=53086
00104             array(
00105                 'Foo<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">[1]</a></sup>'
00106                     . ' <a href="/wiki/Bar" title="Bar" class="mw-redirect">Bar</a>',
00107                 'Foo<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">[1]</a></sup>'
00108                     . ' <a href="/wiki/Bar" title="Bar" class="mw-redirect">Bar</a>',
00109             ),
00110         );
00111     }
00112 
00113     public function testQuickProcessing() {
00114         $f = new MockHtmlFormatter( 'foo' );
00115         $f->filterContent();
00116         $this->assertFalse( $f->hasDoc, 'HtmlFormatter should not needlessly parse HTML' );
00117     }
00118 }
00119 
00120 class MockHtmlFormatter extends HtmlFormatter {
00121     public $hasDoc = false;
00122 
00123     public function getDoc() {
00124         $this->hasDoc = true;
00125         return parent::getDoc();
00126     }
00127 }