MediaWiki  REL1_24
compareParsers.php
Go to the documentation of this file.
00001 <?php
00031 require_once __DIR__ . '/dumpIterator.php';
00032 
00039 class CompareParsers extends DumpIterator {
00040 
00041     private $count = 0;
00042 
00043     public function __construct() {
00044         parent::__construct();
00045         $this->saveFailed = false;
00046         $this->mDescription = "Run a file or dump with several parsers";
00047         $this->addOption( 'parser1', 'The first parser to compare.', true, true );
00048         $this->addOption( 'parser2', 'The second parser to compare.', true, true );
00049         $this->addOption( 'tidy', 'Run tidy on the articles.', false, false );
00050         $this->addOption(
00051             'save-failed',
00052             'Folder in which articles which differ will be stored.',
00053             false,
00054             true
00055         );
00056         $this->addOption( 'show-diff', 'Show a diff of the two renderings.', false, false );
00057         $this->addOption(
00058             'diff-bin',
00059             'Binary to use for diffing (can also be provided by DIFF env var).',
00060             false,
00061             false
00062         );
00063         $this->addOption(
00064             'strip-parameters',
00065             'Remove parameters of html tags to increase readability.',
00066             false,
00067             false
00068         );
00069         $this->addOption(
00070             'show-parsed-output',
00071             'Show the parsed html if both Parsers give the same output.',
00072             false,
00073             false
00074         );
00075     }
00076 
00077     public function checkOptions() {
00078         if ( $this->hasOption( 'save-failed' ) ) {
00079             $this->saveFailed = $this->getOption( 'save-failed' );
00080         }
00081 
00082         $this->stripParametersEnabled = $this->hasOption( 'strip-parameters' );
00083         $this->showParsedOutput = $this->hasOption( 'show-parsed-output' );
00084 
00085         $this->showDiff = $this->hasOption( 'show-diff' );
00086         if ( $this->showDiff ) {
00087             $bin = $this->getOption( 'diff-bin', getenv( 'DIFF' ) );
00088             if ( $bin != '' ) {
00089                 global $wgDiff;
00090                 $wgDiff = $bin;
00091             }
00092         }
00093 
00094         $user = new User();
00095         $this->options = ParserOptions::newFromUser( $user );
00096 
00097         if ( $this->hasOption( 'tidy' ) ) {
00098             global $wgUseTidy;
00099             if ( !$wgUseTidy ) {
00100                 $this->error( 'Tidy was requested but $wgUseTidy is not set in LocalSettings.php', true );
00101             }
00102             $this->options->setTidy( true );
00103         }
00104 
00105         $this->failed = 0;
00106     }
00107 
00108     public function conclusions() {
00109         $this->error( "{$this->failed} failed revisions out of {$this->count}" );
00110         if ( $this->count > 0 ) {
00111             $this->output( " (" . ( $this->failed / $this->count ) . "%)\n" );
00112         }
00113     }
00114 
00115     function stripParameters( $text ) {
00116         if ( !$this->stripParametersEnabled ) {
00117             return $text;
00118         }
00119 
00120         return preg_replace( '/(<a) [^>]+>/', '$1>', $text );
00121     }
00122 
00127     public function processRevision( $rev ) {
00128         $title = $rev->getTitle();
00129 
00130         $parser1Name = $this->getOption( 'parser1' );
00131         $parser2Name = $this->getOption( 'parser2' );
00132 
00133         self::checkParserLocally( $parser1Name );
00134         self::checkParserLocally( $parser2Name );
00135 
00136         $parser1 = new $parser1Name();
00137         $parser2 = new $parser2Name();
00138 
00139         $content = $rev->getContent();
00140 
00141         if ( $content->getModel() !== CONTENT_MODEL_WIKITEXT ) {
00142             $this->error( "Page {$title->getPrefixedText()} does not contain wikitext "
00143                 . "but {$content->getModel()}\n" );
00144 
00145             return;
00146         }
00147 
00148         $text = strval( $content->getNativeData() );
00149 
00150         $output1 = $parser1->parse( $text, $title, $this->options );
00151         $output2 = $parser2->parse( $text, $title, $this->options );
00152 
00153         if ( $output1->getText() != $output2->getText() ) {
00154             $this->failed++;
00155             $this->error( "Parsing for {$title->getPrefixedText()} differs\n" );
00156 
00157             if ( $this->saveFailed ) {
00158                 file_put_contents(
00159                     $this->saveFailed . '/' . rawurlencode( $title->getPrefixedText() ) . ".txt",
00160                     $text
00161                 );
00162             }
00163             if ( $this->showDiff ) {
00164                 $this->output( wfDiff(
00165                     $this->stripParameters( $output1->getText() ),
00166                     $this->stripParameters( $output2->getText() ),
00167                     ''
00168                 ) );
00169             }
00170         } else {
00171             $this->output( $title->getPrefixedText() . "\tOK\n" );
00172 
00173             if ( $this->showParsedOutput ) {
00174                 $this->output( $this->stripParameters( $output1->getText() ) );
00175             }
00176         }
00177     }
00178 
00179     private static function checkParserLocally( $parserName ) {
00180         /* Look for the parser in a file appropiately named in the current folder */
00181         if ( !class_exists( $parserName ) && file_exists( "$parserName.php" ) ) {
00182             global $wgAutoloadClasses;
00183             $wgAutoloadClasses[$parserName] = realpath( '.' ) . "/$parserName.php";
00184         }
00185     }
00186 }
00187 
00188 $maintClass = "CompareParsers";
00189 require_once RUN_MAINTENANCE_IF_MAIN;