MediaWiki  REL1_20
compareParsers.php
Go to the documentation of this file.
00001 <?php
00031 require_once( __DIR__ . '/dumpIterator.php' );
00032 
00039 class CompareParsers extends DumpIterator {
00040 
00041         private $count = 0;
00042 
00043         public function __construct() {
00044                 parent::__construct();
00045                 $this->saveFailed = false;
00046                 $this->mDescription = "Run a file or dump with several parsers";
00047                 $this->addOption( 'parser1', 'The first parser to compare.', true, true );
00048                 $this->addOption( 'parser2', 'The second parser to compare.', true, true );
00049                 $this->addOption( 'tidy', 'Run tidy on the articles.', false, false );
00050                 $this->addOption( 'save-failed', 'Folder in which articles which differ will be stored.', false, true );
00051                 $this->addOption( 'show-diff', 'Show a diff of the two renderings.', false, false );
00052                 $this->addOption( 'diff-bin', 'Binary to use for diffing (can also be provided by DIFF env var).', false, false );
00053                 $this->addOption( 'strip-parameters', 'Remove parameters of html tags to increase readability.', false, false );
00054                 $this->addOption( 'show-parsed-output', 'Show the parsed html if both Parsers give the same output.', false, false );
00055         }
00056 
00057         public function checkOptions() {
00058                 if ( $this->hasOption('save-failed') ) {
00059                         $this->saveFailed = $this->getOption('save-failed');
00060                 }
00061 
00062                 $this->stripParametersEnabled = $this->hasOption( 'strip-parameters' );
00063                 $this->showParsedOutput = $this->hasOption( 'show-parsed-output' );
00064 
00065                 $this->showDiff = $this->hasOption( 'show-diff' );
00066                 if ( $this->showDiff ) {
00067                         $bin = $this->getOption( 'diff-bin', getenv( 'DIFF' ) );
00068                         if ( $bin != '' ) {
00069                                 global $wgDiff;
00070                                 $wgDiff = $bin;
00071                         }
00072                 }
00073 
00074                 $user = new User();
00075                 $this->options = ParserOptions::newFromUser( $user );
00076 
00077                 if ( $this->hasOption( 'tidy' ) ) {
00078                         global $wgUseTidy;
00079                         if ( !$wgUseTidy ) {
00080                                 $this->error( 'Tidy was requested but $wgUseTidy is not set in LocalSettings.php', true );
00081                         }
00082                         $this->options->setTidy( true );
00083                 }
00084 
00085                 $this->failed = 0;
00086         }
00087 
00088         public function conclusions() {
00089                 $this->error( "{$this->failed} failed revisions out of {$this->count}" );
00090                 if ($this->count > 0)
00091                         $this->output( " (" . ( $this->failed / $this->count ) . "%)\n" );
00092         }
00093 
00094         function stripParameters( $text ) {
00095                 if ( !$this->stripParametersEnabled ) {
00096                         return $text;
00097                 }
00098                 return preg_replace( '/(<a) [^>]+>/', '$1>', $text );
00099         }
00100 
00105         public function processRevision( $rev ) {
00106                 $title = $rev->getTitle();
00107 
00108                 $parser1Name = $this->getOption( 'parser1' );
00109                 $parser2Name = $this->getOption( 'parser2' );
00110 
00111                 self::checkParserLocally( $parser1Name );
00112                 self::checkParserLocally( $parser2Name );
00113 
00114                 $parser1 = new $parser1Name();
00115                 $parser2 = new $parser2Name();
00116 
00117                 $output1 = $parser1->parse( $rev->getText(), $title, $this->options );
00118                 $output2 = $parser2->parse( $rev->getText(), $title, $this->options );
00119 
00120                 if ( $output1->getText() != $output2->getText() ) {
00121                         $this->failed++;
00122                         $this->error( "Parsing for {$title->getPrefixedText()} differs\n" );
00123 
00124                         if ( $this->saveFailed ) {
00125                                 file_put_contents( $this->saveFailed . '/' . rawurlencode( $title->getPrefixedText() ) . ".txt", $rev->getText());
00126                         }
00127                         if ( $this->showDiff ) {
00128                                 $this->output( wfDiff( $this->stripParameters( $output1->getText() ), $this->stripParameters( $output2->getText() ), '' ) );
00129                         }
00130                 } else {
00131                         $this->output( $title->getPrefixedText() . "\tOK\n" );
00132                         if ( $this->showParsedOutput ) {
00133                                 $this->output( $this->stripParameters( $output1->getText() ) );
00134                         }
00135                 }
00136         }
00137 
00138         private static function checkParserLocally( $parserName ) {
00139                 /* Look for the parser in a file appropiately named in the current folder */
00140                 if ( !class_exists( $parserName ) && file_exists( "$parserName.php" ) ) {
00141                         global $wgAutoloadClasses;
00142                         $wgAutoloadClasses[ $parserName ] = realpath( '.' ) . "/$parserName.php";
00143                 }
00144         }
00145 
00146 }
00147 
00148 $maintClass = "CompareParsers";
00149 require_once( RUN_MAINTENANCE_IF_MAIN );