MediaWiki  REL1_19
compareParsers.php
Go to the documentation of this file.
00001 <?php
00030 require_once( dirname( __FILE__ ) . '/dumpIterator.php' );
00031 
00032 class CompareParsers extends DumpIterator {
00033 
00034         private $count = 0;
00035 
00036         public function __construct() {
00037                 parent::__construct();
00038                 $this->saveFailed = false;
00039                 $this->mDescription = "Run a file or dump with several parsers";
00040                 $this->addOption( 'parser1', 'The first parser to compare.', true, true );
00041                 $this->addOption( 'parser2', 'The second parser to compare.', true, true );
00042                 $this->addOption( 'tidy', 'Run tidy on the articles.', false, false );
00043                 $this->addOption( 'save-failed', 'Folder in which articles which differ will be stored.', false, true );
00044                 $this->addOption( 'show-diff', 'Show a diff of the two renderings.', false, false );
00045                 $this->addOption( 'diff-bin', 'Binary to use for diffing (can also be provided by DIFF env var).', false, false );
00046                 $this->addOption( 'strip-parameters', 'Remove parameters of html tags to increase readability.', false, false );
00047                 $this->addOption( 'show-parsed-output', 'Show the parsed html if both Parsers give the same output.', false, false );
00048         }
00049 
00050         public function checkOptions() {
00051                 if ( $this->hasOption('save-failed') ) {
00052                         $this->saveFailed = $this->getOption('save-failed');
00053                 }
00054                 
00055                 $this->stripParametersEnabled = $this->hasOption( 'strip-parameters' );
00056                 $this->showParsedOutput = $this->hasOption( 'show-parsed-output' );
00057                 
00058                 $this->showDiff = $this->hasOption( 'show-diff' );
00059                 if ( $this->showDiff ) {
00060                         $bin = $this->getOption( 'diff-bin', getenv( 'DIFF' ) );
00061                         if ( $bin != '' ) {
00062                                 global $wgDiff;
00063                                 $wgDiff = $bin;
00064                         }
00065                 }
00066                 
00067                 $user = new User();             
00068                 $this->options = ParserOptions::newFromUser( $user );
00069                 
00070                 if ( $this->hasOption( 'tidy' ) ) {
00071                         global $wgUseTidy;
00072                         if ( !$wgUseTidy ) {
00073                                 $this->error( 'Tidy was requested but $wgUseTidy is not set in LocalSettings.php', true );
00074                         }
00075                         $this->options->setTidy( true );
00076                 }
00077                 
00078                 $this->failed = 0;
00079         }
00080         
00081         public function conclusions() { 
00082                 $this->error( "{$this->failed} failed revisions out of {$this->count}" );
00083                 if ($this->count > 0)
00084                         $this->output( " (" . ( $this->failed / $this->count ) . "%)\n" );
00085         }
00086         
00087         function stripParameters( $text ) {
00088                 if ( !$this->stripParametersEnabled ) {
00089                         return $text;
00090                 }
00091                 return preg_replace( '/(<a) [^>]+>/', '$1>', $text );
00092         }
00093         
00098         public function processRevision( $rev ) {
00099                 $title = $rev->getTitle();
00100                                 
00101                 $parser1Name = $this->getOption( 'parser1' );
00102                 $parser2Name = $this->getOption( 'parser2' );
00103                 
00104                 self::checkParserLocally( $parser1Name );
00105                 self::checkParserLocally( $parser2Name );
00106                 
00107                 $parser1 = new $parser1Name();
00108                 $parser2 = new $parser2Name();
00109                 
00110                 $output1 = $parser1->parse( $rev->getText(), $title, $this->options );
00111                 $output2 = $parser2->parse( $rev->getText(), $title, $this->options );
00112 
00113                 if ( $output1->getText() != $output2->getText() ) {
00114                         $this->failed++;
00115                         $this->error( "Parsing for {$title->getPrefixedText()} differs\n" );
00116                         
00117                         if ( $this->saveFailed ) {
00118                                 file_put_contents( $this->saveFailed . '/' . rawurlencode( $title->getPrefixedText() ) . ".txt", $rev->getText());
00119                         }
00120                         if ( $this->showDiff ) {
00121                                 $this->output( wfDiff( $this->stripParameters( $output1->getText() ), $this->stripParameters( $output2->getText() ), '' ) );
00122                         }
00123                 } else {
00124                         $this->output( $title->getPrefixedText() . "\tOK\n" );
00125                         if ( $this->showParsedOutput ) {
00126                                 $this->output( $this->stripParameters( $output1->getText() ) );
00127                         }
00128                 }
00129         }
00130         
00131         private static function checkParserLocally( $parserName ) {
00132                 /* Look for the parser in a file appropiately named in the current folder */
00133                 if ( !class_exists( $parserName ) && file_exists( "$parserName.php" ) ) {
00134                         global $wgAutoloadClasses;
00135                         $wgAutoloadClasses[ $parserName ] = realpath( '.' ) . "/$parserName.php";
00136                 }
00137         }
00138 
00139 }
00140 
00141 $maintClass = "CompareParsers";
00142 require_once( RUN_MAINTENANCE_IF_MAIN );