[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/maintenance/ -> compareParsers.php (source)

   1  <?php
   2  /**
   3   * Take page text out of an XML dump file and render basic HTML out to files.
   4   * This is *NOT* suitable for publishing or offline use; it's intended for
   5   * running comparative tests of parsing behavior using real-world data.
   6   *
   7   * Templates etc are pulled from the local wiki database, not from the dump.
   8   *
   9   * Copyright © 2011 Platonides
  10   * https://www.mediawiki.org/
  11   *
  12   * This program is free software; you can redistribute it and/or modify
  13   * it under the terms of the GNU General Public License as published by
  14   * the Free Software Foundation; either version 2 of the License, or
  15   * (at your option) any later version.
  16   *
  17   * This program is distributed in the hope that it will be useful,
  18   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  20   * GNU General Public License for more details.
  21   *
  22   * You should have received a copy of the GNU General Public License along
  23   * with this program; if not, write to the Free Software Foundation, Inc.,
  24   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  25   * http://www.gnu.org/copyleft/gpl.html
  26   *
  27   * @file
  28   * @ingroup Maintenance
  29   */
  30  
  31  require_once  __DIR__ . '/dumpIterator.php';
  32  
  33  /**
  34   * Maintenance script to take page text out of an XML dump file and render
  35   * basic HTML out to files.
  36   *
  37   * @ingroup Maintenance
  38   */
  39  class CompareParsers extends DumpIterator {
  40  
  41      private $count = 0;
  42  
  43  	public function __construct() {
  44          parent::__construct();
  45          $this->saveFailed = false;
  46          $this->mDescription = "Run a file or dump with several parsers";
  47          $this->addOption( 'parser1', 'The first parser to compare.', true, true );
  48          $this->addOption( 'parser2', 'The second parser to compare.', true, true );
  49          $this->addOption( 'tidy', 'Run tidy on the articles.', false, false );
  50          $this->addOption(
  51              'save-failed',
  52              'Folder in which articles which differ will be stored.',
  53              false,
  54              true
  55          );
  56          $this->addOption( 'show-diff', 'Show a diff of the two renderings.', false, false );
  57          $this->addOption(
  58              'diff-bin',
  59              'Binary to use for diffing (can also be provided by DIFF env var).',
  60              false,
  61              false
  62          );
  63          $this->addOption(
  64              'strip-parameters',
  65              'Remove parameters of html tags to increase readability.',
  66              false,
  67              false
  68          );
  69          $this->addOption(
  70              'show-parsed-output',
  71              'Show the parsed html if both Parsers give the same output.',
  72              false,
  73              false
  74          );
  75      }
  76  
  77  	public function checkOptions() {
  78          if ( $this->hasOption( 'save-failed' ) ) {
  79              $this->saveFailed = $this->getOption( 'save-failed' );
  80          }
  81  
  82          $this->stripParametersEnabled = $this->hasOption( 'strip-parameters' );
  83          $this->showParsedOutput = $this->hasOption( 'show-parsed-output' );
  84  
  85          $this->showDiff = $this->hasOption( 'show-diff' );
  86          if ( $this->showDiff ) {
  87              $bin = $this->getOption( 'diff-bin', getenv( 'DIFF' ) );
  88              if ( $bin != '' ) {
  89                  global $wgDiff;
  90                  $wgDiff = $bin;
  91              }
  92          }
  93  
  94          $user = new User();
  95          $this->options = ParserOptions::newFromUser( $user );
  96  
  97          if ( $this->hasOption( 'tidy' ) ) {
  98              global $wgUseTidy;
  99              if ( !$wgUseTidy ) {
 100                  $this->error( 'Tidy was requested but $wgUseTidy is not set in LocalSettings.php', true );
 101              }
 102              $this->options->setTidy( true );
 103          }
 104  
 105          $this->failed = 0;
 106      }
 107  
 108  	public function conclusions() {
 109          $this->error( "{$this->failed} failed revisions out of {$this->count}" );
 110          if ( $this->count > 0 ) {
 111              $this->output( " (" . ( $this->failed / $this->count ) . "%)\n" );
 112          }
 113      }
 114  
 115  	function stripParameters( $text ) {
 116          if ( !$this->stripParametersEnabled ) {
 117              return $text;
 118          }
 119  
 120          return preg_replace( '/(<a) [^>]+>/', '$1>', $text );
 121      }
 122  
 123      /**
 124       * Callback function for each revision, parse with both parsers and compare
 125       * @param Revision $rev
 126       */
 127  	public function processRevision( $rev ) {
 128          $title = $rev->getTitle();
 129  
 130          $parser1Name = $this->getOption( 'parser1' );
 131          $parser2Name = $this->getOption( 'parser2' );
 132  
 133          self::checkParserLocally( $parser1Name );
 134          self::checkParserLocally( $parser2Name );
 135  
 136          $parser1 = new $parser1Name();
 137          $parser2 = new $parser2Name();
 138  
 139          $content = $rev->getContent();
 140  
 141          if ( $content->getModel() !== CONTENT_MODEL_WIKITEXT ) {
 142              $this->error( "Page {$title->getPrefixedText()} does not contain wikitext "
 143                  . "but {$content->getModel()}\n" );
 144  
 145              return;
 146          }
 147  
 148          $text = strval( $content->getNativeData() );
 149  
 150          $output1 = $parser1->parse( $text, $title, $this->options );
 151          $output2 = $parser2->parse( $text, $title, $this->options );
 152  
 153          if ( $output1->getText() != $output2->getText() ) {
 154              $this->failed++;
 155              $this->error( "Parsing for {$title->getPrefixedText()} differs\n" );
 156  
 157              if ( $this->saveFailed ) {
 158                  file_put_contents(
 159                      $this->saveFailed . '/' . rawurlencode( $title->getPrefixedText() ) . ".txt",
 160                      $text
 161                  );
 162              }
 163              if ( $this->showDiff ) {
 164                  $this->output( wfDiff(
 165                      $this->stripParameters( $output1->getText() ),
 166                      $this->stripParameters( $output2->getText() ),
 167                      ''
 168                  ) );
 169              }
 170          } else {
 171              $this->output( $title->getPrefixedText() . "\tOK\n" );
 172  
 173              if ( $this->showParsedOutput ) {
 174                  $this->output( $this->stripParameters( $output1->getText() ) );
 175              }
 176          }
 177      }
 178  
 179  	private static function checkParserLocally( $parserName ) {
 180          /* Look for the parser in a file appropiately named in the current folder */
 181          if ( !class_exists( $parserName ) && file_exists( "$parserName.php" ) ) {
 182              global $wgAutoloadClasses;
 183              $wgAutoloadClasses[$parserName] = realpath( '.' ) . "/$parserName.php";
 184          }
 185      }
 186  }
 187  
 188  $maintClass = "CompareParsers";
 189  require_once RUN_MAINTENANCE_IF_MAIN;


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1