[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/maintenance/ -> dumpIterator.php (source)

   1  <?php
   2  /**
   3   * Take page text out of an XML dump file and perform some operation on it.
   4   * Used as a base class for CompareParsers and PreprocessDump.
   5   * We implement below the simple task of searching inside a dump.
   6   *
   7   * Copyright © 2011 Platonides
   8   * https://www.mediawiki.org/
   9   *
  10   * This program is free software; you can redistribute it and/or modify
  11   * it under the terms of the GNU General Public License as published by
  12   * the Free Software Foundation; either version 2 of the License, or
  13   * (at your option) any later version.
  14   *
  15   * This program is distributed in the hope that it will be useful,
  16   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18   * GNU General Public License for more details.
  19   *
  20   * You should have received a copy of the GNU General Public License along
  21   * with this program; if not, write to the Free Software Foundation, Inc.,
  22   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  23   * http://www.gnu.org/copyleft/gpl.html
  24   *
  25   * @file
  26   * @ingroup Maintenance
  27   */
  28  
  29  require_once  __DIR__ . '/Maintenance.php';
  30  
  31  /**
  32   * Base class for interating over a dump.
  33   *
  34   * @ingroup Maintenance
  35   */
  36  abstract class DumpIterator extends Maintenance {
  37  
  38      private $count = 0;
  39      private $startTime;
  40  
  41  	public function __construct() {
  42          parent::__construct();
  43          $this->mDescription = "Does something with a dump";
  44          $this->addOption( 'file', 'File with text to run.', false, true );
  45          $this->addOption( 'dump', 'XML dump to execute all revisions.', false, true );
  46          $this->addOption( 'from', 'Article from XML dump to start from.', false, true );
  47      }
  48  
  49  	public function execute() {
  50          if ( !( $this->hasOption( 'file' ) ^ $this->hasOption( 'dump' ) ) ) {
  51              $this->error( "You must provide a file or dump", true );
  52          }
  53  
  54          $this->checkOptions();
  55  
  56          if ( $this->hasOption( 'file' ) ) {
  57              $revision = new WikiRevision;
  58  
  59              $revision->setText( file_get_contents( $this->getOption( 'file' ) ) );
  60              $revision->setTitle( Title::newFromText(
  61                  rawurldecode( basename( $this->getOption( 'file' ), '.txt' ) )
  62              ) );
  63              $this->handleRevision( $revision );
  64  
  65              return;
  66          }
  67  
  68          $this->startTime = microtime( true );
  69  
  70          if ( $this->getOption( 'dump' ) == '-' ) {
  71              $source = new ImportStreamSource( $this->getStdin() );
  72          } else {
  73              $this->error( "Sorry, I don't support dump filenames yet. "
  74                  . "Use - and provide it on stdin on the meantime.", true );
  75          }
  76          $importer = new WikiImporter( $source );
  77  
  78          $importer->setRevisionCallback(
  79              array( &$this, 'handleRevision' ) );
  80  
  81          $this->from = $this->getOption( 'from', null );
  82          $this->count = 0;
  83          $importer->doImport();
  84  
  85          $this->conclusions();
  86  
  87          $delta = microtime( true ) - $this->startTime;
  88          $this->error( "Done {$this->count} revisions in " . round( $delta, 2 ) . " seconds " );
  89          if ( $delta > 0 ) {
  90              $this->error( round( $this->count / $delta, 2 ) . " pages/sec" );
  91          }
  92  
  93          # Perform the memory_get_peak_usage() when all the other data has been
  94          # output so there's no damage if it dies. It is only available since
  95          # 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit)
  96          $this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" );
  97      }
  98  
  99  	public function finalSetup() {
 100          parent::finalSetup();
 101  
 102          if ( $this->getDbType() == Maintenance::DB_NONE ) {
 103              global $wgUseDatabaseMessages, $wgLocalisationCacheConf, $wgHooks;
 104              $wgUseDatabaseMessages = false;
 105              $wgLocalisationCacheConf['storeClass'] = 'LCStoreNull';
 106              $wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis';
 107          }
 108      }
 109  
 110  	static function disableInterwikis( $prefix, &$data ) {
 111          # Title::newFromText will check on each namespaced article if it's an interwiki.
 112          # We always answer that it is not.
 113  
 114          return false;
 115      }
 116  
 117      /**
 118       * Callback function for each revision, child classes should override
 119       * processRevision instead.
 120       * @param DatabaseBase $rev
 121       */
 122  	public function handleRevision( $rev ) {
 123          $title = $rev->getTitle();
 124          if ( !$title ) {
 125              $this->error( "Got bogus revision with null title!" );
 126  
 127              return;
 128          }
 129  
 130          $this->count++;
 131          if ( isset( $this->from ) ) {
 132              if ( $this->from != $title ) {
 133                  return;
 134              }
 135              $this->output( "Skipped " . ( $this->count - 1 ) . " pages\n" );
 136  
 137              $this->count = 1;
 138              $this->from = null;
 139          }
 140  
 141          $this->processRevision( $rev );
 142      }
 143  
 144      /* Stub function for processing additional options */
 145  	public function checkOptions() {
 146          return;
 147      }
 148  
 149      /* Stub function for giving data about what was computed */
 150  	public function conclusions() {
 151          return;
 152      }
 153  
 154      /* Core function which does whatever the maintenance script is designed to do */
 155      abstract public function processRevision( $rev );
 156  }
 157  
 158  /**
 159   * Maintenance script that runs a regex in the revisions from a dump.
 160   *
 161   * @ingroup Maintenance
 162   */
 163  class SearchDump extends DumpIterator {
 164  
 165  	public function __construct() {
 166          parent::__construct();
 167          $this->mDescription = "Runs a regex in the revisions from a dump";
 168          $this->addOption( 'regex', 'Searching regex', true, true );
 169      }
 170  
 171  	public function getDbType() {
 172          return Maintenance::DB_NONE;
 173      }
 174  
 175      /**
 176       * @param Revision $rev
 177       */
 178  	public function processRevision( $rev ) {
 179          if ( preg_match( $this->getOption( 'regex' ), $rev->getContent()->getTextForSearchIndex() ) ) {
 180              $this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" );
 181          }
 182      }
 183  }
 184  
 185  $maintClass = "SearchDump";
 186  require_once RUN_MAINTENANCE_IF_MAIN;


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1