MediaWiki  REL1_19
dumpIterator.php
Go to the documentation of this file.
00001 <?php
00028 require_once( dirname( __FILE__ ) . '/Maintenance.php' );
00029 
00030 abstract class DumpIterator extends Maintenance {
00031 
00032         private $count = 0;
00033         private $startTime;
00034 
00035         public function __construct() {
00036                 parent::__construct();
00037                 $this->mDescription = "Does something with a dump";
00038                 $this->addOption( 'file', 'File with text to run.', false, true );
00039                 $this->addOption( 'dump', 'XML dump to execute all revisions.', false, true );
00040                 $this->addOption( 'from', 'Article from XML dump to start from.', false, true );
00041         }
00042 
00043         public function execute() {
00044                 if (! ( $this->hasOption('file') ^ $this->hasOption('dump') ) ) {
00045                         $this->error("You must provide a file or dump", true);
00046                 }
00047 
00048                 $this->checkOptions();
00049 
00050                 if ( $this->hasOption('file') ) {
00051                         $revision = new WikiRevision;
00052 
00053                         $revision->setText( file_get_contents( $this->getOption( 'file' ) ) );
00054                         $revision->setTitle( Title::newFromText( rawurldecode( basename( $this->getOption( 'file' ), '.txt' ) ) ) );
00055                         $this->handleRevision( $revision );
00056                         return;
00057                 }
00058 
00059                 $this->startTime = wfTime();
00060 
00061                 if ( $this->getOption('dump') == '-' ) {
00062                         $source = new ImportStreamSource( $this->getStdin() );
00063                 } else {
00064                         $this->error("Sorry, I don't support dump filenames yet. Use - and provide it on stdin on the meantime.", true);
00065                 }
00066                 $importer = new WikiImporter( $source );
00067 
00068                 $importer->setRevisionCallback(
00069                         array( &$this, 'handleRevision' ) );
00070 
00071                 $this->from = $this->getOption( 'from', null );
00072                 $this->count = 0;
00073                 $importer->doImport();
00074 
00075                 $this->conclusions();
00076 
00077                 $delta = wfTime() - $this->startTime;
00078                 $this->error( "Done {$this->count} revisions in " . round($delta, 2) . " seconds " );
00079                 if ($delta > 0)
00080                         $this->error( round($this->count / $delta, 2) . " pages/sec" );
00081 
00082                 # Perform the memory_get_peak_usage() when all the other data has been output so there's no damage if it dies.
00083                 # It is only available since 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit)
00084                 $this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" );
00085         }
00086 
00087         public function finalSetup() {
00088                 parent::finalSetup();
00089 
00090                 if ( $this->getDbType() == Maintenance::DB_NONE ) {
00091                         global $wgUseDatabaseMessages, $wgLocalisationCacheConf, $wgHooks;
00092                         $wgUseDatabaseMessages = false;
00093                         $wgLocalisationCacheConf['storeClass'] =  'LCStore_Null';
00094                         $wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis';
00095                 }
00096         }
00097 
00098         static function disableInterwikis( $prefix, &$data ) {
00099                 # Title::newFromText will check on each namespaced article if it's an interwiki.
00100                 # We always answer that it is not.
00101 
00102                 return false;
00103         }
00104 
00110         public function handleRevision( $rev ) {
00111                 $title = $rev->getTitle();
00112                 if ( !$title ) {
00113                         $this->error( "Got bogus revision with null title!" );
00114                         return;
00115                 }
00116 
00117                 $this->count++;
00118                 if ( isset( $this->from ) ) {
00119                         if ( $this->from != $title )
00120                                 return;
00121                         $this->output( "Skipped " . ($this->count - 1) . " pages\n" );
00122 
00123                         $this->count = 1;
00124                         $this->from = null;
00125                 }
00126 
00127                 $this->processRevision( $rev );
00128         }
00129 
00130         /* Stub function for processing additional options */
00131         public function checkOptions() {
00132                 return;
00133         }
00134 
00135         /* Stub function for giving data about what was computed */
00136         public function conclusions() {
00137                 return;
00138         }
00139 
00140         /* Core function which does whatever the maintenance script is designed to do */
00141         abstract public function processRevision( $rev );
00142 }
00143 
00144 class SearchDump extends DumpIterator {
00145 
00146         public function __construct() {
00147                 parent::__construct();
00148                 $this->mDescription = "Runs a regex in the revisions from a dump";
00149                 $this->addOption( 'regex', 'Searching regex', true, true );
00150         }
00151 
00152         public function getDbType() {
00153                 return Maintenance::DB_NONE;
00154         }
00155 
00159         public function processRevision( $rev ) {
00160                 if ( preg_match( $this->getOption( 'regex' ), $rev->getText() ) ) {
00161                         $this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" );
00162                 }
00163         }
00164 }
00165 
00166 $maintClass = "SearchDump";
00167 require_once( RUN_MAINTENANCE_IF_MAIN );