MediaWiki
REL1_19
|
00001 <?php 00028 require_once( dirname( __FILE__ ) . '/Maintenance.php' ); 00029 00030 abstract class DumpIterator extends Maintenance { 00031 00032 private $count = 0; 00033 private $startTime; 00034 00035 public function __construct() { 00036 parent::__construct(); 00037 $this->mDescription = "Does something with a dump"; 00038 $this->addOption( 'file', 'File with text to run.', false, true ); 00039 $this->addOption( 'dump', 'XML dump to execute all revisions.', false, true ); 00040 $this->addOption( 'from', 'Article from XML dump to start from.', false, true ); 00041 } 00042 00043 public function execute() { 00044 if (! ( $this->hasOption('file') ^ $this->hasOption('dump') ) ) { 00045 $this->error("You must provide a file or dump", true); 00046 } 00047 00048 $this->checkOptions(); 00049 00050 if ( $this->hasOption('file') ) { 00051 $revision = new WikiRevision; 00052 00053 $revision->setText( file_get_contents( $this->getOption( 'file' ) ) ); 00054 $revision->setTitle( Title::newFromText( rawurldecode( basename( $this->getOption( 'file' ), '.txt' ) ) ) ); 00055 $this->handleRevision( $revision ); 00056 return; 00057 } 00058 00059 $this->startTime = wfTime(); 00060 00061 if ( $this->getOption('dump') == '-' ) { 00062 $source = new ImportStreamSource( $this->getStdin() ); 00063 } else { 00064 $this->error("Sorry, I don't support dump filenames yet. Use - and provide it on stdin on the meantime.", true); 00065 } 00066 $importer = new WikiImporter( $source ); 00067 00068 $importer->setRevisionCallback( 00069 array( &$this, 'handleRevision' ) ); 00070 00071 $this->from = $this->getOption( 'from', null ); 00072 $this->count = 0; 00073 $importer->doImport(); 00074 00075 $this->conclusions(); 00076 00077 $delta = wfTime() - $this->startTime; 00078 $this->error( "Done {$this->count} revisions in " . round($delta, 2) . " seconds " ); 00079 if ($delta > 0) 00080 $this->error( round($this->count / $delta, 2) . " pages/sec" ); 00081 00082 # Perform the memory_get_peak_usage() when all the other data has been output so there's no damage if it dies. 00083 # It is only available since 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit) 00084 $this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" ); 00085 } 00086 00087 public function finalSetup() { 00088 parent::finalSetup(); 00089 00090 if ( $this->getDbType() == Maintenance::DB_NONE ) { 00091 global $wgUseDatabaseMessages, $wgLocalisationCacheConf, $wgHooks; 00092 $wgUseDatabaseMessages = false; 00093 $wgLocalisationCacheConf['storeClass'] = 'LCStore_Null'; 00094 $wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis'; 00095 } 00096 } 00097 00098 static function disableInterwikis( $prefix, &$data ) { 00099 # Title::newFromText will check on each namespaced article if it's an interwiki. 00100 # We always answer that it is not. 00101 00102 return false; 00103 } 00104 00110 public function handleRevision( $rev ) { 00111 $title = $rev->getTitle(); 00112 if ( !$title ) { 00113 $this->error( "Got bogus revision with null title!" ); 00114 return; 00115 } 00116 00117 $this->count++; 00118 if ( isset( $this->from ) ) { 00119 if ( $this->from != $title ) 00120 return; 00121 $this->output( "Skipped " . ($this->count - 1) . " pages\n" ); 00122 00123 $this->count = 1; 00124 $this->from = null; 00125 } 00126 00127 $this->processRevision( $rev ); 00128 } 00129 00130 /* Stub function for processing additional options */ 00131 public function checkOptions() { 00132 return; 00133 } 00134 00135 /* Stub function for giving data about what was computed */ 00136 public function conclusions() { 00137 return; 00138 } 00139 00140 /* Core function which does whatever the maintenance script is designed to do */ 00141 abstract public function processRevision( $rev ); 00142 } 00143 00144 class SearchDump extends DumpIterator { 00145 00146 public function __construct() { 00147 parent::__construct(); 00148 $this->mDescription = "Runs a regex in the revisions from a dump"; 00149 $this->addOption( 'regex', 'Searching regex', true, true ); 00150 } 00151 00152 public function getDbType() { 00153 return Maintenance::DB_NONE; 00154 } 00155 00159 public function processRevision( $rev ) { 00160 if ( preg_match( $this->getOption( 'regex' ), $rev->getText() ) ) { 00161 $this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" ); 00162 } 00163 } 00164 } 00165 00166 $maintClass = "SearchDump"; 00167 require_once( RUN_MAINTENANCE_IF_MAIN );