MediaWiki
REL1_19
|
00001 <?php 00025 require_once( dirname( __FILE__ ) . '/Maintenance.php' ); 00026 00030 class BackupReader extends Maintenance { 00031 var $reportingInterval = 100; 00032 var $pageCount = 0; 00033 var $revCount = 0; 00034 var $dryRun = false; 00035 var $uploads = false; 00036 var $imageBasePath = false; 00037 var $nsFilter = false; 00038 00039 function __construct() { 00040 parent::__construct(); 00041 $gz = in_array('compress.zlib', stream_get_wrappers()) ? 'ok' : '(disabled; requires PHP zlib module)'; 00042 $bz2 = in_array('compress.bzip2', stream_get_wrappers()) ? 'ok' : '(disabled; requires PHP bzip2 module)'; 00043 00044 $this->mDescription = <<<TEXT 00045 This script reads pages from an XML file as produced from Special:Export or 00046 dumpBackup.php, and saves them into the current wiki. 00047 00048 Compressed XML files may be read directly: 00049 .gz $gz 00050 .bz2 $bz2 00051 .7z (if 7za executable is in PATH) 00052 00053 Note that for very large data sets, importDump.php may be slow; there are 00054 alternate methods which can be much faster for full site restoration: 00055 <http://www.mediawiki.org/wiki/Manual:Importing_XML_dumps> 00056 TEXT; 00057 $this->stderr = fopen( "php://stderr", "wt" ); 00058 $this->addOption( 'report', 00059 'Report position and speed after every n pages processed', false, true ); 00060 $this->addOption( 'namespaces', 00061 'Import only the pages from namespaces belonging to the list of ' . 00062 'pipe-separated namespace names or namespace indexes', false, true ); 00063 $this->addOption( 'dry-run', 'Parse dump without actually importing pages' ); 00064 $this->addOption( 'debug', 'Output extra verbose debug information' ); 00065 $this->addOption( 'uploads', 'Process file upload data if included (experimental)' ); 00066 $this->addOption( 'no-updates', 'Disable link table updates. Is faster but leaves the wiki in an inconsistent state' ); 00067 $this->addOption( 'image-base-path', 'Import files from a specified path', false, true ); 00068 $this->addArg( 'file', 'Dump file to import [else use stdin]', false ); 00069 } 00070 00071 public function execute() { 00072 if( wfReadOnly() ) { 00073 $this->error( "Wiki is in read-only mode; you'll need to disable it for import to work.", true ); 00074 } 00075 00076 $this->reportingInterval = intval( $this->getOption( 'report', 100 ) ); 00077 if ( !$this->reportingInterval ) { 00078 $this->reportingInterval = 100; // avoid division by zero 00079 } 00080 00081 $this->dryRun = $this->hasOption( 'dry-run' ); 00082 $this->uploads = $this->hasOption( 'uploads' ); // experimental! 00083 if ( $this->hasOption( 'image-base-path' ) ) { 00084 $this->imageBasePath = $this->getOption( 'image-base-path' ); 00085 } 00086 if ( $this->hasOption( 'namespaces' ) ) { 00087 $this->setNsfilter( explode( '|', $this->getOption( 'namespaces' ) ) ); 00088 } 00089 00090 if( $this->hasArg() ) { 00091 $this->importFromFile( $this->getArg() ); 00092 } else { 00093 $this->importFromStdin(); 00094 } 00095 00096 $this->output( "Done!\n" ); 00097 $this->output( "You might want to run rebuildrecentchanges.php to regenerate RecentChanges\n" ); 00098 } 00099 00100 function setNsfilter( array $namespaces ) { 00101 if ( count( $namespaces ) == 0 ) { 00102 $this->nsFilter = false; 00103 return; 00104 } 00105 $this->nsFilter = array_unique( array_map( array( $this, 'getNsIndex' ), $namespaces ) ); 00106 } 00107 00108 private function getNsIndex( $namespace ) { 00109 global $wgContLang; 00110 if ( ( $result = $wgContLang->getNsIndex( $namespace ) ) !== false ) { 00111 return $result; 00112 } 00113 $ns = intval( $namespace ); 00114 if ( strval( $ns ) === $namespace && $wgContLang->getNsText( $ns ) !== false ) { 00115 return $ns; 00116 } 00117 $this->error( "Unknown namespace text / index specified: $namespace", true ); 00118 } 00119 00124 private function skippedNamespace( $obj ) { 00125 if ( $obj instanceof Title ) { 00126 $ns = $obj->getNamespace(); 00127 } elseif ( $obj instanceof Revision ) { 00128 $ns = $obj->getTitle()->getNamespace(); 00129 } elseif ( $obj instanceof WikiRevision ) { 00130 $ns = $obj->title->getNamespace(); 00131 } else { 00132 echo wfBacktrace(); 00133 $this->error( "Cannot get namespace of object in " . __METHOD__, true ); 00134 } 00135 return is_array( $this->nsFilter ) && !in_array( $ns, $this->nsFilter ); 00136 } 00137 00138 function reportPage( $page ) { 00139 $this->pageCount++; 00140 } 00141 00146 function handleRevision( $rev ) { 00147 $title = $rev->getTitle(); 00148 if ( !$title ) { 00149 $this->progress( "Got bogus revision with null title!" ); 00150 return; 00151 } 00152 00153 if ( $this->skippedNamespace( $title ) ) { 00154 return; 00155 } 00156 00157 $this->revCount++; 00158 $this->report(); 00159 00160 if ( !$this->dryRun ) { 00161 call_user_func( $this->importCallback, $rev ); 00162 } 00163 } 00164 00169 function handleUpload( $revision ) { 00170 if ( $this->uploads ) { 00171 if ( $this->skippedNamespace( $revision ) ) { 00172 return; 00173 } 00174 $this->uploadCount++; 00175 // $this->report(); 00176 $this->progress( "upload: " . $revision->getFilename() ); 00177 00178 if ( !$this->dryRun ) { 00179 // bluuuh hack 00180 // call_user_func( $this->uploadCallback, $revision ); 00181 $dbw = wfGetDB( DB_MASTER ); 00182 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) ); 00183 } 00184 } 00185 } 00186 00187 function handleLogItem( $rev ) { 00188 if ( $this->skippedNamespace( $rev ) ) { 00189 return; 00190 } 00191 $this->revCount++; 00192 $this->report(); 00193 00194 if ( !$this->dryRun ) { 00195 call_user_func( $this->logItemCallback, $rev ); 00196 } 00197 } 00198 00199 function report( $final = false ) { 00200 if ( $final xor ( $this->pageCount % $this->reportingInterval == 0 ) ) { 00201 $this->showReport(); 00202 } 00203 } 00204 00205 function showReport() { 00206 if ( !$this->mQuiet ) { 00207 $delta = wfTime() - $this->startTime; 00208 if ( $delta ) { 00209 $rate = sprintf( "%.2f", $this->pageCount / $delta ); 00210 $revrate = sprintf( "%.2f", $this->revCount / $delta ); 00211 } else { 00212 $rate = '-'; 00213 $revrate = '-'; 00214 } 00215 # Logs dumps don't have page tallies 00216 if ( $this->pageCount ) { 00217 $this->progress( "$this->pageCount ($rate pages/sec $revrate revs/sec)" ); 00218 } else { 00219 $this->progress( "$this->revCount ($revrate revs/sec)" ); 00220 } 00221 } 00222 wfWaitForSlaves(); 00223 // XXX: Don't let deferred jobs array get absurdly large (bug 24375) 00224 DeferredUpdates::doUpdates( 'commit' ); 00225 } 00226 00227 function progress( $string ) { 00228 fwrite( $this->stderr, $string . "\n" ); 00229 } 00230 00231 function importFromFile( $filename ) { 00232 if ( preg_match( '/\.gz$/', $filename ) ) { 00233 $filename = 'compress.zlib://' . $filename; 00234 } elseif ( preg_match( '/\.bz2$/', $filename ) ) { 00235 $filename = 'compress.bzip2://' . $filename; 00236 } elseif ( preg_match( '/\.7z$/', $filename ) ) { 00237 $filename = 'mediawiki.compress.7z://' . $filename; 00238 } 00239 00240 $file = fopen( $filename, 'rt' ); 00241 return $this->importFromHandle( $file ); 00242 } 00243 00244 function importFromStdin() { 00245 $file = fopen( 'php://stdin', 'rt' ); 00246 if( self::posix_isatty( $file ) ) { 00247 $this->maybeHelp( true ); 00248 } 00249 return $this->importFromHandle( $file ); 00250 } 00251 00252 function importFromHandle( $handle ) { 00253 $this->startTime = wfTime(); 00254 00255 $source = new ImportStreamSource( $handle ); 00256 $importer = new WikiImporter( $source ); 00257 00258 if( $this->hasOption( 'debug' ) ) { 00259 $importer->setDebug( true ); 00260 } 00261 if ( $this->hasOption( 'no-updates' ) ) { 00262 $importer->setNoUpdates( true ); 00263 } 00264 $importer->setPageCallback( array( &$this, 'reportPage' ) ); 00265 $this->importCallback = $importer->setRevisionCallback( 00266 array( &$this, 'handleRevision' ) ); 00267 $this->uploadCallback = $importer->setUploadCallback( 00268 array( &$this, 'handleUpload' ) ); 00269 $this->logItemCallback = $importer->setLogItemCallback( 00270 array( &$this, 'handleLogItem' ) ); 00271 if ( $this->uploads ) { 00272 $importer->setImportUploads( true ); 00273 } 00274 if ( $this->imageBasePath ) { 00275 $importer->setImageBasePath( $this->imageBasePath ); 00276 } 00277 00278 if ( $this->dryRun ) { 00279 $importer->setPageOutCallback( null ); 00280 } 00281 00282 return $importer->doImport(); 00283 } 00284 } 00285 00286 $maintClass = 'BackupReader'; 00287 require_once( RUN_MAINTENANCE_IF_MAIN );