MediaWiki  REL1_24
backup.inc
Go to the documentation of this file.
00001 <?php
00030 class DumpDBZip2Output extends DumpPipeOutput {
00031     function __construct( $file ) {
00032         parent::__construct( "dbzip2", $file );
00033     }
00034 }
00035 
00039 class BackupDumper {
00040     public $reporting = true;
00041     public $pages = null; // all pages
00042     public $skipHeader = false; // don't output <mediawiki> and <siteinfo>
00043     public $skipFooter = false; // don't output </mediawiki>
00044     public $startId = 0;
00045     public $endId = 0;
00046     public $revStartId = 0;
00047     public $revEndId = 0;
00048     public $dumpUploads = false;
00049     public $dumpUploadFileContents = false;
00050 
00051     protected $reportingInterval = 100;
00052     protected $pageCount = 0;
00053     protected $revCount = 0;
00054     protected $server = null; // use default
00055     protected $sink = null; // Output filters
00056     protected $lastTime = 0;
00057     protected $pageCountLast = 0;
00058     protected $revCountLast = 0;
00059 
00060     protected $outputTypes = array();
00061     protected $filterTypes = array();
00062 
00063     protected $ID = 0;
00064 
00072     protected $forcedDb = null;
00073 
00075     protected $lb;
00076 
00077     // @todo Unused?
00078     private $stubText = false; // include rev_text_id instead of text; for 2-pass dump
00079 
00080     function __construct( $args ) {
00081         $this->stderr = fopen( "php://stderr", "wt" );
00082 
00083         // Built-in output and filter plugins
00084         $this->registerOutput( 'file', 'DumpFileOutput' );
00085         $this->registerOutput( 'gzip', 'DumpGZipOutput' );
00086         $this->registerOutput( 'bzip2', 'DumpBZip2Output' );
00087         $this->registerOutput( 'dbzip2', 'DumpDBZip2Output' );
00088         $this->registerOutput( '7zip', 'Dump7ZipOutput' );
00089 
00090         $this->registerFilter( 'latest', 'DumpLatestFilter' );
00091         $this->registerFilter( 'notalk', 'DumpNotalkFilter' );
00092         $this->registerFilter( 'namespace', 'DumpNamespaceFilter' );
00093 
00094         $this->sink = $this->processArgs( $args );
00095     }
00096 
00101     function registerOutput( $name, $class ) {
00102         $this->outputTypes[$name] = $class;
00103     }
00104 
00109     function registerFilter( $name, $class ) {
00110         $this->filterTypes[$name] = $class;
00111     }
00112 
00120     function loadPlugin( $class, $file ) {
00121         if ( $file != '' ) {
00122             require_once $file;
00123         }
00124         $register = array( $class, 'register' );
00125         call_user_func_array( $register, array( &$this ) );
00126     }
00127 
00132     function processArgs( $args ) {
00133         $sink = null;
00134         $sinks = array();
00135         foreach ( $args as $arg ) {
00136             $matches = array();
00137             if ( preg_match( '/^--(.+?)(?:=(.+?)(?::(.+?))?)?$/', $arg, $matches ) ) {
00138                 wfSuppressWarnings();
00139                 list( /* $full */, $opt, $val, $param ) = $matches;
00140                 wfRestoreWarnings();
00141 
00142                 switch ( $opt ) {
00143                     case "plugin":
00144                         $this->loadPlugin( $val, $param );
00145                         break;
00146                     case "output":
00147                         if ( !is_null( $sink ) ) {
00148                             $sinks[] = $sink;
00149                         }
00150                         if ( !isset( $this->outputTypes[$val] ) ) {
00151                             $this->fatalError( "Unrecognized output sink type '$val'" );
00152                         }
00153                         $type = $this->outputTypes[$val];
00154                         $sink = new $type( $param );
00155                         break;
00156                     case "filter":
00157                         if ( is_null( $sink ) ) {
00158                             $sink = new DumpOutput();
00159                         }
00160                         if ( !isset( $this->filterTypes[$val] ) ) {
00161                             $this->fatalError( "Unrecognized filter type '$val'" );
00162                         }
00163                         $type = $this->filterTypes[$val];
00164                         $filter = new $type( $sink, $param );
00165 
00166                         // references are lame in php...
00167                         unset( $sink );
00168                         $sink = $filter;
00169 
00170                         break;
00171                     case "report":
00172                         $this->reportingInterval = intval( $val );
00173                         break;
00174                     case "server":
00175                         $this->server = $val;
00176                         break;
00177                     case "force-normal":
00178                         if ( !function_exists( 'utf8_normalize' ) ) {
00179                             $this->fatalError( "UTF-8 normalization extension not loaded. " .
00180                                 "Install or remove --force-normal parameter to use slower code." );
00181                         }
00182                         break;
00183                     default:
00184                         $this->processOption( $opt, $val, $param );
00185                 }
00186             }
00187         }
00188 
00189         if ( is_null( $sink ) ) {
00190             $sink = new DumpOutput();
00191         }
00192         $sinks[] = $sink;
00193 
00194         if ( count( $sinks ) > 1 ) {
00195             return new DumpMultiWriter( $sinks );
00196         } else {
00197             return $sink;
00198         }
00199     }
00200 
00201     function processOption( $opt, $val, $param ) {
00202         // extension point for subclasses to add options
00203     }
00204 
00205     function dump( $history, $text = WikiExporter::TEXT ) {
00206         # Notice messages will foul up your XML output even if they're
00207         # relatively harmless.
00208         if ( ini_get( 'display_errors' ) ) {
00209             ini_set( 'display_errors', 'stderr' );
00210         }
00211 
00212         $this->initProgress( $history );
00213 
00214         $db = $this->backupDb();
00215         $exporter = new WikiExporter( $db, $history, WikiExporter::STREAM, $text );
00216         $exporter->dumpUploads = $this->dumpUploads;
00217         $exporter->dumpUploadFileContents = $this->dumpUploadFileContents;
00218 
00219         $wrapper = new ExportProgressFilter( $this->sink, $this );
00220         $exporter->setOutputSink( $wrapper );
00221 
00222         if ( !$this->skipHeader ) {
00223             $exporter->openStream();
00224         }
00225         # Log item dumps: all or by range
00226         if ( $history & WikiExporter::LOGS ) {
00227             if ( $this->startId || $this->endId ) {
00228                 $exporter->logsByRange( $this->startId, $this->endId );
00229             } else {
00230                 $exporter->allLogs();
00231             }
00232         } elseif ( is_null( $this->pages ) ) {
00233             # Page dumps: all or by page ID range
00234             if ( $this->startId || $this->endId ) {
00235                 $exporter->pagesByRange( $this->startId, $this->endId );
00236             } elseif ( $this->revStartId || $this->revEndId ) {
00237                 $exporter->revsByRange( $this->revStartId, $this->revEndId );
00238             } else {
00239                 $exporter->allPages();
00240             }
00241         } else {
00242             # Dump of specific pages
00243             $exporter->pagesByName( $this->pages );
00244         }
00245 
00246         if ( !$this->skipFooter ) {
00247             $exporter->closeStream();
00248         }
00249 
00250         $this->report( true );
00251     }
00252 
00259     function initProgress( $history = WikiExporter::FULL ) {
00260         $table = ( $history == WikiExporter::CURRENT ) ? 'page' : 'revision';
00261         $field = ( $history == WikiExporter::CURRENT ) ? 'page_id' : 'rev_id';
00262 
00263         $dbr = $this->forcedDb;
00264         if ( $this->forcedDb === null ) {
00265             $dbr = wfGetDB( DB_SLAVE );
00266         }
00267         $this->maxCount = $dbr->selectField( $table, "MAX($field)", '', __METHOD__ );
00268         $this->startTime = microtime( true );
00269         $this->lastTime = $this->startTime;
00270         $this->ID = getmypid();
00271     }
00272 
00279     function backupDb() {
00280         if ( $this->forcedDb !== null ) {
00281             return $this->forcedDb;
00282         }
00283 
00284         $this->lb = wfGetLBFactory()->newMainLB();
00285         $db = $this->lb->getConnection( DB_SLAVE, 'dump' );
00286 
00287         // Discourage the server from disconnecting us if it takes a long time
00288         // to read out the big ol' batch query.
00289         $db->setSessionOptions( array( 'connTimeout' => 3600 * 24 ) );
00290 
00291         return $db;
00292     }
00293 
00301     function setDb( DatabaseBase $db = null ) {
00302         $this->forcedDb = $db;
00303     }
00304 
00305     function __destruct() {
00306         if ( isset( $this->lb ) ) {
00307             $this->lb->closeAll();
00308         }
00309     }
00310 
00311     function backupServer() {
00312         global $wgDBserver;
00313 
00314         return $this->server
00315             ? $this->server
00316             : $wgDBserver;
00317     }
00318 
00319     function reportPage() {
00320         $this->pageCount++;
00321     }
00322 
00323     function revCount() {
00324         $this->revCount++;
00325         $this->report();
00326     }
00327 
00328     function report( $final = false ) {
00329         if ( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) {
00330             $this->showReport();
00331         }
00332     }
00333 
00334     function showReport() {
00335         if ( $this->reporting ) {
00336             $now = wfTimestamp( TS_DB );
00337             $nowts = microtime( true );
00338             $deltaAll = $nowts - $this->startTime;
00339             $deltaPart = $nowts - $this->lastTime;
00340             $this->pageCountPart = $this->pageCount - $this->pageCountLast;
00341             $this->revCountPart = $this->revCount - $this->revCountLast;
00342 
00343             if ( $deltaAll ) {
00344                 $portion = $this->revCount / $this->maxCount;
00345                 $eta = $this->startTime + $deltaAll / $portion;
00346                 $etats = wfTimestamp( TS_DB, intval( $eta ) );
00347                 $pageRate = $this->pageCount / $deltaAll;
00348                 $revRate = $this->revCount / $deltaAll;
00349             } else {
00350                 $pageRate = '-';
00351                 $revRate = '-';
00352                 $etats = '-';
00353             }
00354             if ( $deltaPart ) {
00355                 $pageRatePart = $this->pageCountPart / $deltaPart;
00356                 $revRatePart = $this->revCountPart / $deltaPart;
00357             } else {
00358                 $pageRatePart = '-';
00359                 $revRatePart = '-';
00360             }
00361             $this->progress( sprintf(
00362                 "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), "
00363                     . "%d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d]",
00364                 $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate,
00365                 $pageRatePart, $this->revCount, $revRate, $revRatePart, $etats,
00366                 $this->maxCount
00367             ) );
00368             $this->lastTime = $nowts;
00369             $this->revCountLast = $this->revCount;
00370         }
00371     }
00372 
00373     function progress( $string ) {
00374         fwrite( $this->stderr, $string . "\n" );
00375     }
00376 
00377     function fatalError( $msg ) {
00378         $this->progress( "$msg\n" );
00379         die( 1 );
00380     }
00381 }
00382 
00383 class ExportProgressFilter extends DumpFilter {
00384     function __construct( &$sink, &$progress ) {
00385         parent::__construct( $sink );
00386         $this->progress = $progress;
00387     }
00388 
00389     function writeClosePage( $string ) {
00390         parent::writeClosePage( $string );
00391         $this->progress->reportPage();
00392     }
00393 
00394     function writeRevision( $rev, $string ) {
00395         parent::writeRevision( $rev, $string );
00396         $this->progress->revCount();
00397     }
00398 }