MediaWiki  REL1_21
syncFileBackend.php
Go to the documentation of this file.
00001 <?php
00024 require_once( __DIR__ . '/Maintenance.php' );
00025 
00032 class SyncFileBackend extends Maintenance {
00033         public function __construct() {
00034                 parent::__construct();
00035                 $this->mDescription = "Sync one file backend with another using the journal";
00036                 $this->addOption( 'src', 'Name of backend to sync from', true, true );
00037                 $this->addOption( 'dst', 'Name of destination backend to sync', false, true );
00038                 $this->addOption( 'start', 'Starting journal ID', false, true );
00039                 $this->addOption( 'end', 'Ending journal ID', false, true );
00040                 $this->addOption( 'posdir', 'Directory to read/record journal positions', false, true );
00041                 $this->addOption( 'posdump', 'Just dump current journal position into the position dir.' );
00042                 $this->addOption( 'postime', 'For position dumps, get the ID at this time', false, true );
00043                 $this->addOption( 'verbose', 'Verbose mode', false, false, 'v' );
00044                 $this->setBatchSize( 50 );
00045         }
00046 
00047         public function execute() {
00048                 $src = FileBackendGroup::singleton()->get( $this->getOption( 'src' ) );
00049 
00050                 $posDir = $this->getOption( 'posdir' );
00051                 $posFile = $posDir ? $posDir . '/' . wfWikiID() : false;
00052 
00053                 if ( $this->hasOption( 'posdump' ) ) {
00054                         // Just dump the current position into the specified position dir
00055                         if ( !$this->hasOption( 'posdir' ) ) {
00056                                 $this->error( "Param posdir required!", 1 );
00057                         }
00058                         if ( $this->hasOption( 'postime' ) ) {
00059                                 $id = (int)$src->getJournal()->getPositionAtTime( $this->getOption( 'postime' ) );
00060                                 $this->output( "Requested journal position is $id.\n" );
00061                         } else {
00062                                 $id = (int)$src->getJournal()->getCurrentPosition();
00063                                 $this->output( "Current journal position is $id.\n" );
00064                         }
00065                         if ( file_put_contents( $posFile, $id, LOCK_EX ) !== false ) {
00066                                 $this->output( "Saved journal position file.\n" );
00067                         } else {
00068                                 $this->output( "Could not save journal position file.\n" );
00069                         }
00070                         if ( $this->isQuiet() ) {
00071                                 print $id; // give a single machine-readable number
00072                         }
00073                         return;
00074                 }
00075 
00076                 if ( !$this->hasOption( 'dst' ) ) {
00077                         $this->error( "Param dst required!", 1 );
00078                 }
00079                 $dst = FileBackendGroup::singleton()->get( $this->getOption( 'dst' ) );
00080 
00081                 $start = $this->getOption( 'start', 0 );
00082                 if ( !$start && $posFile && is_dir( $posDir ) ) {
00083                         $start = is_file( $posFile )
00084                                 ? (int)trim( file_get_contents( $posFile ) )
00085                                 : 0;
00086                         ++$start; // we already did this ID, start with the next one
00087                         $startFromPosFile = true;
00088                 } else {
00089                         $startFromPosFile = false;
00090                 }
00091                 $end = $this->getOption( 'end', INF );
00092 
00093                 $this->output( "Synchronizing backend '{$dst->getName()}' to '{$src->getName()}'...\n" );
00094                 $this->output( "Starting journal position is $start.\n" );
00095                 if ( is_finite( $end ) ) {
00096                         $this->output( "Ending journal position is $end.\n" );
00097                 }
00098 
00099                 // Periodically update the position file
00100                 $callback = function( $pos ) use ( $startFromPosFile, $posFile, $start ) {
00101                         if ( $startFromPosFile && $pos >= $start ) { // successfully advanced
00102                                 file_put_contents( $posFile, $pos, LOCK_EX );
00103                         }
00104                 };
00105 
00106                 // Actually sync the dest backend with the reference backend
00107                 $lastOKPos = $this->syncBackends( $src, $dst, $start, $end, $callback );
00108 
00109                 // Update the sync position file
00110                 if ( $startFromPosFile && $lastOKPos >= $start ) { // successfully advanced
00111                         if ( file_put_contents( $posFile, $lastOKPos, LOCK_EX ) !== false ) {
00112                                 $this->output( "Updated journal position file.\n" );
00113                         } else {
00114                                 $this->output( "Could not update journal position file.\n" );
00115                         }
00116                 }
00117 
00118                 if ( $lastOKPos === false ) {
00119                         if ( !$start ) {
00120                                 $this->output( "No journal entries found.\n" );
00121                         } else {
00122                                 $this->output( "No new journal entries found.\n" );
00123                         }
00124                 } else {
00125                         $this->output( "Stopped synchronization at journal position $lastOKPos.\n" );
00126                 }
00127 
00128                 if ( $this->isQuiet() ) {
00129                         print $lastOKPos; // give a single machine-readable number
00130                 }
00131         }
00132 
00144         protected function syncBackends(
00145                 FileBackend $src, FileBackend $dst, $start, $end, Closure $callback
00146         ) {
00147                 $lastOKPos = 0; // failed
00148                 $first = true; // first batch
00149 
00150                 if ( $start > $end ) { // sanity
00151                         $this->error( "Error: given starting ID greater than ending ID.", 1 );
00152                 }
00153 
00154                 do {
00155                         $limit = min( $this->mBatchSize, $end - $start + 1 ); // don't go pass ending ID
00156                         $this->output( "Doing id $start to " . ( $start + $limit - 1 ) . "...\n" );
00157 
00158                         $entries = $src->getJournal()->getChangeEntries( $start, $limit, $next );
00159                         $start = $next; // start where we left off next time
00160                         if ( $first && !count( $entries ) ) {
00161                                 return false; // nothing to do
00162                         }
00163                         $first = false;
00164 
00165                         $lastPosInBatch = 0;
00166                         $pathsInBatch = array(); // changed paths
00167                         foreach ( $entries as $entry ) {
00168                                 if ( $entry['op'] !== 'null' ) { // null ops are just for reference
00169                                         $pathsInBatch[$entry['path']] = 1; // remove duplicates
00170                                 }
00171                                 $lastPosInBatch = $entry['id'];
00172                         }
00173 
00174                         $status = $this->syncFileBatch( array_keys( $pathsInBatch ), $src, $dst );
00175                         if ( $status->isOK() ) {
00176                                 $lastOKPos = max( $lastOKPos, $lastPosInBatch );
00177                                 $callback( $lastOKPos ); // update position file
00178                         } else {
00179                                 $this->error( print_r( $status->getErrorsArray(), true ) );
00180                                 break; // no gaps; everything up to $lastPos must be OK
00181                         }
00182 
00183                         if ( !$start ) {
00184                                 $this->output( "End of journal entries.\n" );
00185                         }
00186                 } while ( $start && $start <= $end );
00187 
00188                 return $lastOKPos;
00189         }
00190 
00199         protected function syncFileBatch( array $paths, FileBackend $src, FileBackend $dst ) {
00200                 $status = Status::newGood();
00201                 if ( !count( $paths ) ) {
00202                         return $status; // nothing to do
00203                 }
00204 
00205                 // Source: convert internal backend names (FileBackendMultiWrite) to the public one
00206                 $sPaths = $this->replaceNamePaths( $paths, $src );
00207                 // Destination: get corresponding path name
00208                 $dPaths = $this->replaceNamePaths( $paths, $dst );
00209 
00210                 // Lock the live backend paths from modification
00211                 $sLock = $src->getScopedFileLocks( $sPaths, LockManager::LOCK_UW, $status );
00212                 $eLock = $dst->getScopedFileLocks( $dPaths, LockManager::LOCK_EX, $status );
00213                 if ( !$status->isOK() ) {
00214                         return $status;
00215                 }
00216 
00217                 $ops = array();
00218                 $fsFiles = array();
00219                 foreach ( $sPaths as $i => $sPath ) {
00220                         $dPath = $dPaths[$i]; // destination
00221                         $sExists = $src->fileExists( array( 'src' => $sPath, 'latest' => 1 ) );
00222                         if ( $sExists === true ) { // exists in source
00223                                 if ( $this->filesAreSame( $src, $dst, $sPath, $dPath ) ) {
00224                                         continue; // avoid local copies for non-FS backends
00225                                 }
00226                                 // Note: getLocalReference() is fast for FS backends
00227                                 $fsFile = $src->getLocalReference( array( 'src' => $sPath, 'latest' => 1 ) );
00228                                 if ( !$fsFile ) {
00229                                         $this->error( "Unable to sync '$dPath': could not get local copy." );
00230                                         $status->fatal( 'backend-fail-internal', $src->getName() );
00231                                         return $status;
00232                                 }
00233                                 $fsFiles[] = $fsFile; // keep TempFSFile objects alive as needed
00234                                 // Note: prepare() is usually fast for key/value backends
00235                                 $status->merge( $dst->prepare( array(
00236                                         'dir' => dirname( $dPath ), 'bypassReadOnly' => 1 ) ) );
00237                                 if ( !$status->isOK() ) {
00238                                         return $status;
00239                                 }
00240                                 $ops[] = array( 'op' => 'store',
00241                                         'src' => $fsFile->getPath(), 'dst' => $dPath, 'overwrite' => 1 );
00242                         } elseif ( $sExists === false ) { // does not exist in source
00243                                 $ops[] = array( 'op' => 'delete', 'src' => $dPath, 'ignoreMissingSource' => 1 );
00244                         } else { // error
00245                                 $this->error( "Unable to sync '$dPath': could not stat file." );
00246                                 $status->fatal( 'backend-fail-internal', $src->getName() );
00247                                 return $status;
00248                         }
00249                 }
00250 
00251                 $t_start = microtime( true );
00252                 $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) );
00253                 if ( !$status->isOK() ) {
00254                         sleep( 10 ); // wait and retry copy again
00255                         $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) );
00256                 }
00257                 $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 );
00258                 if ( $status->isOK() && $this->getOption( 'verbose' ) ) {
00259                         $this->output( "Synchronized these file(s) [{$ellapsed_ms}ms]:\n" .
00260                                 implode( "\n", $dPaths ) . "\n" );
00261                 }
00262 
00263                 return $status;
00264         }
00265 
00272         protected function replaceNamePaths( $paths, FileBackend $backend ) {
00273                 return preg_replace(
00274                         '!^mwstore://([^/]+)!',
00275                         StringUtils::escapeRegexReplacement( "mwstore://" . $backend->getName() ),
00276                         $paths // string or array
00277                 );
00278         }
00279 
00280         protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) {
00281                 return (
00282                         ( $src->getFileSize( array( 'src' => $sPath ) )
00283                                 === $dst->getFileSize( array( 'src' => $dPath ) ) // short-circuit
00284                         ) && ( $src->getFileSha1Base36( array( 'src' => $sPath ) )
00285                                 === $dst->getFileSha1Base36( array( 'src' => $dPath ) )
00286                         )
00287                 );
00288         }
00289 }
00290 
00291 $maintClass = "SyncFileBackend";
00292 require_once( RUN_MAINTENANCE_IF_MAIN );