MediaWiki  REL1_24
syncFileBackend.php
Go to the documentation of this file.
00001 <?php
00024 require_once __DIR__ . '/Maintenance.php';
00025 
00032 class SyncFileBackend extends Maintenance {
00033     public function __construct() {
00034         parent::__construct();
00035         $this->mDescription = "Sync one file backend with another using the journal";
00036         $this->addOption( 'src', 'Name of backend to sync from', true, true );
00037         $this->addOption( 'dst', 'Name of destination backend to sync', false, true );
00038         $this->addOption( 'start', 'Starting journal ID', false, true );
00039         $this->addOption( 'end', 'Ending journal ID', false, true );
00040         $this->addOption( 'posdir', 'Directory to read/record journal positions', false, true );
00041         $this->addOption( 'posdump', 'Just dump current journal position into the position dir.' );
00042         $this->addOption( 'postime', 'For position dumps, get the ID at this time', false, true );
00043         $this->addOption( 'backoff', 'Stop at entries younger than this age (sec).', false, true );
00044         $this->addOption( 'verbose', 'Verbose mode', false, false, 'v' );
00045         $this->setBatchSize( 50 );
00046     }
00047 
00048     public function execute() {
00049         $src = FileBackendGroup::singleton()->get( $this->getOption( 'src' ) );
00050 
00051         $posDir = $this->getOption( 'posdir' );
00052         $posFile = $posDir ? $posDir . '/' . wfWikiID() : false;
00053 
00054         if ( $this->hasOption( 'posdump' ) ) {
00055             // Just dump the current position into the specified position dir
00056             if ( !$this->hasOption( 'posdir' ) ) {
00057                 $this->error( "Param posdir required!", 1 );
00058             }
00059             if ( $this->hasOption( 'postime' ) ) {
00060                 $id = (int)$src->getJournal()->getPositionAtTime( $this->getOption( 'postime' ) );
00061                 $this->output( "Requested journal position is $id.\n" );
00062             } else {
00063                 $id = (int)$src->getJournal()->getCurrentPosition();
00064                 $this->output( "Current journal position is $id.\n" );
00065             }
00066             if ( file_put_contents( $posFile, $id, LOCK_EX ) !== false ) {
00067                 $this->output( "Saved journal position file.\n" );
00068             } else {
00069                 $this->output( "Could not save journal position file.\n" );
00070             }
00071             if ( $this->isQuiet() ) {
00072                 print $id; // give a single machine-readable number
00073             }
00074 
00075             return;
00076         }
00077 
00078         if ( !$this->hasOption( 'dst' ) ) {
00079             $this->error( "Param dst required!", 1 );
00080         }
00081         $dst = FileBackendGroup::singleton()->get( $this->getOption( 'dst' ) );
00082 
00083         $start = $this->getOption( 'start', 0 );
00084         if ( !$start && $posFile && is_dir( $posDir ) ) {
00085             $start = is_file( $posFile )
00086                 ? (int)trim( file_get_contents( $posFile ) )
00087                 : 0;
00088             ++$start; // we already did this ID, start with the next one
00089             $startFromPosFile = true;
00090         } else {
00091             $startFromPosFile = false;
00092         }
00093 
00094         if ( $this->hasOption( 'backoff' ) ) {
00095             $time = time() - $this->getOption( 'backoff', 0 );
00096             $end = (int)$src->getJournal()->getPositionAtTime( $time );
00097         } else {
00098             $end = $this->getOption( 'end', INF );
00099         }
00100 
00101         $this->output( "Synchronizing backend '{$dst->getName()}' to '{$src->getName()}'...\n" );
00102         $this->output( "Starting journal position is $start.\n" );
00103         if ( is_finite( $end ) ) {
00104             $this->output( "Ending journal position is $end.\n" );
00105         }
00106 
00107         // Periodically update the position file
00108         $callback = function ( $pos ) use ( $startFromPosFile, $posFile, $start ) {
00109             if ( $startFromPosFile && $pos >= $start ) { // successfully advanced
00110                 file_put_contents( $posFile, $pos, LOCK_EX );
00111             }
00112         };
00113 
00114         // Actually sync the dest backend with the reference backend
00115         $lastOKPos = $this->syncBackends( $src, $dst, $start, $end, $callback );
00116 
00117         // Update the sync position file
00118         if ( $startFromPosFile && $lastOKPos >= $start ) { // successfully advanced
00119             if ( file_put_contents( $posFile, $lastOKPos, LOCK_EX ) !== false ) {
00120                 $this->output( "Updated journal position file.\n" );
00121             } else {
00122                 $this->output( "Could not update journal position file.\n" );
00123             }
00124         }
00125 
00126         if ( $lastOKPos === false ) {
00127             if ( !$start ) {
00128                 $this->output( "No journal entries found.\n" );
00129             } else {
00130                 $this->output( "No new journal entries found.\n" );
00131             }
00132         } else {
00133             $this->output( "Stopped synchronization at journal position $lastOKPos.\n" );
00134         }
00135 
00136         if ( $this->isQuiet() ) {
00137             print $lastOKPos; // give a single machine-readable number
00138         }
00139     }
00140 
00152     protected function syncBackends(
00153         FileBackend $src, FileBackend $dst, $start, $end, Closure $callback
00154     ) {
00155         $lastOKPos = 0; // failed
00156         $first = true; // first batch
00157 
00158         if ( $start > $end ) { // sanity
00159             $this->error( "Error: given starting ID greater than ending ID.", 1 );
00160         }
00161 
00162         do {
00163             $limit = min( $this->mBatchSize, $end - $start + 1 ); // don't go pass ending ID
00164             $this->output( "Doing id $start to " . ( $start + $limit - 1 ) . "...\n" );
00165 
00166             $entries = $src->getJournal()->getChangeEntries( $start, $limit, $next );
00167             $start = $next; // start where we left off next time
00168             if ( $first && !count( $entries ) ) {
00169                 return false; // nothing to do
00170             }
00171             $first = false;
00172 
00173             $lastPosInBatch = 0;
00174             $pathsInBatch = array(); // changed paths
00175             foreach ( $entries as $entry ) {
00176                 if ( $entry['op'] !== 'null' ) { // null ops are just for reference
00177                     $pathsInBatch[$entry['path']] = 1; // remove duplicates
00178                 }
00179                 $lastPosInBatch = $entry['id'];
00180             }
00181 
00182             $status = $this->syncFileBatch( array_keys( $pathsInBatch ), $src, $dst );
00183             if ( $status->isOK() ) {
00184                 $lastOKPos = max( $lastOKPos, $lastPosInBatch );
00185                 $callback( $lastOKPos ); // update position file
00186             } else {
00187                 $this->error( print_r( $status->getErrorsArray(), true ) );
00188                 break; // no gaps; everything up to $lastPos must be OK
00189             }
00190 
00191             if ( !$start ) {
00192                 $this->output( "End of journal entries.\n" );
00193             }
00194         } while ( $start && $start <= $end );
00195 
00196         return $lastOKPos;
00197     }
00198 
00207     protected function syncFileBatch( array $paths, FileBackend $src, FileBackend $dst ) {
00208         $status = Status::newGood();
00209         if ( !count( $paths ) ) {
00210             return $status; // nothing to do
00211         }
00212 
00213         // Source: convert internal backend names (FileBackendMultiWrite) to the public one
00214         $sPaths = $this->replaceNamePaths( $paths, $src );
00215         // Destination: get corresponding path name
00216         $dPaths = $this->replaceNamePaths( $paths, $dst );
00217 
00218         // Lock the live backend paths from modification
00219         $sLock = $src->getScopedFileLocks( $sPaths, LockManager::LOCK_UW, $status );
00220         $eLock = $dst->getScopedFileLocks( $dPaths, LockManager::LOCK_EX, $status );
00221         if ( !$status->isOK() ) {
00222             return $status;
00223         }
00224 
00225         $src->preloadFileStat( array( 'srcs' => $sPaths, 'latest' => 1 ) );
00226         $dst->preloadFileStat( array( 'srcs' => $dPaths, 'latest' => 1 ) );
00227 
00228         $ops = array();
00229         $fsFiles = array();
00230         foreach ( $sPaths as $i => $sPath ) {
00231             $dPath = $dPaths[$i]; // destination
00232             $sExists = $src->fileExists( array( 'src' => $sPath, 'latest' => 1 ) );
00233             if ( $sExists === true ) { // exists in source
00234                 if ( $this->filesAreSame( $src, $dst, $sPath, $dPath ) ) {
00235                     continue; // avoid local copies for non-FS backends
00236                 }
00237                 // Note: getLocalReference() is fast for FS backends
00238                 $fsFile = $src->getLocalReference( array( 'src' => $sPath, 'latest' => 1 ) );
00239                 if ( !$fsFile ) {
00240                     $this->error( "Unable to sync '$dPath': could not get local copy." );
00241                     $status->fatal( 'backend-fail-internal', $src->getName() );
00242 
00243                     return $status;
00244                 }
00245                 $fsFiles[] = $fsFile; // keep TempFSFile objects alive as needed
00246                 // Note: prepare() is usually fast for key/value backends
00247                 $status->merge( $dst->prepare( array(
00248                     'dir' => dirname( $dPath ), 'bypassReadOnly' => 1 ) ) );
00249                 if ( !$status->isOK() ) {
00250                     return $status;
00251                 }
00252                 $ops[] = array( 'op' => 'store',
00253                     'src' => $fsFile->getPath(), 'dst' => $dPath, 'overwrite' => 1 );
00254             } elseif ( $sExists === false ) { // does not exist in source
00255                 $ops[] = array( 'op' => 'delete', 'src' => $dPath, 'ignoreMissingSource' => 1 );
00256             } else { // error
00257                 $this->error( "Unable to sync '$dPath': could not stat file." );
00258                 $status->fatal( 'backend-fail-internal', $src->getName() );
00259 
00260                 return $status;
00261             }
00262         }
00263 
00264         $t_start = microtime( true );
00265         $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) );
00266         if ( !$status->isOK() ) {
00267             sleep( 10 ); // wait and retry copy again
00268             $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) );
00269         }
00270         $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 );
00271         if ( $status->isOK() && $this->getOption( 'verbose' ) ) {
00272             $this->output( "Synchronized these file(s) [{$ellapsed_ms}ms]:\n" .
00273                 implode( "\n", $dPaths ) . "\n" );
00274         }
00275 
00276         return $status;
00277     }
00278 
00286     protected function replaceNamePaths( $paths, FileBackend $backend ) {
00287         return preg_replace(
00288             '!^mwstore://([^/]+)!',
00289             StringUtils::escapeRegexReplacement( "mwstore://" . $backend->getName() ),
00290             $paths // string or array
00291         );
00292     }
00293 
00294     protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) {
00295         return (
00296             ( $src->getFileSize( array( 'src' => $sPath ) )
00297                 === $dst->getFileSize( array( 'src' => $dPath ) ) // short-circuit
00298             ) && ( $src->getFileSha1Base36( array( 'src' => $sPath ) )
00299                 === $dst->getFileSha1Base36( array( 'src' => $dPath ) )
00300             )
00301         );
00302     }
00303 }
00304 
00305 $maintClass = "SyncFileBackend";
00306 require_once RUN_MAINTENANCE_IF_MAIN;