MediaWiki
REL1_24
|
00001 <?php 00024 require_once __DIR__ . '/Maintenance.php'; 00025 00032 class SyncFileBackend extends Maintenance { 00033 public function __construct() { 00034 parent::__construct(); 00035 $this->mDescription = "Sync one file backend with another using the journal"; 00036 $this->addOption( 'src', 'Name of backend to sync from', true, true ); 00037 $this->addOption( 'dst', 'Name of destination backend to sync', false, true ); 00038 $this->addOption( 'start', 'Starting journal ID', false, true ); 00039 $this->addOption( 'end', 'Ending journal ID', false, true ); 00040 $this->addOption( 'posdir', 'Directory to read/record journal positions', false, true ); 00041 $this->addOption( 'posdump', 'Just dump current journal position into the position dir.' ); 00042 $this->addOption( 'postime', 'For position dumps, get the ID at this time', false, true ); 00043 $this->addOption( 'backoff', 'Stop at entries younger than this age (sec).', false, true ); 00044 $this->addOption( 'verbose', 'Verbose mode', false, false, 'v' ); 00045 $this->setBatchSize( 50 ); 00046 } 00047 00048 public function execute() { 00049 $src = FileBackendGroup::singleton()->get( $this->getOption( 'src' ) ); 00050 00051 $posDir = $this->getOption( 'posdir' ); 00052 $posFile = $posDir ? $posDir . '/' . wfWikiID() : false; 00053 00054 if ( $this->hasOption( 'posdump' ) ) { 00055 // Just dump the current position into the specified position dir 00056 if ( !$this->hasOption( 'posdir' ) ) { 00057 $this->error( "Param posdir required!", 1 ); 00058 } 00059 if ( $this->hasOption( 'postime' ) ) { 00060 $id = (int)$src->getJournal()->getPositionAtTime( $this->getOption( 'postime' ) ); 00061 $this->output( "Requested journal position is $id.\n" ); 00062 } else { 00063 $id = (int)$src->getJournal()->getCurrentPosition(); 00064 $this->output( "Current journal position is $id.\n" ); 00065 } 00066 if ( file_put_contents( $posFile, $id, LOCK_EX ) !== false ) { 00067 $this->output( "Saved journal position file.\n" ); 00068 } else { 00069 $this->output( "Could not save journal position file.\n" ); 00070 } 00071 if ( $this->isQuiet() ) { 00072 print $id; // give a single machine-readable number 00073 } 00074 00075 return; 00076 } 00077 00078 if ( !$this->hasOption( 'dst' ) ) { 00079 $this->error( "Param dst required!", 1 ); 00080 } 00081 $dst = FileBackendGroup::singleton()->get( $this->getOption( 'dst' ) ); 00082 00083 $start = $this->getOption( 'start', 0 ); 00084 if ( !$start && $posFile && is_dir( $posDir ) ) { 00085 $start = is_file( $posFile ) 00086 ? (int)trim( file_get_contents( $posFile ) ) 00087 : 0; 00088 ++$start; // we already did this ID, start with the next one 00089 $startFromPosFile = true; 00090 } else { 00091 $startFromPosFile = false; 00092 } 00093 00094 if ( $this->hasOption( 'backoff' ) ) { 00095 $time = time() - $this->getOption( 'backoff', 0 ); 00096 $end = (int)$src->getJournal()->getPositionAtTime( $time ); 00097 } else { 00098 $end = $this->getOption( 'end', INF ); 00099 } 00100 00101 $this->output( "Synchronizing backend '{$dst->getName()}' to '{$src->getName()}'...\n" ); 00102 $this->output( "Starting journal position is $start.\n" ); 00103 if ( is_finite( $end ) ) { 00104 $this->output( "Ending journal position is $end.\n" ); 00105 } 00106 00107 // Periodically update the position file 00108 $callback = function ( $pos ) use ( $startFromPosFile, $posFile, $start ) { 00109 if ( $startFromPosFile && $pos >= $start ) { // successfully advanced 00110 file_put_contents( $posFile, $pos, LOCK_EX ); 00111 } 00112 }; 00113 00114 // Actually sync the dest backend with the reference backend 00115 $lastOKPos = $this->syncBackends( $src, $dst, $start, $end, $callback ); 00116 00117 // Update the sync position file 00118 if ( $startFromPosFile && $lastOKPos >= $start ) { // successfully advanced 00119 if ( file_put_contents( $posFile, $lastOKPos, LOCK_EX ) !== false ) { 00120 $this->output( "Updated journal position file.\n" ); 00121 } else { 00122 $this->output( "Could not update journal position file.\n" ); 00123 } 00124 } 00125 00126 if ( $lastOKPos === false ) { 00127 if ( !$start ) { 00128 $this->output( "No journal entries found.\n" ); 00129 } else { 00130 $this->output( "No new journal entries found.\n" ); 00131 } 00132 } else { 00133 $this->output( "Stopped synchronization at journal position $lastOKPos.\n" ); 00134 } 00135 00136 if ( $this->isQuiet() ) { 00137 print $lastOKPos; // give a single machine-readable number 00138 } 00139 } 00140 00152 protected function syncBackends( 00153 FileBackend $src, FileBackend $dst, $start, $end, Closure $callback 00154 ) { 00155 $lastOKPos = 0; // failed 00156 $first = true; // first batch 00157 00158 if ( $start > $end ) { // sanity 00159 $this->error( "Error: given starting ID greater than ending ID.", 1 ); 00160 } 00161 00162 do { 00163 $limit = min( $this->mBatchSize, $end - $start + 1 ); // don't go pass ending ID 00164 $this->output( "Doing id $start to " . ( $start + $limit - 1 ) . "...\n" ); 00165 00166 $entries = $src->getJournal()->getChangeEntries( $start, $limit, $next ); 00167 $start = $next; // start where we left off next time 00168 if ( $first && !count( $entries ) ) { 00169 return false; // nothing to do 00170 } 00171 $first = false; 00172 00173 $lastPosInBatch = 0; 00174 $pathsInBatch = array(); // changed paths 00175 foreach ( $entries as $entry ) { 00176 if ( $entry['op'] !== 'null' ) { // null ops are just for reference 00177 $pathsInBatch[$entry['path']] = 1; // remove duplicates 00178 } 00179 $lastPosInBatch = $entry['id']; 00180 } 00181 00182 $status = $this->syncFileBatch( array_keys( $pathsInBatch ), $src, $dst ); 00183 if ( $status->isOK() ) { 00184 $lastOKPos = max( $lastOKPos, $lastPosInBatch ); 00185 $callback( $lastOKPos ); // update position file 00186 } else { 00187 $this->error( print_r( $status->getErrorsArray(), true ) ); 00188 break; // no gaps; everything up to $lastPos must be OK 00189 } 00190 00191 if ( !$start ) { 00192 $this->output( "End of journal entries.\n" ); 00193 } 00194 } while ( $start && $start <= $end ); 00195 00196 return $lastOKPos; 00197 } 00198 00207 protected function syncFileBatch( array $paths, FileBackend $src, FileBackend $dst ) { 00208 $status = Status::newGood(); 00209 if ( !count( $paths ) ) { 00210 return $status; // nothing to do 00211 } 00212 00213 // Source: convert internal backend names (FileBackendMultiWrite) to the public one 00214 $sPaths = $this->replaceNamePaths( $paths, $src ); 00215 // Destination: get corresponding path name 00216 $dPaths = $this->replaceNamePaths( $paths, $dst ); 00217 00218 // Lock the live backend paths from modification 00219 $sLock = $src->getScopedFileLocks( $sPaths, LockManager::LOCK_UW, $status ); 00220 $eLock = $dst->getScopedFileLocks( $dPaths, LockManager::LOCK_EX, $status ); 00221 if ( !$status->isOK() ) { 00222 return $status; 00223 } 00224 00225 $src->preloadFileStat( array( 'srcs' => $sPaths, 'latest' => 1 ) ); 00226 $dst->preloadFileStat( array( 'srcs' => $dPaths, 'latest' => 1 ) ); 00227 00228 $ops = array(); 00229 $fsFiles = array(); 00230 foreach ( $sPaths as $i => $sPath ) { 00231 $dPath = $dPaths[$i]; // destination 00232 $sExists = $src->fileExists( array( 'src' => $sPath, 'latest' => 1 ) ); 00233 if ( $sExists === true ) { // exists in source 00234 if ( $this->filesAreSame( $src, $dst, $sPath, $dPath ) ) { 00235 continue; // avoid local copies for non-FS backends 00236 } 00237 // Note: getLocalReference() is fast for FS backends 00238 $fsFile = $src->getLocalReference( array( 'src' => $sPath, 'latest' => 1 ) ); 00239 if ( !$fsFile ) { 00240 $this->error( "Unable to sync '$dPath': could not get local copy." ); 00241 $status->fatal( 'backend-fail-internal', $src->getName() ); 00242 00243 return $status; 00244 } 00245 $fsFiles[] = $fsFile; // keep TempFSFile objects alive as needed 00246 // Note: prepare() is usually fast for key/value backends 00247 $status->merge( $dst->prepare( array( 00248 'dir' => dirname( $dPath ), 'bypassReadOnly' => 1 ) ) ); 00249 if ( !$status->isOK() ) { 00250 return $status; 00251 } 00252 $ops[] = array( 'op' => 'store', 00253 'src' => $fsFile->getPath(), 'dst' => $dPath, 'overwrite' => 1 ); 00254 } elseif ( $sExists === false ) { // does not exist in source 00255 $ops[] = array( 'op' => 'delete', 'src' => $dPath, 'ignoreMissingSource' => 1 ); 00256 } else { // error 00257 $this->error( "Unable to sync '$dPath': could not stat file." ); 00258 $status->fatal( 'backend-fail-internal', $src->getName() ); 00259 00260 return $status; 00261 } 00262 } 00263 00264 $t_start = microtime( true ); 00265 $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); 00266 if ( !$status->isOK() ) { 00267 sleep( 10 ); // wait and retry copy again 00268 $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); 00269 } 00270 $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); 00271 if ( $status->isOK() && $this->getOption( 'verbose' ) ) { 00272 $this->output( "Synchronized these file(s) [{$ellapsed_ms}ms]:\n" . 00273 implode( "\n", $dPaths ) . "\n" ); 00274 } 00275 00276 return $status; 00277 } 00278 00286 protected function replaceNamePaths( $paths, FileBackend $backend ) { 00287 return preg_replace( 00288 '!^mwstore://([^/]+)!', 00289 StringUtils::escapeRegexReplacement( "mwstore://" . $backend->getName() ), 00290 $paths // string or array 00291 ); 00292 } 00293 00294 protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) { 00295 return ( 00296 ( $src->getFileSize( array( 'src' => $sPath ) ) 00297 === $dst->getFileSize( array( 'src' => $dPath ) ) // short-circuit 00298 ) && ( $src->getFileSha1Base36( array( 'src' => $sPath ) ) 00299 === $dst->getFileSha1Base36( array( 'src' => $dPath ) ) 00300 ) 00301 ); 00302 } 00303 } 00304 00305 $maintClass = "SyncFileBackend"; 00306 require_once RUN_MAINTENANCE_IF_MAIN;