MediaWiki
REL1_20
|
00001 <?php 00024 require_once( __DIR__ . '/Maintenance.php' ); 00025 00032 class SyncFileBackend extends Maintenance { 00033 public function __construct() { 00034 parent::__construct(); 00035 $this->mDescription = "Sync one file backend with another using the journal"; 00036 $this->addOption( 'src', 'Name of backend to sync from', true, true ); 00037 $this->addOption( 'dst', 'Name of destination backend to sync', true, true ); 00038 $this->addOption( 'start', 'Starting journal ID', false, true ); 00039 $this->addOption( 'end', 'Ending journal ID', false, true ); 00040 $this->addOption( 'posdir', 'Directory to read/record journal positions', false, true ); 00041 $this->addOption( 'verbose', 'Verbose mode', false, false, 'v' ); 00042 $this->setBatchSize( 50 ); 00043 } 00044 00045 public function execute() { 00046 $src = FileBackendGroup::singleton()->get( $this->getOption( 'src' ) ); 00047 $dst = FileBackendGroup::singleton()->get( $this->getOption( 'dst' ) ); 00048 00049 $posDir = $this->getOption( 'posdir' ); 00050 $posFile = $posDir ? $posDir . '/' . wfWikiID() : false; 00051 00052 $start = $this->getOption( 'start', 0 ); 00053 if ( !$start && $posFile && is_dir( $posDir ) ) { 00054 $start = is_file( $posFile ) 00055 ? (int)trim( file_get_contents( $posFile ) ) 00056 : 0; 00057 ++$start; // we already did this ID, start with the next one 00058 $startFromPosFile = true; 00059 } else { 00060 $startFromPosFile = false; 00061 } 00062 $end = $this->getOption( 'end', INF ); 00063 00064 $this->output( "Synchronizing backend '{$dst->getName()}' to '{$src->getName()}'...\n" ); 00065 $this->output( "Starting journal position is $start.\n" ); 00066 if ( is_finite( $end ) ) { 00067 $this->output( "Ending journal position is $end.\n" ); 00068 } 00069 00070 // Actually sync the dest backend with the reference backend 00071 $lastOKPos = $this->syncBackends( $src, $dst, $start, $end ); 00072 00073 // Update the sync position file 00074 if ( $startFromPosFile && $lastOKPos >= $start ) { // successfully advanced 00075 if ( file_put_contents( $posFile, $lastOKPos, LOCK_EX ) !== false ) { 00076 $this->output( "Updated journal position file.\n" ); 00077 } else { 00078 $this->output( "Could not update journal position file.\n" ); 00079 } 00080 } 00081 00082 if ( $lastOKPos === false ) { 00083 if ( !$start ) { 00084 $this->output( "No journal entries found.\n" ); 00085 } else { 00086 $this->output( "No new journal entries found.\n" ); 00087 } 00088 } else { 00089 $this->output( "Stopped synchronization at journal position $lastOKPos.\n" ); 00090 } 00091 00092 if ( $this->isQuiet() ) { 00093 print $lastOKPos; // give a single machine-readable number 00094 } 00095 } 00096 00107 protected function syncBackends( FileBackend $src, FileBackend $dst, $start, $end ) { 00108 $lastOKPos = 0; // failed 00109 $first = true; // first batch 00110 00111 if ( $start > $end ) { // sanity 00112 $this->error( "Error: given starting ID greater than ending ID.", 1 ); 00113 } 00114 00115 do { 00116 $limit = min( $this->mBatchSize, $end - $start + 1 ); // don't go pass ending ID 00117 $this->output( "Doing id $start to " . ( $start + $limit - 1 ) . "...\n" ); 00118 00119 $entries = $src->getJournal()->getChangeEntries( $start, $limit, $next ); 00120 $start = $next; // start where we left off next time 00121 if ( $first && !count( $entries ) ) { 00122 return false; // nothing to do 00123 } 00124 $first = false; 00125 00126 $lastPosInBatch = 0; 00127 $pathsInBatch = array(); // changed paths 00128 foreach ( $entries as $entry ) { 00129 if ( $entry['op'] !== 'null' ) { // null ops are just for reference 00130 $pathsInBatch[$entry['path']] = 1; // remove duplicates 00131 } 00132 $lastPosInBatch = $entry['id']; 00133 } 00134 00135 $status = $this->syncFileBatch( array_keys( $pathsInBatch ), $src, $dst ); 00136 if ( $status->isOK() ) { 00137 $lastOKPos = max( $lastOKPos, $lastPosInBatch ); 00138 } else { 00139 $this->error( print_r( $status->getErrorsArray(), true ) ); 00140 break; // no gaps; everything up to $lastPos must be OK 00141 } 00142 00143 if ( !$start ) { 00144 $this->output( "End of journal entries.\n" ); 00145 } 00146 } while ( $start && $start <= $end ); 00147 00148 return $lastOKPos; 00149 } 00150 00159 protected function syncFileBatch( array $paths, FileBackend $src, FileBackend $dst ) { 00160 $status = Status::newGood(); 00161 if ( !count( $paths ) ) { 00162 return $status; // nothing to do 00163 } 00164 00165 // Source: convert internal backend names (FileBackendMultiWrite) to the public one 00166 $sPaths = $this->replaceNamePaths( $paths, $src ); 00167 // Destination: get corresponding path name 00168 $dPaths = $this->replaceNamePaths( $paths, $dst ); 00169 00170 // Lock the live backend paths from modification 00171 $sLock = $src->getScopedFileLocks( $sPaths, LockManager::LOCK_UW, $status ); 00172 $eLock = $dst->getScopedFileLocks( $dPaths, LockManager::LOCK_EX, $status ); 00173 if ( !$status->isOK() ) { 00174 return $status; 00175 } 00176 00177 $ops = array(); 00178 $fsFiles = array(); 00179 foreach ( $sPaths as $i => $sPath ) { 00180 $dPath = $dPaths[$i]; // destination 00181 $sExists = $src->fileExists( array( 'src' => $sPath, 'latest' => 1 ) ); 00182 if ( $sExists === true ) { // exists in source 00183 if ( $this->filesAreSame( $src, $dst, $sPath, $dPath ) ) { 00184 continue; // avoid local copies for non-FS backends 00185 } 00186 // Note: getLocalReference() is fast for FS backends 00187 $fsFile = $src->getLocalReference( array( 'src' => $sPath, 'latest' => 1 ) ); 00188 if ( !$fsFile ) { 00189 $this->error( "Unable to sync '$dPath': could not get local copy." ); 00190 $status->fatal( 'backend-fail-internal', $src->getName() ); 00191 return $status; 00192 } 00193 $fsFiles[] = $fsFile; // keep TempFSFile objects alive as needed 00194 // Note: prepare() is usually fast for key/value backends 00195 $status->merge( $dst->prepare( array( 00196 'dir' => dirname( $dPath ), 'bypassReadOnly' => 1 ) ) ); 00197 if ( !$status->isOK() ) { 00198 return $status; 00199 } 00200 $ops[] = array( 'op' => 'store', 00201 'src' => $fsFile->getPath(), 'dst' => $dPath, 'overwrite' => 1 ); 00202 } elseif ( $sExists === false ) { // does not exist in source 00203 $ops[] = array( 'op' => 'delete', 'src' => $dPath, 'ignoreMissingSource' => 1 ); 00204 } else { // error 00205 $this->error( "Unable to sync '$dPath': could not stat file." ); 00206 $status->fatal( 'backend-fail-internal', $src->getName() ); 00207 return $status; 00208 } 00209 } 00210 00211 $t_start = microtime( true ); 00212 $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); 00213 if ( !$status->isOK() ) { 00214 sleep( 10 ); // wait and retry copy again 00215 $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); 00216 } 00217 $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); 00218 if ( $status->isOK() && $this->getOption( 'verbose' ) ) { 00219 $this->output( "Synchronized these file(s) [{$ellapsed_ms}ms]:\n" . 00220 implode( "\n", $dPaths ) . "\n" ); 00221 } 00222 00223 return $status; 00224 } 00225 00232 protected function replaceNamePaths( $paths, FileBackend $backend ) { 00233 return preg_replace( 00234 '!^mwstore://([^/]+)!', 00235 StringUtils::escapeRegexReplacement( "mwstore://" . $backend->getName() ), 00236 $paths // string or array 00237 ); 00238 } 00239 00240 protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) { 00241 return ( 00242 ( $src->getFileSize( array( 'src' => $sPath ) ) 00243 === $dst->getFileSize( array( 'src' => $dPath ) ) // short-circuit 00244 ) && ( $src->getFileSha1Base36( array( 'src' => $sPath ) ) 00245 === $dst->getFileSha1Base36( array( 'src' => $dPath ) ) 00246 ) 00247 ); 00248 } 00249 } 00250 00251 $maintClass = "SyncFileBackend"; 00252 require_once( RUN_MAINTENANCE_IF_MAIN );