MediaWiki
REL1_22
|
00001 <?php 00024 require_once __DIR__ . '/Maintenance.php'; 00025 00037 class CopyFileBackend extends Maintenance { 00038 protected $statCache = array(); 00039 00040 public function __construct() { 00041 parent::__construct(); 00042 $this->mDescription = "Copy files in one backend to another."; 00043 $this->addOption( 'src', 'Backend containing the source files', true, true ); 00044 $this->addOption( 'dst', 'Backend where files should be copied to', true, true ); 00045 $this->addOption( 'containers', 'Pipe separated list of containers', true, true ); 00046 $this->addOption( 'subdir', 'Only do items in this child directory', false, true ); 00047 $this->addOption( 'ratefile', 'File to check periodically for batch size', false, true ); 00048 $this->addOption( 'prestat', 'Stat the destination files first (try to use listings)' ); 00049 $this->addOption( 'skiphash', 'Skip SHA-1 sync checks for files' ); 00050 $this->addOption( 'missingonly', 'Only copy files missing from destination listing' ); 00051 $this->addOption( 'syncviadelete', 'Delete destination files missing from source listing' ); 00052 $this->addOption( 'utf8only', 'Skip source files that do not have valid UTF-8 names' ); 00053 $this->setBatchSize( 50 ); 00054 } 00055 00056 public function execute() { 00057 $src = FileBackendGroup::singleton()->get( $this->getOption( 'src' ) ); 00058 $dst = FileBackendGroup::singleton()->get( $this->getOption( 'dst' ) ); 00059 $containers = explode( '|', $this->getOption( 'containers' ) ); 00060 $subDir = rtrim( $this->getOption( 'subdir', '' ), '/' ); 00061 00062 $rateFile = $this->getOption( 'ratefile' ); 00063 00064 if ( $this->hasOption( 'utf8only' ) && !extension_loaded( 'mbstring' ) ) { 00065 $this->error( "Cannot check for UTF-8, mbstring extension missing.", 1 ); // die 00066 } 00067 00068 foreach ( $containers as $container ) { 00069 if ( $subDir != '' ) { 00070 $backendRel = "$container/$subDir"; 00071 $this->output( "Doing container '$container', directory '$subDir'...\n" ); 00072 } else { 00073 $backendRel = $container; 00074 $this->output( "Doing container '$container'...\n" ); 00075 } 00076 00077 if ( $this->hasOption( 'missingonly' ) ) { 00078 $this->output( "\tBuilding list of missing files..." ); 00079 $srcPathsRel = $this->getListingDiffRel( $src, $dst, $backendRel ); 00080 $this->output( count( $srcPathsRel ) . " file(s) need to be copied.\n" ); 00081 } else { 00082 $srcPathsRel = $src->getFileList( array( 00083 'dir' => $src->getRootStoragePath() . "/$backendRel", 00084 'adviseStat' => true // avoid HEADs 00085 ) ); 00086 if ( $srcPathsRel === null ) { 00087 $this->error( "Could not list files in $container.", 1 ); // die 00088 } 00089 } 00090 00091 if ( $this->getOption( 'prestat' ) && !$this->hasOption( 'missingonly' ) ) { 00092 // Build the stat cache for the destination files 00093 $this->output( "\tBuilding destination stat cache..." ); 00094 $dstPathsRel = $dst->getFileList( array( 00095 'dir' => $dst->getRootStoragePath() . "/$backendRel", 00096 'adviseStat' => true // avoid HEADs 00097 ) ); 00098 if ( $dstPathsRel === null ) { 00099 $this->error( "Could not list files in $container.", 1 ); // die 00100 } 00101 $this->statCache = array(); // clear 00102 foreach ( $dstPathsRel as $dstPathRel ) { 00103 $path = $dst->getRootStoragePath() . "/$backendRel/$dstPathRel"; 00104 $this->statCache[sha1( $path )] = $dst->getFileStat( array( 'src' => $path ) ); 00105 } 00106 $this->output( "done [" . count( $this->statCache ) . " file(s)]\n" ); 00107 } 00108 00109 $this->output( "\tCopying file(s)...\n" ); 00110 $count = 0; 00111 $batchPaths = array(); 00112 foreach ( $srcPathsRel as $srcPathRel ) { 00113 // Check up on the rate file periodically to adjust the concurrency 00114 if ( $rateFile && ( !$count || ( $count % 500 ) == 0 ) ) { 00115 $this->mBatchSize = max( 1, (int)file_get_contents( $rateFile ) ); 00116 $this->output( "\tBatch size is now {$this->mBatchSize}.\n" ); 00117 } 00118 $batchPaths[$srcPathRel] = 1; // remove duplicates 00119 if ( count( $batchPaths ) >= $this->mBatchSize ) { 00120 $this->copyFileBatch( array_keys( $batchPaths ), $backendRel, $src, $dst ); 00121 $batchPaths = array(); // done 00122 } 00123 ++$count; 00124 } 00125 if ( count( $batchPaths ) ) { // left-overs 00126 $this->copyFileBatch( array_keys( $batchPaths ), $backendRel, $src, $dst ); 00127 $batchPaths = array(); // done 00128 } 00129 $this->output( "\tCopied $count file(s).\n" ); 00130 00131 if ( $this->hasOption( 'syncviadelete' ) ) { 00132 $this->output( "\tBuilding list of excess destination files..." ); 00133 $delPathsRel = $this->getListingDiffRel( $dst, $src, $backendRel ); 00134 $this->output( count( $delPathsRel ) . " file(s) need to be deleted.\n" ); 00135 00136 $this->output( "\tDeleting file(s)...\n" ); 00137 $count = 0; 00138 $batchPaths = array(); 00139 foreach ( $delPathsRel as $delPathRel ) { 00140 // Check up on the rate file periodically to adjust the concurrency 00141 if ( $rateFile && ( !$count || ( $count % 500 ) == 0 ) ) { 00142 $this->mBatchSize = max( 1, (int)file_get_contents( $rateFile ) ); 00143 $this->output( "\tBatch size is now {$this->mBatchSize}.\n" ); 00144 } 00145 $batchPaths[$delPathRel] = 1; // remove duplicates 00146 if ( count( $batchPaths ) >= $this->mBatchSize ) { 00147 $this->delFileBatch( array_keys( $batchPaths ), $backendRel, $dst ); 00148 $batchPaths = array(); // done 00149 } 00150 ++$count; 00151 } 00152 if ( count( $batchPaths ) ) { // left-overs 00153 $this->delFileBatch( array_keys( $batchPaths ), $backendRel, $dst ); 00154 $batchPaths = array(); // done 00155 } 00156 00157 $this->output( "\tDeleted $count file(s).\n" ); 00158 } 00159 00160 if ( $subDir != '' ) { 00161 $this->output( "Finished container '$container', directory '$subDir'.\n" ); 00162 } else { 00163 $this->output( "Finished container '$container'.\n" ); 00164 } 00165 } 00166 00167 $this->output( "Done.\n" ); 00168 } 00169 00176 protected function getListingDiffRel( FileBackend $src, FileBackend $dst, $backendRel ) { 00177 $srcPathsRel = $src->getFileList( array( 00178 'dir' => $src->getRootStoragePath() . "/$backendRel" ) ); 00179 if ( $srcPathsRel === null ) { 00180 $this->error( "Could not list files in source container.", 1 ); // die 00181 } 00182 $dstPathsRel = $dst->getFileList( array( 00183 'dir' => $dst->getRootStoragePath() . "/$backendRel" ) ); 00184 if ( $dstPathsRel === null ) { 00185 $this->error( "Could not list files in destination container.", 1 ); // die 00186 } 00187 // Get the list of destination files 00188 $relFilesDstSha1 = array(); 00189 foreach ( $dstPathsRel as $dstPathRel ) { 00190 $relFilesDstSha1[sha1( $dstPathRel )] = 1; 00191 } 00192 unset( $dstPathsRel ); // free 00193 // Get the list of missing files 00194 $missingPathsRel = array(); 00195 foreach ( $srcPathsRel as $srcPathRel ) { 00196 if ( !isset( $relFilesDstSha1[sha1( $srcPathRel )] ) ) { 00197 $missingPathsRel[] = $srcPathRel; 00198 } 00199 } 00200 unset( $srcPathsRel ); // free 00201 00202 return $missingPathsRel; 00203 } 00204 00212 protected function copyFileBatch( 00213 array $srcPathsRel, $backendRel, FileBackend $src, FileBackend $dst 00214 ) { 00215 $ops = array(); 00216 $fsFiles = array(); 00217 $copiedRel = array(); // for output message 00218 $wikiId = $src->getWikiId(); 00219 00220 // Download the batch of source files into backend cache... 00221 if ( $this->hasOption( 'missingonly' ) ) { 00222 $srcPaths = array(); 00223 foreach ( $srcPathsRel as $srcPathRel ) { 00224 $srcPaths[] = $src->getRootStoragePath() . "/$backendRel/$srcPathRel"; 00225 } 00226 $t_start = microtime( true ); 00227 $fsFiles = $src->getLocalReferenceMulti( array( 'srcs' => $srcPaths, 'latest' => 1 ) ); 00228 $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); 00229 $this->output( "\n\tDownloaded these file(s) [{$ellapsed_ms}ms]:\n\t" . 00230 implode( "\n\t", $srcPaths ) . "\n\n" ); 00231 } 00232 00233 // Determine what files need to be copied over... 00234 foreach ( $srcPathsRel as $srcPathRel ) { 00235 $srcPath = $src->getRootStoragePath() . "/$backendRel/$srcPathRel"; 00236 $dstPath = $dst->getRootStoragePath() . "/$backendRel/$srcPathRel"; 00237 if ( $this->hasOption( 'utf8only' ) && !mb_check_encoding( $srcPath, 'UTF-8' ) ) { 00238 $this->error( "$wikiId: Detected illegal (non-UTF8) path for $srcPath." ); 00239 continue; 00240 } elseif ( !$this->hasOption( 'missingonly' ) 00241 && $this->filesAreSame( $src, $dst, $srcPath, $dstPath ) ) 00242 { 00243 $this->output( "\tAlready have $srcPathRel.\n" ); 00244 continue; // assume already copied... 00245 } 00246 $fsFile = array_key_exists( $srcPath, $fsFiles ) 00247 ? $fsFiles[$srcPath] 00248 : $src->getLocalReference( array( 'src' => $srcPath, 'latest' => 1 ) ); 00249 if ( !$fsFile ) { 00250 $src->clearCache( array( $srcPath ) ); 00251 if ( $src->fileExists( array( 'src' => $srcPath, 'latest' => 1 ) ) === false ) { 00252 $this->error( "$wikiId: File '$srcPath' was listed but does not exist." ); 00253 } else { 00254 $this->error( "$wikiId: Could not get local copy of $srcPath." ); 00255 } 00256 continue; 00257 } elseif ( !$fsFile->exists() ) { 00258 // FSFileBackends just return the path for getLocalReference() and paths with 00259 // illegal slashes may get normalized to a different path. This can cause the 00260 // local reference to not exist...skip these broken files. 00261 $this->error( "$wikiId: Detected possible illegal path for $srcPath." ); 00262 continue; 00263 } 00264 $fsFiles[] = $fsFile; // keep TempFSFile objects alive as needed 00265 // Note: prepare() is usually fast for key/value backends 00266 $status = $dst->prepare( array( 'dir' => dirname( $dstPath ), 'bypassReadOnly' => 1 ) ); 00267 if ( !$status->isOK() ) { 00268 $this->error( print_r( $status->getErrorsArray(), true ) ); 00269 $this->error( "$wikiId: Could not copy $srcPath to $dstPath.", 1 ); // die 00270 } 00271 $ops[] = array( 'op' => 'store', 00272 'src' => $fsFile->getPath(), 'dst' => $dstPath, 'overwrite' => 1 ); 00273 $copiedRel[] = $srcPathRel; 00274 } 00275 00276 // Copy in the batch of source files... 00277 $t_start = microtime( true ); 00278 $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); 00279 if ( !$status->isOK() ) { 00280 sleep( 10 ); // wait and retry copy again 00281 $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); 00282 } 00283 $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); 00284 if ( !$status->isOK() ) { 00285 $this->error( print_r( $status->getErrorsArray(), true ) ); 00286 $this->error( "$wikiId: Could not copy file batch.", 1 ); // die 00287 } elseif ( count( $copiedRel ) ) { 00288 $this->output( "\n\tCopied these file(s) [{$ellapsed_ms}ms]:\n\t" . 00289 implode( "\n\t", $copiedRel ) . "\n\n" ); 00290 } 00291 } 00292 00299 protected function delFileBatch( 00300 array $dstPathsRel, $backendRel, FileBackend $dst 00301 ) { 00302 $ops = array(); 00303 $deletedRel = array(); // for output message 00304 $wikiId = $dst->getWikiId(); 00305 00306 // Determine what files need to be copied over... 00307 foreach ( $dstPathsRel as $dstPathRel ) { 00308 $dstPath = $dst->getRootStoragePath() . "/$backendRel/$dstPathRel"; 00309 $ops[] = array( 'op' => 'delete', 'src' => $dstPath ); 00310 $deletedRel[] = $dstPathRel; 00311 } 00312 00313 // Delete the batch of source files... 00314 $t_start = microtime( true ); 00315 $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); 00316 if ( !$status->isOK() ) { 00317 sleep( 10 ); // wait and retry copy again 00318 $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); 00319 } 00320 $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); 00321 if ( !$status->isOK() ) { 00322 $this->error( print_r( $status->getErrorsArray(), true ) ); 00323 $this->error( "$wikiId: Could not delete file batch.", 1 ); // die 00324 } elseif ( count( $deletedRel ) ) { 00325 $this->output( "\n\tDeleted these file(s) [{$ellapsed_ms}ms]:\n\t" . 00326 implode( "\n\t", $deletedRel ) . "\n\n" ); 00327 } 00328 } 00329 00337 protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) { 00338 $skipHash = $this->hasOption( 'skiphash' ); 00339 $srcStat = $src->getFileStat( array( 'src' => $sPath ) ); 00340 $dPathSha1 = sha1( $dPath ); 00341 $dstStat = isset( $this->statCache[$dPathSha1] ) 00342 ? $this->statCache[$dPathSha1] 00343 : $dst->getFileStat( array( 'src' => $dPath ) ); 00344 return ( 00345 is_array( $srcStat ) // sanity check that source exists 00346 && is_array( $dstStat ) // dest exists 00347 && $srcStat['size'] === $dstStat['size'] 00348 && ( !$skipHash || $srcStat['mtime'] <= $dstStat['mtime'] ) 00349 && ( $skipHash || $src->getFileSha1Base36( array( 'src' => $sPath, 'latest' => 1 ) ) 00350 === $dst->getFileSha1Base36( array( 'src' => $dPath, 'latest' => 1 ) ) 00351 ) 00352 ); 00353 } 00354 } 00355 00356 $maintClass = 'CopyFileBackend'; 00357 require_once RUN_MAINTENANCE_IF_MAIN;