MediaWiki  REL1_22
copyFileBackend.php
Go to the documentation of this file.
00001 <?php
00024 require_once __DIR__ . '/Maintenance.php';
00025 
00037 class CopyFileBackend extends Maintenance {
00038     protected $statCache = array();
00039 
00040     public function __construct() {
00041         parent::__construct();
00042         $this->mDescription = "Copy files in one backend to another.";
00043         $this->addOption( 'src', 'Backend containing the source files', true, true );
00044         $this->addOption( 'dst', 'Backend where files should be copied to', true, true );
00045         $this->addOption( 'containers', 'Pipe separated list of containers', true, true );
00046         $this->addOption( 'subdir', 'Only do items in this child directory', false, true );
00047         $this->addOption( 'ratefile', 'File to check periodically for batch size', false, true );
00048         $this->addOption( 'prestat', 'Stat the destination files first (try to use listings)' );
00049         $this->addOption( 'skiphash', 'Skip SHA-1 sync checks for files' );
00050         $this->addOption( 'missingonly', 'Only copy files missing from destination listing' );
00051         $this->addOption( 'syncviadelete', 'Delete destination files missing from source listing' );
00052         $this->addOption( 'utf8only', 'Skip source files that do not have valid UTF-8 names' );
00053         $this->setBatchSize( 50 );
00054     }
00055 
00056     public function execute() {
00057         $src = FileBackendGroup::singleton()->get( $this->getOption( 'src' ) );
00058         $dst = FileBackendGroup::singleton()->get( $this->getOption( 'dst' ) );
00059         $containers = explode( '|', $this->getOption( 'containers' ) );
00060         $subDir = rtrim( $this->getOption( 'subdir', '' ), '/' );
00061 
00062         $rateFile = $this->getOption( 'ratefile' );
00063 
00064         if ( $this->hasOption( 'utf8only' ) && !extension_loaded( 'mbstring' ) ) {
00065             $this->error( "Cannot check for UTF-8, mbstring extension missing.", 1 ); // die
00066         }
00067 
00068         foreach ( $containers as $container ) {
00069             if ( $subDir != '' ) {
00070                 $backendRel = "$container/$subDir";
00071                 $this->output( "Doing container '$container', directory '$subDir'...\n" );
00072             } else {
00073                 $backendRel = $container;
00074                 $this->output( "Doing container '$container'...\n" );
00075             }
00076 
00077             if ( $this->hasOption( 'missingonly' ) ) {
00078                 $this->output( "\tBuilding list of missing files..." );
00079                 $srcPathsRel = $this->getListingDiffRel( $src, $dst, $backendRel );
00080                 $this->output( count( $srcPathsRel ) . " file(s) need to be copied.\n" );
00081             } else {
00082                 $srcPathsRel = $src->getFileList( array(
00083                     'dir' => $src->getRootStoragePath() . "/$backendRel",
00084                     'adviseStat' => true // avoid HEADs
00085                 ) );
00086                 if ( $srcPathsRel === null ) {
00087                     $this->error( "Could not list files in $container.", 1 ); // die
00088                 }
00089             }
00090 
00091             if ( $this->getOption( 'prestat' ) && !$this->hasOption( 'missingonly' ) ) {
00092                 // Build the stat cache for the destination files
00093                 $this->output( "\tBuilding destination stat cache..." );
00094                 $dstPathsRel = $dst->getFileList( array(
00095                     'dir' => $dst->getRootStoragePath() . "/$backendRel",
00096                     'adviseStat' => true // avoid HEADs
00097                 ) );
00098                 if ( $dstPathsRel === null ) {
00099                     $this->error( "Could not list files in $container.", 1 ); // die
00100                 }
00101                 $this->statCache = array(); // clear
00102                 foreach ( $dstPathsRel as $dstPathRel ) {
00103                     $path = $dst->getRootStoragePath() . "/$backendRel/$dstPathRel";
00104                     $this->statCache[sha1( $path )] = $dst->getFileStat( array( 'src' => $path ) );
00105                 }
00106                 $this->output( "done [" . count( $this->statCache ) . " file(s)]\n" );
00107             }
00108 
00109             $this->output( "\tCopying file(s)...\n" );
00110             $count = 0;
00111             $batchPaths = array();
00112             foreach ( $srcPathsRel as $srcPathRel ) {
00113                 // Check up on the rate file periodically to adjust the concurrency
00114                 if ( $rateFile && ( !$count || ( $count % 500 ) == 0 ) ) {
00115                     $this->mBatchSize = max( 1, (int)file_get_contents( $rateFile ) );
00116                     $this->output( "\tBatch size is now {$this->mBatchSize}.\n" );
00117                 }
00118                 $batchPaths[$srcPathRel] = 1; // remove duplicates
00119                 if ( count( $batchPaths ) >= $this->mBatchSize ) {
00120                     $this->copyFileBatch( array_keys( $batchPaths ), $backendRel, $src, $dst );
00121                     $batchPaths = array(); // done
00122                 }
00123                 ++$count;
00124             }
00125             if ( count( $batchPaths ) ) { // left-overs
00126                 $this->copyFileBatch( array_keys( $batchPaths ), $backendRel, $src, $dst );
00127                 $batchPaths = array(); // done
00128             }
00129             $this->output( "\tCopied $count file(s).\n" );
00130 
00131             if ( $this->hasOption( 'syncviadelete' ) ) {
00132                 $this->output( "\tBuilding list of excess destination files..." );
00133                 $delPathsRel = $this->getListingDiffRel( $dst, $src, $backendRel );
00134                 $this->output( count( $delPathsRel ) . " file(s) need to be deleted.\n" );
00135 
00136                 $this->output( "\tDeleting file(s)...\n" );
00137                 $count = 0;
00138                 $batchPaths = array();
00139                 foreach ( $delPathsRel as $delPathRel ) {
00140                     // Check up on the rate file periodically to adjust the concurrency
00141                     if ( $rateFile && ( !$count || ( $count % 500 ) == 0 ) ) {
00142                         $this->mBatchSize = max( 1, (int)file_get_contents( $rateFile ) );
00143                         $this->output( "\tBatch size is now {$this->mBatchSize}.\n" );
00144                     }
00145                     $batchPaths[$delPathRel] = 1; // remove duplicates
00146                     if ( count( $batchPaths ) >= $this->mBatchSize ) {
00147                         $this->delFileBatch( array_keys( $batchPaths ), $backendRel, $dst );
00148                         $batchPaths = array(); // done
00149                     }
00150                     ++$count;
00151                 }
00152                 if ( count( $batchPaths ) ) { // left-overs
00153                     $this->delFileBatch( array_keys( $batchPaths ), $backendRel, $dst );
00154                     $batchPaths = array(); // done
00155                 }
00156 
00157                 $this->output( "\tDeleted $count file(s).\n" );
00158             }
00159 
00160             if ( $subDir != '' ) {
00161                 $this->output( "Finished container '$container', directory '$subDir'.\n" );
00162             } else {
00163                 $this->output( "Finished container '$container'.\n" );
00164             }
00165         }
00166 
00167         $this->output( "Done.\n" );
00168     }
00169 
00176     protected function getListingDiffRel( FileBackend $src, FileBackend $dst, $backendRel ) {
00177         $srcPathsRel = $src->getFileList( array(
00178             'dir' => $src->getRootStoragePath() . "/$backendRel" ) );
00179         if ( $srcPathsRel === null ) {
00180             $this->error( "Could not list files in source container.", 1 ); // die
00181         }
00182         $dstPathsRel = $dst->getFileList( array(
00183             'dir' => $dst->getRootStoragePath() . "/$backendRel" ) );
00184         if ( $dstPathsRel === null ) {
00185             $this->error( "Could not list files in destination container.", 1 ); // die
00186         }
00187         // Get the list of destination files
00188         $relFilesDstSha1 = array();
00189         foreach ( $dstPathsRel as $dstPathRel ) {
00190             $relFilesDstSha1[sha1( $dstPathRel )] = 1;
00191         }
00192         unset( $dstPathsRel ); // free
00193         // Get the list of missing files
00194         $missingPathsRel = array();
00195         foreach ( $srcPathsRel as $srcPathRel ) {
00196             if ( !isset( $relFilesDstSha1[sha1( $srcPathRel )] ) ) {
00197                 $missingPathsRel[] = $srcPathRel;
00198             }
00199         }
00200         unset( $srcPathsRel ); // free
00201 
00202         return $missingPathsRel;
00203     }
00204 
00212     protected function copyFileBatch(
00213         array $srcPathsRel, $backendRel, FileBackend $src, FileBackend $dst
00214     ) {
00215         $ops = array();
00216         $fsFiles = array();
00217         $copiedRel = array(); // for output message
00218         $wikiId = $src->getWikiId();
00219 
00220         // Download the batch of source files into backend cache...
00221         if ( $this->hasOption( 'missingonly' ) ) {
00222             $srcPaths = array();
00223             foreach ( $srcPathsRel as $srcPathRel ) {
00224                 $srcPaths[] = $src->getRootStoragePath() . "/$backendRel/$srcPathRel";
00225             }
00226             $t_start = microtime( true );
00227             $fsFiles = $src->getLocalReferenceMulti( array( 'srcs' => $srcPaths, 'latest' => 1 ) );
00228             $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 );
00229             $this->output( "\n\tDownloaded these file(s) [{$ellapsed_ms}ms]:\n\t" .
00230                 implode( "\n\t", $srcPaths ) . "\n\n" );
00231         }
00232 
00233         // Determine what files need to be copied over...
00234         foreach ( $srcPathsRel as $srcPathRel ) {
00235             $srcPath = $src->getRootStoragePath() . "/$backendRel/$srcPathRel";
00236             $dstPath = $dst->getRootStoragePath() . "/$backendRel/$srcPathRel";
00237             if ( $this->hasOption( 'utf8only' ) && !mb_check_encoding( $srcPath, 'UTF-8' ) ) {
00238                 $this->error( "$wikiId: Detected illegal (non-UTF8) path for $srcPath." );
00239                 continue;
00240             } elseif ( !$this->hasOption( 'missingonly' )
00241                 && $this->filesAreSame( $src, $dst, $srcPath, $dstPath ) )
00242             {
00243                 $this->output( "\tAlready have $srcPathRel.\n" );
00244                 continue; // assume already copied...
00245             }
00246             $fsFile = array_key_exists( $srcPath, $fsFiles )
00247                 ? $fsFiles[$srcPath]
00248                 : $src->getLocalReference( array( 'src' => $srcPath, 'latest' => 1 ) );
00249             if ( !$fsFile ) {
00250                 $src->clearCache( array( $srcPath ) );
00251                 if ( $src->fileExists( array( 'src' => $srcPath, 'latest' => 1 ) ) === false ) {
00252                     $this->error( "$wikiId: File '$srcPath' was listed but does not exist." );
00253                 } else {
00254                     $this->error( "$wikiId: Could not get local copy of $srcPath." );
00255                 }
00256                 continue;
00257             } elseif ( !$fsFile->exists() ) {
00258                 // FSFileBackends just return the path for getLocalReference() and paths with
00259                 // illegal slashes may get normalized to a different path. This can cause the
00260                 // local reference to not exist...skip these broken files.
00261                 $this->error( "$wikiId: Detected possible illegal path for $srcPath." );
00262                 continue;
00263             }
00264             $fsFiles[] = $fsFile; // keep TempFSFile objects alive as needed
00265             // Note: prepare() is usually fast for key/value backends
00266             $status = $dst->prepare( array( 'dir' => dirname( $dstPath ), 'bypassReadOnly' => 1 ) );
00267             if ( !$status->isOK() ) {
00268                 $this->error( print_r( $status->getErrorsArray(), true ) );
00269                 $this->error( "$wikiId: Could not copy $srcPath to $dstPath.", 1 ); // die
00270             }
00271             $ops[] = array( 'op' => 'store',
00272                 'src' => $fsFile->getPath(), 'dst' => $dstPath, 'overwrite' => 1 );
00273             $copiedRel[] = $srcPathRel;
00274         }
00275 
00276         // Copy in the batch of source files...
00277         $t_start = microtime( true );
00278         $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) );
00279         if ( !$status->isOK() ) {
00280             sleep( 10 ); // wait and retry copy again
00281             $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) );
00282         }
00283         $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 );
00284         if ( !$status->isOK() ) {
00285             $this->error( print_r( $status->getErrorsArray(), true ) );
00286             $this->error( "$wikiId: Could not copy file batch.", 1 ); // die
00287         } elseif ( count( $copiedRel ) ) {
00288             $this->output( "\n\tCopied these file(s) [{$ellapsed_ms}ms]:\n\t" .
00289                 implode( "\n\t", $copiedRel ) . "\n\n" );
00290         }
00291     }
00292 
00299     protected function delFileBatch(
00300         array $dstPathsRel, $backendRel, FileBackend $dst
00301     ) {
00302         $ops = array();
00303         $deletedRel = array(); // for output message
00304         $wikiId = $dst->getWikiId();
00305 
00306         // Determine what files need to be copied over...
00307         foreach ( $dstPathsRel as $dstPathRel ) {
00308             $dstPath = $dst->getRootStoragePath() . "/$backendRel/$dstPathRel";
00309             $ops[] = array( 'op' => 'delete', 'src' => $dstPath );
00310             $deletedRel[] = $dstPathRel;
00311         }
00312 
00313         // Delete the batch of source files...
00314         $t_start = microtime( true );
00315         $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) );
00316         if ( !$status->isOK() ) {
00317             sleep( 10 ); // wait and retry copy again
00318             $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) );
00319         }
00320         $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 );
00321         if ( !$status->isOK() ) {
00322             $this->error( print_r( $status->getErrorsArray(), true ) );
00323             $this->error( "$wikiId: Could not delete file batch.", 1 ); // die
00324         } elseif ( count( $deletedRel ) ) {
00325             $this->output( "\n\tDeleted these file(s) [{$ellapsed_ms}ms]:\n\t" .
00326                 implode( "\n\t", $deletedRel ) . "\n\n" );
00327         }
00328     }
00329 
00337     protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) {
00338         $skipHash = $this->hasOption( 'skiphash' );
00339         $srcStat = $src->getFileStat( array( 'src' => $sPath ) );
00340         $dPathSha1 = sha1( $dPath );
00341         $dstStat = isset( $this->statCache[$dPathSha1] )
00342             ? $this->statCache[$dPathSha1]
00343             : $dst->getFileStat( array( 'src' => $dPath ) );
00344         return (
00345             is_array( $srcStat ) // sanity check that source exists
00346             && is_array( $dstStat ) // dest exists
00347             && $srcStat['size'] === $dstStat['size']
00348             && ( !$skipHash || $srcStat['mtime'] <= $dstStat['mtime'] )
00349             && ( $skipHash || $src->getFileSha1Base36( array( 'src' => $sPath, 'latest' => 1 ) )
00350                 === $dst->getFileSha1Base36( array( 'src' => $dPath, 'latest' => 1 ) )
00351             )
00352         );
00353     }
00354 }
00355 
00356 $maintClass = 'CopyFileBackend';
00357 require_once RUN_MAINTENANCE_IF_MAIN;