[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Copy all files in some containers of one backend to another. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup Maintenance 22 */ 23 24 require_once __DIR__ . '/Maintenance.php'; 25 26 /** 27 * Copy all files in one container of one backend to another. 28 * 29 * This can also be used to re-shard the files for one backend using the 30 * config of second backend. The second backend should have the same config 31 * as the first, except for it having a different name and different sharding 32 * configuration. The backend should be made read-only while this runs. 33 * After this script finishes, the old files in the containers can be deleted. 34 * 35 * @ingroup Maintenance 36 */ 37 class CopyFileBackend extends Maintenance { 38 /** @var array|null (path sha1 => stat) Pre-computed dst stat entries from listings */ 39 protected $statCache = null; 40 41 public function __construct() { 42 parent::__construct(); 43 $this->mDescription = "Copy files in one backend to another."; 44 $this->addOption( 'src', 'Backend containing the source files', true, true ); 45 $this->addOption( 'dst', 'Backend where files should be copied to', true, true ); 46 $this->addOption( 'containers', 'Pipe separated list of containers', true, true ); 47 $this->addOption( 'subdir', 'Only do items in this child directory', false, true ); 48 $this->addOption( 'ratefile', 'File to check periodically for batch size', false, true ); 49 $this->addOption( 'prestat', 'Stat the destination files first (try to use listings)' ); 50 $this->addOption( 'skiphash', 'Skip SHA-1 sync checks for files' ); 51 $this->addOption( 'missingonly', 'Only copy files missing from destination listing' ); 52 $this->addOption( 'syncviadelete', 'Delete destination files missing from source listing' ); 53 $this->addOption( 'utf8only', 'Skip source files that do not have valid UTF-8 names' ); 54 $this->setBatchSize( 50 ); 55 } 56 57 public function execute() { 58 $src = FileBackendGroup::singleton()->get( $this->getOption( 'src' ) ); 59 $dst = FileBackendGroup::singleton()->get( $this->getOption( 'dst' ) ); 60 $containers = explode( '|', $this->getOption( 'containers' ) ); 61 $subDir = rtrim( $this->getOption( 'subdir', '' ), '/' ); 62 63 $rateFile = $this->getOption( 'ratefile' ); 64 65 if ( $this->hasOption( 'utf8only' ) && !extension_loaded( 'mbstring' ) ) { 66 $this->error( "Cannot check for UTF-8, mbstring extension missing.", 1 ); // die 67 } 68 69 foreach ( $containers as $container ) { 70 if ( $subDir != '' ) { 71 $backendRel = "$container/$subDir"; 72 $this->output( "Doing container '$container', directory '$subDir'...\n" ); 73 } else { 74 $backendRel = $container; 75 $this->output( "Doing container '$container'...\n" ); 76 } 77 78 if ( $this->hasOption( 'missingonly' ) ) { 79 $this->output( "\tBuilding list of missing files..." ); 80 $srcPathsRel = $this->getListingDiffRel( $src, $dst, $backendRel ); 81 $this->output( count( $srcPathsRel ) . " file(s) need to be copied.\n" ); 82 } else { 83 $srcPathsRel = $src->getFileList( array( 84 'dir' => $src->getRootStoragePath() . "/$backendRel", 85 'adviseStat' => true // avoid HEADs 86 ) ); 87 if ( $srcPathsRel === null ) { 88 $this->error( "Could not list files in $container.", 1 ); // die 89 } 90 } 91 92 if ( $this->getOption( 'prestat' ) && !$this->hasOption( 'missingonly' ) ) { 93 // Build the stat cache for the destination files 94 $this->output( "\tBuilding destination stat cache..." ); 95 $dstPathsRel = $dst->getFileList( array( 96 'dir' => $dst->getRootStoragePath() . "/$backendRel", 97 'adviseStat' => true // avoid HEADs 98 ) ); 99 if ( $dstPathsRel === null ) { 100 $this->error( "Could not list files in $container.", 1 ); // die 101 } 102 $this->statCache = array(); 103 foreach ( $dstPathsRel as $dstPathRel ) { 104 $path = $dst->getRootStoragePath() . "/$backendRel/$dstPathRel"; 105 $this->statCache[sha1( $path )] = $dst->getFileStat( array( 'src' => $path ) ); 106 } 107 $this->output( "done [" . count( $this->statCache ) . " file(s)]\n" ); 108 } 109 110 $this->output( "\tCopying file(s)...\n" ); 111 $count = 0; 112 $batchPaths = array(); 113 foreach ( $srcPathsRel as $srcPathRel ) { 114 // Check up on the rate file periodically to adjust the concurrency 115 if ( $rateFile && ( !$count || ( $count % 500 ) == 0 ) ) { 116 $this->mBatchSize = max( 1, (int)file_get_contents( $rateFile ) ); 117 $this->output( "\tBatch size is now {$this->mBatchSize}.\n" ); 118 } 119 $batchPaths[$srcPathRel] = 1; // remove duplicates 120 if ( count( $batchPaths ) >= $this->mBatchSize ) { 121 $this->copyFileBatch( array_keys( $batchPaths ), $backendRel, $src, $dst ); 122 $batchPaths = array(); // done 123 } 124 ++$count; 125 } 126 if ( count( $batchPaths ) ) { // left-overs 127 $this->copyFileBatch( array_keys( $batchPaths ), $backendRel, $src, $dst ); 128 $batchPaths = array(); // done 129 } 130 $this->output( "\tCopied $count file(s).\n" ); 131 132 if ( $this->hasOption( 'syncviadelete' ) ) { 133 $this->output( "\tBuilding list of excess destination files..." ); 134 $delPathsRel = $this->getListingDiffRel( $dst, $src, $backendRel ); 135 $this->output( count( $delPathsRel ) . " file(s) need to be deleted.\n" ); 136 137 $this->output( "\tDeleting file(s)...\n" ); 138 $count = 0; 139 $batchPaths = array(); 140 foreach ( $delPathsRel as $delPathRel ) { 141 // Check up on the rate file periodically to adjust the concurrency 142 if ( $rateFile && ( !$count || ( $count % 500 ) == 0 ) ) { 143 $this->mBatchSize = max( 1, (int)file_get_contents( $rateFile ) ); 144 $this->output( "\tBatch size is now {$this->mBatchSize}.\n" ); 145 } 146 $batchPaths[$delPathRel] = 1; // remove duplicates 147 if ( count( $batchPaths ) >= $this->mBatchSize ) { 148 $this->delFileBatch( array_keys( $batchPaths ), $backendRel, $dst ); 149 $batchPaths = array(); // done 150 } 151 ++$count; 152 } 153 if ( count( $batchPaths ) ) { // left-overs 154 $this->delFileBatch( array_keys( $batchPaths ), $backendRel, $dst ); 155 $batchPaths = array(); // done 156 } 157 158 $this->output( "\tDeleted $count file(s).\n" ); 159 } 160 161 if ( $subDir != '' ) { 162 $this->output( "Finished container '$container', directory '$subDir'.\n" ); 163 } else { 164 $this->output( "Finished container '$container'.\n" ); 165 } 166 } 167 168 $this->output( "Done.\n" ); 169 } 170 171 /** 172 * @param FileBackend $src 173 * @param FileBackend $dst 174 * @param string $backendRel 175 * @return array (rel paths in $src minus those in $dst) 176 */ 177 protected function getListingDiffRel( FileBackend $src, FileBackend $dst, $backendRel ) { 178 $srcPathsRel = $src->getFileList( array( 179 'dir' => $src->getRootStoragePath() . "/$backendRel" ) ); 180 if ( $srcPathsRel === null ) { 181 $this->error( "Could not list files in source container.", 1 ); // die 182 } 183 $dstPathsRel = $dst->getFileList( array( 184 'dir' => $dst->getRootStoragePath() . "/$backendRel" ) ); 185 if ( $dstPathsRel === null ) { 186 $this->error( "Could not list files in destination container.", 1 ); // die 187 } 188 // Get the list of destination files 189 $relFilesDstSha1 = array(); 190 foreach ( $dstPathsRel as $dstPathRel ) { 191 $relFilesDstSha1[sha1( $dstPathRel )] = 1; 192 } 193 unset( $dstPathsRel ); // free 194 // Get the list of missing files 195 $missingPathsRel = array(); 196 foreach ( $srcPathsRel as $srcPathRel ) { 197 if ( !isset( $relFilesDstSha1[sha1( $srcPathRel )] ) ) { 198 $missingPathsRel[] = $srcPathRel; 199 } 200 } 201 unset( $srcPathsRel ); // free 202 203 return $missingPathsRel; 204 } 205 206 /** 207 * @param array $srcPathsRel 208 * @param string $backendRel 209 * @param FileBackend $src 210 * @param FileBackend $dst 211 * @return void 212 */ 213 protected function copyFileBatch( 214 array $srcPathsRel, $backendRel, FileBackend $src, FileBackend $dst 215 ) { 216 $ops = array(); 217 $fsFiles = array(); 218 $copiedRel = array(); // for output message 219 $wikiId = $src->getWikiId(); 220 221 // Download the batch of source files into backend cache... 222 if ( $this->hasOption( 'missingonly' ) ) { 223 $srcPaths = array(); 224 foreach ( $srcPathsRel as $srcPathRel ) { 225 $srcPaths[] = $src->getRootStoragePath() . "/$backendRel/$srcPathRel"; 226 } 227 $t_start = microtime( true ); 228 $fsFiles = $src->getLocalReferenceMulti( array( 'srcs' => $srcPaths, 'latest' => 1 ) ); 229 $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); 230 $this->output( "\n\tDownloaded these file(s) [{$ellapsed_ms}ms]:\n\t" . 231 implode( "\n\t", $srcPaths ) . "\n\n" ); 232 } 233 234 // Determine what files need to be copied over... 235 foreach ( $srcPathsRel as $srcPathRel ) { 236 $srcPath = $src->getRootStoragePath() . "/$backendRel/$srcPathRel"; 237 $dstPath = $dst->getRootStoragePath() . "/$backendRel/$srcPathRel"; 238 if ( $this->hasOption( 'utf8only' ) && !mb_check_encoding( $srcPath, 'UTF-8' ) ) { 239 $this->error( "$wikiId: Detected illegal (non-UTF8) path for $srcPath." ); 240 continue; 241 } elseif ( !$this->hasOption( 'missingonly' ) 242 && $this->filesAreSame( $src, $dst, $srcPath, $dstPath ) 243 ) { 244 $this->output( "\tAlready have $srcPathRel.\n" ); 245 continue; // assume already copied... 246 } 247 $fsFile = array_key_exists( $srcPath, $fsFiles ) 248 ? $fsFiles[$srcPath] 249 : $src->getLocalReference( array( 'src' => $srcPath, 'latest' => 1 ) ); 250 if ( !$fsFile ) { 251 $src->clearCache( array( $srcPath ) ); 252 if ( $src->fileExists( array( 'src' => $srcPath, 'latest' => 1 ) ) === false ) { 253 $this->error( "$wikiId: File '$srcPath' was listed but does not exist." ); 254 } else { 255 $this->error( "$wikiId: Could not get local copy of $srcPath." ); 256 } 257 continue; 258 } elseif ( !$fsFile->exists() ) { 259 // FSFileBackends just return the path for getLocalReference() and paths with 260 // illegal slashes may get normalized to a different path. This can cause the 261 // local reference to not exist...skip these broken files. 262 $this->error( "$wikiId: Detected possible illegal path for $srcPath." ); 263 continue; 264 } 265 $fsFiles[] = $fsFile; // keep TempFSFile objects alive as needed 266 // Note: prepare() is usually fast for key/value backends 267 $status = $dst->prepare( array( 'dir' => dirname( $dstPath ), 'bypassReadOnly' => 1 ) ); 268 if ( !$status->isOK() ) { 269 $this->error( print_r( $status->getErrorsArray(), true ) ); 270 $this->error( "$wikiId: Could not copy $srcPath to $dstPath.", 1 ); // die 271 } 272 $ops[] = array( 'op' => 'store', 273 'src' => $fsFile->getPath(), 'dst' => $dstPath, 'overwrite' => 1 ); 274 $copiedRel[] = $srcPathRel; 275 } 276 277 // Copy in the batch of source files... 278 $t_start = microtime( true ); 279 $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); 280 if ( !$status->isOK() ) { 281 sleep( 10 ); // wait and retry copy again 282 $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); 283 } 284 $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); 285 if ( !$status->isOK() ) { 286 $this->error( print_r( $status->getErrorsArray(), true ) ); 287 $this->error( "$wikiId: Could not copy file batch.", 1 ); // die 288 } elseif ( count( $copiedRel ) ) { 289 $this->output( "\n\tCopied these file(s) [{$ellapsed_ms}ms]:\n\t" . 290 implode( "\n\t", $copiedRel ) . "\n\n" ); 291 } 292 } 293 294 /** 295 * @param array $dstPathsRel 296 * @param string $backendRel 297 * @param FileBackend $dst 298 * @return void 299 */ 300 protected function delFileBatch( 301 array $dstPathsRel, $backendRel, FileBackend $dst 302 ) { 303 $ops = array(); 304 $deletedRel = array(); // for output message 305 $wikiId = $dst->getWikiId(); 306 307 // Determine what files need to be copied over... 308 foreach ( $dstPathsRel as $dstPathRel ) { 309 $dstPath = $dst->getRootStoragePath() . "/$backendRel/$dstPathRel"; 310 $ops[] = array( 'op' => 'delete', 'src' => $dstPath ); 311 $deletedRel[] = $dstPathRel; 312 } 313 314 // Delete the batch of source files... 315 $t_start = microtime( true ); 316 $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); 317 if ( !$status->isOK() ) { 318 sleep( 10 ); // wait and retry copy again 319 $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); 320 } 321 $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); 322 if ( !$status->isOK() ) { 323 $this->error( print_r( $status->getErrorsArray(), true ) ); 324 $this->error( "$wikiId: Could not delete file batch.", 1 ); // die 325 } elseif ( count( $deletedRel ) ) { 326 $this->output( "\n\tDeleted these file(s) [{$ellapsed_ms}ms]:\n\t" . 327 implode( "\n\t", $deletedRel ) . "\n\n" ); 328 } 329 } 330 331 /** 332 * @param FileBackend $src 333 * @param FileBackend $dst 334 * @param string $sPath 335 * @param string $dPath 336 * @return bool 337 */ 338 protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) { 339 $skipHash = $this->hasOption( 'skiphash' ); 340 $srcStat = $src->getFileStat( array( 'src' => $sPath ) ); 341 $dPathSha1 = sha1( $dPath ); 342 if ( $this->statCache !== null ) { 343 // All dst files are already in stat cache 344 $dstStat = isset( $this->statCache[$dPathSha1] ) 345 ? $this->statCache[$dPathSha1] 346 : false; 347 } else { 348 $dstStat = $dst->getFileStat( array( 'src' => $dPath ) ); 349 } 350 // Initial fast checks to see if files are obviously different 351 $sameFast = ( 352 is_array( $srcStat ) // sanity check that source exists 353 && is_array( $dstStat ) // dest exists 354 && $srcStat['size'] === $dstStat['size'] 355 ); 356 // More thorough checks against files 357 if ( !$sameFast ) { 358 $same = false; // no need to look farther 359 } elseif ( isset( $srcStat['md5'] ) && isset( $dstStat['md5'] ) ) { 360 // If MD5 was already in the stat info, just use it. 361 // This is useful as many objects stores can return this in object listing, 362 // so we can use it to avoid slow per-file HEADs. 363 $same = ( $srcStat['md5'] === $dstStat['md5'] ); 364 } elseif ( $skipHash ) { 365 // This mode is good for copying to a backup location or resyncing clone 366 // backends in FileBackendMultiWrite (since they get writes second, they have 367 // higher timestamps). However, when copying the other way, this hits loads of 368 // false positives (possibly 100%) and wastes a bunch of time on GETs/PUTs. 369 $same = ( $srcStat['mtime'] <= $dstStat['mtime'] ); 370 } else { 371 // This is the slowest method which does many per-file HEADs (unless an object 372 // store tracks SHA-1 in listings). 373 $same = ( $src->getFileSha1Base36( array( 'src' => $sPath, 'latest' => 1 ) ) 374 === $dst->getFileSha1Base36( array( 'src' => $dPath, 'latest' => 1 ) ) ); 375 } 376 377 return $same; 378 } 379 } 380 381 $maintClass = 'CopyFileBackend'; 382 require_once RUN_MAINTENANCE_IF_MAIN;
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |