MediaWiki  REL1_24
populateImageSha1.php
Go to the documentation of this file.
00001 <?php
00024 require_once __DIR__ . '/Maintenance.php';
00025 
00031 class PopulateImageSha1 extends LoggedUpdateMaintenance {
00032     public function __construct() {
00033         parent::__construct();
00034         $this->mDescription = "Populate the img_sha1 field";
00035         $this->addOption( 'force', "Recalculate sha1 for rows that already have a value" );
00036         $this->addOption( 'multiversiononly', "Calculate only for files with several versions" );
00037         $this->addOption( 'method', "Use 'pipe' to pipe to mysql command line,\n" .
00038             "\t\tdefault uses Database class", false, true );
00039         $this->addOption(
00040             'file',
00041             'Fix for a specific file, without File: namespace prefixed',
00042             false,
00043             true
00044         );
00045     }
00046 
00047     protected function getUpdateKey() {
00048         return 'populate img_sha1';
00049     }
00050 
00051     protected function updateSkippedMessage() {
00052         return 'img_sha1 column of image table already populated.';
00053     }
00054 
00055     public function execute() {
00056         if ( $this->getOption( 'file' ) || $this->hasOption( 'multiversiononly' ) ) {
00057             $this->doDBUpdates(); // skip update log checks/saves
00058         } else {
00059             parent::execute();
00060         }
00061     }
00062 
00063     public function doDBUpdates() {
00064         $method = $this->getOption( 'method', 'normal' );
00065         $file = $this->getOption( 'file', '' );
00066         $force = $this->getOption( 'force' );
00067         $isRegen = ( $force || $file != '' ); // forced recalculation?
00068 
00069         $t = -microtime( true );
00070         $dbw = wfGetDB( DB_MASTER );
00071         if ( $file != '' ) {
00072             $res = $dbw->select(
00073                 'image',
00074                 array( 'img_name' ),
00075                 array( 'img_name' => $file ),
00076                 __METHOD__
00077             );
00078             if ( !$res ) {
00079                 $this->error( "No such file: $file", true );
00080 
00081                 return false;
00082             }
00083             $this->output( "Populating img_sha1 field for specified files\n" );
00084         } else {
00085             if ( $this->hasOption( 'multiversiononly' ) ) {
00086                 $conds = array();
00087                 $this->output( "Populating and recalculating img_sha1 field for versioned files\n" );
00088             } elseif ( $force ) {
00089                 $conds = array();
00090                 $this->output( "Populating and recalculating img_sha1 field\n" );
00091             } else {
00092                 $conds = array( 'img_sha1' => '' );
00093                 $this->output( "Populating img_sha1 field\n" );
00094             }
00095             if ( $this->hasOption( 'multiversiononly' ) ) {
00096                 $res = $dbw->select( 'oldimage',
00097                     array( 'img_name' => 'DISTINCT(oi_name)' ), $conds, __METHOD__ );
00098             } else {
00099                 $res = $dbw->select( 'image', array( 'img_name' ), $conds, __METHOD__ );
00100             }
00101         }
00102 
00103         $imageTable = $dbw->tableName( 'image' );
00104         $oldImageTable = $dbw->tableName( 'oldimage' );
00105 
00106         if ( $method == 'pipe' ) {
00107             // Opening a pipe allows the SHA-1 operation to be done in parallel
00108             // with the database write operation, because the writes are queued
00109             // in the pipe buffer. This can improve performance by up to a
00110             // factor of 2.
00111             global $wgDBuser, $wgDBserver, $wgDBpassword, $wgDBname;
00112             $cmd = 'mysql -u' . wfEscapeShellArg( $wgDBuser ) .
00113                 ' -h' . wfEscapeShellArg( $wgDBserver ) .
00114                 ' -p' . wfEscapeShellArg( $wgDBpassword, $wgDBname );
00115             $this->output( "Using pipe method\n" );
00116             $pipe = popen( $cmd, 'w' );
00117         }
00118 
00119         $numRows = $res->numRows();
00120         $i = 0;
00121         foreach ( $res as $row ) {
00122             if ( $i % $this->mBatchSize == 0 ) {
00123                 $this->output( sprintf(
00124                     "Done %d of %d, %5.3f%%  \r", $i, $numRows, $i / $numRows * 100 ) );
00125                 wfWaitForSlaves();
00126             }
00127 
00128             $file = wfLocalFile( $row->img_name );
00129             if ( !$file ) {
00130                 continue;
00131             }
00132 
00133             // Upgrade the current file version...
00134             $sha1 = $file->getRepo()->getFileSha1( $file->getPath() );
00135             if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
00136                 if ( $isRegen && $file->getSha1() !== $sha1 ) {
00137                     // The population was probably done already. If the old SHA1
00138                     // does not match, then both fix the SHA1 and the metadata.
00139                     $file->upgradeRow();
00140                 } else {
00141                     $sql = "UPDATE $imageTable SET img_sha1=" . $dbw->addQuotes( $sha1 ) .
00142                         " WHERE img_name=" . $dbw->addQuotes( $file->getName() );
00143                     if ( $method == 'pipe' ) {
00144                         fwrite( $pipe, "$sql;\n" );
00145                     } else {
00146                         $dbw->query( $sql, __METHOD__ );
00147                     }
00148                 }
00149             }
00150             // Upgrade the old file versions...
00151             foreach ( $file->getHistory() as $oldFile ) {
00152                 $sha1 = $oldFile->getRepo()->getFileSha1( $oldFile->getPath() );
00153                 if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
00154                     if ( $isRegen && $oldFile->getSha1() !== $sha1 ) {
00155                         // The population was probably done already. If the old SHA1
00156                         // does not match, then both fix the SHA1 and the metadata.
00157                         $oldFile->upgradeRow();
00158                     } else {
00159                         $sql = "UPDATE $oldImageTable SET oi_sha1=" . $dbw->addQuotes( $sha1 ) .
00160                             " WHERE (oi_name=" . $dbw->addQuotes( $oldFile->getName() ) . " AND" .
00161                             " oi_archive_name=" . $dbw->addQuotes( $oldFile->getArchiveName() ) . ")";
00162                         if ( $method == 'pipe' ) {
00163                             fwrite( $pipe, "$sql;\n" );
00164                         } else {
00165                             $dbw->query( $sql, __METHOD__ );
00166                         }
00167                     }
00168                 }
00169             }
00170             $i++;
00171         }
00172         if ( $method == 'pipe' ) {
00173             fflush( $pipe );
00174             pclose( $pipe );
00175         }
00176         $t += microtime( true );
00177         $this->output( sprintf( "\nDone %d files in %.1f seconds\n", $numRows, $t ) );
00178 
00179         return !$file; // we only updated *some* files, don't log
00180     }
00181 }
00182 
00183 $maintClass = "PopulateImageSha1";
00184 require_once RUN_MAINTENANCE_IF_MAIN;