MediaWiki  REL1_24
populateRevisionSha1.php
Go to the documentation of this file.
00001 <?php
00025 require_once __DIR__ . '/Maintenance.php';
00026 
00033 class PopulateRevisionSha1 extends LoggedUpdateMaintenance {
00034     public function __construct() {
00035         parent::__construct();
00036         $this->mDescription = "Populates the rev_sha1 and ar_sha1 fields";
00037         $this->setBatchSize( 200 );
00038     }
00039 
00040     protected function getUpdateKey() {
00041         return 'populate rev_sha1';
00042     }
00043 
00044     protected function doDBUpdates() {
00045         $db = $this->getDB( DB_MASTER );
00046 
00047         if ( !$db->tableExists( 'revision' ) ) {
00048             $this->error( "revision table does not exist", true );
00049         } elseif ( !$db->tableExists( 'archive' ) ) {
00050             $this->error( "archive table does not exist", true );
00051         } elseif ( !$db->fieldExists( 'revision', 'rev_sha1', __METHOD__ ) ) {
00052             $this->output( "rev_sha1 column does not exist\n\n", true );
00053 
00054             return false;
00055         }
00056 
00057         $this->output( "Populating rev_sha1 column\n" );
00058         $rc = $this->doSha1Updates( 'revision', 'rev_id', 'rev' );
00059 
00060         $this->output( "Populating ar_sha1 column\n" );
00061         $ac = $this->doSha1Updates( 'archive', 'ar_rev_id', 'ar' );
00062         $this->output( "Populating ar_sha1 column legacy rows\n" );
00063         $ac += $this->doSha1LegacyUpdates();
00064 
00065         $this->output( "rev_sha1 and ar_sha1 population complete "
00066             . "[$rc revision rows, $ac archive rows].\n" );
00067 
00068         return true;
00069     }
00070 
00077     protected function doSha1Updates( $table, $idCol, $prefix ) {
00078         $db = $this->getDB( DB_MASTER );
00079         $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ );
00080         $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ );
00081         if ( !$start || !$end ) {
00082             $this->output( "...$table table seems to be empty.\n" );
00083 
00084             return 0;
00085         }
00086 
00087         $count = 0;
00088         # Do remaining chunk
00089         $end += $this->mBatchSize - 1;
00090         $blockStart = $start;
00091         $blockEnd = $start + $this->mBatchSize - 1;
00092         while ( $blockEnd <= $end ) {
00093             $this->output( "...doing $idCol from $blockStart to $blockEnd\n" );
00094             $cond = "$idCol BETWEEN $blockStart AND $blockEnd
00095                 AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''";
00096             $res = $db->select( $table, '*', $cond, __METHOD__ );
00097 
00098             $db->begin( __METHOD__ );
00099             foreach ( $res as $row ) {
00100                 if ( $this->upgradeRow( $row, $table, $idCol, $prefix ) ) {
00101                     $count++;
00102                 }
00103             }
00104             $db->commit( __METHOD__ );
00105 
00106             $blockStart += $this->mBatchSize;
00107             $blockEnd += $this->mBatchSize;
00108             wfWaitForSlaves();
00109         }
00110 
00111         return $count;
00112     }
00113 
00117     protected function doSha1LegacyUpdates() {
00118         $count = 0;
00119         $db = $this->getDB( DB_MASTER );
00120         $res = $db->select( 'archive', '*',
00121             array( 'ar_rev_id IS NULL', 'ar_sha1' => '' ), __METHOD__ );
00122 
00123         $updateSize = 0;
00124         $db->begin( __METHOD__ );
00125         foreach ( $res as $row ) {
00126             if ( $this->upgradeLegacyArchiveRow( $row ) ) {
00127                 ++$count;
00128             }
00129             if ( ++$updateSize >= 100 ) {
00130                 $updateSize = 0;
00131                 $db->commit( __METHOD__ );
00132                 $this->output( "Commited row with ar_timestamp={$row->ar_timestamp}\n" );
00133                 wfWaitForSlaves();
00134                 $db->begin( __METHOD__ );
00135             }
00136         }
00137         $db->commit( __METHOD__ );
00138 
00139         return $count;
00140     }
00141 
00149     protected function upgradeRow( $row, $table, $idCol, $prefix ) {
00150         $db = $this->getDB( DB_MASTER );
00151         try {
00152             $rev = ( $table === 'archive' )
00153                 ? Revision::newFromArchiveRow( $row )
00154                 : new Revision( $row );
00155             $text = $rev->getSerializedData();
00156         } catch ( MWException $e ) {
00157             $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" );
00158 
00159             return false; // bug 22624?
00160         }
00161         if ( !is_string( $text ) ) {
00162             # This should not happen, but sometimes does (bug 20757)
00163             $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" );
00164 
00165             return false;
00166         } else {
00167             $db->update( $table,
00168                 array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ),
00169                 array( $idCol => $row->$idCol ),
00170                 __METHOD__
00171             );
00172 
00173             return true;
00174         }
00175     }
00176 
00181     protected function upgradeLegacyArchiveRow( $row ) {
00182         $db = $this->getDB( DB_MASTER );
00183         try {
00184             $rev = Revision::newFromArchiveRow( $row );
00185         } catch ( MWException $e ) {
00186             $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
00187 
00188             return false; // bug 22624?
00189         }
00190         $text = $rev->getSerializedData();
00191         if ( !is_string( $text ) ) {
00192             # This should not happen, but sometimes does (bug 20757)
00193             $this->output( "Data of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
00194 
00195             return false;
00196         } else {
00197             # Archive table as no PK, but (NS,title,time) should be near unique.
00198             # Any duplicates on those should also have duplicated text anyway.
00199             $db->update( 'archive',
00200                 array( 'ar_sha1' => Revision::base36Sha1( $text ) ),
00201                 array(
00202                     'ar_namespace' => $row->ar_namespace,
00203                     'ar_title' => $row->ar_title,
00204                     'ar_timestamp' => $row->ar_timestamp,
00205                     'ar_len' => $row->ar_len // extra sanity
00206                 ),
00207                 __METHOD__
00208             );
00209 
00210             return true;
00211         }
00212     }
00213 }
00214 
00215 $maintClass = "PopulateRevisionSha1";
00216 require_once RUN_MAINTENANCE_IF_MAIN;