MediaWiki  REL1_22
populateRevisionSha1.php
Go to the documentation of this file.
00001 <?php
00025 require_once __DIR__ . '/Maintenance.php';
00026 
00033 class PopulateRevisionSha1 extends LoggedUpdateMaintenance {
00034     public function __construct() {
00035         parent::__construct();
00036         $this->mDescription = "Populates the rev_sha1 and ar_sha1 fields";
00037         $this->setBatchSize( 200 );
00038     }
00039 
00040     protected function getUpdateKey() {
00041         return 'populate rev_sha1';
00042     }
00043 
00044     protected function doDBUpdates() {
00045         $db = $this->getDB( DB_MASTER );
00046 
00047         if ( !$db->tableExists( 'revision' ) ) {
00048             $this->error( "revision table does not exist", true );
00049         } elseif ( !$db->tableExists( 'archive' ) ) {
00050             $this->error( "archive table does not exist", true );
00051         } elseif ( !$db->fieldExists( 'revision', 'rev_sha1', __METHOD__ ) ) {
00052             $this->output( "rev_sha1 column does not exist\n\n", true );
00053             return false;
00054         }
00055 
00056         $this->output( "Populating rev_sha1 column\n" );
00057         $rc = $this->doSha1Updates( 'revision', 'rev_id', 'rev' );
00058 
00059         $this->output( "Populating ar_sha1 column\n" );
00060         $ac = $this->doSha1Updates( 'archive', 'ar_rev_id', 'ar' );
00061         $this->output( "Populating ar_sha1 column legacy rows\n" );
00062         $ac += $this->doSha1LegacyUpdates();
00063 
00064         $this->output( "rev_sha1 and ar_sha1 population complete [$rc revision rows, $ac archive rows].\n" );
00065         return true;
00066     }
00067 
00074     protected function doSha1Updates( $table, $idCol, $prefix ) {
00075         $db = $this->getDB( DB_MASTER );
00076         $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ );
00077         $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ );
00078         if ( !$start || !$end ) {
00079             $this->output( "...$table table seems to be empty.\n" );
00080             return 0;
00081         }
00082 
00083         $count = 0;
00084         # Do remaining chunk
00085         $end += $this->mBatchSize - 1;
00086         $blockStart = $start;
00087         $blockEnd = $start + $this->mBatchSize - 1;
00088         while ( $blockEnd <= $end ) {
00089             $this->output( "...doing $idCol from $blockStart to $blockEnd\n" );
00090             $cond = "$idCol BETWEEN $blockStart AND $blockEnd
00091                 AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''";
00092             $res = $db->select( $table, '*', $cond, __METHOD__ );
00093 
00094             $db->begin( __METHOD__ );
00095             foreach ( $res as $row ) {
00096                 if ( $this->upgradeRow( $row, $table, $idCol, $prefix ) ) {
00097                     $count++;
00098                 }
00099             }
00100             $db->commit( __METHOD__ );
00101 
00102             $blockStart += $this->mBatchSize;
00103             $blockEnd += $this->mBatchSize;
00104             wfWaitForSlaves();
00105         }
00106         return $count;
00107     }
00108 
00112     protected function doSha1LegacyUpdates() {
00113         $count = 0;
00114         $db = $this->getDB( DB_MASTER );
00115         $res = $db->select( 'archive', '*',
00116             array( 'ar_rev_id IS NULL', 'ar_sha1' => '' ), __METHOD__ );
00117 
00118         $updateSize = 0;
00119         $db->begin( __METHOD__ );
00120         foreach ( $res as $row ) {
00121             if ( $this->upgradeLegacyArchiveRow( $row ) ) {
00122                 ++$count;
00123             }
00124             if ( ++$updateSize >= 100 ) {
00125                 $updateSize = 0;
00126                 $db->commit( __METHOD__ );
00127                 $this->output( "Commited row with ar_timestamp={$row->ar_timestamp}\n" );
00128                 wfWaitForSlaves();
00129                 $db->begin( __METHOD__ );
00130             }
00131         }
00132         $db->commit( __METHOD__ );
00133         return $count;
00134     }
00135 
00143     protected function upgradeRow( $row, $table, $idCol, $prefix ) {
00144         $db = $this->getDB( DB_MASTER );
00145         try {
00146             $rev = ( $table === 'archive' )
00147                 ? Revision::newFromArchiveRow( $row )
00148                 : new Revision( $row );
00149             $text = $rev->getSerializedData();
00150         } catch ( MWException $e ) {
00151             $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" );
00152             return false; // bug 22624?
00153         }
00154         if ( !is_string( $text ) ) {
00155             # This should not happen, but sometimes does (bug 20757)
00156             $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" );
00157             return false;
00158         } else {
00159             $db->update( $table,
00160                 array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ),
00161                 array( $idCol => $row->$idCol ),
00162                 __METHOD__
00163             );
00164             return true;
00165         }
00166     }
00167 
00172     protected function upgradeLegacyArchiveRow( $row ) {
00173         $db = $this->getDB( DB_MASTER );
00174         try {
00175             $rev = Revision::newFromArchiveRow( $row );
00176         } catch ( MWException $e ) {
00177             $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
00178             return false; // bug 22624?
00179         }
00180         $text = $rev->getSerializedData();
00181         if ( !is_string( $text ) ) {
00182             # This should not happen, but sometimes does (bug 20757)
00183             $this->output( "Data of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
00184             return false;
00185         } else {
00186             # Archive table as no PK, but (NS,title,time) should be near unique.
00187             # Any duplicates on those should also have duplicated text anyway.
00188             $db->update( 'archive',
00189                 array( 'ar_sha1' => Revision::base36Sha1( $text ) ),
00190                 array(
00191                     'ar_namespace' => $row->ar_namespace,
00192                     'ar_title' => $row->ar_title,
00193                     'ar_timestamp' => $row->ar_timestamp,
00194                     'ar_len' => $row->ar_len // extra sanity
00195                 ),
00196                 __METHOD__
00197             );
00198             return true;
00199         }
00200     }
00201 }
00202 
00203 $maintClass = "PopulateRevisionSha1";
00204 require_once RUN_MAINTENANCE_IF_MAIN;