MediaWiki  REL1_19
populateRevisionSha1.php
Go to the documentation of this file.
00001 <?php
00024 require_once( dirname( __FILE__ ) . '/Maintenance.php' );
00025 
00026 class PopulateRevisionSha1 extends LoggedUpdateMaintenance {
00027         public function __construct() {
00028                 parent::__construct();
00029                 $this->mDescription = "Populates the rev_sha1 and ar_sha1 fields";
00030                 $this->setBatchSize( 200 );
00031         }
00032 
00033         protected function getUpdateKey() {
00034                 return 'populate rev_sha1';
00035         }
00036 
00037         protected function doDBUpdates() {
00038                 $db = $this->getDB( DB_MASTER );
00039 
00040                 if ( !$db->tableExists( 'revision' ) ) {
00041                         $this->error( "revision table does not exist", true );
00042                 } elseif ( !$db->tableExists( 'archive' ) ) {
00043                         $this->error( "archive table does not exist", true );
00044                 }
00045 
00046                 $this->output( "Populating rev_sha1 column\n" );
00047                 $rc = $this->doSha1Updates( 'revision', 'rev_id', 'rev' );
00048 
00049                 $this->output( "Populating ar_sha1 column\n" );
00050                 $ac = $this->doSha1Updates( 'archive', 'ar_rev_id', 'ar' );
00051                 $this->output( "Populating ar_sha1 column legacy rows\n" );
00052                 $ac += $this->doSha1LegacyUpdates();
00053 
00054                 $this->output( "rev_sha1 and ar_sha1 population complete [$rc revision rows, $ac archive rows].\n" );
00055                 return true;
00056         }
00057 
00064         protected function doSha1Updates( $table, $idCol, $prefix ) {
00065                 $db = $this->getDB( DB_MASTER );
00066                 $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ );
00067                 $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ );
00068                 if ( !$start || !$end ) {
00069                         $this->output( "...$table table seems to be empty.\n" );
00070                         return 0;
00071                 }
00072 
00073                 $count = 0;
00074                 # Do remaining chunk
00075                 $end += $this->mBatchSize - 1;
00076                 $blockStart = $start;
00077                 $blockEnd = $start + $this->mBatchSize - 1;
00078                 while ( $blockEnd <= $end ) {
00079                         $this->output( "...doing $idCol from $blockStart to $blockEnd\n" );
00080                         $cond = "$idCol BETWEEN $blockStart AND $blockEnd
00081                                 AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''";
00082                         $res = $db->select( $table, '*', $cond, __METHOD__ );
00083 
00084                         $db->begin();
00085                         foreach ( $res as $row ) {
00086                                 if ( $this->upgradeRow( $row, $table, $idCol, $prefix ) ) {
00087                                         $count++;
00088                                 }
00089                         }
00090                         $db->commit();
00091 
00092                         $blockStart += $this->mBatchSize;
00093                         $blockEnd += $this->mBatchSize;
00094                         wfWaitForSlaves();
00095                 }
00096                 return $count;
00097         }
00098 
00102         protected function doSha1LegacyUpdates() {
00103                 $count = 0;
00104                 $db = $this->getDB( DB_MASTER );
00105                 $res = $db->select( 'archive', '*', array( 'ar_rev_id IS NULL' ), __METHOD__ );
00106 
00107                 $updateSize = 0;
00108                 $db->begin();
00109                 foreach ( $res as $row ) {
00110                         if ( $this->upgradeLegacyArchiveRow( $row ) ) {
00111                                 ++$count;
00112                         }
00113                         if ( ++$updateSize >= 100 ) {
00114                                 $updateSize = 0;
00115                                 $db->commit();
00116                                 $this->output( "Commited row with ar_timestamp={$row->ar_timestamp}\n" );
00117                                 wfWaitForSlaves();
00118                                 $db->begin();
00119                         }
00120                 }
00121                 $db->commit();
00122                 return $count;
00123         }
00124 
00132         protected function upgradeRow( $row, $table, $idCol, $prefix ) {
00133                 $db = $this->getDB( DB_MASTER );
00134                 if ( $table === 'archive' ) {
00135                         $rev = Revision::newFromArchiveRow( $row );
00136                 } else {
00137                         $rev = new Revision( $row );
00138                 }
00139                 $text = $rev->getRawText();
00140                 if ( !is_string( $text ) ) {
00141                         # This should not happen, but sometimes does (bug 20757)
00142                         $this->output( "Text of revision with {$idCol}={$row->$idCol} unavailable!\n" );
00143                         return false;
00144                 } else {
00145                         $db->update( $table,
00146                                 array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ),
00147                                 array( $idCol => $row->$idCol ),
00148                                 __METHOD__ 
00149                         );
00150                         return true;
00151                 }
00152         }
00153 
00158         protected function upgradeLegacyArchiveRow( $row ) {
00159                 $db = $this->getDB( DB_MASTER );
00160                 $rev = Revision::newFromArchiveRow( $row );
00161                 $text = $rev->getRawText();
00162                 if ( !is_string( $text ) ) {
00163                         # This should not happen, but sometimes does (bug 20757)
00164                         $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
00165                         return false;
00166                 } else {
00167                         # Archive table as no PK, but (NS,title,time) should be near unique.
00168                         # Any duplicates on those should also have duplicated text anyway.
00169                         $db->update( 'archive',
00170                                 array( 'ar_sha1' => Revision::base36Sha1( $text ) ),
00171                                 array(
00172                                         'ar_namespace' => $row->ar_namespace,
00173                                         'ar_title'     => $row->ar_title,
00174                                         'ar_timestamp' => $row->ar_timestamp,
00175                                         'ar_len'       => $row->ar_len // extra sanity
00176                                 ),
00177                                 __METHOD__ 
00178                         );
00179                         return true;
00180                 }
00181         }
00182 }
00183 
00184 $maintClass = "PopulateRevisionSha1";
00185 require_once( RUN_MAINTENANCE_IF_MAIN );