MediaWiki  REL1_21
populateRevisionSha1.php
Go to the documentation of this file.
00001 <?php
00025 require_once( __DIR__ . '/Maintenance.php' );
00026 
00033 class PopulateRevisionSha1 extends LoggedUpdateMaintenance {
00034         public function __construct() {
00035                 parent::__construct();
00036                 $this->mDescription = "Populates the rev_sha1 and ar_sha1 fields";
00037                 $this->setBatchSize( 200 );
00038         }
00039 
00040         protected function getUpdateKey() {
00041                 return 'populate rev_sha1';
00042         }
00043 
00044         protected function doDBUpdates() {
00045                 $db = $this->getDB( DB_MASTER );
00046 
00047                 if ( !$db->tableExists( 'revision' ) ) {
00048                         $this->error( "revision table does not exist", true );
00049                 } elseif ( !$db->tableExists( 'archive' ) ) {
00050                         $this->error( "archive table does not exist", true );
00051                 } else if ( !$db->fieldExists( 'revision', 'rev_sha1', __METHOD__ ) ) {
00052                         $this->output( "rev_sha1 column does not exist\n\n", true );
00053                         return false;
00054                 }
00055 
00056                 $this->output( "Populating rev_sha1 column\n" );
00057                 $rc = $this->doSha1Updates( 'revision', 'rev_id', 'rev' );
00058 
00059                 $this->output( "Populating ar_sha1 column\n" );
00060                 $ac = $this->doSha1Updates( 'archive', 'ar_rev_id', 'ar' );
00061                 $this->output( "Populating ar_sha1 column legacy rows\n" );
00062                 $ac += $this->doSha1LegacyUpdates();
00063 
00064                 $this->output( "rev_sha1 and ar_sha1 population complete [$rc revision rows, $ac archive rows].\n" );
00065                 return true;
00066         }
00067 
00074         protected function doSha1Updates( $table, $idCol, $prefix ) {
00075                 $db = $this->getDB( DB_MASTER );
00076                 $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ );
00077                 $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ );
00078                 if ( !$start || !$end ) {
00079                         $this->output( "...$table table seems to be empty.\n" );
00080                         return 0;
00081                 }
00082 
00083                 $count = 0;
00084                 # Do remaining chunk
00085                 $end += $this->mBatchSize - 1;
00086                 $blockStart = $start;
00087                 $blockEnd = $start + $this->mBatchSize - 1;
00088                 while ( $blockEnd <= $end ) {
00089                         $this->output( "...doing $idCol from $blockStart to $blockEnd\n" );
00090                         $cond = "$idCol BETWEEN $blockStart AND $blockEnd
00091                                 AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''";
00092                         $res = $db->select( $table, '*', $cond, __METHOD__ );
00093 
00094                         $db->begin( __METHOD__ );
00095                         foreach ( $res as $row ) {
00096                                 if ( $this->upgradeRow( $row, $table, $idCol, $prefix ) ) {
00097                                         $count++;
00098                                 }
00099                         }
00100                         $db->commit( __METHOD__ );
00101 
00102                         $blockStart += $this->mBatchSize;
00103                         $blockEnd += $this->mBatchSize;
00104                         wfWaitForSlaves();
00105                 }
00106                 return $count;
00107         }
00108 
00112         protected function doSha1LegacyUpdates() {
00113                 $count = 0;
00114                 $db = $this->getDB( DB_MASTER );
00115                 $res = $db->select( 'archive', '*',
00116                         array( 'ar_rev_id IS NULL', 'ar_sha1' => '' ), __METHOD__ );
00117 
00118                 $updateSize = 0;
00119                 $db->begin( __METHOD__ );
00120                 foreach ( $res as $row ) {
00121                         if ( $this->upgradeLegacyArchiveRow( $row ) ) {
00122                                 ++$count;
00123                         }
00124                         if ( ++$updateSize >= 100 ) {
00125                                 $updateSize = 0;
00126                                 $db->commit( __METHOD__ );
00127                                 $this->output( "Commited row with ar_timestamp={$row->ar_timestamp}\n" );
00128                                 wfWaitForSlaves();
00129                                 $db->begin( __METHOD__ );
00130                         }
00131                 }
00132                 $db->commit( __METHOD__ );
00133                 return $count;
00134         }
00135 
00143         protected function upgradeRow( $row, $table, $idCol, $prefix ) {
00144                 $db = $this->getDB( DB_MASTER );
00145                 try {
00146                         $rev = ( $table === 'archive' )
00147                                 ? Revision::newFromArchiveRow( $row )
00148                                 : new Revision( $row );
00149                         $text = $rev->getSerializedData();
00150                 } catch ( MWException $e ) {
00151                         $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" );
00152                         return false; // bug 22624?
00153                 }
00154                 if ( !is_string( $text ) ) {
00155                         # This should not happen, but sometimes does (bug 20757)
00156                         $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" );
00157                         return false;
00158                 } else {
00159                         $db->update( $table,
00160                                 array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ),
00161                                 array( $idCol => $row->$idCol ),
00162                                 __METHOD__
00163                         );
00164                         return true;
00165                 }
00166         }
00167 
00172         protected function upgradeLegacyArchiveRow( $row ) {
00173                 $db = $this->getDB( DB_MASTER );
00174                 try {
00175                         $rev = Revision::newFromArchiveRow( $row );
00176                 } catch ( MWException $e ) {
00177                         $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
00178                         return false; // bug 22624?
00179                 }
00180                 $text = $rev->getSerializedData();
00181                 if ( !is_string( $text ) ) {
00182                         # This should not happen, but sometimes does (bug 20757)
00183                         $this->output( "Data of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
00184                         return false;
00185                 } else {
00186                         # Archive table as no PK, but (NS,title,time) should be near unique.
00187                         # Any duplicates on those should also have duplicated text anyway.
00188                         $db->update( 'archive',
00189                                 array( 'ar_sha1' => Revision::base36Sha1( $text ) ),
00190                                 array(
00191                                         'ar_namespace' => $row->ar_namespace,
00192                                         'ar_title'     => $row->ar_title,
00193                                         'ar_timestamp' => $row->ar_timestamp,
00194                                         'ar_len'       => $row->ar_len // extra sanity
00195                                 ),
00196                                 __METHOD__
00197                         );
00198                         return true;
00199                 }
00200         }
00201 }
00202 
00203 $maintClass = "PopulateRevisionSha1";
00204 require_once( RUN_MAINTENANCE_IF_MAIN );