MediaWiki  REL1_20
populateRevisionSha1.php
Go to the documentation of this file.
00001 <?php
00025 require_once( __DIR__ . '/Maintenance.php' );
00026 
00033 class PopulateRevisionSha1 extends LoggedUpdateMaintenance {
00034         public function __construct() {
00035                 parent::__construct();
00036                 $this->mDescription = "Populates the rev_sha1 and ar_sha1 fields";
00037                 $this->setBatchSize( 200 );
00038         }
00039 
00040         protected function getUpdateKey() {
00041                 return 'populate rev_sha1';
00042         }
00043 
00044         protected function doDBUpdates() {
00045                 $db = $this->getDB( DB_MASTER );
00046 
00047                 if ( !$db->tableExists( 'revision' ) ) {
00048                         $this->error( "revision table does not exist", true );
00049                 } elseif ( !$db->tableExists( 'archive' ) ) {
00050                         $this->error( "archive table does not exist", true );
00051                 }
00052 
00053                 $this->output( "Populating rev_sha1 column\n" );
00054                 $rc = $this->doSha1Updates( 'revision', 'rev_id', 'rev' );
00055 
00056                 $this->output( "Populating ar_sha1 column\n" );
00057                 $ac = $this->doSha1Updates( 'archive', 'ar_rev_id', 'ar' );
00058                 $this->output( "Populating ar_sha1 column legacy rows\n" );
00059                 $ac += $this->doSha1LegacyUpdates();
00060 
00061                 $this->output( "rev_sha1 and ar_sha1 population complete [$rc revision rows, $ac archive rows].\n" );
00062                 return true;
00063         }
00064 
00071         protected function doSha1Updates( $table, $idCol, $prefix ) {
00072                 $db = $this->getDB( DB_MASTER );
00073                 $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ );
00074                 $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ );
00075                 if ( !$start || !$end ) {
00076                         $this->output( "...$table table seems to be empty.\n" );
00077                         return 0;
00078                 }
00079 
00080                 $count = 0;
00081                 # Do remaining chunk
00082                 $end += $this->mBatchSize - 1;
00083                 $blockStart = $start;
00084                 $blockEnd = $start + $this->mBatchSize - 1;
00085                 while ( $blockEnd <= $end ) {
00086                         $this->output( "...doing $idCol from $blockStart to $blockEnd\n" );
00087                         $cond = "$idCol BETWEEN $blockStart AND $blockEnd
00088                                 AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''";
00089                         $res = $db->select( $table, '*', $cond, __METHOD__ );
00090 
00091                         $db->begin( __METHOD__ );
00092                         foreach ( $res as $row ) {
00093                                 if ( $this->upgradeRow( $row, $table, $idCol, $prefix ) ) {
00094                                         $count++;
00095                                 }
00096                         }
00097                         $db->commit( __METHOD__ );
00098 
00099                         $blockStart += $this->mBatchSize;
00100                         $blockEnd += $this->mBatchSize;
00101                         wfWaitForSlaves();
00102                 }
00103                 return $count;
00104         }
00105 
00109         protected function doSha1LegacyUpdates() {
00110                 $count = 0;
00111                 $db = $this->getDB( DB_MASTER );
00112                 $res = $db->select( 'archive', '*',
00113                         array( 'ar_rev_id IS NULL', 'ar_sha1' => '' ), __METHOD__ );
00114 
00115                 $updateSize = 0;
00116                 $db->begin( __METHOD__ );
00117                 foreach ( $res as $row ) {
00118                         if ( $this->upgradeLegacyArchiveRow( $row ) ) {
00119                                 ++$count;
00120                         }
00121                         if ( ++$updateSize >= 100 ) {
00122                                 $updateSize = 0;
00123                                 $db->commit( __METHOD__ );
00124                                 $this->output( "Commited row with ar_timestamp={$row->ar_timestamp}\n" );
00125                                 wfWaitForSlaves();
00126                                 $db->begin( __METHOD__ );
00127                         }
00128                 }
00129                 $db->commit( __METHOD__ );
00130                 return $count;
00131         }
00132 
00140         protected function upgradeRow( $row, $table, $idCol, $prefix ) {
00141                 $db = $this->getDB( DB_MASTER );
00142                 try {
00143                         $rev = ( $table === 'archive' )
00144                                 ? Revision::newFromArchiveRow( $row )
00145                                 : new Revision( $row );
00146                         $text = $rev->getRawText();
00147                 } catch ( MWException $e ) {
00148                         $this->output( "Text of revision with {$idCol}={$row->$idCol} unavailable!\n" );
00149                         return false; // bug 22624?
00150                 }
00151                 if ( !is_string( $text ) ) {
00152                         # This should not happen, but sometimes does (bug 20757)
00153                         $this->output( "Text of revision with {$idCol}={$row->$idCol} unavailable!\n" );
00154                         return false;
00155                 } else {
00156                         $db->update( $table,
00157                                 array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ),
00158                                 array( $idCol => $row->$idCol ),
00159                                 __METHOD__
00160                         );
00161                         return true;
00162                 }
00163         }
00164 
00169         protected function upgradeLegacyArchiveRow( $row ) {
00170                 $db = $this->getDB( DB_MASTER );
00171                 try {
00172                         $rev = Revision::newFromArchiveRow( $row );
00173                 } catch ( MWException $e ) {
00174                         $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
00175                         return false; // bug 22624?
00176                 }
00177                 $text = $rev->getRawText();
00178                 if ( !is_string( $text ) ) {
00179                         # This should not happen, but sometimes does (bug 20757)
00180                         $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
00181                         return false;
00182                 } else {
00183                         # Archive table as no PK, but (NS,title,time) should be near unique.
00184                         # Any duplicates on those should also have duplicated text anyway.
00185                         $db->update( 'archive',
00186                                 array( 'ar_sha1' => Revision::base36Sha1( $text ) ),
00187                                 array(
00188                                         'ar_namespace' => $row->ar_namespace,
00189                                         'ar_title'     => $row->ar_title,
00190                                         'ar_timestamp' => $row->ar_timestamp,
00191                                         'ar_len'       => $row->ar_len // extra sanity
00192                                 ),
00193                                 __METHOD__
00194                         );
00195                         return true;
00196                 }
00197         }
00198 }
00199 
00200 $maintClass = "PopulateRevisionSha1";
00201 require_once( RUN_MAINTENANCE_IF_MAIN );