MediaWiki
REL1_20
|
00001 <?php 00025 require_once( __DIR__ . '/Maintenance.php' ); 00026 00033 class PopulateRevisionSha1 extends LoggedUpdateMaintenance { 00034 public function __construct() { 00035 parent::__construct(); 00036 $this->mDescription = "Populates the rev_sha1 and ar_sha1 fields"; 00037 $this->setBatchSize( 200 ); 00038 } 00039 00040 protected function getUpdateKey() { 00041 return 'populate rev_sha1'; 00042 } 00043 00044 protected function doDBUpdates() { 00045 $db = $this->getDB( DB_MASTER ); 00046 00047 if ( !$db->tableExists( 'revision' ) ) { 00048 $this->error( "revision table does not exist", true ); 00049 } elseif ( !$db->tableExists( 'archive' ) ) { 00050 $this->error( "archive table does not exist", true ); 00051 } 00052 00053 $this->output( "Populating rev_sha1 column\n" ); 00054 $rc = $this->doSha1Updates( 'revision', 'rev_id', 'rev' ); 00055 00056 $this->output( "Populating ar_sha1 column\n" ); 00057 $ac = $this->doSha1Updates( 'archive', 'ar_rev_id', 'ar' ); 00058 $this->output( "Populating ar_sha1 column legacy rows\n" ); 00059 $ac += $this->doSha1LegacyUpdates(); 00060 00061 $this->output( "rev_sha1 and ar_sha1 population complete [$rc revision rows, $ac archive rows].\n" ); 00062 return true; 00063 } 00064 00071 protected function doSha1Updates( $table, $idCol, $prefix ) { 00072 $db = $this->getDB( DB_MASTER ); 00073 $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ ); 00074 $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ ); 00075 if ( !$start || !$end ) { 00076 $this->output( "...$table table seems to be empty.\n" ); 00077 return 0; 00078 } 00079 00080 $count = 0; 00081 # Do remaining chunk 00082 $end += $this->mBatchSize - 1; 00083 $blockStart = $start; 00084 $blockEnd = $start + $this->mBatchSize - 1; 00085 while ( $blockEnd <= $end ) { 00086 $this->output( "...doing $idCol from $blockStart to $blockEnd\n" ); 00087 $cond = "$idCol BETWEEN $blockStart AND $blockEnd 00088 AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''"; 00089 $res = $db->select( $table, '*', $cond, __METHOD__ ); 00090 00091 $db->begin( __METHOD__ ); 00092 foreach ( $res as $row ) { 00093 if ( $this->upgradeRow( $row, $table, $idCol, $prefix ) ) { 00094 $count++; 00095 } 00096 } 00097 $db->commit( __METHOD__ ); 00098 00099 $blockStart += $this->mBatchSize; 00100 $blockEnd += $this->mBatchSize; 00101 wfWaitForSlaves(); 00102 } 00103 return $count; 00104 } 00105 00109 protected function doSha1LegacyUpdates() { 00110 $count = 0; 00111 $db = $this->getDB( DB_MASTER ); 00112 $res = $db->select( 'archive', '*', 00113 array( 'ar_rev_id IS NULL', 'ar_sha1' => '' ), __METHOD__ ); 00114 00115 $updateSize = 0; 00116 $db->begin( __METHOD__ ); 00117 foreach ( $res as $row ) { 00118 if ( $this->upgradeLegacyArchiveRow( $row ) ) { 00119 ++$count; 00120 } 00121 if ( ++$updateSize >= 100 ) { 00122 $updateSize = 0; 00123 $db->commit( __METHOD__ ); 00124 $this->output( "Commited row with ar_timestamp={$row->ar_timestamp}\n" ); 00125 wfWaitForSlaves(); 00126 $db->begin( __METHOD__ ); 00127 } 00128 } 00129 $db->commit( __METHOD__ ); 00130 return $count; 00131 } 00132 00140 protected function upgradeRow( $row, $table, $idCol, $prefix ) { 00141 $db = $this->getDB( DB_MASTER ); 00142 try { 00143 $rev = ( $table === 'archive' ) 00144 ? Revision::newFromArchiveRow( $row ) 00145 : new Revision( $row ); 00146 $text = $rev->getRawText(); 00147 } catch ( MWException $e ) { 00148 $this->output( "Text of revision with {$idCol}={$row->$idCol} unavailable!\n" ); 00149 return false; // bug 22624? 00150 } 00151 if ( !is_string( $text ) ) { 00152 # This should not happen, but sometimes does (bug 20757) 00153 $this->output( "Text of revision with {$idCol}={$row->$idCol} unavailable!\n" ); 00154 return false; 00155 } else { 00156 $db->update( $table, 00157 array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ), 00158 array( $idCol => $row->$idCol ), 00159 __METHOD__ 00160 ); 00161 return true; 00162 } 00163 } 00164 00169 protected function upgradeLegacyArchiveRow( $row ) { 00170 $db = $this->getDB( DB_MASTER ); 00171 try { 00172 $rev = Revision::newFromArchiveRow( $row ); 00173 } catch ( MWException $e ) { 00174 $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" ); 00175 return false; // bug 22624? 00176 } 00177 $text = $rev->getRawText(); 00178 if ( !is_string( $text ) ) { 00179 # This should not happen, but sometimes does (bug 20757) 00180 $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" ); 00181 return false; 00182 } else { 00183 # Archive table as no PK, but (NS,title,time) should be near unique. 00184 # Any duplicates on those should also have duplicated text anyway. 00185 $db->update( 'archive', 00186 array( 'ar_sha1' => Revision::base36Sha1( $text ) ), 00187 array( 00188 'ar_namespace' => $row->ar_namespace, 00189 'ar_title' => $row->ar_title, 00190 'ar_timestamp' => $row->ar_timestamp, 00191 'ar_len' => $row->ar_len // extra sanity 00192 ), 00193 __METHOD__ 00194 ); 00195 return true; 00196 } 00197 } 00198 } 00199 00200 $maintClass = "PopulateRevisionSha1"; 00201 require_once( RUN_MAINTENANCE_IF_MAIN );