MediaWiki
REL1_24
|
00001 <?php 00025 require_once __DIR__ . '/Maintenance.php'; 00026 00033 class PopulateRevisionSha1 extends LoggedUpdateMaintenance { 00034 public function __construct() { 00035 parent::__construct(); 00036 $this->mDescription = "Populates the rev_sha1 and ar_sha1 fields"; 00037 $this->setBatchSize( 200 ); 00038 } 00039 00040 protected function getUpdateKey() { 00041 return 'populate rev_sha1'; 00042 } 00043 00044 protected function doDBUpdates() { 00045 $db = $this->getDB( DB_MASTER ); 00046 00047 if ( !$db->tableExists( 'revision' ) ) { 00048 $this->error( "revision table does not exist", true ); 00049 } elseif ( !$db->tableExists( 'archive' ) ) { 00050 $this->error( "archive table does not exist", true ); 00051 } elseif ( !$db->fieldExists( 'revision', 'rev_sha1', __METHOD__ ) ) { 00052 $this->output( "rev_sha1 column does not exist\n\n", true ); 00053 00054 return false; 00055 } 00056 00057 $this->output( "Populating rev_sha1 column\n" ); 00058 $rc = $this->doSha1Updates( 'revision', 'rev_id', 'rev' ); 00059 00060 $this->output( "Populating ar_sha1 column\n" ); 00061 $ac = $this->doSha1Updates( 'archive', 'ar_rev_id', 'ar' ); 00062 $this->output( "Populating ar_sha1 column legacy rows\n" ); 00063 $ac += $this->doSha1LegacyUpdates(); 00064 00065 $this->output( "rev_sha1 and ar_sha1 population complete " 00066 . "[$rc revision rows, $ac archive rows].\n" ); 00067 00068 return true; 00069 } 00070 00077 protected function doSha1Updates( $table, $idCol, $prefix ) { 00078 $db = $this->getDB( DB_MASTER ); 00079 $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ ); 00080 $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ ); 00081 if ( !$start || !$end ) { 00082 $this->output( "...$table table seems to be empty.\n" ); 00083 00084 return 0; 00085 } 00086 00087 $count = 0; 00088 # Do remaining chunk 00089 $end += $this->mBatchSize - 1; 00090 $blockStart = $start; 00091 $blockEnd = $start + $this->mBatchSize - 1; 00092 while ( $blockEnd <= $end ) { 00093 $this->output( "...doing $idCol from $blockStart to $blockEnd\n" ); 00094 $cond = "$idCol BETWEEN $blockStart AND $blockEnd 00095 AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''"; 00096 $res = $db->select( $table, '*', $cond, __METHOD__ ); 00097 00098 $db->begin( __METHOD__ ); 00099 foreach ( $res as $row ) { 00100 if ( $this->upgradeRow( $row, $table, $idCol, $prefix ) ) { 00101 $count++; 00102 } 00103 } 00104 $db->commit( __METHOD__ ); 00105 00106 $blockStart += $this->mBatchSize; 00107 $blockEnd += $this->mBatchSize; 00108 wfWaitForSlaves(); 00109 } 00110 00111 return $count; 00112 } 00113 00117 protected function doSha1LegacyUpdates() { 00118 $count = 0; 00119 $db = $this->getDB( DB_MASTER ); 00120 $res = $db->select( 'archive', '*', 00121 array( 'ar_rev_id IS NULL', 'ar_sha1' => '' ), __METHOD__ ); 00122 00123 $updateSize = 0; 00124 $db->begin( __METHOD__ ); 00125 foreach ( $res as $row ) { 00126 if ( $this->upgradeLegacyArchiveRow( $row ) ) { 00127 ++$count; 00128 } 00129 if ( ++$updateSize >= 100 ) { 00130 $updateSize = 0; 00131 $db->commit( __METHOD__ ); 00132 $this->output( "Commited row with ar_timestamp={$row->ar_timestamp}\n" ); 00133 wfWaitForSlaves(); 00134 $db->begin( __METHOD__ ); 00135 } 00136 } 00137 $db->commit( __METHOD__ ); 00138 00139 return $count; 00140 } 00141 00149 protected function upgradeRow( $row, $table, $idCol, $prefix ) { 00150 $db = $this->getDB( DB_MASTER ); 00151 try { 00152 $rev = ( $table === 'archive' ) 00153 ? Revision::newFromArchiveRow( $row ) 00154 : new Revision( $row ); 00155 $text = $rev->getSerializedData(); 00156 } catch ( MWException $e ) { 00157 $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" ); 00158 00159 return false; // bug 22624? 00160 } 00161 if ( !is_string( $text ) ) { 00162 # This should not happen, but sometimes does (bug 20757) 00163 $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" ); 00164 00165 return false; 00166 } else { 00167 $db->update( $table, 00168 array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ), 00169 array( $idCol => $row->$idCol ), 00170 __METHOD__ 00171 ); 00172 00173 return true; 00174 } 00175 } 00176 00181 protected function upgradeLegacyArchiveRow( $row ) { 00182 $db = $this->getDB( DB_MASTER ); 00183 try { 00184 $rev = Revision::newFromArchiveRow( $row ); 00185 } catch ( MWException $e ) { 00186 $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" ); 00187 00188 return false; // bug 22624? 00189 } 00190 $text = $rev->getSerializedData(); 00191 if ( !is_string( $text ) ) { 00192 # This should not happen, but sometimes does (bug 20757) 00193 $this->output( "Data of revision with timestamp {$row->ar_timestamp} unavailable!\n" ); 00194 00195 return false; 00196 } else { 00197 # Archive table as no PK, but (NS,title,time) should be near unique. 00198 # Any duplicates on those should also have duplicated text anyway. 00199 $db->update( 'archive', 00200 array( 'ar_sha1' => Revision::base36Sha1( $text ) ), 00201 array( 00202 'ar_namespace' => $row->ar_namespace, 00203 'ar_title' => $row->ar_title, 00204 'ar_timestamp' => $row->ar_timestamp, 00205 'ar_len' => $row->ar_len // extra sanity 00206 ), 00207 __METHOD__ 00208 ); 00209 00210 return true; 00211 } 00212 } 00213 } 00214 00215 $maintClass = "PopulateRevisionSha1"; 00216 require_once RUN_MAINTENANCE_IF_MAIN;