MediaWiki
REL1_22
|
00001 <?php 00025 require_once __DIR__ . '/Maintenance.php'; 00026 00033 class PopulateRevisionSha1 extends LoggedUpdateMaintenance { 00034 public function __construct() { 00035 parent::__construct(); 00036 $this->mDescription = "Populates the rev_sha1 and ar_sha1 fields"; 00037 $this->setBatchSize( 200 ); 00038 } 00039 00040 protected function getUpdateKey() { 00041 return 'populate rev_sha1'; 00042 } 00043 00044 protected function doDBUpdates() { 00045 $db = $this->getDB( DB_MASTER ); 00046 00047 if ( !$db->tableExists( 'revision' ) ) { 00048 $this->error( "revision table does not exist", true ); 00049 } elseif ( !$db->tableExists( 'archive' ) ) { 00050 $this->error( "archive table does not exist", true ); 00051 } elseif ( !$db->fieldExists( 'revision', 'rev_sha1', __METHOD__ ) ) { 00052 $this->output( "rev_sha1 column does not exist\n\n", true ); 00053 return false; 00054 } 00055 00056 $this->output( "Populating rev_sha1 column\n" ); 00057 $rc = $this->doSha1Updates( 'revision', 'rev_id', 'rev' ); 00058 00059 $this->output( "Populating ar_sha1 column\n" ); 00060 $ac = $this->doSha1Updates( 'archive', 'ar_rev_id', 'ar' ); 00061 $this->output( "Populating ar_sha1 column legacy rows\n" ); 00062 $ac += $this->doSha1LegacyUpdates(); 00063 00064 $this->output( "rev_sha1 and ar_sha1 population complete [$rc revision rows, $ac archive rows].\n" ); 00065 return true; 00066 } 00067 00074 protected function doSha1Updates( $table, $idCol, $prefix ) { 00075 $db = $this->getDB( DB_MASTER ); 00076 $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ ); 00077 $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ ); 00078 if ( !$start || !$end ) { 00079 $this->output( "...$table table seems to be empty.\n" ); 00080 return 0; 00081 } 00082 00083 $count = 0; 00084 # Do remaining chunk 00085 $end += $this->mBatchSize - 1; 00086 $blockStart = $start; 00087 $blockEnd = $start + $this->mBatchSize - 1; 00088 while ( $blockEnd <= $end ) { 00089 $this->output( "...doing $idCol from $blockStart to $blockEnd\n" ); 00090 $cond = "$idCol BETWEEN $blockStart AND $blockEnd 00091 AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''"; 00092 $res = $db->select( $table, '*', $cond, __METHOD__ ); 00093 00094 $db->begin( __METHOD__ ); 00095 foreach ( $res as $row ) { 00096 if ( $this->upgradeRow( $row, $table, $idCol, $prefix ) ) { 00097 $count++; 00098 } 00099 } 00100 $db->commit( __METHOD__ ); 00101 00102 $blockStart += $this->mBatchSize; 00103 $blockEnd += $this->mBatchSize; 00104 wfWaitForSlaves(); 00105 } 00106 return $count; 00107 } 00108 00112 protected function doSha1LegacyUpdates() { 00113 $count = 0; 00114 $db = $this->getDB( DB_MASTER ); 00115 $res = $db->select( 'archive', '*', 00116 array( 'ar_rev_id IS NULL', 'ar_sha1' => '' ), __METHOD__ ); 00117 00118 $updateSize = 0; 00119 $db->begin( __METHOD__ ); 00120 foreach ( $res as $row ) { 00121 if ( $this->upgradeLegacyArchiveRow( $row ) ) { 00122 ++$count; 00123 } 00124 if ( ++$updateSize >= 100 ) { 00125 $updateSize = 0; 00126 $db->commit( __METHOD__ ); 00127 $this->output( "Commited row with ar_timestamp={$row->ar_timestamp}\n" ); 00128 wfWaitForSlaves(); 00129 $db->begin( __METHOD__ ); 00130 } 00131 } 00132 $db->commit( __METHOD__ ); 00133 return $count; 00134 } 00135 00143 protected function upgradeRow( $row, $table, $idCol, $prefix ) { 00144 $db = $this->getDB( DB_MASTER ); 00145 try { 00146 $rev = ( $table === 'archive' ) 00147 ? Revision::newFromArchiveRow( $row ) 00148 : new Revision( $row ); 00149 $text = $rev->getSerializedData(); 00150 } catch ( MWException $e ) { 00151 $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" ); 00152 return false; // bug 22624? 00153 } 00154 if ( !is_string( $text ) ) { 00155 # This should not happen, but sometimes does (bug 20757) 00156 $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" ); 00157 return false; 00158 } else { 00159 $db->update( $table, 00160 array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ), 00161 array( $idCol => $row->$idCol ), 00162 __METHOD__ 00163 ); 00164 return true; 00165 } 00166 } 00167 00172 protected function upgradeLegacyArchiveRow( $row ) { 00173 $db = $this->getDB( DB_MASTER ); 00174 try { 00175 $rev = Revision::newFromArchiveRow( $row ); 00176 } catch ( MWException $e ) { 00177 $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" ); 00178 return false; // bug 22624? 00179 } 00180 $text = $rev->getSerializedData(); 00181 if ( !is_string( $text ) ) { 00182 # This should not happen, but sometimes does (bug 20757) 00183 $this->output( "Data of revision with timestamp {$row->ar_timestamp} unavailable!\n" ); 00184 return false; 00185 } else { 00186 # Archive table as no PK, but (NS,title,time) should be near unique. 00187 # Any duplicates on those should also have duplicated text anyway. 00188 $db->update( 'archive', 00189 array( 'ar_sha1' => Revision::base36Sha1( $text ) ), 00190 array( 00191 'ar_namespace' => $row->ar_namespace, 00192 'ar_title' => $row->ar_title, 00193 'ar_timestamp' => $row->ar_timestamp, 00194 'ar_len' => $row->ar_len // extra sanity 00195 ), 00196 __METHOD__ 00197 ); 00198 return true; 00199 } 00200 } 00201 } 00202 00203 $maintClass = "PopulateRevisionSha1"; 00204 require_once RUN_MAINTENANCE_IF_MAIN;