MediaWiki
REL1_19
|
00001 <?php 00024 require_once( dirname( __FILE__ ) . '/Maintenance.php' ); 00025 00026 class PopulateRevisionSha1 extends LoggedUpdateMaintenance { 00027 public function __construct() { 00028 parent::__construct(); 00029 $this->mDescription = "Populates the rev_sha1 and ar_sha1 fields"; 00030 $this->setBatchSize( 200 ); 00031 } 00032 00033 protected function getUpdateKey() { 00034 return 'populate rev_sha1'; 00035 } 00036 00037 protected function doDBUpdates() { 00038 $db = $this->getDB( DB_MASTER ); 00039 00040 if ( !$db->tableExists( 'revision' ) ) { 00041 $this->error( "revision table does not exist", true ); 00042 } elseif ( !$db->tableExists( 'archive' ) ) { 00043 $this->error( "archive table does not exist", true ); 00044 } 00045 00046 $this->output( "Populating rev_sha1 column\n" ); 00047 $rc = $this->doSha1Updates( 'revision', 'rev_id', 'rev' ); 00048 00049 $this->output( "Populating ar_sha1 column\n" ); 00050 $ac = $this->doSha1Updates( 'archive', 'ar_rev_id', 'ar' ); 00051 $this->output( "Populating ar_sha1 column legacy rows\n" ); 00052 $ac += $this->doSha1LegacyUpdates(); 00053 00054 $this->output( "rev_sha1 and ar_sha1 population complete [$rc revision rows, $ac archive rows].\n" ); 00055 return true; 00056 } 00057 00064 protected function doSha1Updates( $table, $idCol, $prefix ) { 00065 $db = $this->getDB( DB_MASTER ); 00066 $start = $db->selectField( $table, "MIN($idCol)", false, __METHOD__ ); 00067 $end = $db->selectField( $table, "MAX($idCol)", false, __METHOD__ ); 00068 if ( !$start || !$end ) { 00069 $this->output( "...$table table seems to be empty.\n" ); 00070 return 0; 00071 } 00072 00073 $count = 0; 00074 # Do remaining chunk 00075 $end += $this->mBatchSize - 1; 00076 $blockStart = $start; 00077 $blockEnd = $start + $this->mBatchSize - 1; 00078 while ( $blockEnd <= $end ) { 00079 $this->output( "...doing $idCol from $blockStart to $blockEnd\n" ); 00080 $cond = "$idCol BETWEEN $blockStart AND $blockEnd 00081 AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''"; 00082 $res = $db->select( $table, '*', $cond, __METHOD__ ); 00083 00084 $db->begin(); 00085 foreach ( $res as $row ) { 00086 if ( $this->upgradeRow( $row, $table, $idCol, $prefix ) ) { 00087 $count++; 00088 } 00089 } 00090 $db->commit(); 00091 00092 $blockStart += $this->mBatchSize; 00093 $blockEnd += $this->mBatchSize; 00094 wfWaitForSlaves(); 00095 } 00096 return $count; 00097 } 00098 00102 protected function doSha1LegacyUpdates() { 00103 $count = 0; 00104 $db = $this->getDB( DB_MASTER ); 00105 $res = $db->select( 'archive', '*', array( 'ar_rev_id IS NULL' ), __METHOD__ ); 00106 00107 $updateSize = 0; 00108 $db->begin(); 00109 foreach ( $res as $row ) { 00110 if ( $this->upgradeLegacyArchiveRow( $row ) ) { 00111 ++$count; 00112 } 00113 if ( ++$updateSize >= 100 ) { 00114 $updateSize = 0; 00115 $db->commit(); 00116 $this->output( "Commited row with ar_timestamp={$row->ar_timestamp}\n" ); 00117 wfWaitForSlaves(); 00118 $db->begin(); 00119 } 00120 } 00121 $db->commit(); 00122 return $count; 00123 } 00124 00132 protected function upgradeRow( $row, $table, $idCol, $prefix ) { 00133 $db = $this->getDB( DB_MASTER ); 00134 if ( $table === 'archive' ) { 00135 $rev = Revision::newFromArchiveRow( $row ); 00136 } else { 00137 $rev = new Revision( $row ); 00138 } 00139 $text = $rev->getRawText(); 00140 if ( !is_string( $text ) ) { 00141 # This should not happen, but sometimes does (bug 20757) 00142 $this->output( "Text of revision with {$idCol}={$row->$idCol} unavailable!\n" ); 00143 return false; 00144 } else { 00145 $db->update( $table, 00146 array( "{$prefix}_sha1" => Revision::base36Sha1( $text ) ), 00147 array( $idCol => $row->$idCol ), 00148 __METHOD__ 00149 ); 00150 return true; 00151 } 00152 } 00153 00158 protected function upgradeLegacyArchiveRow( $row ) { 00159 $db = $this->getDB( DB_MASTER ); 00160 $rev = Revision::newFromArchiveRow( $row ); 00161 $text = $rev->getRawText(); 00162 if ( !is_string( $text ) ) { 00163 # This should not happen, but sometimes does (bug 20757) 00164 $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" ); 00165 return false; 00166 } else { 00167 # Archive table as no PK, but (NS,title,time) should be near unique. 00168 # Any duplicates on those should also have duplicated text anyway. 00169 $db->update( 'archive', 00170 array( 'ar_sha1' => Revision::base36Sha1( $text ) ), 00171 array( 00172 'ar_namespace' => $row->ar_namespace, 00173 'ar_title' => $row->ar_title, 00174 'ar_timestamp' => $row->ar_timestamp, 00175 'ar_len' => $row->ar_len // extra sanity 00176 ), 00177 __METHOD__ 00178 ); 00179 return true; 00180 } 00181 } 00182 } 00183 00184 $maintClass = "PopulateRevisionSha1"; 00185 require_once( RUN_MAINTENANCE_IF_MAIN );