[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Script to fix bug 20757. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup Maintenance ExternalStorage 22 */ 23 24 require_once __DIR__ . '/../Maintenance.php'; 25 26 /** 27 * Maintenance script to fix bug 20757. 28 * 29 * @ingroup Maintenance ExternalStorage 30 */ 31 class FixBug20757 extends Maintenance { 32 public $batchSize = 10000; 33 public $mapCache = array(); 34 public $mapCacheSize = 0; 35 public $maxMapCacheSize = 1000000; 36 37 function __construct() { 38 parent::__construct(); 39 $this->mDescription = 'Script to fix bug 20757 assuming that blob_tracking is intact'; 40 $this->addOption( 'dry-run', 'Report only' ); 41 $this->addOption( 'start', 'old_id to start at', false, true ); 42 } 43 44 function execute() { 45 $dbr = wfGetDB( DB_SLAVE ); 46 $dbw = wfGetDB( DB_MASTER ); 47 48 $dryRun = $this->getOption( 'dry-run' ); 49 if ( $dryRun ) { 50 print "Dry run only.\n"; 51 } 52 53 $startId = $this->getOption( 'start', 0 ); 54 $numGood = 0; 55 $numFixed = 0; 56 $numBad = 0; 57 58 $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ ); 59 60 if ( $dbr->getType() == 'mysql' ) { 61 // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function 62 $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))'; 63 } 64 65 while ( true ) { 66 print "ID: $startId / $totalRevs\r"; 67 68 $res = $dbr->select( 69 'text', 70 array( 'old_id', 'old_flags', 'old_text' ), 71 array( 72 'old_id > ' . intval( $startId ), 73 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'', 74 "$lowerLeft = 'o:15:\"historyblobstub\"'", 75 ), 76 __METHOD__, 77 array( 78 'ORDER BY' => 'old_id', 79 'LIMIT' => $this->batchSize, 80 ) 81 ); 82 83 if ( !$res->numRows() ) { 84 break; 85 } 86 87 $secondaryIds = array(); 88 $stubs = array(); 89 90 foreach ( $res as $row ) { 91 $startId = $row->old_id; 92 93 // Basic sanity checks 94 $obj = unserialize( $row->old_text ); 95 if ( $obj === false ) { 96 print "{$row->old_id}: unrecoverable: cannot unserialize\n"; 97 ++$numBad; 98 continue; 99 } 100 101 if ( !is_object( $obj ) ) { 102 print "{$row->old_id}: unrecoverable: unserialized to type " . 103 gettype( $obj ) . ", possible double-serialization\n"; 104 ++$numBad; 105 continue; 106 } 107 108 if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) { 109 print "{$row->old_id}: unrecoverable: unexpected object class " . 110 get_class( $obj ) . "\n"; 111 ++$numBad; 112 continue; 113 } 114 115 // Process flags 116 $flags = explode( ',', $row->old_flags ); 117 if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) { 118 $legacyEncoding = false; 119 } else { 120 $legacyEncoding = true; 121 } 122 123 // Queue the stub for future batch processing 124 $id = intval( $obj->mOldId ); 125 $secondaryIds[] = $id; 126 $stubs[$row->old_id] = array( 127 'legacyEncoding' => $legacyEncoding, 128 'secondaryId' => $id, 129 'hash' => $obj->mHash, 130 ); 131 } 132 133 $secondaryIds = array_unique( $secondaryIds ); 134 135 if ( !count( $secondaryIds ) ) { 136 continue; 137 } 138 139 // Run the batch query on blob_tracking 140 $res = $dbr->select( 141 'blob_tracking', 142 '*', 143 array( 144 'bt_text_id' => $secondaryIds, 145 ), 146 __METHOD__ 147 ); 148 $trackedBlobs = array(); 149 foreach ( $res as $row ) { 150 $trackedBlobs[$row->bt_text_id] = $row; 151 } 152 153 // Process the stubs 154 foreach ( $stubs as $primaryId => $stub ) { 155 $secondaryId = $stub['secondaryId']; 156 if ( !isset( $trackedBlobs[$secondaryId] ) ) { 157 // No tracked blob. Work out what went wrong 158 $secondaryRow = $dbr->selectRow( 159 'text', 160 array( 'old_flags', 'old_text' ), 161 array( 'old_id' => $secondaryId ), 162 __METHOD__ 163 ); 164 if ( !$secondaryRow ) { 165 print "$primaryId: unrecoverable: secondary row is missing\n"; 166 ++$numBad; 167 } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) { 168 // Not broken yet, and not in the tracked clusters so it won't get 169 // broken by the current RCT run. 170 ++$numGood; 171 } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) { 172 print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n"; 173 ++$numBad; 174 } else { 175 print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n"; 176 ++$numBad; 177 } 178 unset( $stubs[$primaryId] ); 179 continue; 180 } 181 $trackRow = $trackedBlobs[$secondaryId]; 182 183 // Check that the specified text really is available in the tracked source row 184 $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}"; 185 $text = ExternalStore::fetchFromURL( $url ); 186 if ( $text === false ) { 187 print "$primaryId: unrecoverable: source text missing\n"; 188 ++$numBad; 189 unset( $stubs[$primaryId] ); 190 continue; 191 } 192 if ( md5( $text ) !== $stub['hash'] ) { 193 print "$primaryId: unrecoverable: content hashes do not match\n"; 194 ++$numBad; 195 unset( $stubs[$primaryId] ); 196 continue; 197 } 198 199 // Find the page_id and rev_id 200 // The page is probably the same as the page of the secondary row 201 $pageId = intval( $trackRow->bt_page ); 202 if ( !$pageId ) { 203 $revId = $pageId = 0; 204 } else { 205 $revId = $this->findTextIdInPage( $pageId, $primaryId ); 206 if ( !$revId ) { 207 // Actually an orphan 208 $pageId = $revId = 0; 209 } 210 } 211 212 $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8'; 213 214 if ( !$dryRun ) { 215 // Reset the text row to point to the original copy 216 $dbw->begin( __METHOD__ ); 217 $dbw->update( 218 'text', 219 // SET 220 array( 221 'old_flags' => $newFlags, 222 'old_text' => $url 223 ), 224 // WHERE 225 array( 'old_id' => $primaryId ), 226 __METHOD__ 227 ); 228 229 // Add a blob_tracking row so that the new reference can be recompressed 230 // without needing to run trackBlobs.php again 231 $dbw->insert( 'blob_tracking', 232 array( 233 'bt_page' => $pageId, 234 'bt_rev_id' => $revId, 235 'bt_text_id' => $primaryId, 236 'bt_cluster' => $trackRow->bt_cluster, 237 'bt_blob_id' => $trackRow->bt_blob_id, 238 'bt_cgz_hash' => $stub['hash'], 239 'bt_new_url' => null, 240 'bt_moved' => 0, 241 ), 242 __METHOD__ 243 ); 244 $dbw->commit( __METHOD__ ); 245 $this->waitForSlaves(); 246 } 247 248 print "$primaryId: resolved to $url\n"; 249 ++$numFixed; 250 } 251 } 252 253 print "\n"; 254 print "Fixed: $numFixed\n"; 255 print "Unrecoverable: $numBad\n"; 256 print "Good stubs: $numGood\n"; 257 } 258 259 function waitForSlaves() { 260 static $iteration = 0; 261 ++$iteration; 262 if ( ++$iteration > 50 == 0 ) { 263 wfWaitForSlaves(); 264 $iteration = 0; 265 } 266 } 267 268 function findTextIdInPage( $pageId, $textId ) { 269 $ids = $this->getRevTextMap( $pageId ); 270 if ( !isset( $ids[$textId] ) ) { 271 return null; 272 } else { 273 return $ids[$textId]; 274 } 275 } 276 277 function getRevTextMap( $pageId ) { 278 if ( !isset( $this->mapCache[$pageId] ) ) { 279 // Limit cache size 280 while ( $this->mapCacheSize > $this->maxMapCacheSize ) { 281 $key = key( $this->mapCache ); 282 $this->mapCacheSize -= count( $this->mapCache[$key] ); 283 unset( $this->mapCache[$key] ); 284 } 285 286 $dbr = wfGetDB( DB_SLAVE ); 287 $map = array(); 288 $res = $dbr->select( 'revision', 289 array( 'rev_id', 'rev_text_id' ), 290 array( 'rev_page' => $pageId ), 291 __METHOD__ 292 ); 293 foreach ( $res as $row ) { 294 $map[$row->rev_text_id] = $row->rev_id; 295 } 296 $this->mapCache[$pageId] = $map; 297 $this->mapCacheSize += count( $map ); 298 } 299 300 return $this->mapCache[$pageId]; 301 } 302 303 /** 304 * This is based on part of HistoryBlobStub::getText(). 305 * Determine if the text can be retrieved from the row in the normal way. 306 * @param array $stub 307 * @param stdClass $secondaryRow 308 * @return bool 309 */ 310 function isUnbrokenStub( $stub, $secondaryRow ) { 311 $flags = explode( ',', $secondaryRow->old_flags ); 312 $text = $secondaryRow->old_text; 313 if ( in_array( 'external', $flags ) ) { 314 $url = $text; 315 wfSuppressWarnings(); 316 list( /* $proto */, $path ) = explode( '://', $url, 2 ); 317 wfRestoreWarnings(); 318 319 if ( $path == "" ) { 320 return false; 321 } 322 $text = ExternalStore::fetchFromUrl( $url ); 323 } 324 if ( !in_array( 'object', $flags ) ) { 325 return false; 326 } 327 328 if ( in_array( 'gzip', $flags ) ) { 329 $obj = unserialize( gzinflate( $text ) ); 330 } else { 331 $obj = unserialize( $text ); 332 } 333 334 if ( !is_object( $obj ) ) { 335 // Correct for old double-serialization bug. 336 $obj = unserialize( $obj ); 337 } 338 339 if ( !is_object( $obj ) ) { 340 return false; 341 } 342 343 $obj->uncompress(); 344 $text = $obj->getItem( $stub['hash'] ); 345 346 return $text !== false; 347 } 348 } 349 350 $maintClass = 'FixBug20757'; 351 require_once RUN_MAINTENANCE_IF_MAIN;
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |