[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Adds blobs from a given external storage cluster to the blob_tracking table. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup Maintenance 22 * @see wfWaitForSlaves() 23 */ 24 25 require __DIR__ . '/../commandLine.inc'; 26 27 if ( count( $args ) < 1 ) { 28 echo "Usage: php trackBlobs.php <cluster> [... <cluster>]\n"; 29 echo "Adds blobs from a given ES cluster to the blob_tracking table\n"; 30 echo "Automatically deletes the tracking table and starts from the start again when restarted.\n"; 31 32 exit( 1 ); 33 } 34 $tracker = new TrackBlobs( $args ); 35 $tracker->run(); 36 echo "All done.\n"; 37 38 class TrackBlobs { 39 public $clusters, $textClause; 40 public $doBlobOrphans; 41 public $trackedBlobs = array(); 42 43 public $batchSize = 1000; 44 public $reportingInterval = 10; 45 46 function __construct( $clusters ) { 47 $this->clusters = $clusters; 48 if ( extension_loaded( 'gmp' ) ) { 49 $this->doBlobOrphans = true; 50 foreach ( $clusters as $cluster ) { 51 $this->trackedBlobs[$cluster] = gmp_init( 0 ); 52 } 53 } else { 54 echo "Warning: the gmp extension is needed to find orphan blobs\n"; 55 } 56 } 57 58 function run() { 59 $this->checkIntegrity(); 60 $this->initTrackingTable(); 61 $this->trackRevisions(); 62 $this->trackOrphanText(); 63 if ( $this->doBlobOrphans ) { 64 $this->findOrphanBlobs(); 65 } 66 } 67 68 function checkIntegrity() { 69 echo "Doing integrity check...\n"; 70 $dbr = wfGetDB( DB_SLAVE ); 71 72 // Scan for HistoryBlobStub objects in the text table (bug 20757) 73 74 $exists = $dbr->selectField( 'text', 1, 75 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\' ' . 76 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'', 77 __METHOD__ 78 ); 79 80 if ( $exists ) { 81 echo "Integrity check failed: found HistoryBlobStub objects in your text table.\n" . 82 "This script could destroy these objects if it continued. Run resolveStubs.php\n" . 83 "to fix this.\n"; 84 exit( 1 ); 85 } 86 87 // Scan the archive table for HistoryBlobStub objects or external flags (bug 22624) 88 $flags = $dbr->selectField( 'archive', 'ar_flags', 89 'ar_flags LIKE \'%external%\' OR (' . 90 'ar_flags LIKE \'%object%\' ' . 91 'AND LOWER(CONVERT(LEFT(ar_text,22) USING latin1)) = \'o:15:"historyblobstub"\' )', 92 __METHOD__ 93 ); 94 95 if ( strpos( $flags, 'external' ) !== false ) { 96 echo "Integrity check failed: found external storage pointers in your archive table.\n" . 97 "Run normaliseArchiveTable.php to fix this.\n"; 98 exit( 1 ); 99 } elseif ( $flags ) { 100 echo "Integrity check failed: found HistoryBlobStub objects in your archive table.\n" . 101 "These objects are probably already broken, continuing would make them\n" . 102 "unrecoverable. Run \"normaliseArchiveTable.php --fix-cgz-bug\" to fix this.\n"; 103 exit( 1 ); 104 } 105 106 echo "Integrity check OK\n"; 107 } 108 109 function initTrackingTable() { 110 $dbw = wfGetDB( DB_MASTER ); 111 if ( $dbw->tableExists( 'blob_tracking' ) ) { 112 $dbw->query( 'DROP TABLE ' . $dbw->tableName( 'blob_tracking' ) ); 113 $dbw->query( 'DROP TABLE ' . $dbw->tableName( 'blob_orphans' ) ); 114 } 115 $dbw->sourceFile( __DIR__ . '/blob_tracking.sql' ); 116 } 117 118 function getTextClause() { 119 if ( !$this->textClause ) { 120 $dbr = wfGetDB( DB_SLAVE ); 121 $this->textClause = ''; 122 foreach ( $this->clusters as $cluster ) { 123 if ( $this->textClause != '' ) { 124 $this->textClause .= ' OR '; 125 } 126 $this->textClause .= 'old_text' . $dbr->buildLike( "DB://$cluster/", $dbr->anyString() ); 127 } 128 } 129 130 return $this->textClause; 131 } 132 133 function interpretPointer( $text ) { 134 if ( !preg_match( '!^DB://(\w+)/(\d+)(?:/([0-9a-fA-F]+)|)$!', $text, $m ) ) { 135 return false; 136 } 137 138 return array( 139 'cluster' => $m[1], 140 'id' => intval( $m[2] ), 141 'hash' => isset( $m[3] ) ? $m[3] : null 142 ); 143 } 144 145 /** 146 * Scan the revision table for rows stored in the specified clusters 147 */ 148 function trackRevisions() { 149 $dbw = wfGetDB( DB_MASTER ); 150 $dbr = wfGetDB( DB_SLAVE ); 151 152 $textClause = $this->getTextClause(); 153 $startId = 0; 154 $endId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ ); 155 $batchesDone = 0; 156 $rowsInserted = 0; 157 158 echo "Finding revisions...\n"; 159 160 while ( true ) { 161 $res = $dbr->select( array( 'revision', 'text' ), 162 array( 'rev_id', 'rev_page', 'old_id', 'old_flags', 'old_text' ), 163 array( 164 'rev_id > ' . $dbr->addQuotes( $startId ), 165 'rev_text_id=old_id', 166 $textClause, 167 'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ), 168 ), 169 __METHOD__, 170 array( 171 'ORDER BY' => 'rev_id', 172 'LIMIT' => $this->batchSize 173 ) 174 ); 175 if ( !$res->numRows() ) { 176 break; 177 } 178 179 $insertBatch = array(); 180 foreach ( $res as $row ) { 181 $startId = $row->rev_id; 182 $info = $this->interpretPointer( $row->old_text ); 183 if ( !$info ) { 184 echo "Invalid DB:// URL in rev_id {$row->rev_id}\n"; 185 continue; 186 } 187 if ( !in_array( $info['cluster'], $this->clusters ) ) { 188 echo "Invalid cluster returned in SQL query: {$info['cluster']}\n"; 189 continue; 190 } 191 $insertBatch[] = array( 192 'bt_page' => $row->rev_page, 193 'bt_rev_id' => $row->rev_id, 194 'bt_text_id' => $row->old_id, 195 'bt_cluster' => $info['cluster'], 196 'bt_blob_id' => $info['id'], 197 'bt_cgz_hash' => $info['hash'] 198 ); 199 if ( $this->doBlobOrphans ) { 200 gmp_setbit( $this->trackedBlobs[$info['cluster']], $info['id'] ); 201 } 202 } 203 $dbw->insert( 'blob_tracking', $insertBatch, __METHOD__ ); 204 $rowsInserted += count( $insertBatch ); 205 206 ++$batchesDone; 207 if ( $batchesDone >= $this->reportingInterval ) { 208 $batchesDone = 0; 209 echo "$startId / $endId\n"; 210 wfWaitForSlaves(); 211 } 212 } 213 echo "Found $rowsInserted revisions\n"; 214 } 215 216 /** 217 * Scan the text table for orphan text 218 * Orphan text here does not imply DB corruption -- deleted text tracked by the 219 * archive table counts as orphan for our purposes. 220 */ 221 function trackOrphanText() { 222 # Wait until the blob_tracking table is available in the slave 223 $dbw = wfGetDB( DB_MASTER ); 224 $dbr = wfGetDB( DB_SLAVE ); 225 $pos = $dbw->getMasterPos(); 226 $dbr->masterPosWait( $pos, 100000 ); 227 228 $textClause = $this->getTextClause( $this->clusters ); 229 $startId = 0; 230 $endId = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ ); 231 $rowsInserted = 0; 232 $batchesDone = 0; 233 234 echo "Finding orphan text...\n"; 235 236 # Scan the text table for orphan text 237 while ( true ) { 238 $res = $dbr->select( array( 'text', 'blob_tracking' ), 239 array( 'old_id', 'old_flags', 'old_text' ), 240 array( 241 'old_id>' . $dbr->addQuotes( $startId ), 242 $textClause, 243 'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ), 244 'bt_text_id IS NULL' 245 ), 246 __METHOD__, 247 array( 248 'ORDER BY' => 'old_id', 249 'LIMIT' => $this->batchSize 250 ), 251 array( 'blob_tracking' => array( 'LEFT JOIN', 'bt_text_id=old_id' ) ) 252 ); 253 $ids = array(); 254 foreach ( $res as $row ) { 255 $ids[] = $row->old_id; 256 } 257 258 if ( !$res->numRows() ) { 259 break; 260 } 261 262 $insertBatch = array(); 263 foreach ( $res as $row ) { 264 $startId = $row->old_id; 265 $info = $this->interpretPointer( $row->old_text ); 266 if ( !$info ) { 267 echo "Invalid DB:// URL in old_id {$row->old_id}\n"; 268 continue; 269 } 270 if ( !in_array( $info['cluster'], $this->clusters ) ) { 271 echo "Invalid cluster returned in SQL query\n"; 272 continue; 273 } 274 275 $insertBatch[] = array( 276 'bt_page' => 0, 277 'bt_rev_id' => 0, 278 'bt_text_id' => $row->old_id, 279 'bt_cluster' => $info['cluster'], 280 'bt_blob_id' => $info['id'], 281 'bt_cgz_hash' => $info['hash'] 282 ); 283 if ( $this->doBlobOrphans ) { 284 gmp_setbit( $this->trackedBlobs[$info['cluster']], $info['id'] ); 285 } 286 } 287 $dbw->insert( 'blob_tracking', $insertBatch, __METHOD__ ); 288 289 $rowsInserted += count( $insertBatch ); 290 ++$batchesDone; 291 if ( $batchesDone >= $this->reportingInterval ) { 292 $batchesDone = 0; 293 echo "$startId / $endId\n"; 294 wfWaitForSlaves(); 295 } 296 } 297 echo "Found $rowsInserted orphan text rows\n"; 298 } 299 300 /** 301 * Scan the blobs table for rows not registered in blob_tracking (and thus not 302 * registered in the text table). 303 * 304 * Orphan blobs are indicative of DB corruption. They are inaccessible and 305 * should probably be deleted. 306 */ 307 function findOrphanBlobs() { 308 if ( !extension_loaded( 'gmp' ) ) { 309 echo "Can't find orphan blobs, need bitfield support provided by GMP.\n"; 310 311 return; 312 } 313 314 $dbw = wfGetDB( DB_MASTER ); 315 316 foreach ( $this->clusters as $cluster ) { 317 echo "Searching for orphan blobs in $cluster...\n"; 318 $lb = wfGetLBFactory()->getExternalLB( $cluster ); 319 try { 320 $extDB = $lb->getConnection( DB_SLAVE ); 321 } catch ( DBConnectionError $e ) { 322 if ( strpos( $e->error, 'Unknown database' ) !== false ) { 323 echo "No database on $cluster\n"; 324 } else { 325 echo "Error on $cluster: " . $e->getMessage() . "\n"; 326 } 327 continue; 328 } 329 $table = $extDB->getLBInfo( 'blobs table' ); 330 if ( is_null( $table ) ) { 331 $table = 'blobs'; 332 } 333 if ( !$extDB->tableExists( $table ) ) { 334 echo "No blobs table on cluster $cluster\n"; 335 continue; 336 } 337 $startId = 0; 338 $batchesDone = 0; 339 $actualBlobs = gmp_init( 0 ); 340 $endId = $extDB->selectField( $table, 'MAX(blob_id)', false, __METHOD__ ); 341 342 // Build a bitmap of actual blob rows 343 while ( true ) { 344 $res = $extDB->select( $table, 345 array( 'blob_id' ), 346 array( 'blob_id > ' . $extDB->addQuotes( $startId ) ), 347 __METHOD__, 348 array( 'LIMIT' => $this->batchSize, 'ORDER BY' => 'blob_id' ) 349 ); 350 351 if ( !$res->numRows() ) { 352 break; 353 } 354 355 foreach ( $res as $row ) { 356 gmp_setbit( $actualBlobs, $row->blob_id ); 357 } 358 $startId = $row->blob_id; 359 360 ++$batchesDone; 361 if ( $batchesDone >= $this->reportingInterval ) { 362 $batchesDone = 0; 363 echo "$startId / $endId\n"; 364 } 365 } 366 367 // Find actual blobs that weren't tracked by the previous passes 368 // This is a set-theoretic difference A \ B, or in bitwise terms, A & ~B 369 $orphans = gmp_and( $actualBlobs, gmp_com( $this->trackedBlobs[$cluster] ) ); 370 371 // Traverse the orphan list 372 $insertBatch = array(); 373 $id = 0; 374 $numOrphans = 0; 375 while ( true ) { 376 $id = gmp_scan1( $orphans, $id ); 377 if ( $id == -1 ) { 378 break; 379 } 380 $insertBatch[] = array( 381 'bo_cluster' => $cluster, 382 'bo_blob_id' => $id 383 ); 384 if ( count( $insertBatch ) > $this->batchSize ) { 385 $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ ); 386 $insertBatch = array(); 387 } 388 389 ++$id; 390 ++$numOrphans; 391 } 392 if ( $insertBatch ) { 393 $dbw->insert( 'blob_orphans', $insertBatch, __METHOD__ ); 394 } 395 echo "Found $numOrphans orphan(s) in $cluster\n"; 396 } 397 } 398 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |