MediaWiki  REL1_21
checkStorage.php
Go to the documentation of this file.
00001 <?php
00024 if ( !defined( 'MEDIAWIKI' ) ) {
00025         require_once( __DIR__ . '/../commandLine.inc' );
00026 
00027         $cs = new CheckStorage;
00028         $fix = isset( $options['fix'] );
00029         if ( isset( $args[0] ) ) {
00030                 $xml = $args[0];
00031         } else {
00032                 $xml = false;
00033         }
00034         $cs->check( $fix, $xml );
00035 }
00036 
00037 
00038 // ----------------------------------------------------------------------------------
00039 
00045 class CheckStorage {
00046         const CONCAT_HEADER = 'O:27:"concatenatedgziphistoryblob"';
00047         public $oldIdMap, $errors;
00048         public $dbStore = null;
00049 
00050         public $errorDescriptions = array(
00051                 'restore text' => 'Damaged text, need to be restored from a backup',
00052                 'restore revision' => 'Damaged revision row, need to be restored from a backup',
00053                 'unfixable' => 'Unexpected errors with no automated fixing method',
00054                 'fixed' => 'Errors already fixed',
00055                 'fixable' => 'Errors which would already be fixed if --fix was specified',
00056         );
00057 
00058         function check( $fix = false, $xml = '' ) {
00059                 $dbr = wfGetDB( DB_SLAVE );
00060                 if ( $fix ) {
00061                         print "Checking, will fix errors if possible...\n";
00062                 } else {
00063                         print "Checking...\n";
00064                 }
00065                 $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ );
00066                 $chunkSize = 1000;
00067                 $flagStats = array();
00068                 $objectStats = array();
00069                 $knownFlags = array( 'external', 'gzip', 'object', 'utf-8' );
00070                 $this->errors = array(
00071                         'restore text' => array(),
00072                         'restore revision' => array(),
00073                         'unfixable' => array(),
00074                         'fixed' => array(),
00075                         'fixable' => array(),
00076                 );
00077 
00078                 for ( $chunkStart = 1 ; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
00079                         $chunkEnd = $chunkStart + $chunkSize - 1;
00080                         // print "$chunkStart of $maxRevId\n";
00081 
00082                         // Fetch revision rows
00083                         $this->oldIdMap = array();
00084                         $dbr->ping();
00085                         $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ),
00086                                 array( "rev_id BETWEEN $chunkStart AND $chunkEnd" ), __METHOD__ );
00087                         foreach ( $res as $row ) {
00088                                 $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
00089                         }
00090                         $dbr->freeResult( $res );
00091 
00092                         if ( !count( $this->oldIdMap ) ) {
00093                                 continue;
00094                         }
00095 
00096                         // Fetch old_flags
00097                         $missingTextRows = array_flip( $this->oldIdMap );
00098                         $externalRevs = array();
00099                         $objectRevs = array();
00100                         $res = $dbr->select( 'text', array( 'old_id', 'old_flags' ),
00101                                 'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', __METHOD__ );
00102                         foreach ( $res as $row ) {
00106                                 $flags = $row->old_flags;
00107                                 $id = $row->old_id;
00108 
00109                                 // Create flagStats row if it doesn't exist
00110                                 $flagStats = $flagStats + array( $flags => 0 );
00111                                 // Increment counter
00112                                 $flagStats[$flags]++;
00113 
00114                                 // Not missing
00115                                 unset( $missingTextRows[$row->old_id] );
00116 
00117                                 // Check for external or object
00118                                 if ( $flags == '' ) {
00119                                         $flagArray = array();
00120                                 } else {
00121                                         $flagArray = explode( ',', $flags );
00122                                 }
00123                                 if ( in_array( 'external', $flagArray ) ) {
00124                                         $externalRevs[] = $id;
00125                                 } elseif ( in_array( 'object', $flagArray ) ) {
00126                                         $objectRevs[] = $id;
00127                                 }
00128 
00129                                 // Check for unrecognised flags
00130                                 if ( $flags == '0' ) {
00131                                         // This is a known bug from 2004
00132                                         // It's safe to just erase the old_flags field
00133                                         if ( $fix ) {
00134                                                 $this->error( 'fixed', "Warning: old_flags set to 0", $id );
00135                                                 $dbw = wfGetDB( DB_MASTER );
00136                                                 $dbw->ping();
00137                                                 $dbw->update( 'text', array( 'old_flags' => '' ),
00138                                                         array( 'old_id' => $id ), __METHOD__ );
00139                                                 echo "Fixed\n";
00140                                         } else {
00141                                                 $this->error( 'fixable', "Warning: old_flags set to 0", $id );
00142                                         }
00143                                 } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
00144                                         $this->error( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
00145                                 }
00146                         }
00147                         $dbr->freeResult( $res );
00148 
00149                         // Output errors for any missing text rows
00150                         foreach ( $missingTextRows as $oldId => $revId ) {
00151                                 $this->error( 'restore revision', "Error: missing text row", $oldId );
00152                         }
00153 
00154                         // Verify external revisions
00155                         $externalConcatBlobs = array();
00156                         $externalNormalBlobs = array();
00157                         if ( count( $externalRevs ) ) {
00158                                 $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ),
00159                                         array( 'old_id IN (' . implode( ',', $externalRevs ) . ')' ), __METHOD__ );
00160                                 foreach ( $res as $row ) {
00161                                         $urlParts = explode( '://', $row->old_text, 2 );
00162                                         if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
00163                                                 $this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
00164                                                 continue;
00165                                         }
00166                                         list( $proto, ) = $urlParts;
00167                                         if ( $proto != 'DB' ) {
00168                                                 $this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id );
00169                                                 continue;
00170                                         }
00171                                         $path = explode( '/', $row->old_text );
00172                                         $cluster = $path[2];
00173                                         $id = $path[3];
00174                                         if ( isset( $path[4] ) ) {
00175                                                 $externalConcatBlobs[$cluster][$id][] = $row->old_id;
00176                                         } else {
00177                                                 $externalNormalBlobs[$cluster][$id][] = $row->old_id;
00178                                         }
00179                                 }
00180                                 $dbr->freeResult( $res );
00181                         }
00182 
00183                         // Check external concat blobs for the right header
00184                         $this->checkExternalConcatBlobs( $externalConcatBlobs );
00185 
00186                         // Check external normal blobs for existence
00187                         if ( count( $externalNormalBlobs ) ) {
00188                                 if ( is_null( $this->dbStore ) ) {
00189                                         $this->dbStore = new ExternalStoreDB;
00190                                 }
00191                                 foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
00192                                         $blobIds = array_keys( $xBlobIds );
00193                                         $extDb =& $this->dbStore->getSlave( $cluster );
00194                                         $blobsTable = $this->dbStore->getTable( $extDb );
00195                                         $res = $extDb->select( $blobsTable,
00196                                                 array( 'blob_id' ),
00197                                                 array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), __METHOD__ );
00198                                         foreach ( $res as $row ) {
00199                                                 unset( $xBlobIds[$row->blob_id] );
00200                                         }
00201                                         $extDb->freeResult( $res );
00202                                         // Print errors for missing blobs rows
00203                                         foreach ( $xBlobIds as $blobId => $oldId ) {
00204                                                 $this->error( 'restore text', "Error: missing target $blobId for one-part ES URL", $oldId );
00205                                         }
00206                                 }
00207                         }
00208 
00209                         // Check local objects
00210                         $dbr->ping();
00211                         $concatBlobs = array();
00212                         $curIds = array();
00213                         if ( count( $objectRevs ) ) {
00214                                 $headerLength = 300;
00215                                 $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
00216                                         array( 'old_id IN (' . implode( ',', $objectRevs ) . ')' ), __METHOD__ );
00217                                 foreach ( $res as $row ) {
00218                                         $oldId = $row->old_id;
00219                                         $matches = array();
00220                                         if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
00221                                                 $this->error( 'restore text', "Error: invalid object header", $oldId );
00222                                                 continue;
00223                                         }
00224 
00225                                         $className = strtolower( $matches[2] );
00226                                         if ( strlen( $className ) != $matches[1] ) {
00227                                                 $this->error( 'restore text', "Error: invalid object header, wrong class name length", $oldId );
00228                                                 continue;
00229                                         }
00230 
00231                                         $objectStats = $objectStats + array( $className => 0 );
00232                                         $objectStats[$className]++;
00233 
00234                                         switch ( $className ) {
00235                                                 case 'concatenatedgziphistoryblob':
00236                                                         // Good
00237                                                         break;
00238                                                 case 'historyblobstub':
00239                                                 case 'historyblobcurstub':
00240                                                         if ( strlen( $row->header ) == $headerLength ) {
00241                                                                 $this->error( 'unfixable', "Error: overlong stub header", $oldId );
00242                                                                 continue;
00243                                                         }
00244                                                         $stubObj = unserialize( $row->header );
00245                                                         if ( !is_object( $stubObj ) ) {
00246                                                                 $this->error( 'restore text', "Error: unable to unserialize stub object", $oldId );
00247                                                                 continue;
00248                                                         }
00249                                                         if ( $className == 'historyblobstub' ) {
00250                                                                 $concatBlobs[$stubObj->mOldId][] = $oldId;
00251                                                         } else {
00252                                                                 $curIds[$stubObj->mCurId][] = $oldId;
00253                                                         }
00254                                                         break;
00255                                                 default:
00256                                                         $this->error( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
00257                                         }
00258                                 }
00259                                 $dbr->freeResult( $res );
00260                         }
00261 
00262                         // Check local concat blob validity
00263                         $externalConcatBlobs = array();
00264                         if ( count( $concatBlobs ) ) {
00265                                 $headerLength = 300;
00266                                 $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
00267                                         array( 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ), __METHOD__ );
00268                                 foreach ( $res as $row ) {
00269                                         $flags = explode( ',', $row->old_flags );
00270                                         if ( in_array( 'external', $flags ) ) {
00271                                                 // Concat blob is in external storage?
00272                                                 if ( in_array( 'object', $flags ) ) {
00273                                                         $urlParts = explode( '/', $row->header );
00274                                                         if ( $urlParts[0] != 'DB:' ) {
00275                                                                 $this->error( 'unfixable', "Error: unrecognised external storage type \"{$urlParts[0]}", $row->old_id );
00276                                                         } else {
00277                                                                 $cluster = $urlParts[2];
00278                                                                 $id = $urlParts[3];
00279                                                                 if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
00280                                                                         $externalConcatBlobs[$cluster][$id] = array();
00281                                                                 }
00282                                                                 $externalConcatBlobs[$cluster][$id] = array_merge(
00283                                                                         $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
00284                                                                 );
00285                                                         }
00286                                                 } else {
00287                                                         $this->error( 'unfixable', "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
00288                                                                 $concatBlobs[$row->old_id] );
00289                                                 }
00290                                         } elseif ( strcasecmp( substr( $row->header, 0, strlen( self::CONCAT_HEADER ) ), self::CONCAT_HEADER ) ) {
00291                                                 $this->error( 'restore text', "Error: Incorrect object header for concat bulk row {$row->old_id}",
00292                                                         $concatBlobs[$row->old_id] );
00293                                         } # else good
00294 
00295                                         unset( $concatBlobs[$row->old_id] );
00296                                 }
00297                                 $dbr->freeResult( $res );
00298                         }
00299 
00300                         // Check targets of unresolved stubs
00301                         $this->checkExternalConcatBlobs( $externalConcatBlobs );
00302 
00303                         // next chunk
00304                 }
00305 
00306                 print "\n\nErrors:\n";
00307                 foreach ( $this->errors as $name => $errors ) {
00308                         if ( count( $errors ) ) {
00309                                 $description = $this->errorDescriptions[$name];
00310                                 echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
00311                         }
00312                 }
00313 
00314                 if ( count( $this->errors['restore text'] ) && $fix ) {
00315                         if ( (string)$xml !== '' ) {
00316                                 $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
00317                         } else {
00318                                 echo "Can't fix text, no XML backup specified\n";
00319                         }
00320                 }
00321 
00322                 print "\nFlag statistics:\n";
00323                 $total = array_sum( $flagStats );
00324                 foreach ( $flagStats as $flag => $count ) {
00325                         printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
00326                 }
00327                 print "\nLocal object statistics:\n";
00328                 $total = array_sum( $objectStats );
00329                 foreach ( $objectStats as $className => $count ) {
00330                         printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
00331                 }
00332         }
00333 
00334 
00335         function error( $type, $msg, $ids ) {
00336                 if ( is_array( $ids ) && count( $ids ) == 1 ) {
00337                         $ids = reset( $ids );
00338                 }
00339                 if ( is_array( $ids ) ) {
00340                         $revIds = array();
00341                         foreach ( $ids as $id ) {
00342                                 $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
00343                         }
00344                         print "$msg in text rows " . implode( ', ', $ids ) .
00345                                 ", revisions " . implode( ', ', $revIds ) . "\n";
00346                 } else {
00347                         $id = $ids;
00348                         $revIds = array_keys( $this->oldIdMap, $id );
00349                         if ( count( $revIds ) == 1 ) {
00350                                 print "$msg in old_id $id, rev_id {$revIds[0]}\n";
00351                         } else {
00352                                 print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
00353                         }
00354                 }
00355                 $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
00356         }
00357 
00358         function checkExternalConcatBlobs( $externalConcatBlobs ) {
00359                 if ( !count( $externalConcatBlobs ) ) {
00360                         return;
00361                 }
00362 
00363                 if ( is_null( $this->dbStore ) ) {
00364                         $this->dbStore = new ExternalStoreDB;
00365                 }
00366 
00367                 foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
00368                         $blobIds = array_keys( $oldIds );
00369                         $extDb =& $this->dbStore->getSlave( $cluster );
00370                         $blobsTable = $this->dbStore->getTable( $extDb );
00371                         $headerLength = strlen( self::CONCAT_HEADER );
00372                         $res = $extDb->select( $blobsTable,
00373                                 array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ),
00374                                 array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), __METHOD__ );
00375                         foreach ( $res as $row ) {
00376                                 if ( strcasecmp( $row->header, self::CONCAT_HEADER ) ) {
00377                                         $this->error( 'restore text', "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
00378                                                 $oldIds[$row->blob_id] );
00379                                 }
00380                                 unset( $oldIds[$row->blob_id] );
00381 
00382                         }
00383                         $extDb->freeResult( $res );
00384 
00385                         // Print errors for missing blobs rows
00386                         foreach ( $oldIds as $blobId => $oldIds2 ) {
00387                                 $this->error( 'restore text', "Error: missing target $cluster/$blobId for two-part ES URL", $oldIds2 );
00388                         }
00389                 }
00390         }
00391 
00392         function restoreText( $revIds, $xml ) {
00393                 global $wgDBname;
00394                 $tmpDir = wfTempDir();
00395 
00396                 if ( !count( $revIds ) ) {
00397                         return;
00398                 }
00399 
00400                 print "Restoring text from XML backup...\n";
00401 
00402                 $revFileName = "$tmpDir/broken-revlist-$wgDBname";
00403                 $filteredXmlFileName = "$tmpDir/filtered-$wgDBname.xml";
00404 
00405                 // Write revision list
00406                 if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
00407                         echo "Error writing revision list, can't restore text\n";
00408                         return;
00409                 }
00410 
00411                 // Run mwdumper
00412                 echo "Filtering XML dump...\n";
00413                 $exitStatus = 0;
00414                 passthru( 'mwdumper ' .
00415                         wfEscapeShellArg(
00416                                 "--output=file:$filteredXmlFileName",
00417                                 "--filter=revlist:$revFileName",
00418                                 $xml
00419                         ), $exitStatus
00420                 );
00421 
00422                 if ( $exitStatus ) {
00423                         echo "mwdumper died with exit status $exitStatus\n";
00424                         return;
00425                 }
00426 
00427                 $file = fopen( $filteredXmlFileName, 'r' );
00428                 if ( !$file ) {
00429                         echo "Unable to open filtered XML file\n";
00430                         return;
00431                 }
00432 
00433                 $dbr = wfGetDB( DB_SLAVE );
00434                 $dbw = wfGetDB( DB_MASTER );
00435                 $dbr->ping();
00436                 $dbw->ping();
00437 
00438                 $source = new ImportStreamSource( $file );
00439                 $importer = new WikiImporter( $source );
00440                 $importer->setRevisionCallback( array( &$this, 'importRevision' ) );
00441                 $importer->doImport();
00442         }
00443 
00444         function importRevision( &$revision, &$importer ) {
00445                 $id = $revision->getID();
00446                 $text = $revision->getText();
00447                 if ( $text === '' ) {
00448                         // This is what happens if the revision was broken at the time the
00449                         // dump was made. Unfortunately, it also happens if the revision was
00450                         // legitimately blank, so there's no way to tell the difference. To
00451                         // be safe, we'll skip it and leave it broken
00452                         $id = $id ? $id : '';
00453                         echo "Revision $id is blank in the dump, may have been broken before export\n";
00454                         return;
00455                 }
00456 
00457                 if ( !$id )  {
00458                         // No ID, can't import
00459                         echo "No id tag in revision, can't import\n";
00460                         return;
00461                 }
00462 
00463                 // Find text row again
00464                 $dbr = wfGetDB( DB_SLAVE );
00465                 $oldId = $dbr->selectField( 'revision', 'rev_text_id', array( 'rev_id' => $id ), __METHOD__ );
00466                 if ( !$oldId ) {
00467                         echo "Missing revision row for rev_id $id\n";
00468                         return;
00469                 }
00470 
00471                 // Compress the text
00472                 $flags = Revision::compressRevisionText( $text );
00473 
00474                 // Update the text row
00475                 $dbw = wfGetDB( DB_MASTER );
00476                 $dbw->update( 'text',
00477                         array( 'old_flags' => $flags, 'old_text' => $text ),
00478                         array( 'old_id' => $oldId ),
00479                         __METHOD__, array( 'LIMIT' => 1 )
00480                 );
00481 
00482                 // Remove it from the unfixed list and add it to the fixed list
00483                 unset( $this->errors['restore text'][$id] );
00484                 $this->errors['fixed'][$id] = true;
00485         }
00486 }