[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/maintenance/storage/ -> fixBug20757.php (source)

   1  <?php
   2  /**
   3   * Script to fix bug 20757.
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   * @ingroup Maintenance ExternalStorage
  22   */
  23  
  24  require_once  __DIR__ . '/../Maintenance.php';
  25  
  26  /**
  27   * Maintenance script to fix bug 20757.
  28   *
  29   * @ingroup Maintenance ExternalStorage
  30   */
  31  class FixBug20757 extends Maintenance {
  32      public $batchSize = 10000;
  33      public $mapCache = array();
  34      public $mapCacheSize = 0;
  35      public $maxMapCacheSize = 1000000;
  36  
  37  	function __construct() {
  38          parent::__construct();
  39          $this->mDescription = 'Script to fix bug 20757 assuming that blob_tracking is intact';
  40          $this->addOption( 'dry-run', 'Report only' );
  41          $this->addOption( 'start', 'old_id to start at', false, true );
  42      }
  43  
  44  	function execute() {
  45          $dbr = wfGetDB( DB_SLAVE );
  46          $dbw = wfGetDB( DB_MASTER );
  47  
  48          $dryRun = $this->getOption( 'dry-run' );
  49          if ( $dryRun ) {
  50              print "Dry run only.\n";
  51          }
  52  
  53          $startId = $this->getOption( 'start', 0 );
  54          $numGood = 0;
  55          $numFixed = 0;
  56          $numBad = 0;
  57  
  58          $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ );
  59  
  60          if ( $dbr->getType() == 'mysql' ) {
  61              // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function
  62              $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))';
  63          }
  64  
  65          while ( true ) {
  66              print "ID: $startId / $totalRevs\r";
  67  
  68              $res = $dbr->select(
  69                  'text',
  70                  array( 'old_id', 'old_flags', 'old_text' ),
  71                  array(
  72                      'old_id > ' . intval( $startId ),
  73                      'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'',
  74                      "$lowerLeft = 'o:15:\"historyblobstub\"'",
  75                  ),
  76                  __METHOD__,
  77                  array(
  78                      'ORDER BY' => 'old_id',
  79                      'LIMIT' => $this->batchSize,
  80                  )
  81              );
  82  
  83              if ( !$res->numRows() ) {
  84                  break;
  85              }
  86  
  87              $secondaryIds = array();
  88              $stubs = array();
  89  
  90              foreach ( $res as $row ) {
  91                  $startId = $row->old_id;
  92  
  93                  // Basic sanity checks
  94                  $obj = unserialize( $row->old_text );
  95                  if ( $obj === false ) {
  96                      print "{$row->old_id}: unrecoverable: cannot unserialize\n";
  97                      ++$numBad;
  98                      continue;
  99                  }
 100  
 101                  if ( !is_object( $obj ) ) {
 102                      print "{$row->old_id}: unrecoverable: unserialized to type " .
 103                          gettype( $obj ) . ", possible double-serialization\n";
 104                      ++$numBad;
 105                      continue;
 106                  }
 107  
 108                  if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) {
 109                      print "{$row->old_id}: unrecoverable: unexpected object class " .
 110                          get_class( $obj ) . "\n";
 111                      ++$numBad;
 112                      continue;
 113                  }
 114  
 115                  // Process flags
 116                  $flags = explode( ',', $row->old_flags );
 117                  if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) {
 118                      $legacyEncoding = false;
 119                  } else {
 120                      $legacyEncoding = true;
 121                  }
 122  
 123                  // Queue the stub for future batch processing
 124                  $id = intval( $obj->mOldId );
 125                  $secondaryIds[] = $id;
 126                  $stubs[$row->old_id] = array(
 127                      'legacyEncoding' => $legacyEncoding,
 128                      'secondaryId' => $id,
 129                      'hash' => $obj->mHash,
 130                  );
 131              }
 132  
 133              $secondaryIds = array_unique( $secondaryIds );
 134  
 135              if ( !count( $secondaryIds ) ) {
 136                  continue;
 137              }
 138  
 139              // Run the batch query on blob_tracking
 140              $res = $dbr->select(
 141                  'blob_tracking',
 142                  '*',
 143                  array(
 144                      'bt_text_id' => $secondaryIds,
 145                  ),
 146                  __METHOD__
 147              );
 148              $trackedBlobs = array();
 149              foreach ( $res as $row ) {
 150                  $trackedBlobs[$row->bt_text_id] = $row;
 151              }
 152  
 153              // Process the stubs
 154              foreach ( $stubs as $primaryId => $stub ) {
 155                  $secondaryId = $stub['secondaryId'];
 156                  if ( !isset( $trackedBlobs[$secondaryId] ) ) {
 157                      // No tracked blob. Work out what went wrong
 158                      $secondaryRow = $dbr->selectRow(
 159                          'text',
 160                          array( 'old_flags', 'old_text' ),
 161                          array( 'old_id' => $secondaryId ),
 162                          __METHOD__
 163                      );
 164                      if ( !$secondaryRow ) {
 165                          print "$primaryId: unrecoverable: secondary row is missing\n";
 166                          ++$numBad;
 167                      } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) {
 168                          // Not broken yet, and not in the tracked clusters so it won't get
 169                          // broken by the current RCT run.
 170                          ++$numGood;
 171                      } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) {
 172                          print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n";
 173                          ++$numBad;
 174                      } else {
 175                          print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n";
 176                          ++$numBad;
 177                      }
 178                      unset( $stubs[$primaryId] );
 179                      continue;
 180                  }
 181                  $trackRow = $trackedBlobs[$secondaryId];
 182  
 183                  // Check that the specified text really is available in the tracked source row
 184                  $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}";
 185                  $text = ExternalStore::fetchFromURL( $url );
 186                  if ( $text === false ) {
 187                      print "$primaryId: unrecoverable: source text missing\n";
 188                      ++$numBad;
 189                      unset( $stubs[$primaryId] );
 190                      continue;
 191                  }
 192                  if ( md5( $text ) !== $stub['hash'] ) {
 193                      print "$primaryId: unrecoverable: content hashes do not match\n";
 194                      ++$numBad;
 195                      unset( $stubs[$primaryId] );
 196                      continue;
 197                  }
 198  
 199                  // Find the page_id and rev_id
 200                  // The page is probably the same as the page of the secondary row
 201                  $pageId = intval( $trackRow->bt_page );
 202                  if ( !$pageId ) {
 203                      $revId = $pageId = 0;
 204                  } else {
 205                      $revId = $this->findTextIdInPage( $pageId, $primaryId );
 206                      if ( !$revId ) {
 207                          // Actually an orphan
 208                          $pageId = $revId = 0;
 209                      }
 210                  }
 211  
 212                  $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8';
 213  
 214                  if ( !$dryRun ) {
 215                      // Reset the text row to point to the original copy
 216                      $dbw->begin( __METHOD__ );
 217                      $dbw->update(
 218                          'text',
 219                          // SET
 220                          array(
 221                              'old_flags' => $newFlags,
 222                              'old_text' => $url
 223                          ),
 224                          // WHERE
 225                          array( 'old_id' => $primaryId ),
 226                          __METHOD__
 227                      );
 228  
 229                      // Add a blob_tracking row so that the new reference can be recompressed
 230                      // without needing to run trackBlobs.php again
 231                      $dbw->insert( 'blob_tracking',
 232                          array(
 233                              'bt_page' => $pageId,
 234                              'bt_rev_id' => $revId,
 235                              'bt_text_id' => $primaryId,
 236                              'bt_cluster' => $trackRow->bt_cluster,
 237                              'bt_blob_id' => $trackRow->bt_blob_id,
 238                              'bt_cgz_hash' => $stub['hash'],
 239                              'bt_new_url' => null,
 240                              'bt_moved' => 0,
 241                          ),
 242                          __METHOD__
 243                      );
 244                      $dbw->commit( __METHOD__ );
 245                      $this->waitForSlaves();
 246                  }
 247  
 248                  print "$primaryId: resolved to $url\n";
 249                  ++$numFixed;
 250              }
 251          }
 252  
 253          print "\n";
 254          print "Fixed: $numFixed\n";
 255          print "Unrecoverable: $numBad\n";
 256          print "Good stubs: $numGood\n";
 257      }
 258  
 259  	function waitForSlaves() {
 260          static $iteration = 0;
 261          ++$iteration;
 262          if ( ++$iteration > 50 == 0 ) {
 263              wfWaitForSlaves();
 264              $iteration = 0;
 265          }
 266      }
 267  
 268  	function findTextIdInPage( $pageId, $textId ) {
 269          $ids = $this->getRevTextMap( $pageId );
 270          if ( !isset( $ids[$textId] ) ) {
 271              return null;
 272          } else {
 273              return $ids[$textId];
 274          }
 275      }
 276  
 277  	function getRevTextMap( $pageId ) {
 278          if ( !isset( $this->mapCache[$pageId] ) ) {
 279              // Limit cache size
 280              while ( $this->mapCacheSize > $this->maxMapCacheSize ) {
 281                  $key = key( $this->mapCache );
 282                  $this->mapCacheSize -= count( $this->mapCache[$key] );
 283                  unset( $this->mapCache[$key] );
 284              }
 285  
 286              $dbr = wfGetDB( DB_SLAVE );
 287              $map = array();
 288              $res = $dbr->select( 'revision',
 289                  array( 'rev_id', 'rev_text_id' ),
 290                  array( 'rev_page' => $pageId ),
 291                  __METHOD__
 292              );
 293              foreach ( $res as $row ) {
 294                  $map[$row->rev_text_id] = $row->rev_id;
 295              }
 296              $this->mapCache[$pageId] = $map;
 297              $this->mapCacheSize += count( $map );
 298          }
 299  
 300          return $this->mapCache[$pageId];
 301      }
 302  
 303      /**
 304       * This is based on part of HistoryBlobStub::getText().
 305       * Determine if the text can be retrieved from the row in the normal way.
 306       * @param array $stub
 307       * @param stdClass $secondaryRow
 308       * @return bool
 309       */
 310  	function isUnbrokenStub( $stub, $secondaryRow ) {
 311          $flags = explode( ',', $secondaryRow->old_flags );
 312          $text = $secondaryRow->old_text;
 313          if ( in_array( 'external', $flags ) ) {
 314              $url = $text;
 315              wfSuppressWarnings();
 316              list( /* $proto */, $path ) = explode( '://', $url, 2 );
 317              wfRestoreWarnings();
 318  
 319              if ( $path == "" ) {
 320                  return false;
 321              }
 322              $text = ExternalStore::fetchFromUrl( $url );
 323          }
 324          if ( !in_array( 'object', $flags ) ) {
 325              return false;
 326          }
 327  
 328          if ( in_array( 'gzip', $flags ) ) {
 329              $obj = unserialize( gzinflate( $text ) );
 330          } else {
 331              $obj = unserialize( $text );
 332          }
 333  
 334          if ( !is_object( $obj ) ) {
 335              // Correct for old double-serialization bug.
 336              $obj = unserialize( $obj );
 337          }
 338  
 339          if ( !is_object( $obj ) ) {
 340              return false;
 341          }
 342  
 343          $obj->uncompress();
 344          $text = $obj->getItem( $stub['hash'] );
 345  
 346          return $text !== false;
 347      }
 348  }
 349  
 350  $maintClass = 'FixBug20757';
 351  require_once RUN_MAINTENANCE_IF_MAIN;


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1