[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/maintenance/ -> backupTextPass.inc (source)

   1  <?php
   2  /**
   3   * BackupDumper that postprocesses XML dumps from dumpBackup.php to add page text
   4   *
   5   * Copyright (C) 2005 Brion Vibber <[email protected]>
   6   * https://www.mediawiki.org/
   7   *
   8   * This program is free software; you can redistribute it and/or modify
   9   * it under the terms of the GNU General Public License as published by
  10   * the Free Software Foundation; either version 2 of the License, or
  11   * (at your option) any later version.
  12   *
  13   * This program is distributed in the hope that it will be useful,
  14   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16   * GNU General Public License for more details.
  17   *
  18   * You should have received a copy of the GNU General Public License along
  19   * with this program; if not, write to the Free Software Foundation, Inc.,
  20   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  21   * http://www.gnu.org/copyleft/gpl.html
  22   *
  23   * @file
  24   * @ingroup Maintenance
  25   */
  26  
  27  require_once  __DIR__ . '/backup.inc';
  28  
  29  /**
  30   * @ingroup Maintenance
  31   */
  32  class TextPassDumper extends BackupDumper {
  33      public $prefetch = null;
  34  
  35      // when we spend more than maxTimeAllowed seconds on this run, we continue
  36      // processing until we write out the next complete page, then save output file(s),
  37      // rename it/them and open new one(s)
  38      public $maxTimeAllowed = 0; // 0 = no limit
  39  
  40      protected $input = "php://stdin";
  41      protected $history = WikiExporter::FULL;
  42      protected $fetchCount = 0;
  43      protected $prefetchCount = 0;
  44      protected $prefetchCountLast = 0;
  45      protected $fetchCountLast = 0;
  46  
  47      protected $maxFailures = 5;
  48      protected $maxConsecutiveFailedTextRetrievals = 200;
  49      protected $failureTimeout = 5; // Seconds to sleep after db failure
  50  
  51      protected $php = "php";
  52      protected $spawn = false;
  53  
  54      /**
  55       * @var bool|resource
  56       */
  57      protected $spawnProc = false;
  58  
  59      /**
  60       * @var bool|resource
  61       */
  62      protected $spawnWrite = false;
  63  
  64      /**
  65       * @var bool|resource
  66       */
  67      protected $spawnRead = false;
  68  
  69      /**
  70       * @var bool|resource
  71       */
  72      protected $spawnErr = false;
  73  
  74      protected $xmlwriterobj = false;
  75  
  76      protected $timeExceeded = false;
  77      protected $firstPageWritten = false;
  78      protected $lastPageWritten = false;
  79      protected $checkpointJustWritten = false;
  80      protected $checkpointFiles = array();
  81  
  82      /**
  83       * @var DatabaseBase
  84       */
  85      protected $db;
  86  
  87      /**
  88       * Drop the database connection $this->db and try to get a new one.
  89       *
  90       * This function tries to get a /different/ connection if this is
  91       * possible. Hence, (if this is possible) it switches to a different
  92       * failover upon each call.
  93       *
  94       * This function resets $this->lb and closes all connections on it.
  95       *
  96       * @throws MWException
  97       */
  98  	function rotateDb() {
  99          // Cleaning up old connections
 100          if ( isset( $this->lb ) ) {
 101              $this->lb->closeAll();
 102              unset( $this->lb );
 103          }
 104  
 105          if ( $this->forcedDb !== null ) {
 106              $this->db = $this->forcedDb;
 107  
 108              return;
 109          }
 110  
 111          if ( isset( $this->db ) && $this->db->isOpen() ) {
 112              throw new MWException( 'DB is set and has not been closed by the Load Balancer' );
 113          }
 114  
 115          unset( $this->db );
 116  
 117          // Trying to set up new connection.
 118          // We do /not/ retry upon failure, but delegate to encapsulating logic, to avoid
 119          // individually retrying at different layers of code.
 120  
 121          // 1. The LoadBalancer.
 122          try {
 123              $this->lb = wfGetLBFactory()->newMainLB();
 124          } catch ( Exception $e ) {
 125              throw new MWException( __METHOD__
 126                  . " rotating DB failed to obtain new load balancer (" . $e->getMessage() . ")" );
 127          }
 128  
 129          // 2. The Connection, through the load balancer.
 130          try {
 131              $this->db = $this->lb->getConnection( DB_SLAVE, 'dump' );
 132          } catch ( Exception $e ) {
 133              throw new MWException( __METHOD__
 134                  . " rotating DB failed to obtain new database (" . $e->getMessage() . ")" );
 135          }
 136      }
 137  
 138  	function initProgress( $history = WikiExporter::FULL ) {
 139          parent::initProgress();
 140          $this->timeOfCheckpoint = $this->startTime;
 141      }
 142  
 143  	function dump( $history, $text = WikiExporter::TEXT ) {
 144          // Notice messages will foul up your XML output even if they're
 145          // relatively harmless.
 146          if ( ini_get( 'display_errors' ) ) {
 147              ini_set( 'display_errors', 'stderr' );
 148          }
 149  
 150          $this->initProgress( $this->history );
 151  
 152          // We are trying to get an initial database connection to avoid that the
 153          // first try of this request's first call to getText fails. However, if
 154          // obtaining a good DB connection fails it's not a serious issue, as
 155          // getText does retry upon failure and can start without having a working
 156          // DB connection.
 157          try {
 158              $this->rotateDb();
 159          } catch ( Exception $e ) {
 160              // We do not even count this as failure. Just let eventual
 161              // watchdogs know.
 162              $this->progress( "Getting initial DB connection failed (" .
 163                  $e->getMessage() . ")" );
 164          }
 165  
 166          $this->egress = new ExportProgressFilter( $this->sink, $this );
 167  
 168          // it would be nice to do it in the constructor, oh well. need egress set
 169          $this->finalOptionCheck();
 170  
 171          // we only want this so we know how to close a stream :-P
 172          $this->xmlwriterobj = new XmlDumpWriter();
 173  
 174          $input = fopen( $this->input, "rt" );
 175          $this->readDump( $input );
 176  
 177          if ( $this->spawnProc ) {
 178              $this->closeSpawn();
 179          }
 180  
 181          $this->report( true );
 182      }
 183  
 184  	function processOption( $opt, $val, $param ) {
 185          global $IP;
 186          $url = $this->processFileOpt( $val, $param );
 187  
 188          switch ( $opt ) {
 189              case 'prefetch':
 190                  require_once "$IP/maintenance/backupPrefetch.inc";
 191                  $this->prefetch = new BaseDump( $url );
 192                  break;
 193              case 'stub':
 194                  $this->input = $url;
 195                  break;
 196              case 'maxtime':
 197                  $this->maxTimeAllowed = intval( $val ) * 60;
 198                  break;
 199              case 'checkpointfile':
 200                  $this->checkpointFiles[] = $val;
 201                  break;
 202              case 'current':
 203                  $this->history = WikiExporter::CURRENT;
 204                  break;
 205              case 'full':
 206                  $this->history = WikiExporter::FULL;
 207                  break;
 208              case 'spawn':
 209                  $this->spawn = true;
 210                  if ( $val ) {
 211                      $this->php = $val;
 212                  }
 213                  break;
 214          }
 215      }
 216  
 217  	function processFileOpt( $val, $param ) {
 218          $fileURIs = explode( ';', $param );
 219          foreach ( $fileURIs as $URI ) {
 220              switch ( $val ) {
 221                  case "file":
 222                      $newURI = $URI;
 223                      break;
 224                  case "gzip":
 225                      $newURI = "compress.zlib://$URI";
 226                      break;
 227                  case "bzip2":
 228                      $newURI = "compress.bzip2://$URI";
 229                      break;
 230                  case "7zip":
 231                      $newURI = "mediawiki.compress.7z://$URI";
 232                      break;
 233                  default:
 234                      $newURI = $URI;
 235              }
 236              $newFileURIs[] = $newURI;
 237          }
 238          $val = implode( ';', $newFileURIs );
 239  
 240          return $val;
 241      }
 242  
 243      /**
 244       * Overridden to include prefetch ratio if enabled.
 245       */
 246  	function showReport() {
 247          if ( !$this->prefetch ) {
 248              parent::showReport();
 249  
 250              return;
 251          }
 252  
 253          if ( $this->reporting ) {
 254              $now = wfTimestamp( TS_DB );
 255              $nowts = microtime( true );
 256              $deltaAll = $nowts - $this->startTime;
 257              $deltaPart = $nowts - $this->lastTime;
 258              $this->pageCountPart = $this->pageCount - $this->pageCountLast;
 259              $this->revCountPart = $this->revCount - $this->revCountLast;
 260  
 261              if ( $deltaAll ) {
 262                  $portion = $this->revCount / $this->maxCount;
 263                  $eta = $this->startTime + $deltaAll / $portion;
 264                  $etats = wfTimestamp( TS_DB, intval( $eta ) );
 265                  if ( $this->fetchCount ) {
 266                      $fetchRate = 100.0 * $this->prefetchCount / $this->fetchCount;
 267                  } else {
 268                      $fetchRate = '-';
 269                  }
 270                  $pageRate = $this->pageCount / $deltaAll;
 271                  $revRate = $this->revCount / $deltaAll;
 272              } else {
 273                  $pageRate = '-';
 274                  $revRate = '-';
 275                  $etats = '-';
 276                  $fetchRate = '-';
 277              }
 278              if ( $deltaPart ) {
 279                  if ( $this->fetchCountLast ) {
 280                      $fetchRatePart = 100.0 * $this->prefetchCountLast / $this->fetchCountLast;
 281                  } else {
 282                      $fetchRatePart = '-';
 283                  }
 284                  $pageRatePart = $this->pageCountPart / $deltaPart;
 285                  $revRatePart = $this->revCountPart / $deltaPart;
 286              } else {
 287                  $fetchRatePart = '-';
 288                  $pageRatePart = '-';
 289                  $revRatePart = '-';
 290              }
 291              $this->progress( sprintf(
 292                  "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), "
 293                      . "%d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% "
 294                      . "prefetched (all|curr), ETA %s [max %d]",
 295                  $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate,
 296                  $pageRatePart, $this->revCount, $revRate, $revRatePart,
 297                  $fetchRate, $fetchRatePart, $etats, $this->maxCount
 298              ) );
 299              $this->lastTime = $nowts;
 300              $this->revCountLast = $this->revCount;
 301              $this->prefetchCountLast = $this->prefetchCount;
 302              $this->fetchCountLast = $this->fetchCount;
 303          }
 304      }
 305  
 306  	function setTimeExceeded() {
 307          $this->timeExceeded = true;
 308      }
 309  
 310  	function checkIfTimeExceeded() {
 311          if ( $this->maxTimeAllowed
 312              && ( $this->lastTime - $this->timeOfCheckpoint > $this->maxTimeAllowed )
 313          ) {
 314              return true;
 315          }
 316  
 317          return false;
 318      }
 319  
 320  	function finalOptionCheck() {
 321          if ( ( $this->checkpointFiles && !$this->maxTimeAllowed )
 322              || ( $this->maxTimeAllowed && !$this->checkpointFiles )
 323          ) {
 324              throw new MWException( "Options checkpointfile and maxtime must be specified together.\n" );
 325          }
 326          foreach ( $this->checkpointFiles as $checkpointFile ) {
 327              $count = substr_count( $checkpointFile, "%s" );
 328              if ( $count != 2 ) {
 329                  throw new MWException( "Option checkpointfile must contain two '%s' "
 330                      . "for substitution of first and last pageids, count is $count instead, "
 331                      . "file is $checkpointFile.\n" );
 332              }
 333          }
 334  
 335          if ( $this->checkpointFiles ) {
 336              $filenameList = (array)$this->egress->getFilenames();
 337              if ( count( $filenameList ) != count( $this->checkpointFiles ) ) {
 338                  throw new MWException( "One checkpointfile must be specified "
 339                      . "for each output option, if maxtime is used.\n" );
 340              }
 341          }
 342      }
 343  
 344      /**
 345       * @throws MWException Failure to parse XML input
 346       * @param string $input
 347       * @return bool
 348       */
 349  	function readDump( $input ) {
 350          $this->buffer = "";
 351          $this->openElement = false;
 352          $this->atStart = true;
 353          $this->state = "";
 354          $this->lastName = "";
 355          $this->thisPage = 0;
 356          $this->thisRev = 0;
 357  
 358          $parser = xml_parser_create( "UTF-8" );
 359          xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
 360  
 361          xml_set_element_handler(
 362              $parser,
 363              array( &$this, 'startElement' ),
 364              array( &$this, 'endElement' )
 365          );
 366          xml_set_character_data_handler( $parser, array( &$this, 'characterData' ) );
 367  
 368          $offset = 0; // for context extraction on error reporting
 369          $bufferSize = 512 * 1024;
 370          do {
 371              if ( $this->checkIfTimeExceeded() ) {
 372                  $this->setTimeExceeded();
 373              }
 374              $chunk = fread( $input, $bufferSize );
 375              if ( !xml_parse( $parser, $chunk, feof( $input ) ) ) {
 376                  wfDebug( "TextDumpPass::readDump encountered XML parsing error\n" );
 377  
 378                  $byte = xml_get_current_byte_index( $parser );
 379                  $msg = wfMessage( 'xml-error-string',
 380                      'XML import parse failure',
 381                      xml_get_current_line_number( $parser ),
 382                      xml_get_current_column_number( $parser ),
 383                      $byte . ( is_null( $chunk ) ? null : ( '; "' . substr( $chunk, $byte - $offset, 16 ) . '"' ) ),
 384                      xml_error_string( xml_get_error_code( $parser ) ) )->escaped();
 385  
 386                  xml_parser_free( $parser );
 387  
 388                  throw new MWException( $msg );
 389              }
 390              $offset += strlen( $chunk );
 391          } while ( $chunk !== false && !feof( $input ) );
 392          if ( $this->maxTimeAllowed ) {
 393              $filenameList = (array)$this->egress->getFilenames();
 394              // we wrote some stuff after last checkpoint that needs renamed
 395              if ( file_exists( $filenameList[0] ) ) {
 396                  $newFilenames = array();
 397                  # we might have just written the header and footer and had no
 398                  # pages or revisions written... perhaps they were all deleted
 399                  # there's no pageID 0 so we use that. the caller is responsible
 400                  # for deciding what to do with a file containing only the
 401                  # siteinfo information and the mw tags.
 402                  if ( !$this->firstPageWritten ) {
 403                      $firstPageID = str_pad( 0, 9, "0", STR_PAD_LEFT );
 404                      $lastPageID = str_pad( 0, 9, "0", STR_PAD_LEFT );
 405                  } else {
 406                      $firstPageID = str_pad( $this->firstPageWritten, 9, "0", STR_PAD_LEFT );
 407                      $lastPageID = str_pad( $this->lastPageWritten, 9, "0", STR_PAD_LEFT );
 408                  }
 409  
 410                  $filenameCount = count( $filenameList );
 411                  for ( $i = 0; $i < $filenameCount; $i++ ) {
 412                      $checkpointNameFilledIn = sprintf( $this->checkpointFiles[$i], $firstPageID, $lastPageID );
 413                      $fileinfo = pathinfo( $filenameList[$i] );
 414                      $newFilenames[] = $fileinfo['dirname'] . '/' . $checkpointNameFilledIn;
 415                  }
 416                  $this->egress->closeAndRename( $newFilenames );
 417              }
 418          }
 419          xml_parser_free( $parser );
 420  
 421          return true;
 422      }
 423  
 424      /**
 425       * Tries to get the revision text for a revision id.
 426       *
 427       * Upon errors, retries (Up to $this->maxFailures tries each call).
 428       * If still no good revision get could be found even after this retrying, "" is returned.
 429       * If no good revision text could be returned for
 430       * $this->maxConsecutiveFailedTextRetrievals consecutive calls to getText, MWException
 431       * is thrown.
 432       *
 433       * @param string $id The revision id to get the text for
 434       *
 435       * @return string The revision text for $id, or ""
 436       * @throws MWException
 437       */
 438  	function getText( $id ) {
 439          global $wgContentHandlerUseDB;
 440  
 441          $prefetchNotTried = true; // Whether or not we already tried to get the text via prefetch.
 442          $text = false; // The candidate for a good text. false if no proper value.
 443          $failures = 0; // The number of times, this invocation of getText already failed.
 444  
 445          // The number of times getText failed without yielding a good text in between.
 446          static $consecutiveFailedTextRetrievals = 0;
 447  
 448          $this->fetchCount++;
 449  
 450          // To allow to simply return on success and do not have to worry about book keeping,
 451          // we assume, this fetch works (possible after some retries). Nevertheless, we koop
 452          // the old value, so we can restore it, if problems occur (See after the while loop).
 453          $oldConsecutiveFailedTextRetrievals = $consecutiveFailedTextRetrievals;
 454          $consecutiveFailedTextRetrievals = 0;
 455  
 456          while ( $failures < $this->maxFailures ) {
 457  
 458              // As soon as we found a good text for the $id, we will return immediately.
 459              // Hence, if we make it past the try catch block, we know that we did not
 460              // find a good text.
 461  
 462              try {
 463                  // Step 1: Get some text (or reuse from previous iteratuon if checking
 464                  //         for plausibility failed)
 465  
 466                  // Trying to get prefetch, if it has not been tried before
 467                  if ( $text === false && isset( $this->prefetch ) && $prefetchNotTried ) {
 468                      $prefetchNotTried = false;
 469                      $tryIsPrefetch = true;
 470                      $text = $this->prefetch->prefetch( intval( $this->thisPage ),
 471                          intval( $this->thisRev ) );
 472                      if ( $text === null ) {
 473                          $text = false;
 474                      }
 475                  }
 476  
 477                  if ( $text === false ) {
 478                      // Fallback to asking the database
 479                      $tryIsPrefetch = false;
 480                      if ( $this->spawn ) {
 481                          $text = $this->getTextSpawned( $id );
 482                      } else {
 483                          $text = $this->getTextDb( $id );
 484                      }
 485  
 486                      // No more checks for texts from DB for now.
 487                      // If we received something that is not false,
 488                      // We treat it as good text, regardless of whether it actually is or is not
 489                      if ( $text !== false ) {
 490                          return $text;
 491                      }
 492                  }
 493  
 494                  if ( $text === false ) {
 495                      throw new MWException( "Generic error while obtaining text for id " . $id );
 496                  }
 497  
 498                  // We received a good candidate for the text of $id via some method
 499  
 500                  // Step 2: Checking for plausibility and return the text if it is
 501                  //         plausible
 502                  $revID = intval( $this->thisRev );
 503                  if ( !isset( $this->db ) ) {
 504                      throw new MWException( "No database available" );
 505                  }
 506  
 507                  $revLength = strlen( $text );
 508                  if ( $wgContentHandlerUseDB ) {
 509                      $row = $this->db->selectRow(
 510                          'revision',
 511                          array( 'rev_len', 'rev_content_model' ),
 512                          array( 'rev_id' => $revID ),
 513                          __METHOD__
 514                      );
 515                      if ( $row ) {
 516                          // only check the length for the wikitext content handler,
 517                          // it's a wasted (and failed) check otherwise
 518                          if ( $row->rev_content_model == CONTENT_MODEL_WIKITEXT ) {
 519                              $revLength = $row->rev_len;
 520                          }
 521                      }
 522                  } else {
 523                      $revLength = $this->db->selectField( 'revision', 'rev_len', array( 'rev_id' => $revID ) );
 524                  }
 525  
 526                  if ( strlen( $text ) == $revLength ) {
 527                      if ( $tryIsPrefetch ) {
 528                          $this->prefetchCount++;
 529                      }
 530  
 531                      return $text;
 532                  }
 533  
 534                  $text = false;
 535                  throw new MWException( "Received text is unplausible for id " . $id );
 536              } catch ( Exception $e ) {
 537                  $msg = "getting/checking text " . $id . " failed (" . $e->getMessage() . ")";
 538                  if ( $failures + 1 < $this->maxFailures ) {
 539                      $msg .= " (Will retry " . ( $this->maxFailures - $failures - 1 ) . " more times)";
 540                  }
 541                  $this->progress( $msg );
 542              }
 543  
 544              // Something went wrong; we did not a text that was plausible :(
 545              $failures++;
 546  
 547              // A failure in a prefetch hit does not warrant resetting db connection etc.
 548              if ( !$tryIsPrefetch ) {
 549                  // After backing off for some time, we try to reboot the whole process as
 550                  // much as possible to not carry over failures from one part to the other
 551                  // parts
 552                  sleep( $this->failureTimeout );
 553                  try {
 554                      $this->rotateDb();
 555                      if ( $this->spawn ) {
 556                          $this->closeSpawn();
 557                          $this->openSpawn();
 558                      }
 559                  } catch ( Exception $e ) {
 560                      $this->progress( "Rebooting getText infrastructure failed (" . $e->getMessage() . ")" .
 561                          " Trying to continue anyways" );
 562                  }
 563              }
 564          }
 565  
 566          // Retirieving a good text for $id failed (at least) maxFailures times.
 567          // We abort for this $id.
 568  
 569          // Restoring the consecutive failures, and maybe aborting, if the dump
 570          // is too broken.
 571          $consecutiveFailedTextRetrievals = $oldConsecutiveFailedTextRetrievals + 1;
 572          if ( $consecutiveFailedTextRetrievals > $this->maxConsecutiveFailedTextRetrievals ) {
 573              throw new MWException( "Graceful storage failure" );
 574          }
 575  
 576          return "";
 577      }
 578  
 579      /**
 580       * May throw a database error if, say, the server dies during query.
 581       * @param int $id
 582       * @return bool|string
 583       * @throws MWException
 584       */
 585  	private function getTextDb( $id ) {
 586          global $wgContLang;
 587          if ( !isset( $this->db ) ) {
 588              throw new MWException( __METHOD__ . "No database available" );
 589          }
 590          $row = $this->db->selectRow( 'text',
 591              array( 'old_text', 'old_flags' ),
 592              array( 'old_id' => $id ),
 593              __METHOD__ );
 594          $text = Revision::getRevisionText( $row );
 595          if ( $text === false ) {
 596              return false;
 597          }
 598          $stripped = str_replace( "\r", "", $text );
 599          $normalized = $wgContLang->normalize( $stripped );
 600  
 601          return $normalized;
 602      }
 603  
 604  	private function getTextSpawned( $id ) {
 605          wfSuppressWarnings();
 606          if ( !$this->spawnProc ) {
 607              // First time?
 608              $this->openSpawn();
 609          }
 610          $text = $this->getTextSpawnedOnce( $id );
 611          wfRestoreWarnings();
 612  
 613          return $text;
 614      }
 615  
 616  	function openSpawn() {
 617          global $IP;
 618  
 619          if ( file_exists( "$IP/../multiversion/MWScript.php" ) ) {
 620              $cmd = implode( " ",
 621                  array_map( 'wfEscapeShellArg',
 622                      array(
 623                          $this->php,
 624                          "$IP/../multiversion/MWScript.php",
 625                          "fetchText.php",
 626                          '--wiki', wfWikiID() ) ) );
 627          } else {
 628              $cmd = implode( " ",
 629                  array_map( 'wfEscapeShellArg',
 630                      array(
 631                          $this->php,
 632                          "$IP/maintenance/fetchText.php",
 633                          '--wiki', wfWikiID() ) ) );
 634          }
 635          $spec = array(
 636              0 => array( "pipe", "r" ),
 637              1 => array( "pipe", "w" ),
 638              2 => array( "file", "/dev/null", "a" ) );
 639          $pipes = array();
 640  
 641          $this->progress( "Spawning database subprocess: $cmd" );
 642          $this->spawnProc = proc_open( $cmd, $spec, $pipes );
 643          if ( !$this->spawnProc ) {
 644              // shit
 645              $this->progress( "Subprocess spawn failed." );
 646  
 647              return false;
 648          }
 649          list(
 650              $this->spawnWrite, // -> stdin
 651              $this->spawnRead, // <- stdout
 652          ) = $pipes;
 653  
 654          return true;
 655      }
 656  
 657  	private function closeSpawn() {
 658          wfSuppressWarnings();
 659          if ( $this->spawnRead ) {
 660              fclose( $this->spawnRead );
 661          }
 662          $this->spawnRead = false;
 663          if ( $this->spawnWrite ) {
 664              fclose( $this->spawnWrite );
 665          }
 666          $this->spawnWrite = false;
 667          if ( $this->spawnErr ) {
 668              fclose( $this->spawnErr );
 669          }
 670          $this->spawnErr = false;
 671          if ( $this->spawnProc ) {
 672              pclose( $this->spawnProc );
 673          }
 674          $this->spawnProc = false;
 675          wfRestoreWarnings();
 676      }
 677  
 678  	private function getTextSpawnedOnce( $id ) {
 679          global $wgContLang;
 680  
 681          $ok = fwrite( $this->spawnWrite, "$id\n" );
 682          // $this->progress( ">> $id" );
 683          if ( !$ok ) {
 684              return false;
 685          }
 686  
 687          $ok = fflush( $this->spawnWrite );
 688          // $this->progress( ">> [flush]" );
 689          if ( !$ok ) {
 690              return false;
 691          }
 692  
 693          // check that the text id they are sending is the one we asked for
 694          // this avoids out of sync revision text errors we have encountered in the past
 695          $newId = fgets( $this->spawnRead );
 696          if ( $newId === false ) {
 697              return false;
 698          }
 699          if ( $id != intval( $newId ) ) {
 700              return false;
 701          }
 702  
 703          $len = fgets( $this->spawnRead );
 704          // $this->progress( "<< " . trim( $len ) );
 705          if ( $len === false ) {
 706              return false;
 707          }
 708  
 709          $nbytes = intval( $len );
 710          // actual error, not zero-length text
 711          if ( $nbytes < 0 ) {
 712              return false;
 713          }
 714  
 715          $text = "";
 716  
 717          // Subprocess may not send everything at once, we have to loop.
 718          while ( $nbytes > strlen( $text ) ) {
 719              $buffer = fread( $this->spawnRead, $nbytes - strlen( $text ) );
 720              if ( $buffer === false ) {
 721                  break;
 722              }
 723              $text .= $buffer;
 724          }
 725  
 726          $gotbytes = strlen( $text );
 727          if ( $gotbytes != $nbytes ) {
 728              $this->progress( "Expected $nbytes bytes from database subprocess, got $gotbytes " );
 729  
 730              return false;
 731          }
 732  
 733          // Do normalization in the dump thread...
 734          $stripped = str_replace( "\r", "", $text );
 735          $normalized = $wgContLang->normalize( $stripped );
 736  
 737          return $normalized;
 738      }
 739  
 740  	function startElement( $parser, $name, $attribs ) {
 741          $this->checkpointJustWritten = false;
 742  
 743          $this->clearOpenElement( null );
 744          $this->lastName = $name;
 745  
 746          if ( $name == 'revision' ) {
 747              $this->state = $name;
 748              $this->egress->writeOpenPage( null, $this->buffer );
 749              $this->buffer = "";
 750          } elseif ( $name == 'page' ) {
 751              $this->state = $name;
 752              if ( $this->atStart ) {
 753                  $this->egress->writeOpenStream( $this->buffer );
 754                  $this->buffer = "";
 755                  $this->atStart = false;
 756              }
 757          }
 758  
 759          if ( $name == "text" && isset( $attribs['id'] ) ) {
 760              $text = $this->getText( $attribs['id'] );
 761              $this->openElement = array( $name, array( 'xml:space' => 'preserve' ) );
 762              if ( strlen( $text ) > 0 ) {
 763                  $this->characterData( $parser, $text );
 764              }
 765          } else {
 766              $this->openElement = array( $name, $attribs );
 767          }
 768      }
 769  
 770  	function endElement( $parser, $name ) {
 771          $this->checkpointJustWritten = false;
 772  
 773          if ( $this->openElement ) {
 774              $this->clearOpenElement( "" );
 775          } else {
 776              $this->buffer .= "</$name>";
 777          }
 778  
 779          if ( $name == 'revision' ) {
 780              $this->egress->writeRevision( null, $this->buffer );
 781              $this->buffer = "";
 782              $this->thisRev = "";
 783          } elseif ( $name == 'page' ) {
 784              if ( !$this->firstPageWritten ) {
 785                  $this->firstPageWritten = trim( $this->thisPage );
 786              }
 787              $this->lastPageWritten = trim( $this->thisPage );
 788              if ( $this->timeExceeded ) {
 789                  $this->egress->writeClosePage( $this->buffer );
 790                  // nasty hack, we can't just write the chardata after the
 791                  // page tag, it will include leading blanks from the next line
 792                  $this->egress->sink->write( "\n" );
 793  
 794                  $this->buffer = $this->xmlwriterobj->closeStream();
 795                  $this->egress->writeCloseStream( $this->buffer );
 796  
 797                  $this->buffer = "";
 798                  $this->thisPage = "";
 799                  // this could be more than one file if we had more than one output arg
 800  
 801                  $filenameList = (array)$this->egress->getFilenames();
 802                  $newFilenames = array();
 803                  $firstPageID = str_pad( $this->firstPageWritten, 9, "0", STR_PAD_LEFT );
 804                  $lastPageID = str_pad( $this->lastPageWritten, 9, "0", STR_PAD_LEFT );
 805                  $filenamesCount = count( $filenameList );
 806                  for ( $i = 0; $i < $filenamesCount; $i++ ) {
 807                      $checkpointNameFilledIn = sprintf( $this->checkpointFiles[$i], $firstPageID, $lastPageID );
 808                      $fileinfo = pathinfo( $filenameList[$i] );
 809                      $newFilenames[] = $fileinfo['dirname'] . '/' . $checkpointNameFilledIn;
 810                  }
 811                  $this->egress->closeRenameAndReopen( $newFilenames );
 812                  $this->buffer = $this->xmlwriterobj->openStream();
 813                  $this->timeExceeded = false;
 814                  $this->timeOfCheckpoint = $this->lastTime;
 815                  $this->firstPageWritten = false;
 816                  $this->checkpointJustWritten = true;
 817              } else {
 818                  $this->egress->writeClosePage( $this->buffer );
 819                  $this->buffer = "";
 820                  $this->thisPage = "";
 821              }
 822          } elseif ( $name == 'mediawiki' ) {
 823              $this->egress->writeCloseStream( $this->buffer );
 824              $this->buffer = "";
 825          }
 826      }
 827  
 828  	function characterData( $parser, $data ) {
 829          $this->clearOpenElement( null );
 830          if ( $this->lastName == "id" ) {
 831              if ( $this->state == "revision" ) {
 832                  $this->thisRev .= $data;
 833              } elseif ( $this->state == "page" ) {
 834                  $this->thisPage .= $data;
 835              }
 836          }
 837          // have to skip the newline left over from closepagetag line of
 838          // end of checkpoint files. nasty hack!!
 839          if ( $this->checkpointJustWritten ) {
 840              if ( $data[0] == "\n" ) {
 841                  $data = substr( $data, 1 );
 842              }
 843              $this->checkpointJustWritten = false;
 844          }
 845          $this->buffer .= htmlspecialchars( $data );
 846      }
 847  
 848  	function clearOpenElement( $style ) {
 849          if ( $this->openElement ) {
 850              $this->buffer .= Xml::element( $this->openElement[0], $this->openElement[1], $style );
 851              $this->openElement = false;
 852          }
 853      }
 854  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1