[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/maintenance/ -> backupPrefetch.inc (source)

   1  <?php
   2  /**
   3   * Helper class for the --prefetch option of dumpTextPass.php
   4   *
   5   * Copyright © 2005 Brion Vibber <[email protected]>
   6   * https://www.mediawiki.org/
   7   *
   8   * This program is free software; you can redistribute it and/or modify
   9   * it under the terms of the GNU General Public License as published by
  10   * the Free Software Foundation; either version 2 of the License, or
  11   * (at your option) any later version.
  12   *
  13   * This program is distributed in the hope that it will be useful,
  14   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16   * GNU General Public License for more details.
  17   *
  18   * You should have received a copy of the GNU General Public License along
  19   * with this program; if not, write to the Free Software Foundation, Inc.,
  20   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  21   * http://www.gnu.org/copyleft/gpl.html
  22   *
  23   * @file
  24   * @ingroup Maintenance
  25   */
  26  
  27  /**
  28   * Readahead helper for making large MediaWiki data dumps;
  29   * reads in a previous XML dump to sequentially prefetch text
  30   * records already normalized and decompressed.
  31   *
  32   * This can save load on the external database servers, hopefully.
  33   *
  34   * Assumes that dumps will be recorded in the canonical order:
  35   * - ascending by page_id
  36   * - ascending by rev_id within each page
  37   * - text contents are immutable and should not change once
  38   *   recorded, so the previous dump is a reliable source
  39   *
  40   * @ingroup Maintenance
  41   */
  42  class BaseDump {
  43      protected $reader = null;
  44      protected $atEnd = false;
  45      protected $atPageEnd = false;
  46      protected $lastPage = 0;
  47      protected $lastRev = 0;
  48      protected $infiles = null;
  49  
  50  	public function __construct( $infile ) {
  51          $this->infiles = explode( ';', $infile );
  52          $this->reader = new XMLReader();
  53          $infile = array_shift( $this->infiles );
  54          if ( defined( 'LIBXML_PARSEHUGE' ) ) {
  55              $this->reader->open( $infile, null, LIBXML_PARSEHUGE );
  56          } else {
  57              $this->reader->open( $infile );
  58          }
  59      }
  60  
  61      /**
  62       * Attempts to fetch the text of a particular page revision
  63       * from the dump stream. May return null if the page is
  64       * unavailable.
  65       *
  66       * @param int $page ID number of page to read
  67       * @param int $rev ID number of revision to read
  68       * @return string|null
  69       */
  70  	function prefetch( $page, $rev ) {
  71          $page = intval( $page );
  72          $rev = intval( $rev );
  73          while ( $this->lastPage < $page && !$this->atEnd ) {
  74              $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" );
  75              $this->nextPage();
  76          }
  77          if ( $this->lastPage > $page || $this->atEnd ) {
  78              $this->debug( "BaseDump::prefetch already past page $page "
  79                  . "looking for rev $rev  [$this->lastPage, $this->lastRev]" );
  80  
  81              return null;
  82          }
  83          while ( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
  84              $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, "
  85                  . "looking for $page, $rev" );
  86              $this->nextRev();
  87          }
  88          if ( $this->lastRev == $rev && !$this->atEnd ) {
  89              $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
  90  
  91              return $this->nextText();
  92          } else {
  93              $this->debug( "BaseDump::prefetch already past rev $rev on page $page "
  94                  . "[$this->lastPage, $this->lastRev]" );
  95  
  96              return null;
  97          }
  98      }
  99  
 100  	function debug( $str ) {
 101          wfDebug( $str . "\n" );
 102          // global $dumper;
 103          // $dumper->progress( $str );
 104      }
 105  
 106      /**
 107       * @access private
 108       */
 109  	function nextPage() {
 110          if ( $this->skipTo( 'page', 'mediawiki' ) ) {
 111              if ( $this->skipTo( 'id' ) ) {
 112                  $this->lastPage = intval( $this->nodeContents() );
 113                  $this->lastRev = 0;
 114                  $this->atPageEnd = false;
 115              }
 116          } else {
 117              $this->close();
 118              if ( count( $this->infiles ) ) {
 119                  $infile = array_shift( $this->infiles );
 120                  $this->reader->open( $infile );
 121                  $this->atEnd = false;
 122              }
 123          }
 124      }
 125  
 126      /**
 127       * @access private
 128       */
 129  	function nextRev() {
 130          if ( $this->skipTo( 'revision' ) ) {
 131              if ( $this->skipTo( 'id' ) ) {
 132                  $this->lastRev = intval( $this->nodeContents() );
 133              }
 134          } else {
 135              $this->atPageEnd = true;
 136          }
 137      }
 138  
 139      /**
 140       * @access private
 141       * @return string
 142       */
 143  	function nextText() {
 144          $this->skipTo( 'text' );
 145  
 146          return strval( $this->nodeContents() );
 147      }
 148  
 149      /**
 150       * @access private
 151       * @param string $name
 152       * @param string $parent
 153       * @return bool|null
 154       */
 155  	function skipTo( $name, $parent = 'page' ) {
 156          if ( $this->atEnd ) {
 157              return false;
 158          }
 159          while ( $this->reader->read() ) {
 160              if ( $this->reader->nodeType == XMLReader::ELEMENT
 161                  && $this->reader->name == $name
 162              ) {
 163                  return true;
 164              }
 165              if ( $this->reader->nodeType == XMLReader::END_ELEMENT
 166                  && $this->reader->name == $parent
 167              ) {
 168                  $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" );
 169  
 170                  return false;
 171              }
 172          }
 173  
 174          return $this->close();
 175      }
 176  
 177      /**
 178       * Shouldn't something like this be built-in to XMLReader?
 179       * Fetches text contents of the current element, assuming
 180       * no sub-elements or such scary things.
 181       *
 182       * @return string
 183       * @access private
 184       */
 185  	function nodeContents() {
 186          if ( $this->atEnd ) {
 187              return null;
 188          }
 189          if ( $this->reader->isEmptyElement ) {
 190              return "";
 191          }
 192          $buffer = "";
 193          while ( $this->reader->read() ) {
 194              switch ( $this->reader->nodeType ) {
 195                  case XMLReader::TEXT:
 196                  //case XMLReader::WHITESPACE:
 197                  case XMLReader::SIGNIFICANT_WHITESPACE:
 198                      $buffer .= $this->reader->value;
 199                      break;
 200                  case XMLReader::END_ELEMENT:
 201                      return $buffer;
 202              }
 203          }
 204  
 205          return $this->close();
 206      }
 207  
 208      /**
 209       * @access private
 210       * @return null
 211       */
 212  	function close() {
 213          $this->reader->close();
 214          $this->atEnd = true;
 215  
 216          return null;
 217      }
 218  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1