[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/cache/ -> BacklinkCache.php (source)

   1  <?php
   2  /**
   3   * Class for fetching backlink lists, approximate backlink counts and
   4   * partitions.
   5   *
   6   * This program is free software; you can redistribute it and/or modify
   7   * it under the terms of the GNU General Public License as published by
   8   * the Free Software Foundation; either version 2 of the License, or
   9   * (at your option) any later version.
  10   *
  11   * This program is distributed in the hope that it will be useful,
  12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14   * GNU General Public License for more details.
  15   *
  16   * You should have received a copy of the GNU General Public License along
  17   * with this program; if not, write to the Free Software Foundation, Inc.,
  18   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  19   * http://www.gnu.org/copyleft/gpl.html
  20   *
  21   * @file
  22   * @author Tim Starling
  23   * @author Aaron Schulz
  24   * @copyright © 2009, Tim Starling, Domas Mituzas
  25   * @copyright © 2010, Max Sem
  26   * @copyright © 2011, Antoine Musso
  27   */
  28  
  29  /**
  30   * Class for fetching backlink lists, approximate backlink counts and
  31   * partitions. This is a shared cache.
  32   *
  33   * Instances of this class should typically be fetched with the method
  34   * $title->getBacklinkCache().
  35   *
  36   * Ideally you should only get your backlinks from here when you think
  37   * there is some advantage in caching them. Otherwise it's just a waste
  38   * of memory.
  39   *
  40   * Introduced by r47317
  41   *
  42   * @internal documentation reviewed on 18 Mar 2011 by hashar
  43   */
  44  class BacklinkCache {
  45      /** @var ProcessCacheLRU */
  46      protected static $cache;
  47  
  48      /**
  49       * Multi dimensions array representing batches. Keys are:
  50       *  > (string) links table name
  51       *   > (int) batch size
  52       *    > 'numRows' : Number of rows for this link table
  53       *    > 'batches' : array( $start, $end )
  54       *
  55       * @see BacklinkCache::partitionResult()
  56       *
  57       * Cleared with BacklinkCache::clear()
  58       */
  59      protected $partitionCache = array();
  60  
  61      /**
  62       * Contains the whole links from a database result.
  63       * This is raw data that will be partitioned in $partitionCache
  64       *
  65       * Initialized with BacklinkCache::getLinks()
  66       * Cleared with BacklinkCache::clear()
  67       */
  68      protected $fullResultCache = array();
  69  
  70      /**
  71       * Local copy of a database object.
  72       *
  73       * Accessor: BacklinkCache::getDB()
  74       * Mutator : BacklinkCache::setDB()
  75       * Cleared with BacklinkCache::clear()
  76       */
  77      protected $db;
  78  
  79      /**
  80       * Local copy of a Title object
  81       */
  82      protected $title;
  83  
  84      const CACHE_EXPIRY = 3600;
  85  
  86      /**
  87       * Create a new BacklinkCache
  88       *
  89       * @param Title $title : Title object to create a backlink cache for
  90       */
  91  	public function __construct( Title $title ) {
  92          $this->title = $title;
  93      }
  94  
  95      /**
  96       * Create a new BacklinkCache or reuse any existing one.
  97       * Currently, only one cache instance can exist; callers that
  98       * need multiple backlink cache objects should keep them in scope.
  99       *
 100       * @param Title $title Title object to get a backlink cache for
 101       * @return BacklinkCache
 102       */
 103  	public static function get( Title $title ) {
 104          if ( !self::$cache ) { // init cache
 105              self::$cache = new ProcessCacheLRU( 1 );
 106          }
 107          $dbKey = $title->getPrefixedDBkey();
 108          if ( !self::$cache->has( $dbKey, 'obj', 3600 ) ) {
 109              self::$cache->set( $dbKey, 'obj', new self( $title ) );
 110          }
 111  
 112          return self::$cache->get( $dbKey, 'obj' );
 113      }
 114  
 115      /**
 116       * Serialization handler, diasallows to serialize the database to prevent
 117       * failures after this class is deserialized from cache with dead DB
 118       * connection.
 119       *
 120       * @return array
 121       */
 122  	function __sleep() {
 123          return array( 'partitionCache', 'fullResultCache', 'title' );
 124      }
 125  
 126      /**
 127       * Clear locally stored data and database object.
 128       */
 129  	public function clear() {
 130          $this->partitionCache = array();
 131          $this->fullResultCache = array();
 132          unset( $this->db );
 133      }
 134  
 135      /**
 136       * Set the Database object to use
 137       *
 138       * @param DatabaseBase $db
 139       */
 140  	public function setDB( $db ) {
 141          $this->db = $db;
 142      }
 143  
 144      /**
 145       * Get the slave connection to the database
 146       * When non existing, will initialize the connection.
 147       * @return DatabaseBase
 148       */
 149  	protected function getDB() {
 150          if ( !isset( $this->db ) ) {
 151              $this->db = wfGetDB( DB_SLAVE );
 152          }
 153  
 154          return $this->db;
 155      }
 156  
 157      /**
 158       * Get the backlinks for a given table. Cached in process memory only.
 159       * @param string $table
 160       * @param int|bool $startId
 161       * @param int|bool $endId
 162       * @param int|INF $max
 163       * @return TitleArrayFromResult
 164       */
 165  	public function getLinks( $table, $startId = false, $endId = false, $max = INF ) {
 166          return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) );
 167      }
 168  
 169      /**
 170       * Get the backlinks for a given table. Cached in process memory only.
 171       * @param string $table
 172       * @param int|bool $startId
 173       * @param int|bool $endId
 174       * @param int|INF $max
 175       * @param string $select 'all' or 'ids'
 176       * @return ResultWrapper
 177       */
 178  	protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) {
 179          wfProfileIn( __METHOD__ );
 180  
 181          $fromField = $this->getPrefix( $table ) . '_from';
 182  
 183          if ( !$startId && !$endId && is_infinite( $max )
 184              && isset( $this->fullResultCache[$table] )
 185          ) {
 186              wfDebug( __METHOD__ . ": got results from cache\n" );
 187              $res = $this->fullResultCache[$table];
 188          } else {
 189              wfDebug( __METHOD__ . ": got results from DB\n" );
 190              $conds = $this->getConditions( $table );
 191              // Use the from field in the condition rather than the joined page_id,
 192              // because databases are stupid and don't necessarily propagate indexes.
 193              if ( $startId ) {
 194                  $conds[] = "$fromField >= " . intval( $startId );
 195              }
 196              if ( $endId ) {
 197                  $conds[] = "$fromField <= " . intval( $endId );
 198              }
 199              $options = array( 'ORDER BY' => $fromField );
 200              if ( is_finite( $max ) && $max > 0 ) {
 201                  $options['LIMIT'] = $max;
 202              }
 203  
 204              if ( $select === 'ids' ) {
 205                  // Just select from the backlink table and ignore the page JOIN
 206                  $res = $this->getDB()->select(
 207                      $table,
 208                      array( $this->getPrefix( $table ) . '_from AS page_id' ),
 209                      array_filter( $conds, function ( $clause ) { // kind of janky
 210                          return !preg_match( '/(\b|=)page_id(\b|=)/', $clause );
 211                      } ),
 212                      __METHOD__,
 213                      $options
 214                  );
 215              } else {
 216                  // Select from the backlink table and JOIN with page title information
 217                  $res = $this->getDB()->select(
 218                      array( $table, 'page' ),
 219                      array( 'page_namespace', 'page_title', 'page_id' ),
 220                      $conds,
 221                      __METHOD__,
 222                      array_merge( array( 'STRAIGHT_JOIN' ), $options )
 223                  );
 224              }
 225  
 226              if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) {
 227                  // The full results fit within the limit, so cache them
 228                  $this->fullResultCache[$table] = $res;
 229              } else {
 230                  wfDebug( __METHOD__ . ": results from DB were uncacheable\n" );
 231              }
 232          }
 233  
 234          wfProfileOut( __METHOD__ );
 235  
 236          return $res;
 237      }
 238  
 239      /**
 240       * Get the field name prefix for a given table
 241       * @param string $table
 242       * @throws MWException
 243       * @return null|string
 244       */
 245  	protected function getPrefix( $table ) {
 246          static $prefixes = array(
 247              'pagelinks' => 'pl',
 248              'imagelinks' => 'il',
 249              'categorylinks' => 'cl',
 250              'templatelinks' => 'tl',
 251              'redirect' => 'rd',
 252          );
 253  
 254          if ( isset( $prefixes[$table] ) ) {
 255              return $prefixes[$table];
 256          } else {
 257              $prefix = null;
 258              wfRunHooks( 'BacklinkCacheGetPrefix', array( $table, &$prefix ) );
 259              if ( $prefix ) {
 260                  return $prefix;
 261              } else {
 262                  throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
 263              }
 264          }
 265      }
 266  
 267      /**
 268       * Get the SQL condition array for selecting backlinks, with a join
 269       * on the page table.
 270       * @param string $table
 271       * @throws MWException
 272       * @return array|null
 273       */
 274  	protected function getConditions( $table ) {
 275          $prefix = $this->getPrefix( $table );
 276  
 277          switch ( $table ) {
 278              case 'pagelinks':
 279              case 'templatelinks':
 280                  $conds = array(
 281                      "{$prefix}_namespace" => $this->title->getNamespace(),
 282                      "{$prefix}_title" => $this->title->getDBkey(),
 283                      "page_id={$prefix}_from"
 284                  );
 285                  break;
 286              case 'redirect':
 287                  $conds = array(
 288                      "{$prefix}_namespace" => $this->title->getNamespace(),
 289                      "{$prefix}_title" => $this->title->getDBkey(),
 290                      $this->getDb()->makeList( array(
 291                          "{$prefix}_interwiki" => '',
 292                          "{$prefix}_interwiki IS NULL",
 293                      ), LIST_OR ),
 294                      "page_id={$prefix}_from"
 295                  );
 296                  break;
 297              case 'imagelinks':
 298              case 'categorylinks':
 299                  $conds = array(
 300                      "{$prefix}_to" => $this->title->getDBkey(),
 301                      "page_id={$prefix}_from"
 302                  );
 303                  break;
 304              default:
 305                  $conds = null;
 306                  wfRunHooks( 'BacklinkCacheGetConditions', array( $table, $this->title, &$conds ) );
 307                  if ( !$conds ) {
 308                      throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
 309                  }
 310          }
 311  
 312          return $conds;
 313      }
 314  
 315      /**
 316       * Check if there are any backlinks
 317       * @param string $table
 318       * @return bool
 319       */
 320  	public function hasLinks( $table ) {
 321          return ( $this->getNumLinks( $table, 1 ) > 0 );
 322      }
 323  
 324      /**
 325       * Get the approximate number of backlinks
 326       * @param string $table
 327       * @param int|INF $max Only count up to this many backlinks
 328       * @return int
 329       */
 330  	public function getNumLinks( $table, $max = INF ) {
 331          global $wgMemc, $wgUpdateRowsPerJob;
 332  
 333          // 1) try partition cache ...
 334          if ( isset( $this->partitionCache[$table] ) ) {
 335              $entry = reset( $this->partitionCache[$table] );
 336  
 337              return min( $max, $entry['numRows'] );
 338          }
 339  
 340          // 2) ... then try full result cache ...
 341          if ( isset( $this->fullResultCache[$table] ) ) {
 342              return min( $max, $this->fullResultCache[$table]->numRows() );
 343          }
 344  
 345          $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table );
 346  
 347          // 3) ... fallback to memcached ...
 348          $count = $wgMemc->get( $memcKey );
 349          if ( $count ) {
 350              return min( $max, $count );
 351          }
 352  
 353          // 4) fetch from the database ...
 354          if ( is_infinite( $max ) ) { // no limit at all
 355              // Use partition() since it will batch the query and skip the JOIN.
 356              // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs.
 357              $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache
 358              return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows'];
 359          } else { // probably some sane limit
 360              // Fetch the full title info, since the caller will likely need it next
 361              $count = $this->getLinks( $table, false, false, $max )->count();
 362              if ( $count < $max ) { // full count
 363                  $wgMemc->set( $memcKey, $count, self::CACHE_EXPIRY );
 364              }
 365          }
 366  
 367          return min( $max, $count );
 368      }
 369  
 370      /**
 371       * Partition the backlinks into batches.
 372       * Returns an array giving the start and end of each range. The first
 373       * batch has a start of false, and the last batch has an end of false.
 374       *
 375       * @param string $table The links table name
 376       * @param int $batchSize
 377       * @return array
 378       */
 379  	public function partition( $table, $batchSize ) {
 380          global $wgMemc;
 381  
 382          // 1) try partition cache ...
 383          if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
 384              wfDebug( __METHOD__ . ": got from partition cache\n" );
 385  
 386              return $this->partitionCache[$table][$batchSize]['batches'];
 387          }
 388  
 389          $this->partitionCache[$table][$batchSize] = false;
 390          $cacheEntry =& $this->partitionCache[$table][$batchSize];
 391  
 392          // 2) ... then try full result cache ...
 393          if ( isset( $this->fullResultCache[$table] ) ) {
 394              $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
 395              wfDebug( __METHOD__ . ": got from full result cache\n" );
 396  
 397              return $cacheEntry['batches'];
 398          }
 399  
 400          $memcKey = wfMemcKey(
 401              'backlinks',
 402              md5( $this->title->getPrefixedDBkey() ),
 403              $table,
 404              $batchSize
 405          );
 406  
 407          // 3) ... fallback to memcached ...
 408          $memcValue = $wgMemc->get( $memcKey );
 409          if ( is_array( $memcValue ) ) {
 410              $cacheEntry = $memcValue;
 411              wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
 412  
 413              return $cacheEntry['batches'];
 414          }
 415  
 416          // 4) ... finally fetch from the slow database :(
 417          $cacheEntry = array( 'numRows' => 0, 'batches' => array() ); // final result
 418          // Do the selects in batches to avoid client-side OOMs (bug 43452).
 419          // Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
 420          $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
 421          $start = false;
 422          do {
 423              $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' );
 424              $partitions = $this->partitionResult( $res, $batchSize, false );
 425              // Merge the link count and range partitions for this chunk
 426              $cacheEntry['numRows'] += $partitions['numRows'];
 427              $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] );
 428              if ( count( $partitions['batches'] ) ) {
 429                  list( , $lEnd ) = end( $partitions['batches'] );
 430                  $start = $lEnd + 1; // pick up after this inclusive range
 431              }
 432          } while ( $partitions['numRows'] >= $selectSize );
 433          // Make sure the first range has start=false and the last one has end=false
 434          if ( count( $cacheEntry['batches'] ) ) {
 435              $cacheEntry['batches'][0][0] = false;
 436              $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false;
 437          }
 438  
 439          // Save partitions to memcached
 440          $wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
 441  
 442          // Save backlink count to memcached
 443          $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table );
 444          $wgMemc->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
 445  
 446          wfDebug( __METHOD__ . ": got from database\n" );
 447  
 448          return $cacheEntry['batches'];
 449      }
 450  
 451      /**
 452       * Partition a DB result with backlinks in it into batches
 453       * @param ResultWrapper $res Database result
 454       * @param int $batchSize
 455       * @param bool $isComplete Whether $res includes all the backlinks
 456       * @throws MWException
 457       * @return array
 458       */
 459  	protected function partitionResult( $res, $batchSize, $isComplete = true ) {
 460          $batches = array();
 461          $numRows = $res->numRows();
 462          $numBatches = ceil( $numRows / $batchSize );
 463  
 464          for ( $i = 0; $i < $numBatches; $i++ ) {
 465              if ( $i == 0 && $isComplete ) {
 466                  $start = false;
 467              } else {
 468                  $rowNum = $i * $batchSize;
 469                  $res->seek( $rowNum );
 470                  $row = $res->fetchObject();
 471                  $start = (int)$row->page_id;
 472              }
 473  
 474              if ( $i == ( $numBatches - 1 ) && $isComplete ) {
 475                  $end = false;
 476              } else {
 477                  $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
 478                  $res->seek( $rowNum );
 479                  $row = $res->fetchObject();
 480                  $end = (int)$row->page_id;
 481              }
 482  
 483              # Sanity check order
 484              if ( $start && $end && $start > $end ) {
 485                  throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
 486              }
 487  
 488              $batches[] = array( $start, $end );
 489          }
 490  
 491          return array( 'numRows' => $numRows, 'batches' => $batches );
 492      }
 493  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1