MediaWiki  REL1_22
BacklinkCache.php
Go to the documentation of this file.
00001 <?php
00044 class BacklinkCache {
00046     protected static $cache;
00047 
00058     protected $partitionCache = array();
00059 
00067     protected $fullResultCache = array();
00068 
00076     protected $db;
00077 
00081     protected $title;
00082 
00083     const CACHE_EXPIRY = 3600;
00084 
00090     public function __construct( Title $title ) {
00091         $this->title = $title;
00092     }
00093 
00102     public static function get( Title $title ) {
00103         if ( !self::$cache ) { // init cache
00104             self::$cache = new ProcessCacheLRU( 1 );
00105         }
00106         $dbKey = $title->getPrefixedDBkey();
00107         if ( !self::$cache->has( $dbKey, 'obj', 3600 ) ) {
00108             self::$cache->set( $dbKey, 'obj', new self( $title ) );
00109         }
00110         return self::$cache->get( $dbKey, 'obj' );
00111     }
00112 
00120     function __sleep() {
00121         return array( 'partitionCache', 'fullResultCache', 'title' );
00122     }
00123 
00127     public function clear() {
00128         $this->partitionCache = array();
00129         $this->fullResultCache = array();
00130         unset( $this->db );
00131     }
00132 
00138     public function setDB( $db ) {
00139         $this->db = $db;
00140     }
00141 
00147     protected function getDB() {
00148         if ( !isset( $this->db ) ) {
00149             $this->db = wfGetDB( DB_SLAVE );
00150         }
00151         return $this->db;
00152     }
00153 
00162     public function getLinks( $table, $startId = false, $endId = false, $max = INF ) {
00163         return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) );
00164     }
00165 
00174     protected function queryLinks( $table, $startId, $endId, $max ) {
00175         wfProfileIn( __METHOD__ );
00176 
00177         $fromField = $this->getPrefix( $table ) . '_from';
00178 
00179         if ( !$startId && !$endId && is_infinite( $max )
00180             && isset( $this->fullResultCache[$table] ) )
00181         {
00182             wfDebug( __METHOD__ . ": got results from cache\n" );
00183             $res = $this->fullResultCache[$table];
00184         } else {
00185             wfDebug( __METHOD__ . ": got results from DB\n" );
00186             $conds = $this->getConditions( $table );
00187             // Use the from field in the condition rather than the joined page_id,
00188             // because databases are stupid and don't necessarily propagate indexes.
00189             if ( $startId ) {
00190                 $conds[] = "$fromField >= " . intval( $startId );
00191             }
00192             if ( $endId ) {
00193                 $conds[] = "$fromField <= " . intval( $endId );
00194             }
00195             $options = array( 'STRAIGHT_JOIN', 'ORDER BY' => $fromField );
00196             if ( is_finite( $max ) && $max > 0 ) {
00197                 $options['LIMIT'] = $max;
00198             }
00199 
00200             $res = $this->getDB()->select(
00201                 array( $table, 'page' ),
00202                 array( 'page_namespace', 'page_title', 'page_id' ),
00203                 $conds,
00204                 __METHOD__,
00205                 $options
00206             );
00207 
00208             if ( !$startId && !$endId && $res->numRows() < $max ) {
00209                 // The full results fit within the limit, so cache them
00210                 $this->fullResultCache[$table] = $res;
00211             } else {
00212                 wfDebug( __METHOD__ . ": results from DB were uncacheable\n" );
00213             }
00214         }
00215 
00216         wfProfileOut( __METHOD__ );
00217         return $res;
00218     }
00219 
00226     protected function getPrefix( $table ) {
00227         static $prefixes = array(
00228             'pagelinks' => 'pl',
00229             'imagelinks' => 'il',
00230             'categorylinks' => 'cl',
00231             'templatelinks' => 'tl',
00232             'redirect' => 'rd',
00233         );
00234 
00235         if ( isset( $prefixes[$table] ) ) {
00236             return $prefixes[$table];
00237         } else {
00238             $prefix = null;
00239             wfRunHooks( 'BacklinkCacheGetPrefix', array( $table, &$prefix ) );
00240             if ( $prefix ) {
00241                 return $prefix;
00242             } else {
00243                 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
00244             }
00245         }
00246     }
00247 
00255     protected function getConditions( $table ) {
00256         $prefix = $this->getPrefix( $table );
00257 
00258         // @todo FIXME: imagelinks and categorylinks do not rely on getNamespace,
00259         // they could be moved up for nicer case statements
00260         switch ( $table ) {
00261             case 'pagelinks':
00262             case 'templatelinks':
00263                 $conds = array(
00264                     "{$prefix}_namespace" => $this->title->getNamespace(),
00265                     "{$prefix}_title" => $this->title->getDBkey(),
00266                     "page_id={$prefix}_from"
00267                 );
00268                 break;
00269             case 'redirect':
00270                 $conds = array(
00271                     "{$prefix}_namespace" => $this->title->getNamespace(),
00272                     "{$prefix}_title" => $this->title->getDBkey(),
00273                     $this->getDb()->makeList( array(
00274                         "{$prefix}_interwiki" => '',
00275                         "{$prefix}_interwiki IS NULL",
00276                     ), LIST_OR ),
00277                     "page_id={$prefix}_from"
00278                 );
00279                 break;
00280             case 'imagelinks':
00281                 $conds = array(
00282                     'il_to' => $this->title->getDBkey(),
00283                     'page_id=il_from'
00284                 );
00285                 break;
00286             case 'categorylinks':
00287                 $conds = array(
00288                     'cl_to' => $this->title->getDBkey(),
00289                     'page_id=cl_from',
00290                 );
00291                 break;
00292             default:
00293                 $conds = null;
00294                 wfRunHooks( 'BacklinkCacheGetConditions', array( $table, $this->title, &$conds ) );
00295                 if ( !$conds ) {
00296                     throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
00297                 }
00298         }
00299 
00300         return $conds;
00301     }
00302 
00308     public function hasLinks( $table ) {
00309         return ( $this->getNumLinks( $table, 1 ) > 0 );
00310     }
00311 
00318     public function getNumLinks( $table, $max = INF ) {
00319         global $wgMemc;
00320 
00321         // 1) try partition cache ...
00322         if ( isset( $this->partitionCache[$table] ) ) {
00323             $entry = reset( $this->partitionCache[$table] );
00324             return min( $max, $entry['numRows'] );
00325         }
00326 
00327         // 2) ... then try full result cache ...
00328         if ( isset( $this->fullResultCache[$table] ) ) {
00329             return min( $max, $this->fullResultCache[$table]->numRows() );
00330         }
00331 
00332         $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table );
00333 
00334         // 3) ... fallback to memcached ...
00335         $count = $wgMemc->get( $memcKey );
00336         if ( $count ) {
00337             return min( $max, $count );
00338         }
00339 
00340         // 4) fetch from the database ...
00341         $count = $this->getLinks( $table, false, false, $max )->count();
00342         if ( $count < $max ) { // full count
00343             $wgMemc->set( $memcKey, $count, self::CACHE_EXPIRY );
00344         }
00345 
00346         return min( $max, $count );
00347     }
00348 
00358     public function partition( $table, $batchSize ) {
00359         global $wgMemc;
00360 
00361         // 1) try partition cache ...
00362         if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
00363             wfDebug( __METHOD__ . ": got from partition cache\n" );
00364             return $this->partitionCache[$table][$batchSize]['batches'];
00365         }
00366 
00367         $this->partitionCache[$table][$batchSize] = false;
00368         $cacheEntry =& $this->partitionCache[$table][$batchSize];
00369 
00370         // 2) ... then try full result cache ...
00371         if ( isset( $this->fullResultCache[$table] ) ) {
00372             $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
00373             wfDebug( __METHOD__ . ": got from full result cache\n" );
00374             return $cacheEntry['batches'];
00375         }
00376 
00377         $memcKey = wfMemcKey(
00378             'backlinks',
00379             md5( $this->title->getPrefixedDBkey() ),
00380             $table,
00381             $batchSize
00382         );
00383 
00384         // 3) ... fallback to memcached ...
00385         $memcValue = $wgMemc->get( $memcKey );
00386         if ( is_array( $memcValue ) ) {
00387             $cacheEntry = $memcValue;
00388             wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
00389             return $cacheEntry['batches'];
00390         }
00391 
00392         // 4) ... finally fetch from the slow database :(
00393         $cacheEntry = array( 'numRows' => 0, 'batches' => array() ); // final result
00394         // Do the selects in batches to avoid client-side OOMs (bug 43452).
00395         // Use a LIMIT that plays well with $batchSize to keep equal sized partitions.
00396         $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) );
00397         $start = false;
00398         do {
00399             $res = $this->queryLinks( $table, $start, false, $selectSize );
00400             $partitions = $this->partitionResult( $res, $batchSize, false );
00401             // Merge the link count and range partitions for this chunk
00402             $cacheEntry['numRows'] += $partitions['numRows'];
00403             $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] );
00404             if ( count( $partitions['batches'] ) ) {
00405                 list( $lStart, $lEnd ) = end( $partitions['batches'] );
00406                 $start = $lEnd + 1; // pick up after this inclusive range
00407             }
00408         } while ( $partitions['numRows'] >= $selectSize );
00409         // Make sure the first range has start=false and the last one has end=false
00410         if ( count( $cacheEntry['batches'] ) ) {
00411             $cacheEntry['batches'][0][0] = false;
00412             $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false;
00413         }
00414 
00415         // Save partitions to memcached
00416         $wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
00417 
00418         // Save backlink count to memcached
00419         $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table );
00420         $wgMemc->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
00421 
00422         wfDebug( __METHOD__ . ": got from database\n" );
00423         return $cacheEntry['batches'];
00424     }
00425 
00434     protected function partitionResult( $res, $batchSize, $isComplete = true ) {
00435         $batches = array();
00436         $numRows = $res->numRows();
00437         $numBatches = ceil( $numRows / $batchSize );
00438 
00439         for ( $i = 0; $i < $numBatches; $i++ ) {
00440             if ( $i == 0 && $isComplete ) {
00441                 $start = false;
00442             } else {
00443                 $rowNum = $i * $batchSize;
00444                 $res->seek( $rowNum );
00445                 $row = $res->fetchObject();
00446                 $start = (int)$row->page_id;
00447             }
00448 
00449             if ( $i == ( $numBatches - 1 ) && $isComplete ) {
00450                 $end = false;
00451             } else {
00452                 $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 );
00453                 $res->seek( $rowNum );
00454                 $row = $res->fetchObject();
00455                 $end = (int)$row->page_id;
00456             }
00457 
00458             # Sanity check order
00459             if ( $start && $end && $start > $end ) {
00460                 throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
00461             }
00462 
00463             $batches[] = array( $start, $end );
00464         }
00465 
00466         return array( 'numRows' => $numRows, 'batches' => $batches );
00467     }
00468 }