MediaWiki  REL1_20
BacklinkCache.php
Go to the documentation of this file.
00001 <?php
00043 class BacklinkCache {
00045         protected static $cache;
00046 
00057         protected $partitionCache = array();
00058 
00066         protected $fullResultCache = array();
00067 
00075         protected $db;
00076 
00080         protected $title;
00081 
00082         const CACHE_EXPIRY = 3600;
00083 
00089         public function __construct( Title $title ) {
00090                 $this->title = $title;
00091         }
00092 
00101         public static function get( Title $title ) {
00102                 if ( !self::$cache ) { // init cache
00103                         self::$cache = new ProcessCacheLRU( 1 );
00104                 }
00105                 $dbKey = $title->getPrefixedDBkey();
00106                 if ( !self::$cache->has( $dbKey, 'obj' ) ) {
00107                         self::$cache->set( $dbKey, 'obj', new self( $title ) );
00108                 }
00109                 return self::$cache->get( $dbKey, 'obj' );
00110         }
00111 
00119         function __sleep() {
00120                 return array( 'partitionCache', 'fullResultCache', 'title' );
00121         }
00122 
00126         public function clear() {
00127                 $this->partitionCache = array();
00128                 $this->fullResultCache = array();
00129                 unset( $this->db );
00130         }
00131 
00137         public function setDB( $db ) {
00138                 $this->db = $db;
00139         }
00140 
00146         protected function getDB() {
00147                 if ( !isset( $this->db ) ) {
00148                         $this->db = wfGetDB( DB_SLAVE );
00149                 }
00150 
00151                 return $this->db;
00152         }
00153 
00161         public function getLinks( $table, $startId = false, $endId = false ) {
00162                 wfProfileIn( __METHOD__ );
00163 
00164                 $fromField = $this->getPrefix( $table ) . '_from';
00165 
00166                 if ( $startId || $endId ) {
00167                         // Partial range, not cached
00168                         wfDebug( __METHOD__ . ": from DB (uncacheable range)\n" );
00169                         $conds = $this->getConditions( $table );
00170 
00171                         // Use the from field in the condition rather than the joined page_id,
00172                         // because databases are stupid and don't necessarily propagate indexes.
00173                         if ( $startId ) {
00174                                 $conds[] = "$fromField >= " . intval( $startId );
00175                         }
00176 
00177                         if ( $endId ) {
00178                                 $conds[] = "$fromField <= " . intval( $endId );
00179                         }
00180 
00181                         $res = $this->getDB()->select(
00182                                 array( $table, 'page' ),
00183                                 array( 'page_namespace', 'page_title', 'page_id' ),
00184                                 $conds,
00185                                 __METHOD__,
00186                                 array(
00187                                         'STRAIGHT_JOIN',
00188                                         'ORDER BY' => $fromField
00189                                 ) );
00190                         $ta = TitleArray::newFromResult( $res );
00191 
00192                         wfProfileOut( __METHOD__ );
00193                         return $ta;
00194                 }
00195 
00196                 // @todo FIXME: Make this a function?
00197                 if ( !isset( $this->fullResultCache[$table] ) ) {
00198                         wfDebug( __METHOD__ . ": from DB\n" );
00199                         $res = $this->getDB()->select(
00200                                 array( $table, 'page' ),
00201                                 array( 'page_namespace', 'page_title', 'page_id' ),
00202                                 $this->getConditions( $table ),
00203                                 __METHOD__,
00204                                 array(
00205                                         'STRAIGHT_JOIN',
00206                                         'ORDER BY' => $fromField,
00207                                 ) );
00208                         $this->fullResultCache[$table] = $res;
00209                 }
00210 
00211                 $ta = TitleArray::newFromResult( $this->fullResultCache[$table] );
00212 
00213                 wfProfileOut( __METHOD__ );
00214                 return $ta;
00215         }
00216 
00222         protected function getPrefix( $table ) {
00223                 static $prefixes = array(
00224                         'pagelinks'     => 'pl',
00225                         'imagelinks'    => 'il',
00226                         'categorylinks' => 'cl',
00227                         'templatelinks' => 'tl',
00228                         'redirect'      => 'rd',
00229                 );
00230 
00231                 if ( isset( $prefixes[$table] ) ) {
00232                         return $prefixes[$table];
00233                 } else {
00234                         $prefix = null;
00235                         wfRunHooks( 'BacklinkCacheGetPrefix', array( $table, &$prefix ) );
00236                         if( $prefix ) {
00237                                 return $prefix;
00238                         } else {
00239                                 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
00240                         }
00241                 }
00242         }
00243 
00250         protected function getConditions( $table ) {
00251                 $prefix = $this->getPrefix( $table );
00252 
00253                 // @todo FIXME: imagelinks and categorylinks do not rely on getNamespace,
00254                 // they could be moved up for nicer case statements
00255                 switch ( $table ) {
00256                         case 'pagelinks':
00257                         case 'templatelinks':
00258                                 $conds = array(
00259                                         "{$prefix}_namespace" => $this->title->getNamespace(),
00260                                         "{$prefix}_title"     => $this->title->getDBkey(),
00261                                         "page_id={$prefix}_from"
00262                                 );
00263                                 break;
00264                         case 'redirect':
00265                                 $conds = array(
00266                                         "{$prefix}_namespace" => $this->title->getNamespace(),
00267                                         "{$prefix}_title"     => $this->title->getDBkey(),
00268                                         $this->getDb()->makeList( array(
00269                                                 "{$prefix}_interwiki = ''",
00270                                                 "{$prefix}_interwiki is null",
00271                                         ), LIST_OR ),
00272                                         "page_id={$prefix}_from"
00273                                 );
00274                                 break;
00275                         case 'imagelinks':
00276                                 $conds = array(
00277                                         'il_to' => $this->title->getDBkey(),
00278                                         'page_id=il_from'
00279                                 );
00280                                 break;
00281                         case 'categorylinks':
00282                                 $conds = array(
00283                                         'cl_to' => $this->title->getDBkey(),
00284                                         'page_id=cl_from',
00285                                 );
00286                                 break;
00287                         default:
00288                                 $conds = null;
00289                                 wfRunHooks( 'BacklinkCacheGetConditions', array( $table, $this->title, &$conds ) );
00290                                 if( !$conds )
00291                                         throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
00292                 }
00293 
00294                 return $conds;
00295         }
00296 
00302         public function getNumLinks( $table ) {
00303                 if ( isset( $this->fullResultCache[$table] ) ) {
00304                         return $this->fullResultCache[$table]->numRows();
00305                 }
00306 
00307                 if ( isset( $this->partitionCache[$table] ) ) {
00308                         $entry = reset( $this->partitionCache[$table] );
00309                         return $entry['numRows'];
00310                 }
00311 
00312                 $titleArray = $this->getLinks( $table );
00313 
00314                 return $titleArray->count();
00315         }
00316 
00326         public function partition( $table, $batchSize ) {
00327 
00328                 // 1) try partition cache ...
00329 
00330                 if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
00331                         wfDebug( __METHOD__ . ": got from partition cache\n" );
00332                         return $this->partitionCache[$table][$batchSize]['batches'];
00333                 }
00334 
00335                 $this->partitionCache[$table][$batchSize] = false;
00336                 $cacheEntry =& $this->partitionCache[$table][$batchSize];
00337 
00338                 // 2) ... then try full result cache ...
00339 
00340                 if ( isset( $this->fullResultCache[$table] ) ) {
00341                         $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
00342                         wfDebug( __METHOD__ . ": got from full result cache\n" );
00343 
00344                         return $cacheEntry['batches'];
00345                 }
00346 
00347                 // 3) ... fallback to memcached ...
00348 
00349                 global $wgMemc;
00350 
00351                 $memcKey = wfMemcKey(
00352                         'backlinks',
00353                         md5( $this->title->getPrefixedDBkey() ),
00354                         $table,
00355                         $batchSize
00356                 );
00357 
00358                 $memcValue = $wgMemc->get( $memcKey );
00359 
00360                 if ( is_array( $memcValue ) ) {
00361                         $cacheEntry = $memcValue;
00362                         wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
00363 
00364                         return $cacheEntry['batches'];
00365                 }
00366 
00367 
00368                 // 4) ... finally fetch from the slow database :(
00369 
00370                 $this->getLinks( $table );
00371                 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
00372                 // Save to memcached
00373                 $wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
00374 
00375                 wfDebug( __METHOD__ . ": got from database\n" );
00376                 return $cacheEntry['batches'];
00377         }
00378 
00385         protected function partitionResult( $res, $batchSize ) {
00386                 $batches = array();
00387                 $numRows = $res->numRows();
00388                 $numBatches = ceil( $numRows / $batchSize );
00389 
00390                 for ( $i = 0; $i < $numBatches; $i++ ) {
00391                         if ( $i == 0  ) {
00392                                 $start = false;
00393                         } else {
00394                                 $rowNum = intval( $numRows * $i / $numBatches );
00395                                 $res->seek( $rowNum );
00396                                 $row = $res->fetchObject();
00397                                 $start = $row->page_id;
00398                         }
00399 
00400                         if ( $i == $numBatches - 1 ) {
00401                                 $end = false;
00402                         } else {
00403                                 $rowNum = intval( $numRows * ( $i + 1 ) / $numBatches );
00404                                 $res->seek( $rowNum );
00405                                 $row = $res->fetchObject();
00406                                 $end = $row->page_id - 1;
00407                         }
00408 
00409                         # Sanity check order
00410                         if ( $start && $end && $start > $end ) {
00411                                 throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
00412                         }
00413 
00414                         $batches[] = array( $start, $end );
00415                 }
00416 
00417                 return array( 'numRows' => $numRows, 'batches' => $batches );
00418         }
00419 }