MediaWiki  REL1_21
BacklinkCache.php
Go to the documentation of this file.
00001 <?php
00043 class BacklinkCache {
00045         protected static $cache;
00046 
00057         protected $partitionCache = array();
00058 
00066         protected $fullResultCache = array();
00067 
00075         protected $db;
00076 
00080         protected $title;
00081 
00082         const CACHE_EXPIRY = 3600;
00083 
00089         public function __construct( Title $title ) {
00090                 $this->title = $title;
00091         }
00092 
00101         public static function get( Title $title ) {
00102                 if ( !self::$cache ) { // init cache
00103                         self::$cache = new ProcessCacheLRU( 1 );
00104                 }
00105                 $dbKey = $title->getPrefixedDBkey();
00106                 if ( !self::$cache->has( $dbKey, 'obj' ) ) {
00107                         self::$cache->set( $dbKey, 'obj', new self( $title ) );
00108                 }
00109                 return self::$cache->get( $dbKey, 'obj' );
00110         }
00111 
00119         function __sleep() {
00120                 return array( 'partitionCache', 'fullResultCache', 'title' );
00121         }
00122 
00126         public function clear() {
00127                 $this->partitionCache = array();
00128                 $this->fullResultCache = array();
00129                 unset( $this->db );
00130         }
00131 
00137         public function setDB( $db ) {
00138                 $this->db = $db;
00139         }
00140 
00146         protected function getDB() {
00147                 if ( !isset( $this->db ) ) {
00148                         $this->db = wfGetDB( DB_SLAVE );
00149                 }
00150 
00151                 return $this->db;
00152         }
00153 
00161         public function getLinks( $table, $startId = false, $endId = false ) {
00162                 wfProfileIn( __METHOD__ );
00163 
00164                 $fromField = $this->getPrefix( $table ) . '_from';
00165 
00166                 if ( $startId || $endId ) {
00167                         // Partial range, not cached
00168                         wfDebug( __METHOD__ . ": from DB (uncacheable range)\n" );
00169                         $conds = $this->getConditions( $table );
00170 
00171                         // Use the from field in the condition rather than the joined page_id,
00172                         // because databases are stupid and don't necessarily propagate indexes.
00173                         if ( $startId ) {
00174                                 $conds[] = "$fromField >= " . intval( $startId );
00175                         }
00176 
00177                         if ( $endId ) {
00178                                 $conds[] = "$fromField <= " . intval( $endId );
00179                         }
00180 
00181                         $res = $this->getDB()->select(
00182                                 array( $table, 'page' ),
00183                                 array( 'page_namespace', 'page_title', 'page_id' ),
00184                                 $conds,
00185                                 __METHOD__,
00186                                 array(
00187                                         'STRAIGHT_JOIN',
00188                                         'ORDER BY' => $fromField
00189                                 ) );
00190                         $ta = TitleArray::newFromResult( $res );
00191 
00192                         wfProfileOut( __METHOD__ );
00193                         return $ta;
00194                 }
00195 
00196                 // @todo FIXME: Make this a function?
00197                 if ( !isset( $this->fullResultCache[$table] ) ) {
00198                         wfDebug( __METHOD__ . ": from DB\n" );
00199                         $res = $this->getDB()->select(
00200                                 array( $table, 'page' ),
00201                                 array( 'page_namespace', 'page_title', 'page_id' ),
00202                                 $this->getConditions( $table ),
00203                                 __METHOD__,
00204                                 array(
00205                                         'STRAIGHT_JOIN',
00206                                         'ORDER BY' => $fromField,
00207                                 ) );
00208                         $this->fullResultCache[$table] = $res;
00209                 }
00210 
00211                 $ta = TitleArray::newFromResult( $this->fullResultCache[$table] );
00212 
00213                 wfProfileOut( __METHOD__ );
00214                 return $ta;
00215         }
00216 
00223         protected function getPrefix( $table ) {
00224                 static $prefixes = array(
00225                         'pagelinks' => 'pl',
00226                         'imagelinks' => 'il',
00227                         'categorylinks' => 'cl',
00228                         'templatelinks' => 'tl',
00229                         'redirect' => 'rd',
00230                 );
00231 
00232                 if ( isset( $prefixes[$table] ) ) {
00233                         return $prefixes[$table];
00234                 } else {
00235                         $prefix = null;
00236                         wfRunHooks( 'BacklinkCacheGetPrefix', array( $table, &$prefix ) );
00237                         if( $prefix ) {
00238                                 return $prefix;
00239                         } else {
00240                                 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
00241                         }
00242                 }
00243         }
00244 
00252         protected function getConditions( $table ) {
00253                 $prefix = $this->getPrefix( $table );
00254 
00255                 // @todo FIXME: imagelinks and categorylinks do not rely on getNamespace,
00256                 // they could be moved up for nicer case statements
00257                 switch ( $table ) {
00258                         case 'pagelinks':
00259                         case 'templatelinks':
00260                                 $conds = array(
00261                                         "{$prefix}_namespace" => $this->title->getNamespace(),
00262                                         "{$prefix}_title" => $this->title->getDBkey(),
00263                                         "page_id={$prefix}_from"
00264                                 );
00265                                 break;
00266                         case 'redirect':
00267                                 $conds = array(
00268                                         "{$prefix}_namespace" => $this->title->getNamespace(),
00269                                         "{$prefix}_title" => $this->title->getDBkey(),
00270                                         $this->getDb()->makeList( array(
00271                                                 "{$prefix}_interwiki" => '',
00272                                                 "{$prefix}_interwiki IS NULL",
00273                                         ), LIST_OR ),
00274                                         "page_id={$prefix}_from"
00275                                 );
00276                                 break;
00277                         case 'imagelinks':
00278                                 $conds = array(
00279                                         'il_to' => $this->title->getDBkey(),
00280                                         'page_id=il_from'
00281                                 );
00282                                 break;
00283                         case 'categorylinks':
00284                                 $conds = array(
00285                                         'cl_to' => $this->title->getDBkey(),
00286                                         'page_id=cl_from',
00287                                 );
00288                                 break;
00289                         default:
00290                                 $conds = null;
00291                                 wfRunHooks( 'BacklinkCacheGetConditions', array( $table, $this->title, &$conds ) );
00292                                 if( !$conds ) {
00293                                         throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
00294                                 }
00295                 }
00296 
00297                 return $conds;
00298         }
00299 
00305         public function hasLinks( $table ) {
00306                 return ( $this->getNumLinks( $table, 1 ) > 0 );
00307         }
00308 
00315         public function getNumLinks( $table, $max = INF ) {
00316                 global $wgMemc;
00317 
00318                 // 1) try partition cache ...
00319                 if ( isset( $this->partitionCache[$table] ) ) {
00320                         $entry = reset( $this->partitionCache[$table] );
00321                         return min( $max, $entry['numRows'] );
00322                 }
00323 
00324                 // 2) ... then try full result cache ...
00325                 if ( isset( $this->fullResultCache[$table] ) ) {
00326                         return min( $max, $this->fullResultCache[$table]->numRows() );
00327                 }
00328 
00329                 $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table );
00330 
00331                 // 3) ... fallback to memcached ...
00332                 $count = $wgMemc->get( $memcKey );
00333                 if ( $count ) {
00334                         return min( $max, $count );
00335                 }
00336 
00337                 // 4) fetch from the database ...
00338                 if ( is_infinite( $max ) ) { // full count
00339                         $count = $this->getLinks( $table )->count();
00340                         $wgMemc->set( $memcKey, $count, self::CACHE_EXPIRY );
00341                 } else { // with limit
00342                         $count = $this->getDB()->select(
00343                                 array( $table, 'page' ),
00344                                 '1',
00345                                 $this->getConditions( $table ),
00346                                 __METHOD__,
00347                                 array( 'LIMIT' => $max )
00348                         )->numRows();
00349                 }
00350 
00351                 return $count;
00352         }
00353 
00363         public function partition( $table, $batchSize ) {
00364                 global $wgMemc;
00365 
00366                 // 1) try partition cache ...
00367                 if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
00368                         wfDebug( __METHOD__ . ": got from partition cache\n" );
00369                         return $this->partitionCache[$table][$batchSize]['batches'];
00370                 }
00371 
00372                 $this->partitionCache[$table][$batchSize] = false;
00373                 $cacheEntry =& $this->partitionCache[$table][$batchSize];
00374 
00375                 // 2) ... then try full result cache ...
00376                 if ( isset( $this->fullResultCache[$table] ) ) {
00377                         $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
00378                         wfDebug( __METHOD__ . ": got from full result cache\n" );
00379                         return $cacheEntry['batches'];
00380                 }
00381 
00382                 $memcKey = wfMemcKey(
00383                         'backlinks',
00384                         md5( $this->title->getPrefixedDBkey() ),
00385                         $table,
00386                         $batchSize
00387                 );
00388 
00389                 // 3) ... fallback to memcached ...
00390                 $memcValue = $wgMemc->get( $memcKey );
00391                 if ( is_array( $memcValue ) ) {
00392                         $cacheEntry = $memcValue;
00393                         wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
00394                         return $cacheEntry['batches'];
00395                 }
00396 
00397                 // 4) ... finally fetch from the slow database :(
00398                 $this->getLinks( $table );
00399                 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
00400                 // Save partitions to memcached
00401                 $wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
00402 
00403                 // Save backlink count to memcached
00404                 $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table );
00405                 $wgMemc->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY );
00406 
00407                 wfDebug( __METHOD__ . ": got from database\n" );
00408                 return $cacheEntry['batches'];
00409         }
00410 
00418         protected function partitionResult( $res, $batchSize ) {
00419                 $batches = array();
00420                 $numRows = $res->numRows();
00421                 $numBatches = ceil( $numRows / $batchSize );
00422 
00423                 for ( $i = 0; $i < $numBatches; $i++ ) {
00424                         if ( $i == 0  ) {
00425                                 $start = false;
00426                         } else {
00427                                 $rowNum = intval( $numRows * $i / $numBatches );
00428                                 $res->seek( $rowNum );
00429                                 $row = $res->fetchObject();
00430                                 $start = $row->page_id;
00431                         }
00432 
00433                         if ( $i == $numBatches - 1 ) {
00434                                 $end = false;
00435                         } else {
00436                                 $rowNum = intval( $numRows * ( $i + 1 ) / $numBatches );
00437                                 $res->seek( $rowNum );
00438                                 $row = $res->fetchObject();
00439                                 $end = $row->page_id - 1;
00440                         }
00441 
00442                         # Sanity check order
00443                         if ( $start && $end && $start > $end ) {
00444                                 throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
00445                         }
00446 
00447                         $batches[] = array( $start, $end );
00448                 }
00449 
00450                 return array( 'numRows' => $numRows, 'batches' => $batches );
00451         }
00452 }