MediaWiki  REL1_19
BacklinkCache.php
Go to the documentation of this file.
00001 <?php
00027 class BacklinkCache {
00028 
00039         protected $partitionCache = array();
00040 
00048         protected $fullResultCache = array();
00049 
00057         protected $db;
00058 
00062         protected $title;
00063 
00064         const CACHE_EXPIRY = 3600;
00065 
00070         function __construct( $title ) {
00071                 $this->title = $title;
00072         }
00073 
00081         function __sleep() {
00082                 return array( 'partitionCache', 'fullResultCache', 'title' );
00083         }
00084 
00088         public function clear() {
00089                 $this->partitionCache = array();
00090                 $this->fullResultCache = array();
00091                 unset( $this->db );
00092         }
00093 
00099         public function setDB( $db ) {
00100                 $this->db = $db;
00101         }
00102 
00108         protected function getDB() {
00109                 if ( !isset( $this->db ) ) {
00110                         $this->db = wfGetDB( DB_SLAVE );
00111                 }
00112 
00113                 return $this->db;
00114         }
00115 
00123         public function getLinks( $table, $startId = false, $endId = false ) {
00124                 wfProfileIn( __METHOD__ );
00125 
00126                 $fromField = $this->getPrefix( $table ) . '_from';
00127 
00128                 if ( $startId || $endId ) {
00129                         // Partial range, not cached
00130                         wfDebug( __METHOD__ . ": from DB (uncacheable range)\n" );
00131                         $conds = $this->getConditions( $table );
00132 
00133                         // Use the from field in the condition rather than the joined page_id,
00134                         // because databases are stupid and don't necessarily propagate indexes.
00135                         if ( $startId ) {
00136                                 $conds[] = "$fromField >= " . intval( $startId );
00137                         }
00138 
00139                         if ( $endId ) {
00140                                 $conds[] = "$fromField <= " . intval( $endId );
00141                         }
00142 
00143                         $res = $this->getDB()->select(
00144                                 array( $table, 'page' ),
00145                                 array( 'page_namespace', 'page_title', 'page_id' ),
00146                                 $conds,
00147                                 __METHOD__,
00148                                 array(
00149                                         'STRAIGHT_JOIN',
00150                                         'ORDER BY' => $fromField
00151                                 ) );
00152                         $ta = TitleArray::newFromResult( $res );
00153 
00154                         wfProfileOut( __METHOD__ );
00155                         return $ta;
00156                 }
00157 
00158                 // @todo FIXME: Make this a function?
00159                 if ( !isset( $this->fullResultCache[$table] ) ) {
00160                         wfDebug( __METHOD__ . ": from DB\n" );
00161                         $res = $this->getDB()->select(
00162                                 array( $table, 'page' ),
00163                                 array( 'page_namespace', 'page_title', 'page_id' ),
00164                                 $this->getConditions( $table ),
00165                                 __METHOD__,
00166                                 array(
00167                                         'STRAIGHT_JOIN',
00168                                         'ORDER BY' => $fromField,
00169                                 ) );
00170                         $this->fullResultCache[$table] = $res;
00171                 }
00172 
00173                 $ta = TitleArray::newFromResult( $this->fullResultCache[$table] );
00174 
00175                 wfProfileOut( __METHOD__ );
00176                 return $ta;
00177         }
00178 
00183         protected function getPrefix( $table ) {
00184                 static $prefixes = array(
00185                         'pagelinks'     => 'pl',
00186                         'imagelinks'    => 'il',
00187                         'categorylinks' => 'cl',
00188                         'templatelinks' => 'tl',
00189                         'redirect'      => 'rd',
00190                 );
00191 
00192                 if ( isset( $prefixes[$table] ) ) {
00193                         return $prefixes[$table];
00194                 } else {
00195                         $prefix = null;
00196                         wfRunHooks( 'BacklinkCacheGetPrefix', array( $table, &$prefix ) );
00197                         if( $prefix ) {
00198                                 return $prefix;
00199                         } else {
00200                                 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
00201                         }
00202                 }
00203         }
00204 
00210         protected function getConditions( $table ) {
00211                 $prefix = $this->getPrefix( $table );
00212 
00213                 // @todo FIXME: imagelinks and categorylinks do not rely on getNamespace,
00214                 // they could be moved up for nicer case statements
00215                 switch ( $table ) {
00216                         case 'pagelinks':
00217                         case 'templatelinks':
00218                                 $conds = array(
00219                                         "{$prefix}_namespace" => $this->title->getNamespace(),
00220                                         "{$prefix}_title"     => $this->title->getDBkey(),
00221                                         "page_id={$prefix}_from"
00222                                 );
00223                                 break;
00224                         case 'redirect':
00225                                 $conds = array(
00226                                         "{$prefix}_namespace" => $this->title->getNamespace(),
00227                                         "{$prefix}_title"     => $this->title->getDBkey(),
00228                                         $this->getDb()->makeList( array(
00229                                                 "{$prefix}_interwiki = ''",
00230                                                 "{$prefix}_interwiki is null",
00231                                         ), LIST_OR ),
00232                                         "page_id={$prefix}_from"
00233                                 );
00234                                 break;
00235                         case 'imagelinks':
00236                                 $conds = array(
00237                                         'il_to' => $this->title->getDBkey(),
00238                                         'page_id=il_from'
00239                                 );
00240                                 break;
00241                         case 'categorylinks':
00242                                 $conds = array(
00243                                         'cl_to' => $this->title->getDBkey(),
00244                                         'page_id=cl_from',
00245                                 );
00246                                 break;
00247                         default:
00248                                 $conds = null;
00249                                 wfRunHooks( 'BacklinkCacheGetConditions', array( $table, $this->title, &$conds ) );
00250                                 if( !$conds )
00251                                         throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
00252                 }
00253 
00254                 return $conds;
00255         }
00256 
00262         public function getNumLinks( $table ) {
00263                 if ( isset( $this->fullResultCache[$table] ) ) {
00264                         return $this->fullResultCache[$table]->numRows();
00265                 }
00266 
00267                 if ( isset( $this->partitionCache[$table] ) ) {
00268                         $entry = reset( $this->partitionCache[$table] );
00269                         return $entry['numRows'];
00270                 }
00271 
00272                 $titleArray = $this->getLinks( $table );
00273 
00274                 return $titleArray->count();
00275         }
00276 
00286         public function partition( $table, $batchSize ) {
00287 
00288                 // 1) try partition cache ... 
00289 
00290                 if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
00291                         wfDebug( __METHOD__ . ": got from partition cache\n" );
00292                         return $this->partitionCache[$table][$batchSize]['batches'];
00293                 }
00294 
00295                 $this->partitionCache[$table][$batchSize] = false;
00296                 $cacheEntry =& $this->partitionCache[$table][$batchSize];
00297 
00298                 // 2) ... then try full result cache ...
00299 
00300                 if ( isset( $this->fullResultCache[$table] ) ) {
00301                         $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
00302                         wfDebug( __METHOD__ . ": got from full result cache\n" );
00303 
00304                         return $cacheEntry['batches'];
00305                 }
00306 
00307                 // 3) ... fallback to memcached ...
00308 
00309                 global $wgMemc;
00310 
00311                 $memcKey = wfMemcKey(
00312                         'backlinks',
00313                         md5( $this->title->getPrefixedDBkey() ),
00314                         $table,
00315                         $batchSize
00316                 );
00317 
00318                 $memcValue = $wgMemc->get( $memcKey );
00319 
00320                 if ( is_array( $memcValue ) ) {
00321                         $cacheEntry = $memcValue;
00322                         wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
00323 
00324                         return $cacheEntry['batches'];
00325                 }
00326 
00327 
00328                 // 4) ... finally fetch from the slow database :(
00329 
00330                 $this->getLinks( $table );
00331                 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
00332                 // Save to memcached
00333                 $wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
00334 
00335                 wfDebug( __METHOD__ . ": got from database\n" );
00336                 return $cacheEntry['batches'];
00337         }
00338 
00345         protected function partitionResult( $res, $batchSize ) {
00346                 $batches = array();
00347                 $numRows = $res->numRows();
00348                 $numBatches = ceil( $numRows / $batchSize );
00349 
00350                 for ( $i = 0; $i < $numBatches; $i++ ) {
00351                         if ( $i == 0  ) {
00352                                 $start = false;
00353                         } else {
00354                                 $rowNum = intval( $numRows * $i / $numBatches );
00355                                 $res->seek( $rowNum );
00356                                 $row = $res->fetchObject();
00357                                 $start = $row->page_id;
00358                         }
00359 
00360                         if ( $i == $numBatches - 1 ) {
00361                                 $end = false;
00362                         } else {
00363                                 $rowNum = intval( $numRows * ( $i + 1 ) / $numBatches );
00364                                 $res->seek( $rowNum );
00365                                 $row = $res->fetchObject();
00366                                 $end = $row->page_id - 1;
00367                         }
00368 
00369                         # Sanity check order
00370                         if ( $start && $end && $start > $end ) {
00371                                 throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
00372                         }
00373 
00374                         $batches[] = array( $start, $end );
00375                 }
00376 
00377                 return array( 'numRows' => $numRows, 'batches' => $batches );
00378         }
00379 }