MediaWiki
REL1_22
|
00001 <?php 00044 class BacklinkCache { 00046 protected static $cache; 00047 00058 protected $partitionCache = array(); 00059 00067 protected $fullResultCache = array(); 00068 00076 protected $db; 00077 00081 protected $title; 00082 00083 const CACHE_EXPIRY = 3600; 00084 00090 public function __construct( Title $title ) { 00091 $this->title = $title; 00092 } 00093 00102 public static function get( Title $title ) { 00103 if ( !self::$cache ) { // init cache 00104 self::$cache = new ProcessCacheLRU( 1 ); 00105 } 00106 $dbKey = $title->getPrefixedDBkey(); 00107 if ( !self::$cache->has( $dbKey, 'obj', 3600 ) ) { 00108 self::$cache->set( $dbKey, 'obj', new self( $title ) ); 00109 } 00110 return self::$cache->get( $dbKey, 'obj' ); 00111 } 00112 00120 function __sleep() { 00121 return array( 'partitionCache', 'fullResultCache', 'title' ); 00122 } 00123 00127 public function clear() { 00128 $this->partitionCache = array(); 00129 $this->fullResultCache = array(); 00130 unset( $this->db ); 00131 } 00132 00138 public function setDB( $db ) { 00139 $this->db = $db; 00140 } 00141 00147 protected function getDB() { 00148 if ( !isset( $this->db ) ) { 00149 $this->db = wfGetDB( DB_SLAVE ); 00150 } 00151 return $this->db; 00152 } 00153 00162 public function getLinks( $table, $startId = false, $endId = false, $max = INF ) { 00163 return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) ); 00164 } 00165 00174 protected function queryLinks( $table, $startId, $endId, $max ) { 00175 wfProfileIn( __METHOD__ ); 00176 00177 $fromField = $this->getPrefix( $table ) . '_from'; 00178 00179 if ( !$startId && !$endId && is_infinite( $max ) 00180 && isset( $this->fullResultCache[$table] ) ) 00181 { 00182 wfDebug( __METHOD__ . ": got results from cache\n" ); 00183 $res = $this->fullResultCache[$table]; 00184 } else { 00185 wfDebug( __METHOD__ . ": got results from DB\n" ); 00186 $conds = $this->getConditions( $table ); 00187 // Use the from field in the condition rather than the joined page_id, 00188 // because databases are stupid and don't necessarily propagate indexes. 00189 if ( $startId ) { 00190 $conds[] = "$fromField >= " . intval( $startId ); 00191 } 00192 if ( $endId ) { 00193 $conds[] = "$fromField <= " . intval( $endId ); 00194 } 00195 $options = array( 'STRAIGHT_JOIN', 'ORDER BY' => $fromField ); 00196 if ( is_finite( $max ) && $max > 0 ) { 00197 $options['LIMIT'] = $max; 00198 } 00199 00200 $res = $this->getDB()->select( 00201 array( $table, 'page' ), 00202 array( 'page_namespace', 'page_title', 'page_id' ), 00203 $conds, 00204 __METHOD__, 00205 $options 00206 ); 00207 00208 if ( !$startId && !$endId && $res->numRows() < $max ) { 00209 // The full results fit within the limit, so cache them 00210 $this->fullResultCache[$table] = $res; 00211 } else { 00212 wfDebug( __METHOD__ . ": results from DB were uncacheable\n" ); 00213 } 00214 } 00215 00216 wfProfileOut( __METHOD__ ); 00217 return $res; 00218 } 00219 00226 protected function getPrefix( $table ) { 00227 static $prefixes = array( 00228 'pagelinks' => 'pl', 00229 'imagelinks' => 'il', 00230 'categorylinks' => 'cl', 00231 'templatelinks' => 'tl', 00232 'redirect' => 'rd', 00233 ); 00234 00235 if ( isset( $prefixes[$table] ) ) { 00236 return $prefixes[$table]; 00237 } else { 00238 $prefix = null; 00239 wfRunHooks( 'BacklinkCacheGetPrefix', array( $table, &$prefix ) ); 00240 if ( $prefix ) { 00241 return $prefix; 00242 } else { 00243 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ ); 00244 } 00245 } 00246 } 00247 00255 protected function getConditions( $table ) { 00256 $prefix = $this->getPrefix( $table ); 00257 00258 // @todo FIXME: imagelinks and categorylinks do not rely on getNamespace, 00259 // they could be moved up for nicer case statements 00260 switch ( $table ) { 00261 case 'pagelinks': 00262 case 'templatelinks': 00263 $conds = array( 00264 "{$prefix}_namespace" => $this->title->getNamespace(), 00265 "{$prefix}_title" => $this->title->getDBkey(), 00266 "page_id={$prefix}_from" 00267 ); 00268 break; 00269 case 'redirect': 00270 $conds = array( 00271 "{$prefix}_namespace" => $this->title->getNamespace(), 00272 "{$prefix}_title" => $this->title->getDBkey(), 00273 $this->getDb()->makeList( array( 00274 "{$prefix}_interwiki" => '', 00275 "{$prefix}_interwiki IS NULL", 00276 ), LIST_OR ), 00277 "page_id={$prefix}_from" 00278 ); 00279 break; 00280 case 'imagelinks': 00281 $conds = array( 00282 'il_to' => $this->title->getDBkey(), 00283 'page_id=il_from' 00284 ); 00285 break; 00286 case 'categorylinks': 00287 $conds = array( 00288 'cl_to' => $this->title->getDBkey(), 00289 'page_id=cl_from', 00290 ); 00291 break; 00292 default: 00293 $conds = null; 00294 wfRunHooks( 'BacklinkCacheGetConditions', array( $table, $this->title, &$conds ) ); 00295 if ( !$conds ) { 00296 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ ); 00297 } 00298 } 00299 00300 return $conds; 00301 } 00302 00308 public function hasLinks( $table ) { 00309 return ( $this->getNumLinks( $table, 1 ) > 0 ); 00310 } 00311 00318 public function getNumLinks( $table, $max = INF ) { 00319 global $wgMemc; 00320 00321 // 1) try partition cache ... 00322 if ( isset( $this->partitionCache[$table] ) ) { 00323 $entry = reset( $this->partitionCache[$table] ); 00324 return min( $max, $entry['numRows'] ); 00325 } 00326 00327 // 2) ... then try full result cache ... 00328 if ( isset( $this->fullResultCache[$table] ) ) { 00329 return min( $max, $this->fullResultCache[$table]->numRows() ); 00330 } 00331 00332 $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table ); 00333 00334 // 3) ... fallback to memcached ... 00335 $count = $wgMemc->get( $memcKey ); 00336 if ( $count ) { 00337 return min( $max, $count ); 00338 } 00339 00340 // 4) fetch from the database ... 00341 $count = $this->getLinks( $table, false, false, $max )->count(); 00342 if ( $count < $max ) { // full count 00343 $wgMemc->set( $memcKey, $count, self::CACHE_EXPIRY ); 00344 } 00345 00346 return min( $max, $count ); 00347 } 00348 00358 public function partition( $table, $batchSize ) { 00359 global $wgMemc; 00360 00361 // 1) try partition cache ... 00362 if ( isset( $this->partitionCache[$table][$batchSize] ) ) { 00363 wfDebug( __METHOD__ . ": got from partition cache\n" ); 00364 return $this->partitionCache[$table][$batchSize]['batches']; 00365 } 00366 00367 $this->partitionCache[$table][$batchSize] = false; 00368 $cacheEntry =& $this->partitionCache[$table][$batchSize]; 00369 00370 // 2) ... then try full result cache ... 00371 if ( isset( $this->fullResultCache[$table] ) ) { 00372 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize ); 00373 wfDebug( __METHOD__ . ": got from full result cache\n" ); 00374 return $cacheEntry['batches']; 00375 } 00376 00377 $memcKey = wfMemcKey( 00378 'backlinks', 00379 md5( $this->title->getPrefixedDBkey() ), 00380 $table, 00381 $batchSize 00382 ); 00383 00384 // 3) ... fallback to memcached ... 00385 $memcValue = $wgMemc->get( $memcKey ); 00386 if ( is_array( $memcValue ) ) { 00387 $cacheEntry = $memcValue; 00388 wfDebug( __METHOD__ . ": got from memcached $memcKey\n" ); 00389 return $cacheEntry['batches']; 00390 } 00391 00392 // 4) ... finally fetch from the slow database :( 00393 $cacheEntry = array( 'numRows' => 0, 'batches' => array() ); // final result 00394 // Do the selects in batches to avoid client-side OOMs (bug 43452). 00395 // Use a LIMIT that plays well with $batchSize to keep equal sized partitions. 00396 $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) ); 00397 $start = false; 00398 do { 00399 $res = $this->queryLinks( $table, $start, false, $selectSize ); 00400 $partitions = $this->partitionResult( $res, $batchSize, false ); 00401 // Merge the link count and range partitions for this chunk 00402 $cacheEntry['numRows'] += $partitions['numRows']; 00403 $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] ); 00404 if ( count( $partitions['batches'] ) ) { 00405 list( $lStart, $lEnd ) = end( $partitions['batches'] ); 00406 $start = $lEnd + 1; // pick up after this inclusive range 00407 } 00408 } while ( $partitions['numRows'] >= $selectSize ); 00409 // Make sure the first range has start=false and the last one has end=false 00410 if ( count( $cacheEntry['batches'] ) ) { 00411 $cacheEntry['batches'][0][0] = false; 00412 $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false; 00413 } 00414 00415 // Save partitions to memcached 00416 $wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY ); 00417 00418 // Save backlink count to memcached 00419 $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table ); 00420 $wgMemc->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY ); 00421 00422 wfDebug( __METHOD__ . ": got from database\n" ); 00423 return $cacheEntry['batches']; 00424 } 00425 00434 protected function partitionResult( $res, $batchSize, $isComplete = true ) { 00435 $batches = array(); 00436 $numRows = $res->numRows(); 00437 $numBatches = ceil( $numRows / $batchSize ); 00438 00439 for ( $i = 0; $i < $numBatches; $i++ ) { 00440 if ( $i == 0 && $isComplete ) { 00441 $start = false; 00442 } else { 00443 $rowNum = $i * $batchSize; 00444 $res->seek( $rowNum ); 00445 $row = $res->fetchObject(); 00446 $start = (int)$row->page_id; 00447 } 00448 00449 if ( $i == ( $numBatches - 1 ) && $isComplete ) { 00450 $end = false; 00451 } else { 00452 $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 ); 00453 $res->seek( $rowNum ); 00454 $row = $res->fetchObject(); 00455 $end = (int)$row->page_id; 00456 } 00457 00458 # Sanity check order 00459 if ( $start && $end && $start > $end ) { 00460 throw new MWException( __METHOD__ . ': Internal error: query result out of order' ); 00461 } 00462 00463 $batches[] = array( $start, $end ); 00464 } 00465 00466 return array( 'numRows' => $numRows, 'batches' => $batches ); 00467 } 00468 }