MediaWiki
REL1_24
|
00001 <?php 00044 class BacklinkCache { 00046 protected static $cache; 00047 00059 protected $partitionCache = array(); 00060 00068 protected $fullResultCache = array(); 00069 00077 protected $db; 00078 00082 protected $title; 00083 00084 const CACHE_EXPIRY = 3600; 00085 00091 public function __construct( Title $title ) { 00092 $this->title = $title; 00093 } 00094 00103 public static function get( Title $title ) { 00104 if ( !self::$cache ) { // init cache 00105 self::$cache = new ProcessCacheLRU( 1 ); 00106 } 00107 $dbKey = $title->getPrefixedDBkey(); 00108 if ( !self::$cache->has( $dbKey, 'obj', 3600 ) ) { 00109 self::$cache->set( $dbKey, 'obj', new self( $title ) ); 00110 } 00111 00112 return self::$cache->get( $dbKey, 'obj' ); 00113 } 00114 00122 function __sleep() { 00123 return array( 'partitionCache', 'fullResultCache', 'title' ); 00124 } 00125 00129 public function clear() { 00130 $this->partitionCache = array(); 00131 $this->fullResultCache = array(); 00132 unset( $this->db ); 00133 } 00134 00140 public function setDB( $db ) { 00141 $this->db = $db; 00142 } 00143 00149 protected function getDB() { 00150 if ( !isset( $this->db ) ) { 00151 $this->db = wfGetDB( DB_SLAVE ); 00152 } 00153 00154 return $this->db; 00155 } 00156 00165 public function getLinks( $table, $startId = false, $endId = false, $max = INF ) { 00166 return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) ); 00167 } 00168 00178 protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) { 00179 wfProfileIn( __METHOD__ ); 00180 00181 $fromField = $this->getPrefix( $table ) . '_from'; 00182 00183 if ( !$startId && !$endId && is_infinite( $max ) 00184 && isset( $this->fullResultCache[$table] ) 00185 ) { 00186 wfDebug( __METHOD__ . ": got results from cache\n" ); 00187 $res = $this->fullResultCache[$table]; 00188 } else { 00189 wfDebug( __METHOD__ . ": got results from DB\n" ); 00190 $conds = $this->getConditions( $table ); 00191 // Use the from field in the condition rather than the joined page_id, 00192 // because databases are stupid and don't necessarily propagate indexes. 00193 if ( $startId ) { 00194 $conds[] = "$fromField >= " . intval( $startId ); 00195 } 00196 if ( $endId ) { 00197 $conds[] = "$fromField <= " . intval( $endId ); 00198 } 00199 $options = array( 'ORDER BY' => $fromField ); 00200 if ( is_finite( $max ) && $max > 0 ) { 00201 $options['LIMIT'] = $max; 00202 } 00203 00204 if ( $select === 'ids' ) { 00205 // Just select from the backlink table and ignore the page JOIN 00206 $res = $this->getDB()->select( 00207 $table, 00208 array( $this->getPrefix( $table ) . '_from AS page_id' ), 00209 array_filter( $conds, function ( $clause ) { // kind of janky 00210 return !preg_match( '/(\b|=)page_id(\b|=)/', $clause ); 00211 } ), 00212 __METHOD__, 00213 $options 00214 ); 00215 } else { 00216 // Select from the backlink table and JOIN with page title information 00217 $res = $this->getDB()->select( 00218 array( $table, 'page' ), 00219 array( 'page_namespace', 'page_title', 'page_id' ), 00220 $conds, 00221 __METHOD__, 00222 array_merge( array( 'STRAIGHT_JOIN' ), $options ) 00223 ); 00224 } 00225 00226 if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) { 00227 // The full results fit within the limit, so cache them 00228 $this->fullResultCache[$table] = $res; 00229 } else { 00230 wfDebug( __METHOD__ . ": results from DB were uncacheable\n" ); 00231 } 00232 } 00233 00234 wfProfileOut( __METHOD__ ); 00235 00236 return $res; 00237 } 00238 00245 protected function getPrefix( $table ) { 00246 static $prefixes = array( 00247 'pagelinks' => 'pl', 00248 'imagelinks' => 'il', 00249 'categorylinks' => 'cl', 00250 'templatelinks' => 'tl', 00251 'redirect' => 'rd', 00252 ); 00253 00254 if ( isset( $prefixes[$table] ) ) { 00255 return $prefixes[$table]; 00256 } else { 00257 $prefix = null; 00258 wfRunHooks( 'BacklinkCacheGetPrefix', array( $table, &$prefix ) ); 00259 if ( $prefix ) { 00260 return $prefix; 00261 } else { 00262 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ ); 00263 } 00264 } 00265 } 00266 00274 protected function getConditions( $table ) { 00275 $prefix = $this->getPrefix( $table ); 00276 00277 switch ( $table ) { 00278 case 'pagelinks': 00279 case 'templatelinks': 00280 $conds = array( 00281 "{$prefix}_namespace" => $this->title->getNamespace(), 00282 "{$prefix}_title" => $this->title->getDBkey(), 00283 "page_id={$prefix}_from" 00284 ); 00285 break; 00286 case 'redirect': 00287 $conds = array( 00288 "{$prefix}_namespace" => $this->title->getNamespace(), 00289 "{$prefix}_title" => $this->title->getDBkey(), 00290 $this->getDb()->makeList( array( 00291 "{$prefix}_interwiki" => '', 00292 "{$prefix}_interwiki IS NULL", 00293 ), LIST_OR ), 00294 "page_id={$prefix}_from" 00295 ); 00296 break; 00297 case 'imagelinks': 00298 case 'categorylinks': 00299 $conds = array( 00300 "{$prefix}_to" => $this->title->getDBkey(), 00301 "page_id={$prefix}_from" 00302 ); 00303 break; 00304 default: 00305 $conds = null; 00306 wfRunHooks( 'BacklinkCacheGetConditions', array( $table, $this->title, &$conds ) ); 00307 if ( !$conds ) { 00308 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ ); 00309 } 00310 } 00311 00312 return $conds; 00313 } 00314 00320 public function hasLinks( $table ) { 00321 return ( $this->getNumLinks( $table, 1 ) > 0 ); 00322 } 00323 00330 public function getNumLinks( $table, $max = INF ) { 00331 global $wgMemc, $wgUpdateRowsPerJob; 00332 00333 // 1) try partition cache ... 00334 if ( isset( $this->partitionCache[$table] ) ) { 00335 $entry = reset( $this->partitionCache[$table] ); 00336 00337 return min( $max, $entry['numRows'] ); 00338 } 00339 00340 // 2) ... then try full result cache ... 00341 if ( isset( $this->fullResultCache[$table] ) ) { 00342 return min( $max, $this->fullResultCache[$table]->numRows() ); 00343 } 00344 00345 $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table ); 00346 00347 // 3) ... fallback to memcached ... 00348 $count = $wgMemc->get( $memcKey ); 00349 if ( $count ) { 00350 return min( $max, $count ); 00351 } 00352 00353 // 4) fetch from the database ... 00354 if ( is_infinite( $max ) ) { // no limit at all 00355 // Use partition() since it will batch the query and skip the JOIN. 00356 // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs. 00357 $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache 00358 return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows']; 00359 } else { // probably some sane limit 00360 // Fetch the full title info, since the caller will likely need it next 00361 $count = $this->getLinks( $table, false, false, $max )->count(); 00362 if ( $count < $max ) { // full count 00363 $wgMemc->set( $memcKey, $count, self::CACHE_EXPIRY ); 00364 } 00365 } 00366 00367 return min( $max, $count ); 00368 } 00369 00379 public function partition( $table, $batchSize ) { 00380 global $wgMemc; 00381 00382 // 1) try partition cache ... 00383 if ( isset( $this->partitionCache[$table][$batchSize] ) ) { 00384 wfDebug( __METHOD__ . ": got from partition cache\n" ); 00385 00386 return $this->partitionCache[$table][$batchSize]['batches']; 00387 } 00388 00389 $this->partitionCache[$table][$batchSize] = false; 00390 $cacheEntry =& $this->partitionCache[$table][$batchSize]; 00391 00392 // 2) ... then try full result cache ... 00393 if ( isset( $this->fullResultCache[$table] ) ) { 00394 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize ); 00395 wfDebug( __METHOD__ . ": got from full result cache\n" ); 00396 00397 return $cacheEntry['batches']; 00398 } 00399 00400 $memcKey = wfMemcKey( 00401 'backlinks', 00402 md5( $this->title->getPrefixedDBkey() ), 00403 $table, 00404 $batchSize 00405 ); 00406 00407 // 3) ... fallback to memcached ... 00408 $memcValue = $wgMemc->get( $memcKey ); 00409 if ( is_array( $memcValue ) ) { 00410 $cacheEntry = $memcValue; 00411 wfDebug( __METHOD__ . ": got from memcached $memcKey\n" ); 00412 00413 return $cacheEntry['batches']; 00414 } 00415 00416 // 4) ... finally fetch from the slow database :( 00417 $cacheEntry = array( 'numRows' => 0, 'batches' => array() ); // final result 00418 // Do the selects in batches to avoid client-side OOMs (bug 43452). 00419 // Use a LIMIT that plays well with $batchSize to keep equal sized partitions. 00420 $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) ); 00421 $start = false; 00422 do { 00423 $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' ); 00424 $partitions = $this->partitionResult( $res, $batchSize, false ); 00425 // Merge the link count and range partitions for this chunk 00426 $cacheEntry['numRows'] += $partitions['numRows']; 00427 $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] ); 00428 if ( count( $partitions['batches'] ) ) { 00429 list( , $lEnd ) = end( $partitions['batches'] ); 00430 $start = $lEnd + 1; // pick up after this inclusive range 00431 } 00432 } while ( $partitions['numRows'] >= $selectSize ); 00433 // Make sure the first range has start=false and the last one has end=false 00434 if ( count( $cacheEntry['batches'] ) ) { 00435 $cacheEntry['batches'][0][0] = false; 00436 $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false; 00437 } 00438 00439 // Save partitions to memcached 00440 $wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY ); 00441 00442 // Save backlink count to memcached 00443 $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table ); 00444 $wgMemc->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY ); 00445 00446 wfDebug( __METHOD__ . ": got from database\n" ); 00447 00448 return $cacheEntry['batches']; 00449 } 00450 00459 protected function partitionResult( $res, $batchSize, $isComplete = true ) { 00460 $batches = array(); 00461 $numRows = $res->numRows(); 00462 $numBatches = ceil( $numRows / $batchSize ); 00463 00464 for ( $i = 0; $i < $numBatches; $i++ ) { 00465 if ( $i == 0 && $isComplete ) { 00466 $start = false; 00467 } else { 00468 $rowNum = $i * $batchSize; 00469 $res->seek( $rowNum ); 00470 $row = $res->fetchObject(); 00471 $start = (int)$row->page_id; 00472 } 00473 00474 if ( $i == ( $numBatches - 1 ) && $isComplete ) { 00475 $end = false; 00476 } else { 00477 $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 ); 00478 $res->seek( $rowNum ); 00479 $row = $res->fetchObject(); 00480 $end = (int)$row->page_id; 00481 } 00482 00483 # Sanity check order 00484 if ( $start && $end && $start > $end ) { 00485 throw new MWException( __METHOD__ . ': Internal error: query result out of order' ); 00486 } 00487 00488 $batches[] = array( $start, $end ); 00489 } 00490 00491 return array( 'numRows' => $numRows, 'batches' => $batches ); 00492 } 00493 }