[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Class for fetching backlink lists, approximate backlink counts and 4 * partitions. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License along 17 * with this program; if not, write to the Free Software Foundation, Inc., 18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 * http://www.gnu.org/copyleft/gpl.html 20 * 21 * @file 22 * @author Tim Starling 23 * @author Aaron Schulz 24 * @copyright © 2009, Tim Starling, Domas Mituzas 25 * @copyright © 2010, Max Sem 26 * @copyright © 2011, Antoine Musso 27 */ 28 29 /** 30 * Class for fetching backlink lists, approximate backlink counts and 31 * partitions. This is a shared cache. 32 * 33 * Instances of this class should typically be fetched with the method 34 * $title->getBacklinkCache(). 35 * 36 * Ideally you should only get your backlinks from here when you think 37 * there is some advantage in caching them. Otherwise it's just a waste 38 * of memory. 39 * 40 * Introduced by r47317 41 * 42 * @internal documentation reviewed on 18 Mar 2011 by hashar 43 */ 44 class BacklinkCache { 45 /** @var ProcessCacheLRU */ 46 protected static $cache; 47 48 /** 49 * Multi dimensions array representing batches. Keys are: 50 * > (string) links table name 51 * > (int) batch size 52 * > 'numRows' : Number of rows for this link table 53 * > 'batches' : array( $start, $end ) 54 * 55 * @see BacklinkCache::partitionResult() 56 * 57 * Cleared with BacklinkCache::clear() 58 */ 59 protected $partitionCache = array(); 60 61 /** 62 * Contains the whole links from a database result. 63 * This is raw data that will be partitioned in $partitionCache 64 * 65 * Initialized with BacklinkCache::getLinks() 66 * Cleared with BacklinkCache::clear() 67 */ 68 protected $fullResultCache = array(); 69 70 /** 71 * Local copy of a database object. 72 * 73 * Accessor: BacklinkCache::getDB() 74 * Mutator : BacklinkCache::setDB() 75 * Cleared with BacklinkCache::clear() 76 */ 77 protected $db; 78 79 /** 80 * Local copy of a Title object 81 */ 82 protected $title; 83 84 const CACHE_EXPIRY = 3600; 85 86 /** 87 * Create a new BacklinkCache 88 * 89 * @param Title $title : Title object to create a backlink cache for 90 */ 91 public function __construct( Title $title ) { 92 $this->title = $title; 93 } 94 95 /** 96 * Create a new BacklinkCache or reuse any existing one. 97 * Currently, only one cache instance can exist; callers that 98 * need multiple backlink cache objects should keep them in scope. 99 * 100 * @param Title $title Title object to get a backlink cache for 101 * @return BacklinkCache 102 */ 103 public static function get( Title $title ) { 104 if ( !self::$cache ) { // init cache 105 self::$cache = new ProcessCacheLRU( 1 ); 106 } 107 $dbKey = $title->getPrefixedDBkey(); 108 if ( !self::$cache->has( $dbKey, 'obj', 3600 ) ) { 109 self::$cache->set( $dbKey, 'obj', new self( $title ) ); 110 } 111 112 return self::$cache->get( $dbKey, 'obj' ); 113 } 114 115 /** 116 * Serialization handler, diasallows to serialize the database to prevent 117 * failures after this class is deserialized from cache with dead DB 118 * connection. 119 * 120 * @return array 121 */ 122 function __sleep() { 123 return array( 'partitionCache', 'fullResultCache', 'title' ); 124 } 125 126 /** 127 * Clear locally stored data and database object. 128 */ 129 public function clear() { 130 $this->partitionCache = array(); 131 $this->fullResultCache = array(); 132 unset( $this->db ); 133 } 134 135 /** 136 * Set the Database object to use 137 * 138 * @param DatabaseBase $db 139 */ 140 public function setDB( $db ) { 141 $this->db = $db; 142 } 143 144 /** 145 * Get the slave connection to the database 146 * When non existing, will initialize the connection. 147 * @return DatabaseBase 148 */ 149 protected function getDB() { 150 if ( !isset( $this->db ) ) { 151 $this->db = wfGetDB( DB_SLAVE ); 152 } 153 154 return $this->db; 155 } 156 157 /** 158 * Get the backlinks for a given table. Cached in process memory only. 159 * @param string $table 160 * @param int|bool $startId 161 * @param int|bool $endId 162 * @param int|INF $max 163 * @return TitleArrayFromResult 164 */ 165 public function getLinks( $table, $startId = false, $endId = false, $max = INF ) { 166 return TitleArray::newFromResult( $this->queryLinks( $table, $startId, $endId, $max ) ); 167 } 168 169 /** 170 * Get the backlinks for a given table. Cached in process memory only. 171 * @param string $table 172 * @param int|bool $startId 173 * @param int|bool $endId 174 * @param int|INF $max 175 * @param string $select 'all' or 'ids' 176 * @return ResultWrapper 177 */ 178 protected function queryLinks( $table, $startId, $endId, $max, $select = 'all' ) { 179 wfProfileIn( __METHOD__ ); 180 181 $fromField = $this->getPrefix( $table ) . '_from'; 182 183 if ( !$startId && !$endId && is_infinite( $max ) 184 && isset( $this->fullResultCache[$table] ) 185 ) { 186 wfDebug( __METHOD__ . ": got results from cache\n" ); 187 $res = $this->fullResultCache[$table]; 188 } else { 189 wfDebug( __METHOD__ . ": got results from DB\n" ); 190 $conds = $this->getConditions( $table ); 191 // Use the from field in the condition rather than the joined page_id, 192 // because databases are stupid and don't necessarily propagate indexes. 193 if ( $startId ) { 194 $conds[] = "$fromField >= " . intval( $startId ); 195 } 196 if ( $endId ) { 197 $conds[] = "$fromField <= " . intval( $endId ); 198 } 199 $options = array( 'ORDER BY' => $fromField ); 200 if ( is_finite( $max ) && $max > 0 ) { 201 $options['LIMIT'] = $max; 202 } 203 204 if ( $select === 'ids' ) { 205 // Just select from the backlink table and ignore the page JOIN 206 $res = $this->getDB()->select( 207 $table, 208 array( $this->getPrefix( $table ) . '_from AS page_id' ), 209 array_filter( $conds, function ( $clause ) { // kind of janky 210 return !preg_match( '/(\b|=)page_id(\b|=)/', $clause ); 211 } ), 212 __METHOD__, 213 $options 214 ); 215 } else { 216 // Select from the backlink table and JOIN with page title information 217 $res = $this->getDB()->select( 218 array( $table, 'page' ), 219 array( 'page_namespace', 'page_title', 'page_id' ), 220 $conds, 221 __METHOD__, 222 array_merge( array( 'STRAIGHT_JOIN' ), $options ) 223 ); 224 } 225 226 if ( $select === 'all' && !$startId && !$endId && $res->numRows() < $max ) { 227 // The full results fit within the limit, so cache them 228 $this->fullResultCache[$table] = $res; 229 } else { 230 wfDebug( __METHOD__ . ": results from DB were uncacheable\n" ); 231 } 232 } 233 234 wfProfileOut( __METHOD__ ); 235 236 return $res; 237 } 238 239 /** 240 * Get the field name prefix for a given table 241 * @param string $table 242 * @throws MWException 243 * @return null|string 244 */ 245 protected function getPrefix( $table ) { 246 static $prefixes = array( 247 'pagelinks' => 'pl', 248 'imagelinks' => 'il', 249 'categorylinks' => 'cl', 250 'templatelinks' => 'tl', 251 'redirect' => 'rd', 252 ); 253 254 if ( isset( $prefixes[$table] ) ) { 255 return $prefixes[$table]; 256 } else { 257 $prefix = null; 258 wfRunHooks( 'BacklinkCacheGetPrefix', array( $table, &$prefix ) ); 259 if ( $prefix ) { 260 return $prefix; 261 } else { 262 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ ); 263 } 264 } 265 } 266 267 /** 268 * Get the SQL condition array for selecting backlinks, with a join 269 * on the page table. 270 * @param string $table 271 * @throws MWException 272 * @return array|null 273 */ 274 protected function getConditions( $table ) { 275 $prefix = $this->getPrefix( $table ); 276 277 switch ( $table ) { 278 case 'pagelinks': 279 case 'templatelinks': 280 $conds = array( 281 "{$prefix}_namespace" => $this->title->getNamespace(), 282 "{$prefix}_title" => $this->title->getDBkey(), 283 "page_id={$prefix}_from" 284 ); 285 break; 286 case 'redirect': 287 $conds = array( 288 "{$prefix}_namespace" => $this->title->getNamespace(), 289 "{$prefix}_title" => $this->title->getDBkey(), 290 $this->getDb()->makeList( array( 291 "{$prefix}_interwiki" => '', 292 "{$prefix}_interwiki IS NULL", 293 ), LIST_OR ), 294 "page_id={$prefix}_from" 295 ); 296 break; 297 case 'imagelinks': 298 case 'categorylinks': 299 $conds = array( 300 "{$prefix}_to" => $this->title->getDBkey(), 301 "page_id={$prefix}_from" 302 ); 303 break; 304 default: 305 $conds = null; 306 wfRunHooks( 'BacklinkCacheGetConditions', array( $table, $this->title, &$conds ) ); 307 if ( !$conds ) { 308 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ ); 309 } 310 } 311 312 return $conds; 313 } 314 315 /** 316 * Check if there are any backlinks 317 * @param string $table 318 * @return bool 319 */ 320 public function hasLinks( $table ) { 321 return ( $this->getNumLinks( $table, 1 ) > 0 ); 322 } 323 324 /** 325 * Get the approximate number of backlinks 326 * @param string $table 327 * @param int|INF $max Only count up to this many backlinks 328 * @return int 329 */ 330 public function getNumLinks( $table, $max = INF ) { 331 global $wgMemc, $wgUpdateRowsPerJob; 332 333 // 1) try partition cache ... 334 if ( isset( $this->partitionCache[$table] ) ) { 335 $entry = reset( $this->partitionCache[$table] ); 336 337 return min( $max, $entry['numRows'] ); 338 } 339 340 // 2) ... then try full result cache ... 341 if ( isset( $this->fullResultCache[$table] ) ) { 342 return min( $max, $this->fullResultCache[$table]->numRows() ); 343 } 344 345 $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table ); 346 347 // 3) ... fallback to memcached ... 348 $count = $wgMemc->get( $memcKey ); 349 if ( $count ) { 350 return min( $max, $count ); 351 } 352 353 // 4) fetch from the database ... 354 if ( is_infinite( $max ) ) { // no limit at all 355 // Use partition() since it will batch the query and skip the JOIN. 356 // Use $wgUpdateRowsPerJob just to encourage cache reuse for jobs. 357 $this->partition( $table, $wgUpdateRowsPerJob ); // updates $this->partitionCache 358 return $this->partitionCache[$table][$wgUpdateRowsPerJob]['numRows']; 359 } else { // probably some sane limit 360 // Fetch the full title info, since the caller will likely need it next 361 $count = $this->getLinks( $table, false, false, $max )->count(); 362 if ( $count < $max ) { // full count 363 $wgMemc->set( $memcKey, $count, self::CACHE_EXPIRY ); 364 } 365 } 366 367 return min( $max, $count ); 368 } 369 370 /** 371 * Partition the backlinks into batches. 372 * Returns an array giving the start and end of each range. The first 373 * batch has a start of false, and the last batch has an end of false. 374 * 375 * @param string $table The links table name 376 * @param int $batchSize 377 * @return array 378 */ 379 public function partition( $table, $batchSize ) { 380 global $wgMemc; 381 382 // 1) try partition cache ... 383 if ( isset( $this->partitionCache[$table][$batchSize] ) ) { 384 wfDebug( __METHOD__ . ": got from partition cache\n" ); 385 386 return $this->partitionCache[$table][$batchSize]['batches']; 387 } 388 389 $this->partitionCache[$table][$batchSize] = false; 390 $cacheEntry =& $this->partitionCache[$table][$batchSize]; 391 392 // 2) ... then try full result cache ... 393 if ( isset( $this->fullResultCache[$table] ) ) { 394 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize ); 395 wfDebug( __METHOD__ . ": got from full result cache\n" ); 396 397 return $cacheEntry['batches']; 398 } 399 400 $memcKey = wfMemcKey( 401 'backlinks', 402 md5( $this->title->getPrefixedDBkey() ), 403 $table, 404 $batchSize 405 ); 406 407 // 3) ... fallback to memcached ... 408 $memcValue = $wgMemc->get( $memcKey ); 409 if ( is_array( $memcValue ) ) { 410 $cacheEntry = $memcValue; 411 wfDebug( __METHOD__ . ": got from memcached $memcKey\n" ); 412 413 return $cacheEntry['batches']; 414 } 415 416 // 4) ... finally fetch from the slow database :( 417 $cacheEntry = array( 'numRows' => 0, 'batches' => array() ); // final result 418 // Do the selects in batches to avoid client-side OOMs (bug 43452). 419 // Use a LIMIT that plays well with $batchSize to keep equal sized partitions. 420 $selectSize = max( $batchSize, 200000 - ( 200000 % $batchSize ) ); 421 $start = false; 422 do { 423 $res = $this->queryLinks( $table, $start, false, $selectSize, 'ids' ); 424 $partitions = $this->partitionResult( $res, $batchSize, false ); 425 // Merge the link count and range partitions for this chunk 426 $cacheEntry['numRows'] += $partitions['numRows']; 427 $cacheEntry['batches'] = array_merge( $cacheEntry['batches'], $partitions['batches'] ); 428 if ( count( $partitions['batches'] ) ) { 429 list( , $lEnd ) = end( $partitions['batches'] ); 430 $start = $lEnd + 1; // pick up after this inclusive range 431 } 432 } while ( $partitions['numRows'] >= $selectSize ); 433 // Make sure the first range has start=false and the last one has end=false 434 if ( count( $cacheEntry['batches'] ) ) { 435 $cacheEntry['batches'][0][0] = false; 436 $cacheEntry['batches'][count( $cacheEntry['batches'] ) - 1][1] = false; 437 } 438 439 // Save partitions to memcached 440 $wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY ); 441 442 // Save backlink count to memcached 443 $memcKey = wfMemcKey( 'numbacklinks', md5( $this->title->getPrefixedDBkey() ), $table ); 444 $wgMemc->set( $memcKey, $cacheEntry['numRows'], self::CACHE_EXPIRY ); 445 446 wfDebug( __METHOD__ . ": got from database\n" ); 447 448 return $cacheEntry['batches']; 449 } 450 451 /** 452 * Partition a DB result with backlinks in it into batches 453 * @param ResultWrapper $res Database result 454 * @param int $batchSize 455 * @param bool $isComplete Whether $res includes all the backlinks 456 * @throws MWException 457 * @return array 458 */ 459 protected function partitionResult( $res, $batchSize, $isComplete = true ) { 460 $batches = array(); 461 $numRows = $res->numRows(); 462 $numBatches = ceil( $numRows / $batchSize ); 463 464 for ( $i = 0; $i < $numBatches; $i++ ) { 465 if ( $i == 0 && $isComplete ) { 466 $start = false; 467 } else { 468 $rowNum = $i * $batchSize; 469 $res->seek( $rowNum ); 470 $row = $res->fetchObject(); 471 $start = (int)$row->page_id; 472 } 473 474 if ( $i == ( $numBatches - 1 ) && $isComplete ) { 475 $end = false; 476 } else { 477 $rowNum = min( $numRows - 1, ( $i + 1 ) * $batchSize - 1 ); 478 $res->seek( $rowNum ); 479 $row = $res->fetchObject(); 480 $end = (int)$row->page_id; 481 } 482 483 # Sanity check order 484 if ( $start && $end && $start > $end ) { 485 throw new MWException( __METHOD__ . ': Internal error: query result out of order' ); 486 } 487 488 $batches[] = array( $start, $end ); 489 } 490 491 return array( 'numRows' => $numRows, 'batches' => $batches ); 492 } 493 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |