MediaWiki
REL1_19
|
00001 <?php 00027 class BacklinkCache { 00028 00039 protected $partitionCache = array(); 00040 00048 protected $fullResultCache = array(); 00049 00057 protected $db; 00058 00062 protected $title; 00063 00064 const CACHE_EXPIRY = 3600; 00065 00070 function __construct( $title ) { 00071 $this->title = $title; 00072 } 00073 00081 function __sleep() { 00082 return array( 'partitionCache', 'fullResultCache', 'title' ); 00083 } 00084 00088 public function clear() { 00089 $this->partitionCache = array(); 00090 $this->fullResultCache = array(); 00091 unset( $this->db ); 00092 } 00093 00099 public function setDB( $db ) { 00100 $this->db = $db; 00101 } 00102 00108 protected function getDB() { 00109 if ( !isset( $this->db ) ) { 00110 $this->db = wfGetDB( DB_SLAVE ); 00111 } 00112 00113 return $this->db; 00114 } 00115 00123 public function getLinks( $table, $startId = false, $endId = false ) { 00124 wfProfileIn( __METHOD__ ); 00125 00126 $fromField = $this->getPrefix( $table ) . '_from'; 00127 00128 if ( $startId || $endId ) { 00129 // Partial range, not cached 00130 wfDebug( __METHOD__ . ": from DB (uncacheable range)\n" ); 00131 $conds = $this->getConditions( $table ); 00132 00133 // Use the from field in the condition rather than the joined page_id, 00134 // because databases are stupid and don't necessarily propagate indexes. 00135 if ( $startId ) { 00136 $conds[] = "$fromField >= " . intval( $startId ); 00137 } 00138 00139 if ( $endId ) { 00140 $conds[] = "$fromField <= " . intval( $endId ); 00141 } 00142 00143 $res = $this->getDB()->select( 00144 array( $table, 'page' ), 00145 array( 'page_namespace', 'page_title', 'page_id' ), 00146 $conds, 00147 __METHOD__, 00148 array( 00149 'STRAIGHT_JOIN', 00150 'ORDER BY' => $fromField 00151 ) ); 00152 $ta = TitleArray::newFromResult( $res ); 00153 00154 wfProfileOut( __METHOD__ ); 00155 return $ta; 00156 } 00157 00158 // @todo FIXME: Make this a function? 00159 if ( !isset( $this->fullResultCache[$table] ) ) { 00160 wfDebug( __METHOD__ . ": from DB\n" ); 00161 $res = $this->getDB()->select( 00162 array( $table, 'page' ), 00163 array( 'page_namespace', 'page_title', 'page_id' ), 00164 $this->getConditions( $table ), 00165 __METHOD__, 00166 array( 00167 'STRAIGHT_JOIN', 00168 'ORDER BY' => $fromField, 00169 ) ); 00170 $this->fullResultCache[$table] = $res; 00171 } 00172 00173 $ta = TitleArray::newFromResult( $this->fullResultCache[$table] ); 00174 00175 wfProfileOut( __METHOD__ ); 00176 return $ta; 00177 } 00178 00183 protected function getPrefix( $table ) { 00184 static $prefixes = array( 00185 'pagelinks' => 'pl', 00186 'imagelinks' => 'il', 00187 'categorylinks' => 'cl', 00188 'templatelinks' => 'tl', 00189 'redirect' => 'rd', 00190 ); 00191 00192 if ( isset( $prefixes[$table] ) ) { 00193 return $prefixes[$table]; 00194 } else { 00195 $prefix = null; 00196 wfRunHooks( 'BacklinkCacheGetPrefix', array( $table, &$prefix ) ); 00197 if( $prefix ) { 00198 return $prefix; 00199 } else { 00200 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ ); 00201 } 00202 } 00203 } 00204 00210 protected function getConditions( $table ) { 00211 $prefix = $this->getPrefix( $table ); 00212 00213 // @todo FIXME: imagelinks and categorylinks do not rely on getNamespace, 00214 // they could be moved up for nicer case statements 00215 switch ( $table ) { 00216 case 'pagelinks': 00217 case 'templatelinks': 00218 $conds = array( 00219 "{$prefix}_namespace" => $this->title->getNamespace(), 00220 "{$prefix}_title" => $this->title->getDBkey(), 00221 "page_id={$prefix}_from" 00222 ); 00223 break; 00224 case 'redirect': 00225 $conds = array( 00226 "{$prefix}_namespace" => $this->title->getNamespace(), 00227 "{$prefix}_title" => $this->title->getDBkey(), 00228 $this->getDb()->makeList( array( 00229 "{$prefix}_interwiki = ''", 00230 "{$prefix}_interwiki is null", 00231 ), LIST_OR ), 00232 "page_id={$prefix}_from" 00233 ); 00234 break; 00235 case 'imagelinks': 00236 $conds = array( 00237 'il_to' => $this->title->getDBkey(), 00238 'page_id=il_from' 00239 ); 00240 break; 00241 case 'categorylinks': 00242 $conds = array( 00243 'cl_to' => $this->title->getDBkey(), 00244 'page_id=cl_from', 00245 ); 00246 break; 00247 default: 00248 $conds = null; 00249 wfRunHooks( 'BacklinkCacheGetConditions', array( $table, $this->title, &$conds ) ); 00250 if( !$conds ) 00251 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ ); 00252 } 00253 00254 return $conds; 00255 } 00256 00262 public function getNumLinks( $table ) { 00263 if ( isset( $this->fullResultCache[$table] ) ) { 00264 return $this->fullResultCache[$table]->numRows(); 00265 } 00266 00267 if ( isset( $this->partitionCache[$table] ) ) { 00268 $entry = reset( $this->partitionCache[$table] ); 00269 return $entry['numRows']; 00270 } 00271 00272 $titleArray = $this->getLinks( $table ); 00273 00274 return $titleArray->count(); 00275 } 00276 00286 public function partition( $table, $batchSize ) { 00287 00288 // 1) try partition cache ... 00289 00290 if ( isset( $this->partitionCache[$table][$batchSize] ) ) { 00291 wfDebug( __METHOD__ . ": got from partition cache\n" ); 00292 return $this->partitionCache[$table][$batchSize]['batches']; 00293 } 00294 00295 $this->partitionCache[$table][$batchSize] = false; 00296 $cacheEntry =& $this->partitionCache[$table][$batchSize]; 00297 00298 // 2) ... then try full result cache ... 00299 00300 if ( isset( $this->fullResultCache[$table] ) ) { 00301 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize ); 00302 wfDebug( __METHOD__ . ": got from full result cache\n" ); 00303 00304 return $cacheEntry['batches']; 00305 } 00306 00307 // 3) ... fallback to memcached ... 00308 00309 global $wgMemc; 00310 00311 $memcKey = wfMemcKey( 00312 'backlinks', 00313 md5( $this->title->getPrefixedDBkey() ), 00314 $table, 00315 $batchSize 00316 ); 00317 00318 $memcValue = $wgMemc->get( $memcKey ); 00319 00320 if ( is_array( $memcValue ) ) { 00321 $cacheEntry = $memcValue; 00322 wfDebug( __METHOD__ . ": got from memcached $memcKey\n" ); 00323 00324 return $cacheEntry['batches']; 00325 } 00326 00327 00328 // 4) ... finally fetch from the slow database :( 00329 00330 $this->getLinks( $table ); 00331 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize ); 00332 // Save to memcached 00333 $wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY ); 00334 00335 wfDebug( __METHOD__ . ": got from database\n" ); 00336 return $cacheEntry['batches']; 00337 } 00338 00345 protected function partitionResult( $res, $batchSize ) { 00346 $batches = array(); 00347 $numRows = $res->numRows(); 00348 $numBatches = ceil( $numRows / $batchSize ); 00349 00350 for ( $i = 0; $i < $numBatches; $i++ ) { 00351 if ( $i == 0 ) { 00352 $start = false; 00353 } else { 00354 $rowNum = intval( $numRows * $i / $numBatches ); 00355 $res->seek( $rowNum ); 00356 $row = $res->fetchObject(); 00357 $start = $row->page_id; 00358 } 00359 00360 if ( $i == $numBatches - 1 ) { 00361 $end = false; 00362 } else { 00363 $rowNum = intval( $numRows * ( $i + 1 ) / $numBatches ); 00364 $res->seek( $rowNum ); 00365 $row = $res->fetchObject(); 00366 $end = $row->page_id - 1; 00367 } 00368 00369 # Sanity check order 00370 if ( $start && $end && $start > $end ) { 00371 throw new MWException( __METHOD__ . ': Internal error: query result out of order' ); 00372 } 00373 00374 $batches[] = array( $start, $end ); 00375 } 00376 00377 return array( 'numRows' => $numRows, 'batches' => $batches ); 00378 } 00379 }