MediaWiki
REL1_22
|
00001 <?php 00046 class HTMLCacheUpdateJob extends Job { 00048 protected $blCache; 00049 00050 protected $rowsPerJob, $rowsPerQuery; 00051 00058 function __construct( $title, $params, $id = 0 ) { 00059 global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery; 00060 00061 parent::__construct( 'htmlCacheUpdate', $title, $params, $id ); 00062 00063 $this->rowsPerJob = $wgUpdateRowsPerJob; 00064 $this->rowsPerQuery = $wgUpdateRowsPerQuery; 00065 $this->blCache = $title->getBacklinkCache(); 00066 } 00067 00068 public function run() { 00069 if ( isset( $this->params['start'] ) && isset( $this->params['end'] ) ) { 00070 # This is hit when a job is actually performed 00071 return $this->doPartialUpdate(); 00072 } else { 00073 # This is hit when the jobs have to be inserted 00074 return $this->doFullUpdate(); 00075 } 00076 } 00077 00081 protected function doFullUpdate() { 00082 global $wgMaxBacklinksInvalidate; 00083 00084 # Get an estimate of the number of rows from the BacklinkCache 00085 $max = max( $this->rowsPerJob * 2, $wgMaxBacklinksInvalidate ) + 1; 00086 $numRows = $this->blCache->getNumLinks( $this->params['table'], $max ); 00087 if ( $wgMaxBacklinksInvalidate !== false && $numRows > $wgMaxBacklinksInvalidate ) { 00088 wfDebug( "Skipped HTML cache invalidation of {$this->title->getPrefixedText()}." ); 00089 return true; 00090 } 00091 00092 if ( $numRows > $this->rowsPerJob * 2 ) { 00093 # Do fast cached partition 00094 $this->insertPartitionJobs(); 00095 } else { 00096 # Get the links from the DB 00097 $titleArray = $this->blCache->getLinks( $this->params['table'] ); 00098 # Check if the row count estimate was correct 00099 if ( $titleArray->count() > $this->rowsPerJob * 2 ) { 00100 # Not correct, do accurate partition 00101 wfDebug( __METHOD__ . ": row count estimate was incorrect, repartitioning\n" ); 00102 $this->insertJobsFromTitles( $titleArray ); 00103 } else { 00104 $this->invalidateTitles( $titleArray ); // just do the query 00105 } 00106 } 00107 00108 return true; 00109 } 00110 00114 protected function doPartialUpdate() { 00115 $titleArray = $this->blCache->getLinks( 00116 $this->params['table'], $this->params['start'], $this->params['end'] ); 00117 if ( $titleArray->count() <= $this->rowsPerJob * 2 ) { 00118 # This partition is small enough, do the update 00119 $this->invalidateTitles( $titleArray ); 00120 } else { 00121 # Partitioning was excessively inaccurate. Divide the job further. 00122 # This can occur when a large number of links are added in a short 00123 # period of time, say by updating a heavily-used template. 00124 $this->insertJobsFromTitles( $titleArray ); 00125 } 00126 return true; 00127 } 00128 00138 protected function insertJobsFromTitles( $titleArray, $rootJobParams = array() ) { 00139 // Carry over any "root job" information 00140 $rootJobParams = $this->getRootJobParams(); 00141 # We make subpartitions in the sense that the start of the first job 00142 # will be the start of the parent partition, and the end of the last 00143 # job will be the end of the parent partition. 00144 $jobs = array(); 00145 $start = $this->params['start']; # start of the current job 00146 $numTitles = 0; 00147 foreach ( $titleArray as $title ) { 00148 $id = $title->getArticleID(); 00149 # $numTitles is now the number of titles in the current job not 00150 # including the current ID 00151 if ( $numTitles >= $this->rowsPerJob ) { 00152 # Add a job up to but not including the current ID 00153 $jobs[] = new HTMLCacheUpdateJob( $this->title, 00154 array( 00155 'table' => $this->params['table'], 00156 'start' => $start, 00157 'end' => $id - 1 00158 ) + $rootJobParams // carry over information for de-duplication 00159 ); 00160 $start = $id; 00161 $numTitles = 0; 00162 } 00163 $numTitles++; 00164 } 00165 # Last job 00166 $jobs[] = new HTMLCacheUpdateJob( $this->title, 00167 array( 00168 'table' => $this->params['table'], 00169 'start' => $start, 00170 'end' => $this->params['end'] 00171 ) + $rootJobParams // carry over information for de-duplication 00172 ); 00173 wfDebug( __METHOD__ . ": repartitioning into " . count( $jobs ) . " jobs\n" ); 00174 00175 if ( count( $jobs ) < 2 ) { 00176 # I don't think this is possible at present, but handling this case 00177 # makes the code a bit more robust against future code updates and 00178 # avoids a potential infinite loop of repartitioning 00179 wfDebug( __METHOD__ . ": repartitioning failed!\n" ); 00180 $this->invalidateTitles( $titleArray ); 00181 } else { 00182 JobQueueGroup::singleton()->push( $jobs ); 00183 } 00184 } 00185 00190 protected function insertPartitionJobs( $rootJobParams = array() ) { 00191 // Carry over any "root job" information 00192 $rootJobParams = $this->getRootJobParams(); 00193 00194 $batches = $this->blCache->partition( $this->params['table'], $this->rowsPerJob ); 00195 if ( !count( $batches ) ) { 00196 return; // no jobs to insert 00197 } 00198 00199 $jobs = array(); 00200 foreach ( $batches as $batch ) { 00201 list( $start, $end ) = $batch; 00202 $jobs[] = new HTMLCacheUpdateJob( $this->title, 00203 array( 00204 'table' => $this->params['table'], 00205 'start' => $start, 00206 'end' => $end, 00207 ) + $rootJobParams // carry over information for de-duplication 00208 ); 00209 } 00210 00211 JobQueueGroup::singleton()->push( $jobs ); 00212 } 00213 00218 protected function invalidateTitles( $titleArray ) { 00219 global $wgUseFileCache, $wgUseSquid; 00220 00221 $dbw = wfGetDB( DB_MASTER ); 00222 $timestamp = $dbw->timestamp(); 00223 00224 # Get all IDs in this query into an array 00225 $ids = array(); 00226 foreach ( $titleArray as $title ) { 00227 $ids[] = $title->getArticleID(); 00228 } 00229 00230 if ( !$ids ) { 00231 return; 00232 } 00233 00234 # Don't invalidated pages that were already invalidated 00235 $touchedCond = isset( $this->params['rootJobTimestamp'] ) 00236 ? array( "page_touched < " . 00237 $dbw->addQuotes( $dbw->timestamp( $this->params['rootJobTimestamp'] ) ) ) 00238 : array(); 00239 00240 # Update page_touched 00241 $batches = array_chunk( $ids, $this->rowsPerQuery ); 00242 foreach ( $batches as $batch ) { 00243 $dbw->update( 'page', 00244 array( 'page_touched' => $timestamp ), 00245 array( 'page_id' => $batch ) + $touchedCond, 00246 __METHOD__ 00247 ); 00248 } 00249 00250 # Update squid 00251 if ( $wgUseSquid ) { 00252 $u = SquidUpdate::newFromTitles( $titleArray ); 00253 $u->doUpdate(); 00254 } 00255 00256 # Update file cache 00257 if ( $wgUseFileCache ) { 00258 foreach ( $titleArray as $title ) { 00259 HTMLFileCache::clearFileCache( $title ); 00260 } 00261 } 00262 } 00263 }