MediaWiki
REL1_21
|
00001 <?php 00046 class HTMLCacheUpdateJob extends Job { 00048 protected $blCache; 00049 00050 protected $rowsPerJob, $rowsPerQuery; 00051 00058 function __construct( $title, $params, $id = 0 ) { 00059 global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery; 00060 00061 parent::__construct( 'htmlCacheUpdate', $title, $params, $id ); 00062 00063 $this->rowsPerJob = $wgUpdateRowsPerJob; 00064 $this->rowsPerQuery = $wgUpdateRowsPerQuery; 00065 $this->blCache = $title->getBacklinkCache(); 00066 } 00067 00068 public function run() { 00069 if ( isset( $this->params['start'] ) && isset( $this->params['end'] ) ) { 00070 # This is hit when a job is actually performed 00071 return $this->doPartialUpdate(); 00072 } else { 00073 # This is hit when the jobs have to be inserted 00074 return $this->doFullUpdate(); 00075 } 00076 } 00077 00081 protected function doFullUpdate() { 00082 # Get an estimate of the number of rows from the BacklinkCache 00083 $numRows = $this->blCache->getNumLinks( $this->params['table'] ); 00084 if ( $numRows > $this->rowsPerJob * 2 ) { 00085 # Do fast cached partition 00086 $this->insertPartitionJobs(); 00087 } else { 00088 # Get the links from the DB 00089 $titleArray = $this->blCache->getLinks( $this->params['table'] ); 00090 # Check if the row count estimate was correct 00091 if ( $titleArray->count() > $this->rowsPerJob * 2 ) { 00092 # Not correct, do accurate partition 00093 wfDebug( __METHOD__.": row count estimate was incorrect, repartitioning\n" ); 00094 $this->insertJobsFromTitles( $titleArray ); 00095 } else { 00096 $this->invalidateTitles( $titleArray ); // just do the query 00097 } 00098 } 00099 return true; 00100 } 00101 00105 protected function doPartialUpdate() { 00106 $titleArray = $this->blCache->getLinks( 00107 $this->params['table'], $this->params['start'], $this->params['end'] ); 00108 if ( $titleArray->count() <= $this->rowsPerJob * 2 ) { 00109 # This partition is small enough, do the update 00110 $this->invalidateTitles( $titleArray ); 00111 } else { 00112 # Partitioning was excessively inaccurate. Divide the job further. 00113 # This can occur when a large number of links are added in a short 00114 # period of time, say by updating a heavily-used template. 00115 $this->insertJobsFromTitles( $titleArray ); 00116 } 00117 return true; 00118 } 00119 00129 protected function insertJobsFromTitles( $titleArray, $rootJobParams = array() ) { 00130 // Carry over any "root job" information 00131 $rootJobParams = $this->getRootJobParams(); 00132 # We make subpartitions in the sense that the start of the first job 00133 # will be the start of the parent partition, and the end of the last 00134 # job will be the end of the parent partition. 00135 $jobs = array(); 00136 $start = $this->params['start']; # start of the current job 00137 $numTitles = 0; 00138 foreach ( $titleArray as $title ) { 00139 $id = $title->getArticleID(); 00140 # $numTitles is now the number of titles in the current job not 00141 # including the current ID 00142 if ( $numTitles >= $this->rowsPerJob ) { 00143 # Add a job up to but not including the current ID 00144 $jobs[] = new HTMLCacheUpdateJob( $this->title, 00145 array( 00146 'table' => $this->params['table'], 00147 'start' => $start, 00148 'end' => $id - 1 00149 ) + $rootJobParams // carry over information for de-duplication 00150 ); 00151 $start = $id; 00152 $numTitles = 0; 00153 } 00154 $numTitles++; 00155 } 00156 # Last job 00157 $jobs[] = new HTMLCacheUpdateJob( $this->title, 00158 array( 00159 'table' => $this->params['table'], 00160 'start' => $start, 00161 'end' => $this->params['end'] 00162 ) + $rootJobParams // carry over information for de-duplication 00163 ); 00164 wfDebug( __METHOD__.": repartitioning into " . count( $jobs ) . " jobs\n" ); 00165 00166 if ( count( $jobs ) < 2 ) { 00167 # I don't think this is possible at present, but handling this case 00168 # makes the code a bit more robust against future code updates and 00169 # avoids a potential infinite loop of repartitioning 00170 wfDebug( __METHOD__.": repartitioning failed!\n" ); 00171 $this->invalidateTitles( $titleArray ); 00172 } else { 00173 JobQueueGroup::singleton()->push( $jobs ); 00174 } 00175 } 00176 00181 protected function insertPartitionJobs( $rootJobParams = array() ) { 00182 // Carry over any "root job" information 00183 $rootJobParams = $this->getRootJobParams(); 00184 00185 $batches = $this->blCache->partition( $this->params['table'], $this->rowsPerJob ); 00186 if ( !count( $batches ) ) { 00187 return; // no jobs to insert 00188 } 00189 00190 $jobs = array(); 00191 foreach ( $batches as $batch ) { 00192 list( $start, $end ) = $batch; 00193 $jobs[] = new HTMLCacheUpdateJob( $this->title, 00194 array( 00195 'table' => $this->params['table'], 00196 'start' => $start, 00197 'end' => $end, 00198 ) + $rootJobParams // carry over information for de-duplication 00199 ); 00200 } 00201 00202 JobQueueGroup::singleton()->push( $jobs ); 00203 } 00204 00209 protected function invalidateTitles( $titleArray ) { 00210 global $wgUseFileCache, $wgUseSquid; 00211 00212 $dbw = wfGetDB( DB_MASTER ); 00213 $timestamp = $dbw->timestamp(); 00214 00215 # Get all IDs in this query into an array 00216 $ids = array(); 00217 foreach ( $titleArray as $title ) { 00218 $ids[] = $title->getArticleID(); 00219 } 00220 00221 if ( !$ids ) { 00222 return; 00223 } 00224 00225 # Don't invalidated pages that were already invalidated 00226 $touchedCond = isset( $this->params['rootJobTimestamp'] ) 00227 ? array( "page_touched < " . 00228 $dbw->addQuotes( $dbw->timestamp( $this->params['rootJobTimestamp'] ) ) ) 00229 : array(); 00230 00231 # Update page_touched 00232 $batches = array_chunk( $ids, $this->rowsPerQuery ); 00233 foreach ( $batches as $batch ) { 00234 $dbw->update( 'page', 00235 array( 'page_touched' => $timestamp ), 00236 array( 'page_id' => $batch ) + $touchedCond, 00237 __METHOD__ 00238 ); 00239 } 00240 00241 # Update squid 00242 if ( $wgUseSquid ) { 00243 $u = SquidUpdate::newFromTitles( $titleArray ); 00244 $u->doUpdate(); 00245 } 00246 00247 # Update file cache 00248 if ( $wgUseFileCache ) { 00249 foreach ( $titleArray as $title ) { 00250 HTMLFileCache::clearFileCache( $title ); 00251 } 00252 } 00253 } 00254 }