MediaWiki  REL1_21
HTMLCacheUpdateJob.php
Go to the documentation of this file.
00001 <?php
00046 class HTMLCacheUpdateJob extends Job {
00048         protected $blCache;
00049 
00050         protected $rowsPerJob, $rowsPerQuery;
00051 
00058         function __construct( $title, $params, $id = 0 ) {
00059                 global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery;
00060 
00061                 parent::__construct( 'htmlCacheUpdate', $title, $params, $id );
00062 
00063                 $this->rowsPerJob = $wgUpdateRowsPerJob;
00064                 $this->rowsPerQuery = $wgUpdateRowsPerQuery;
00065                 $this->blCache = $title->getBacklinkCache();
00066         }
00067 
00068         public function run() {
00069                 if ( isset( $this->params['start'] ) && isset( $this->params['end'] ) ) {
00070                         # This is hit when a job is actually performed
00071                         return $this->doPartialUpdate();
00072                 } else {
00073                         # This is hit when the jobs have to be inserted
00074                         return $this->doFullUpdate();
00075                 }
00076         }
00077 
00081         protected function doFullUpdate() {
00082                 # Get an estimate of the number of rows from the BacklinkCache
00083                 $numRows = $this->blCache->getNumLinks( $this->params['table'] );
00084                 if ( $numRows > $this->rowsPerJob * 2 ) {
00085                         # Do fast cached partition
00086                         $this->insertPartitionJobs();
00087                 } else {
00088                         # Get the links from the DB
00089                         $titleArray = $this->blCache->getLinks( $this->params['table'] );
00090                         # Check if the row count estimate was correct
00091                         if ( $titleArray->count() > $this->rowsPerJob * 2 ) {
00092                                 # Not correct, do accurate partition
00093                                 wfDebug( __METHOD__.": row count estimate was incorrect, repartitioning\n" );
00094                                 $this->insertJobsFromTitles( $titleArray );
00095                         } else {
00096                                 $this->invalidateTitles( $titleArray ); // just do the query
00097                         }
00098                 }
00099                 return true;
00100         }
00101 
00105         protected function doPartialUpdate() {
00106                 $titleArray = $this->blCache->getLinks(
00107                         $this->params['table'], $this->params['start'], $this->params['end'] );
00108                 if ( $titleArray->count() <= $this->rowsPerJob * 2 ) {
00109                         # This partition is small enough, do the update
00110                         $this->invalidateTitles( $titleArray );
00111                 } else {
00112                         # Partitioning was excessively inaccurate. Divide the job further.
00113                         # This can occur when a large number of links are added in a short
00114                         # period of time, say by updating a heavily-used template.
00115                         $this->insertJobsFromTitles( $titleArray );
00116                 }
00117                 return true;
00118         }
00119 
00129         protected function insertJobsFromTitles( $titleArray, $rootJobParams = array() ) {
00130                 // Carry over any "root job" information
00131                 $rootJobParams = $this->getRootJobParams();
00132                 # We make subpartitions in the sense that the start of the first job
00133                 # will be the start of the parent partition, and the end of the last
00134                 # job will be the end of the parent partition.
00135                 $jobs = array();
00136                 $start = $this->params['start']; # start of the current job
00137                 $numTitles = 0;
00138                 foreach ( $titleArray as $title ) {
00139                         $id = $title->getArticleID();
00140                         # $numTitles is now the number of titles in the current job not
00141                         # including the current ID
00142                         if ( $numTitles >= $this->rowsPerJob ) {
00143                                 # Add a job up to but not including the current ID
00144                                 $jobs[] = new HTMLCacheUpdateJob( $this->title,
00145                                         array(
00146                                                 'table' => $this->params['table'],
00147                                                 'start' => $start,
00148                                                 'end'   => $id - 1
00149                                         ) + $rootJobParams // carry over information for de-duplication
00150                                 );
00151                                 $start = $id;
00152                                 $numTitles = 0;
00153                         }
00154                         $numTitles++;
00155                 }
00156                 # Last job
00157                 $jobs[] = new HTMLCacheUpdateJob( $this->title,
00158                         array(
00159                                 'table' => $this->params['table'],
00160                                 'start' => $start,
00161                                 'end'   => $this->params['end']
00162                         ) + $rootJobParams // carry over information for de-duplication
00163                 );
00164                 wfDebug( __METHOD__.": repartitioning into " . count( $jobs ) . " jobs\n" );
00165 
00166                 if ( count( $jobs ) < 2 ) {
00167                         # I don't think this is possible at present, but handling this case
00168                         # makes the code a bit more robust against future code updates and
00169                         # avoids a potential infinite loop of repartitioning
00170                         wfDebug( __METHOD__.": repartitioning failed!\n" );
00171                         $this->invalidateTitles( $titleArray );
00172                 } else {
00173                         JobQueueGroup::singleton()->push( $jobs );
00174                 }
00175         }
00176 
00181         protected function insertPartitionJobs( $rootJobParams = array() ) {
00182                 // Carry over any "root job" information
00183                 $rootJobParams = $this->getRootJobParams();
00184 
00185                 $batches = $this->blCache->partition( $this->params['table'], $this->rowsPerJob );
00186                 if ( !count( $batches ) ) {
00187                         return; // no jobs to insert
00188                 }
00189 
00190                 $jobs = array();
00191                 foreach ( $batches as $batch ) {
00192                         list( $start, $end ) = $batch;
00193                         $jobs[] = new HTMLCacheUpdateJob( $this->title,
00194                                 array(
00195                                         'table' => $this->params['table'],
00196                                         'start' => $start,
00197                                         'end'   => $end,
00198                                 ) + $rootJobParams // carry over information for de-duplication
00199                         );
00200                 }
00201 
00202                 JobQueueGroup::singleton()->push( $jobs );
00203         }
00204 
00209         protected function invalidateTitles( $titleArray ) {
00210                 global $wgUseFileCache, $wgUseSquid;
00211 
00212                 $dbw = wfGetDB( DB_MASTER );
00213                 $timestamp = $dbw->timestamp();
00214 
00215                 # Get all IDs in this query into an array
00216                 $ids = array();
00217                 foreach ( $titleArray as $title ) {
00218                         $ids[] = $title->getArticleID();
00219                 }
00220 
00221                 if ( !$ids ) {
00222                         return;
00223                 }
00224 
00225                 # Don't invalidated pages that were already invalidated
00226                 $touchedCond = isset( $this->params['rootJobTimestamp'] )
00227                         ? array( "page_touched < " .
00228                                 $dbw->addQuotes( $dbw->timestamp( $this->params['rootJobTimestamp'] ) ) )
00229                         : array();
00230 
00231                 # Update page_touched
00232                 $batches = array_chunk( $ids, $this->rowsPerQuery );
00233                 foreach ( $batches as $batch ) {
00234                         $dbw->update( 'page',
00235                                 array( 'page_touched' => $timestamp ),
00236                                 array( 'page_id' => $batch ) + $touchedCond,
00237                                 __METHOD__
00238                         );
00239                 }
00240 
00241                 # Update squid
00242                 if ( $wgUseSquid ) {
00243                         $u = SquidUpdate::newFromTitles( $titleArray );
00244                         $u->doUpdate();
00245                 }
00246 
00247                 # Update file cache
00248                 if  ( $wgUseFileCache ) {
00249                         foreach ( $titleArray as $title ) {
00250                                 HTMLFileCache::clearFileCache( $title );
00251                         }
00252                 }
00253         }
00254 }