MediaWiki  REL1_22
HTMLCacheUpdateJob.php
Go to the documentation of this file.
00001 <?php
00046 class HTMLCacheUpdateJob extends Job {
00048     protected $blCache;
00049 
00050     protected $rowsPerJob, $rowsPerQuery;
00051 
00058     function __construct( $title, $params, $id = 0 ) {
00059         global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery;
00060 
00061         parent::__construct( 'htmlCacheUpdate', $title, $params, $id );
00062 
00063         $this->rowsPerJob = $wgUpdateRowsPerJob;
00064         $this->rowsPerQuery = $wgUpdateRowsPerQuery;
00065         $this->blCache = $title->getBacklinkCache();
00066     }
00067 
00068     public function run() {
00069         if ( isset( $this->params['start'] ) && isset( $this->params['end'] ) ) {
00070             # This is hit when a job is actually performed
00071             return $this->doPartialUpdate();
00072         } else {
00073             # This is hit when the jobs have to be inserted
00074             return $this->doFullUpdate();
00075         }
00076     }
00077 
00081     protected function doFullUpdate() {
00082         global $wgMaxBacklinksInvalidate;
00083 
00084         # Get an estimate of the number of rows from the BacklinkCache
00085         $max = max( $this->rowsPerJob * 2, $wgMaxBacklinksInvalidate ) + 1;
00086         $numRows = $this->blCache->getNumLinks( $this->params['table'], $max );
00087         if ( $wgMaxBacklinksInvalidate !== false && $numRows > $wgMaxBacklinksInvalidate ) {
00088             wfDebug( "Skipped HTML cache invalidation of {$this->title->getPrefixedText()}." );
00089             return true;
00090         }
00091 
00092         if ( $numRows > $this->rowsPerJob * 2 ) {
00093             # Do fast cached partition
00094             $this->insertPartitionJobs();
00095         } else {
00096             # Get the links from the DB
00097             $titleArray = $this->blCache->getLinks( $this->params['table'] );
00098             # Check if the row count estimate was correct
00099             if ( $titleArray->count() > $this->rowsPerJob * 2 ) {
00100                 # Not correct, do accurate partition
00101                 wfDebug( __METHOD__ . ": row count estimate was incorrect, repartitioning\n" );
00102                 $this->insertJobsFromTitles( $titleArray );
00103             } else {
00104                 $this->invalidateTitles( $titleArray ); // just do the query
00105             }
00106         }
00107 
00108         return true;
00109     }
00110 
00114     protected function doPartialUpdate() {
00115         $titleArray = $this->blCache->getLinks(
00116             $this->params['table'], $this->params['start'], $this->params['end'] );
00117         if ( $titleArray->count() <= $this->rowsPerJob * 2 ) {
00118             # This partition is small enough, do the update
00119             $this->invalidateTitles( $titleArray );
00120         } else {
00121             # Partitioning was excessively inaccurate. Divide the job further.
00122             # This can occur when a large number of links are added in a short
00123             # period of time, say by updating a heavily-used template.
00124             $this->insertJobsFromTitles( $titleArray );
00125         }
00126         return true;
00127     }
00128 
00138     protected function insertJobsFromTitles( $titleArray, $rootJobParams = array() ) {
00139         // Carry over any "root job" information
00140         $rootJobParams = $this->getRootJobParams();
00141         # We make subpartitions in the sense that the start of the first job
00142         # will be the start of the parent partition, and the end of the last
00143         # job will be the end of the parent partition.
00144         $jobs = array();
00145         $start = $this->params['start']; # start of the current job
00146         $numTitles = 0;
00147         foreach ( $titleArray as $title ) {
00148             $id = $title->getArticleID();
00149             # $numTitles is now the number of titles in the current job not
00150             # including the current ID
00151             if ( $numTitles >= $this->rowsPerJob ) {
00152                 # Add a job up to but not including the current ID
00153                 $jobs[] = new HTMLCacheUpdateJob( $this->title,
00154                     array(
00155                         'table' => $this->params['table'],
00156                         'start' => $start,
00157                         'end' => $id - 1
00158                     ) + $rootJobParams // carry over information for de-duplication
00159                 );
00160                 $start = $id;
00161                 $numTitles = 0;
00162             }
00163             $numTitles++;
00164         }
00165         # Last job
00166         $jobs[] = new HTMLCacheUpdateJob( $this->title,
00167             array(
00168                 'table' => $this->params['table'],
00169                 'start' => $start,
00170                 'end' => $this->params['end']
00171             ) + $rootJobParams // carry over information for de-duplication
00172         );
00173         wfDebug( __METHOD__ . ": repartitioning into " . count( $jobs ) . " jobs\n" );
00174 
00175         if ( count( $jobs ) < 2 ) {
00176             # I don't think this is possible at present, but handling this case
00177             # makes the code a bit more robust against future code updates and
00178             # avoids a potential infinite loop of repartitioning
00179             wfDebug( __METHOD__ . ": repartitioning failed!\n" );
00180             $this->invalidateTitles( $titleArray );
00181         } else {
00182             JobQueueGroup::singleton()->push( $jobs );
00183         }
00184     }
00185 
00190     protected function insertPartitionJobs( $rootJobParams = array() ) {
00191         // Carry over any "root job" information
00192         $rootJobParams = $this->getRootJobParams();
00193 
00194         $batches = $this->blCache->partition( $this->params['table'], $this->rowsPerJob );
00195         if ( !count( $batches ) ) {
00196             return; // no jobs to insert
00197         }
00198 
00199         $jobs = array();
00200         foreach ( $batches as $batch ) {
00201             list( $start, $end ) = $batch;
00202             $jobs[] = new HTMLCacheUpdateJob( $this->title,
00203                 array(
00204                     'table' => $this->params['table'],
00205                     'start' => $start,
00206                     'end' => $end,
00207                 ) + $rootJobParams // carry over information for de-duplication
00208             );
00209         }
00210 
00211         JobQueueGroup::singleton()->push( $jobs );
00212     }
00213 
00218     protected function invalidateTitles( $titleArray ) {
00219         global $wgUseFileCache, $wgUseSquid;
00220 
00221         $dbw = wfGetDB( DB_MASTER );
00222         $timestamp = $dbw->timestamp();
00223 
00224         # Get all IDs in this query into an array
00225         $ids = array();
00226         foreach ( $titleArray as $title ) {
00227             $ids[] = $title->getArticleID();
00228         }
00229 
00230         if ( !$ids ) {
00231             return;
00232         }
00233 
00234         # Don't invalidated pages that were already invalidated
00235         $touchedCond = isset( $this->params['rootJobTimestamp'] )
00236             ? array( "page_touched < " .
00237                 $dbw->addQuotes( $dbw->timestamp( $this->params['rootJobTimestamp'] ) ) )
00238             : array();
00239 
00240         # Update page_touched
00241         $batches = array_chunk( $ids, $this->rowsPerQuery );
00242         foreach ( $batches as $batch ) {
00243             $dbw->update( 'page',
00244                 array( 'page_touched' => $timestamp ),
00245                 array( 'page_id' => $batch ) + $touchedCond,
00246                 __METHOD__
00247             );
00248         }
00249 
00250         # Update squid
00251         if ( $wgUseSquid ) {
00252             $u = SquidUpdate::newFromTitles( $titleArray );
00253             $u->doUpdate();
00254         }
00255 
00256         # Update file cache
00257         if ( $wgUseFileCache ) {
00258             foreach ( $titleArray as $title ) {
00259                 HTMLFileCache::clearFileCache( $title );
00260             }
00261         }
00262     }
00263 }