MediaWiki  REL1_20
HTMLCacheUpdate.php
Go to the documentation of this file.
00001 <?php
00047 class HTMLCacheUpdate implements DeferrableUpdate {
00051         public $mTitle;
00052 
00053         public $mTable, $mPrefix, $mStart, $mEnd;
00054         public $mRowsPerJob, $mRowsPerQuery;
00055 
00062         function __construct( $titleTo, $table, $start = false, $end = false ) {
00063                 global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery;
00064 
00065                 $this->mTitle = $titleTo;
00066                 $this->mTable = $table;
00067                 $this->mStart = $start;
00068                 $this->mEnd = $end;
00069                 $this->mRowsPerJob = $wgUpdateRowsPerJob;
00070                 $this->mRowsPerQuery = $wgUpdateRowsPerQuery;
00071                 $this->mCache = $this->mTitle->getBacklinkCache();
00072         }
00073 
00074         public function doUpdate() {
00075                 if ( $this->mStart || $this->mEnd ) {
00076                         $this->doPartialUpdate();
00077                         return;
00078                 }
00079 
00080                 # Get an estimate of the number of rows from the BacklinkCache
00081                 $numRows = $this->mCache->getNumLinks( $this->mTable );
00082                 if ( $numRows > $this->mRowsPerJob * 2 ) {
00083                         # Do fast cached partition
00084                         $this->insertJobs();
00085                 } else {
00086                         # Get the links from the DB
00087                         $titleArray = $this->mCache->getLinks( $this->mTable );
00088                         # Check if the row count estimate was correct
00089                         if ( $titleArray->count() > $this->mRowsPerJob * 2 ) {
00090                                 # Not correct, do accurate partition
00091                                 wfDebug( __METHOD__.": row count estimate was incorrect, repartitioning\n" );
00092                                 $this->insertJobsFromTitles( $titleArray );
00093                         } else {
00094                                 $this->invalidateTitles( $titleArray );
00095                         }
00096                 }
00097         }
00098 
00102         protected function doPartialUpdate() {
00103                 $titleArray = $this->mCache->getLinks( $this->mTable, $this->mStart, $this->mEnd );
00104                 if ( $titleArray->count() <= $this->mRowsPerJob * 2 ) {
00105                         # This partition is small enough, do the update
00106                         $this->invalidateTitles( $titleArray );
00107                 } else {
00108                         # Partitioning was excessively inaccurate. Divide the job further.
00109                         # This can occur when a large number of links are added in a short
00110                         # period of time, say by updating a heavily-used template.
00111                         $this->insertJobsFromTitles( $titleArray );
00112                 }
00113         }
00114 
00122         protected function insertJobsFromTitles( $titleArray ) {
00123                 # We make subpartitions in the sense that the start of the first job
00124                 # will be the start of the parent partition, and the end of the last
00125                 # job will be the end of the parent partition.
00126                 $jobs = array();
00127                 $start = $this->mStart; # start of the current job
00128                 $numTitles = 0;
00129                 foreach ( $titleArray as $title ) {
00130                         $id = $title->getArticleID();
00131                         # $numTitles is now the number of titles in the current job not
00132                         # including the current ID
00133                         if ( $numTitles >= $this->mRowsPerJob ) {
00134                                 # Add a job up to but not including the current ID
00135                                 $params = array(
00136                                         'table' => $this->mTable,
00137                                         'start' => $start,
00138                                         'end' => $id - 1
00139                                 );
00140                                 $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params );
00141                                 $start = $id;
00142                                 $numTitles = 0;
00143                         }
00144                         $numTitles++;
00145                 }
00146                 # Last job
00147                 $params = array(
00148                         'table' => $this->mTable,
00149                         'start' => $start,
00150                         'end' => $this->mEnd
00151                 );
00152                 $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params );
00153                 wfDebug( __METHOD__.": repartitioning into " . count( $jobs ) . " jobs\n" );
00154 
00155                 if ( count( $jobs ) < 2 ) {
00156                         # I don't think this is possible at present, but handling this case
00157                         # makes the code a bit more robust against future code updates and
00158                         # avoids a potential infinite loop of repartitioning
00159                         wfDebug( __METHOD__.": repartitioning failed!\n" );
00160                         $this->invalidateTitles( $titleArray );
00161                         return;
00162                 }
00163 
00164                 Job::batchInsert( $jobs );
00165         }
00166 
00170         protected function insertJobs() {
00171                 $batches = $this->mCache->partition( $this->mTable, $this->mRowsPerJob );
00172                 if ( !$batches ) {
00173                         return;
00174                 }
00175                 $jobs = array();
00176                 foreach ( $batches as $batch ) {
00177                         $params = array(
00178                                 'table' => $this->mTable,
00179                                 'start' => $batch[0],
00180                                 'end' => $batch[1],
00181                         );
00182                         $jobs[] = new HTMLCacheUpdateJob( $this->mTitle, $params );
00183                 }
00184                 Job::batchInsert( $jobs );
00185         }
00186 
00191         protected function invalidateTitles( $titleArray ) {
00192                 global $wgUseFileCache, $wgUseSquid;
00193 
00194                 $dbw = wfGetDB( DB_MASTER );
00195                 $timestamp = $dbw->timestamp();
00196 
00197                 # Get all IDs in this query into an array
00198                 $ids = array();
00199                 foreach ( $titleArray as $title ) {
00200                         $ids[] = $title->getArticleID();
00201                 }
00202 
00203                 if ( !$ids ) {
00204                         return;
00205                 }
00206 
00207                 # Update page_touched
00208                 $batches = array_chunk( $ids, $this->mRowsPerQuery );
00209                 foreach ( $batches as $batch ) {
00210                         $dbw->update( 'page',
00211                                 array( 'page_touched' => $timestamp ),
00212                                 array( 'page_id' => $batch ),
00213                                 __METHOD__
00214                         );
00215                 }
00216 
00217                 # Update squid
00218                 if ( $wgUseSquid ) {
00219                         $u = SquidUpdate::newFromTitles( $titleArray );
00220                         $u->doUpdate();
00221                 }
00222 
00223                 # Update file cache
00224                 if  ( $wgUseFileCache ) {
00225                         foreach ( $titleArray as $title ) {
00226                                 HTMLFileCache::clearFileCache( $title );
00227                         }
00228                 }
00229         }
00230 }
00231 
00232 
00239 class HTMLCacheUpdateJob extends Job {
00240         var $table, $start, $end;
00241 
00248         function __construct( $title, $params, $id = 0 ) {
00249                 parent::__construct( 'htmlCacheUpdate', $title, $params, $id );
00250                 $this->table = $params['table'];
00251                 $this->start = $params['start'];
00252                 $this->end = $params['end'];
00253         }
00254 
00255         public function run() {
00256                 $update = new HTMLCacheUpdate( $this->title, $this->table, $this->start, $this->end );
00257                 $update->doUpdate();
00258                 return true;
00259         }
00260 }