MediaWiki  REL1_24
HTMLCacheUpdateJob.php
Go to the documentation of this file.
00001 <?php
00035 class HTMLCacheUpdateJob extends Job {
00036     function __construct( $title, $params = '' ) {
00037         parent::__construct( 'htmlCacheUpdate', $title, $params );
00038         // Base backlink purge jobs can be de-duplicated
00039         $this->removeDuplicates = ( !isset( $params['range'] ) && !isset( $params['pages'] ) );
00040     }
00041 
00042     function run() {
00043         global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery;
00044 
00045         static $expected = array( 'recursive', 'pages' ); // new jobs have one of these
00046 
00047         $oldRangeJob = false;
00048         if ( !array_intersect( array_keys( $this->params ), $expected ) ) {
00049             // B/C for older job params formats that lack these fields:
00050             // a) base jobs with just ("table") and b) range jobs with ("table","start","end")
00051             if ( isset( $this->params['start'] ) && isset( $this->params['end'] ) ) {
00052                 $oldRangeJob = true;
00053             } else {
00054                 $this->params['recursive'] = true; // base job
00055             }
00056         }
00057 
00058         // Job to purge all (or a range of) backlink pages for a page
00059         if ( !empty( $this->params['recursive'] ) ) {
00060             // Convert this into no more than $wgUpdateRowsPerJob HTMLCacheUpdateJob per-title
00061             // jobs and possibly a recursive HTMLCacheUpdateJob job for the rest of the backlinks
00062             $jobs = BacklinkJobUtils::partitionBacklinkJob(
00063                 $this,
00064                 $wgUpdateRowsPerJob,
00065                 $wgUpdateRowsPerQuery, // jobs-per-title
00066                 // Carry over information for de-duplication
00067                 array( 'params' => $this->getRootJobParams() )
00068             );
00069             JobQueueGroup::singleton()->push( $jobs );
00070         // Job to purge pages for for a set of titles
00071         } elseif ( isset( $this->params['pages'] ) ) {
00072             $this->invalidateTitles( $this->params['pages'] );
00073         // B/C for job to purge a range of backlink pages for a given page
00074         } elseif ( $oldRangeJob ) {
00075             $titleArray = $this->title->getBacklinkCache()->getLinks(
00076                 $this->params['table'], $this->params['start'], $this->params['end'] );
00077 
00078             $pages = array(); // same format BacklinkJobUtils uses
00079             foreach ( $titleArray as $tl ) {
00080                 $pages[$tl->getArticleId()] = array( $tl->getNamespace(), $tl->getDbKey() );
00081             }
00082 
00083             $jobs = array();
00084             foreach ( array_chunk( $pages, $wgUpdateRowsPerJob ) as $pageChunk ) {
00085                 $jobs[] = new HTMLCacheUpdateJob( $this->title,
00086                     array(
00087                         'table' => $this->params['table'],
00088                         'pages' => $pageChunk
00089                     ) + $this->getRootJobParams() // carry over information for de-duplication
00090                 );
00091             }
00092             JobQueueGroup::singleton()->push( $jobs );
00093         }
00094 
00095         return true;
00096     }
00097 
00101     protected function invalidateTitles( array $pages ) {
00102         global $wgUpdateRowsPerQuery, $wgUseFileCache, $wgUseSquid;
00103 
00104         // Get all page IDs in this query into an array
00105         $pageIds = array_keys( $pages );
00106         if ( !$pageIds ) {
00107             return;
00108         }
00109 
00110         $dbw = wfGetDB( DB_MASTER );
00111 
00112         // The page_touched field will need to be bumped for these pages.
00113         // Only bump it to the present time if no "rootJobTimestamp" was known.
00114         // If it is known, it can be used instead, which avoids invalidating output
00115         // that was in fact generated *after* the relevant dependency change time
00116         // (e.g. template edit). This is particularily useful since refreshLinks jobs
00117         // save back parser output and usually run along side htmlCacheUpdate jobs;
00118         // their saved output would be invalidated by using the current timestamp.
00119         if ( isset( $this->params['rootJobTimestamp'] ) ) {
00120             $touchTimestamp = $this->params['rootJobTimestamp'];
00121         } else {
00122             $touchTimestamp = wfTimestampNow();
00123         }
00124 
00125         // Update page_touched (skipping pages already touched since the root job).
00126         // Check $wgUpdateRowsPerQuery for sanity; batch jobs are sized by that already.
00127         foreach ( array_chunk( $pageIds, $wgUpdateRowsPerQuery ) as $batch ) {
00128             $dbw->update( 'page',
00129                 array( 'page_touched' => $dbw->timestamp( $touchTimestamp ) ),
00130                 array( 'page_id' => $batch,
00131                     // don't invalidated pages that were already invalidated
00132                     "page_touched < " . $dbw->addQuotes( $dbw->timestamp( $touchTimestamp ) )
00133                 ),
00134                 __METHOD__
00135             );
00136         }
00137         // Get the list of affected pages (races only mean something else did the purge)
00138         $titleArray = TitleArray::newFromResult( $dbw->select(
00139             'page',
00140             array( 'page_namespace', 'page_title' ),
00141             array( 'page_id' => $pageIds, 'page_touched' => $dbw->timestamp( $touchTimestamp ) ),
00142             __METHOD__
00143         ) );
00144 
00145         // Update squid
00146         if ( $wgUseSquid ) {
00147             $u = SquidUpdate::newFromTitles( $titleArray );
00148             $u->doUpdate();
00149         }
00150 
00151         // Update file cache
00152         if ( $wgUseFileCache ) {
00153             foreach ( $titleArray as $title ) {
00154                 HTMLFileCache::clearFileCache( $title );
00155             }
00156         }
00157     }
00158 
00159     public function workItemCount() {
00160         return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1;
00161     }
00162 }