MediaWiki  REL1_24
RefreshLinksJob.php
Go to the documentation of this file.
00001 <?php
00037 class RefreshLinksJob extends Job {
00038     const PARSE_THRESHOLD_SEC = 1.0;
00039 
00040     function __construct( $title, $params = '' ) {
00041         parent::__construct( 'refreshLinks', $title, $params );
00042         // Base backlink update jobs and per-title update jobs can be de-duplicated.
00043         // If template A changes twice before any jobs run, a clean queue will have:
00044         //      (A base, A base)
00045         // The second job is ignored by the queue on insertion.
00046         // Suppose, many pages use template A, and that template itself uses template B.
00047         // An edit to both will first create two base jobs. A clean FIFO queue will have:
00048         //      (A base, B base)
00049         // When these jobs run, the queue will have per-title and remnant partition jobs:
00050         //      (titleX,titleY,titleZ,...,A remnant,titleM,titleN,titleO,...,B remnant)
00051         // Some these jobs will be the same, and will automatically be ignored by
00052         // the queue upon insertion. Some title jobs will run before the duplicate is
00053         // inserted, so the work will still be done twice in those cases. More titles
00054         // can be de-duplicated as the remnant jobs continue to be broken down. This
00055         // works best when $wgUpdateRowsPerJob, and either the pages have few backlinks
00056         // and/or the backlink sets for pages A and B are almost identical.
00057         $this->removeDuplicates = !isset( $params['range'] )
00058             && ( !isset( $params['pages'] ) || count( $params['pages'] ) == 1 );
00059     }
00060 
00061     function run() {
00062         global $wgUpdateRowsPerJob;
00063 
00064         // Job to update all (or a range of) backlink pages for a page
00065         if ( !empty( $this->params['recursive'] ) ) {
00066             // Carry over information for de-duplication
00067             $extraParams = $this->getRootJobParams();
00068             // Avoid slave lag when fetching templates.
00069             // When the outermost job is run, we know that the caller that enqueued it must have
00070             // committed the relevant changes to the DB by now. At that point, record the master
00071             // position and pass it along as the job recursively breaks into smaller range jobs.
00072             // Hopefully, when leaf jobs are popped, the slaves will have reached that position.
00073             if ( isset( $this->params['masterPos'] ) ) {
00074                 $extraParams['masterPos'] = $this->params['masterPos'];
00075             } elseif ( wfGetLB()->getServerCount() > 1 ) {
00076                 $extraParams['masterPos'] = wfGetLB()->getMasterPos();
00077             } else {
00078                 $extraParams['masterPos'] = false;
00079             }
00080             // Convert this into no more than $wgUpdateRowsPerJob RefreshLinks per-title
00081             // jobs and possibly a recursive RefreshLinks job for the rest of the backlinks
00082             $jobs = BacklinkJobUtils::partitionBacklinkJob(
00083                 $this,
00084                 $wgUpdateRowsPerJob,
00085                 1, // job-per-title
00086                 array( 'params' => $extraParams )
00087             );
00088             JobQueueGroup::singleton()->push( $jobs );
00089         // Job to update link tables for for a set of titles
00090         } elseif ( isset( $this->params['pages'] ) ) {
00091             foreach ( $this->params['pages'] as $pageId => $nsAndKey ) {
00092                 list( $ns, $dbKey ) = $nsAndKey;
00093                 $this->runForTitle( Title::makeTitleSafe( $ns, $dbKey ) );
00094             }
00095         // Job to update link tables for a given title
00096         } else {
00097             $this->runForTitle( $this->title );
00098         }
00099 
00100         return true;
00101     }
00102 
00103     protected function runForTitle( Title $title = null ) {
00104         $linkCache = LinkCache::singleton();
00105         $linkCache->clear();
00106 
00107         if ( is_null( $title ) ) {
00108             $this->setLastError( "refreshLinks: Invalid title" );
00109             return false;
00110         }
00111 
00112         // Wait for the DB of the current/next slave DB handle to catch up to the master.
00113         // This way, we get the correct page_latest for templates or files that just changed
00114         // milliseconds ago, having triggered this job to begin with.
00115         if ( isset( $this->params['masterPos'] ) && $this->params['masterPos'] !== false ) {
00116             wfGetLB()->waitFor( $this->params['masterPos'] );
00117         }
00118 
00119         $page = WikiPage::factory( $title );
00120 
00121         // Fetch the current revision...
00122         $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
00123         if ( !$revision ) {
00124             $this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" );
00125             return false; // XXX: what if it was just deleted?
00126         }
00127         $content = $revision->getContent( Revision::RAW );
00128         if ( !$content ) {
00129             // If there is no content, pretend the content is empty
00130             $content = $revision->getContentHandler()->makeEmptyContent();
00131         }
00132 
00133         $parserOutput = false;
00134         $parserOptions = $page->makeParserOptions( 'canonical' );
00135         // If page_touched changed after this root job (with a good slave lag skew factor),
00136         // then it is likely that any views of the pages already resulted in re-parses which
00137         // are now in cache. This can be reused to avoid expensive parsing in some cases.
00138         if ( isset( $this->params['rootJobTimestamp'] ) ) {
00139             $skewedTimestamp = wfTimestamp( TS_UNIX, $this->params['rootJobTimestamp'] ) + 5;
00140             if ( $page->getLinksTimestamp() > wfTimestamp( TS_MW, $skewedTimestamp ) ) {
00141                 // Something already updated the backlinks since this job was made
00142                 return true;
00143             }
00144             if ( $page->getTouched() > wfTimestamp( TS_MW, $skewedTimestamp ) ) {
00145                 $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions );
00146                 if ( $parserOutput && $parserOutput->getCacheTime() <= $skewedTimestamp ) {
00147                     $parserOutput = false; // too stale
00148                 }
00149             }
00150         }
00151         // Fetch the current revision and parse it if necessary...
00152         if ( $parserOutput == false ) {
00153             $start = microtime( true );
00154             // Revision ID must be passed to the parser output to get revision variables correct
00155             $parserOutput = $content->getParserOutput(
00156                 $title, $revision->getId(), $parserOptions, false );
00157             $ellapsed = microtime( true ) - $start;
00158             // If it took a long time to render, then save this back to the cache to avoid
00159             // wasted CPU by other apaches or job runners. We don't want to always save to
00160             // cache as this cause cause high cache I/O and LRU churn when a template changes.
00161             if ( $ellapsed >= self::PARSE_THRESHOLD_SEC
00162                 && $page->isParserCacheUsed( $parserOptions, $revision->getId() )
00163                 && $parserOutput->isCacheable()
00164             ) {
00165                 $ctime = wfTimestamp( TS_MW, (int)$start ); // cache time
00166                 ParserCache::singleton()->save(
00167                     $parserOutput, $page, $parserOptions, $ctime, $revision->getId()
00168                 );
00169             }
00170         }
00171 
00172         $updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput );
00173         DataUpdate::runUpdates( $updates );
00174 
00175         InfoAction::invalidateCache( $title );
00176 
00177         return true;
00178     }
00179 
00180     public function getDeduplicationInfo() {
00181         $info = parent::getDeduplicationInfo();
00182         if ( is_array( $info['params'] ) ) {
00183             // Don't let highly unique "masterPos" values ruin duplicate detection
00184             unset( $info['params']['masterPos'] );
00185             // For per-pages jobs, the job title is that of the template that changed
00186             // (or similar), so remove that since it ruins duplicate detection
00187             if ( isset( $info['pages'] ) ) {
00188                 unset( $info['namespace'] );
00189                 unset( $info['title'] );
00190             }
00191         }
00192 
00193         return $info;
00194     }
00195 
00196     public function workItemCount() {
00197         return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1;
00198     }
00199 }