MediaWiki
REL1_24
|
00001 <?php 00037 class RefreshLinksJob extends Job { 00038 const PARSE_THRESHOLD_SEC = 1.0; 00039 00040 function __construct( $title, $params = '' ) { 00041 parent::__construct( 'refreshLinks', $title, $params ); 00042 // Base backlink update jobs and per-title update jobs can be de-duplicated. 00043 // If template A changes twice before any jobs run, a clean queue will have: 00044 // (A base, A base) 00045 // The second job is ignored by the queue on insertion. 00046 // Suppose, many pages use template A, and that template itself uses template B. 00047 // An edit to both will first create two base jobs. A clean FIFO queue will have: 00048 // (A base, B base) 00049 // When these jobs run, the queue will have per-title and remnant partition jobs: 00050 // (titleX,titleY,titleZ,...,A remnant,titleM,titleN,titleO,...,B remnant) 00051 // Some these jobs will be the same, and will automatically be ignored by 00052 // the queue upon insertion. Some title jobs will run before the duplicate is 00053 // inserted, so the work will still be done twice in those cases. More titles 00054 // can be de-duplicated as the remnant jobs continue to be broken down. This 00055 // works best when $wgUpdateRowsPerJob, and either the pages have few backlinks 00056 // and/or the backlink sets for pages A and B are almost identical. 00057 $this->removeDuplicates = !isset( $params['range'] ) 00058 && ( !isset( $params['pages'] ) || count( $params['pages'] ) == 1 ); 00059 } 00060 00061 function run() { 00062 global $wgUpdateRowsPerJob; 00063 00064 // Job to update all (or a range of) backlink pages for a page 00065 if ( !empty( $this->params['recursive'] ) ) { 00066 // Carry over information for de-duplication 00067 $extraParams = $this->getRootJobParams(); 00068 // Avoid slave lag when fetching templates. 00069 // When the outermost job is run, we know that the caller that enqueued it must have 00070 // committed the relevant changes to the DB by now. At that point, record the master 00071 // position and pass it along as the job recursively breaks into smaller range jobs. 00072 // Hopefully, when leaf jobs are popped, the slaves will have reached that position. 00073 if ( isset( $this->params['masterPos'] ) ) { 00074 $extraParams['masterPos'] = $this->params['masterPos']; 00075 } elseif ( wfGetLB()->getServerCount() > 1 ) { 00076 $extraParams['masterPos'] = wfGetLB()->getMasterPos(); 00077 } else { 00078 $extraParams['masterPos'] = false; 00079 } 00080 // Convert this into no more than $wgUpdateRowsPerJob RefreshLinks per-title 00081 // jobs and possibly a recursive RefreshLinks job for the rest of the backlinks 00082 $jobs = BacklinkJobUtils::partitionBacklinkJob( 00083 $this, 00084 $wgUpdateRowsPerJob, 00085 1, // job-per-title 00086 array( 'params' => $extraParams ) 00087 ); 00088 JobQueueGroup::singleton()->push( $jobs ); 00089 // Job to update link tables for for a set of titles 00090 } elseif ( isset( $this->params['pages'] ) ) { 00091 foreach ( $this->params['pages'] as $pageId => $nsAndKey ) { 00092 list( $ns, $dbKey ) = $nsAndKey; 00093 $this->runForTitle( Title::makeTitleSafe( $ns, $dbKey ) ); 00094 } 00095 // Job to update link tables for a given title 00096 } else { 00097 $this->runForTitle( $this->title ); 00098 } 00099 00100 return true; 00101 } 00102 00103 protected function runForTitle( Title $title = null ) { 00104 $linkCache = LinkCache::singleton(); 00105 $linkCache->clear(); 00106 00107 if ( is_null( $title ) ) { 00108 $this->setLastError( "refreshLinks: Invalid title" ); 00109 return false; 00110 } 00111 00112 // Wait for the DB of the current/next slave DB handle to catch up to the master. 00113 // This way, we get the correct page_latest for templates or files that just changed 00114 // milliseconds ago, having triggered this job to begin with. 00115 if ( isset( $this->params['masterPos'] ) && $this->params['masterPos'] !== false ) { 00116 wfGetLB()->waitFor( $this->params['masterPos'] ); 00117 } 00118 00119 $page = WikiPage::factory( $title ); 00120 00121 // Fetch the current revision... 00122 $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); 00123 if ( !$revision ) { 00124 $this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" ); 00125 return false; // XXX: what if it was just deleted? 00126 } 00127 $content = $revision->getContent( Revision::RAW ); 00128 if ( !$content ) { 00129 // If there is no content, pretend the content is empty 00130 $content = $revision->getContentHandler()->makeEmptyContent(); 00131 } 00132 00133 $parserOutput = false; 00134 $parserOptions = $page->makeParserOptions( 'canonical' ); 00135 // If page_touched changed after this root job (with a good slave lag skew factor), 00136 // then it is likely that any views of the pages already resulted in re-parses which 00137 // are now in cache. This can be reused to avoid expensive parsing in some cases. 00138 if ( isset( $this->params['rootJobTimestamp'] ) ) { 00139 $skewedTimestamp = wfTimestamp( TS_UNIX, $this->params['rootJobTimestamp'] ) + 5; 00140 if ( $page->getLinksTimestamp() > wfTimestamp( TS_MW, $skewedTimestamp ) ) { 00141 // Something already updated the backlinks since this job was made 00142 return true; 00143 } 00144 if ( $page->getTouched() > wfTimestamp( TS_MW, $skewedTimestamp ) ) { 00145 $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions ); 00146 if ( $parserOutput && $parserOutput->getCacheTime() <= $skewedTimestamp ) { 00147 $parserOutput = false; // too stale 00148 } 00149 } 00150 } 00151 // Fetch the current revision and parse it if necessary... 00152 if ( $parserOutput == false ) { 00153 $start = microtime( true ); 00154 // Revision ID must be passed to the parser output to get revision variables correct 00155 $parserOutput = $content->getParserOutput( 00156 $title, $revision->getId(), $parserOptions, false ); 00157 $ellapsed = microtime( true ) - $start; 00158 // If it took a long time to render, then save this back to the cache to avoid 00159 // wasted CPU by other apaches or job runners. We don't want to always save to 00160 // cache as this cause cause high cache I/O and LRU churn when a template changes. 00161 if ( $ellapsed >= self::PARSE_THRESHOLD_SEC 00162 && $page->isParserCacheUsed( $parserOptions, $revision->getId() ) 00163 && $parserOutput->isCacheable() 00164 ) { 00165 $ctime = wfTimestamp( TS_MW, (int)$start ); // cache time 00166 ParserCache::singleton()->save( 00167 $parserOutput, $page, $parserOptions, $ctime, $revision->getId() 00168 ); 00169 } 00170 } 00171 00172 $updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput ); 00173 DataUpdate::runUpdates( $updates ); 00174 00175 InfoAction::invalidateCache( $title ); 00176 00177 return true; 00178 } 00179 00180 public function getDeduplicationInfo() { 00181 $info = parent::getDeduplicationInfo(); 00182 if ( is_array( $info['params'] ) ) { 00183 // Don't let highly unique "masterPos" values ruin duplicate detection 00184 unset( $info['params']['masterPos'] ); 00185 // For per-pages jobs, the job title is that of the template that changed 00186 // (or similar), so remove that since it ruins duplicate detection 00187 if ( isset( $info['pages'] ) ) { 00188 unset( $info['namespace'] ); 00189 unset( $info['title'] ); 00190 } 00191 } 00192 00193 return $info; 00194 } 00195 00196 public function workItemCount() { 00197 return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1; 00198 } 00199 }