[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Job to update link tables for pages 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup JobQueue 22 */ 23 24 /** 25 * Job to update link tables for pages 26 * 27 * This job comes in a few variants: 28 * - a) Recursive jobs to update links for backlink pages for a given title. 29 * These jobs have have (recursive:true,table:<table>) set. 30 * - b) Jobs to update links for a set of pages (the job title is ignored). 31 * These jobs have have (pages:(<page ID>:(<namespace>,<title>),...) set. 32 * - c) Jobs to update links for a single page (the job title) 33 * These jobs need no extra fields set. 34 * 35 * @ingroup JobQueue 36 */ 37 class RefreshLinksJob extends Job { 38 const PARSE_THRESHOLD_SEC = 1.0; 39 40 function __construct( $title, $params = '' ) { 41 parent::__construct( 'refreshLinks', $title, $params ); 42 // Base backlink update jobs and per-title update jobs can be de-duplicated. 43 // If template A changes twice before any jobs run, a clean queue will have: 44 // (A base, A base) 45 // The second job is ignored by the queue on insertion. 46 // Suppose, many pages use template A, and that template itself uses template B. 47 // An edit to both will first create two base jobs. A clean FIFO queue will have: 48 // (A base, B base) 49 // When these jobs run, the queue will have per-title and remnant partition jobs: 50 // (titleX,titleY,titleZ,...,A remnant,titleM,titleN,titleO,...,B remnant) 51 // Some these jobs will be the same, and will automatically be ignored by 52 // the queue upon insertion. Some title jobs will run before the duplicate is 53 // inserted, so the work will still be done twice in those cases. More titles 54 // can be de-duplicated as the remnant jobs continue to be broken down. This 55 // works best when $wgUpdateRowsPerJob, and either the pages have few backlinks 56 // and/or the backlink sets for pages A and B are almost identical. 57 $this->removeDuplicates = !isset( $params['range'] ) 58 && ( !isset( $params['pages'] ) || count( $params['pages'] ) == 1 ); 59 } 60 61 function run() { 62 global $wgUpdateRowsPerJob; 63 64 // Job to update all (or a range of) backlink pages for a page 65 if ( !empty( $this->params['recursive'] ) ) { 66 // Carry over information for de-duplication 67 $extraParams = $this->getRootJobParams(); 68 // Avoid slave lag when fetching templates. 69 // When the outermost job is run, we know that the caller that enqueued it must have 70 // committed the relevant changes to the DB by now. At that point, record the master 71 // position and pass it along as the job recursively breaks into smaller range jobs. 72 // Hopefully, when leaf jobs are popped, the slaves will have reached that position. 73 if ( isset( $this->params['masterPos'] ) ) { 74 $extraParams['masterPos'] = $this->params['masterPos']; 75 } elseif ( wfGetLB()->getServerCount() > 1 ) { 76 $extraParams['masterPos'] = wfGetLB()->getMasterPos(); 77 } else { 78 $extraParams['masterPos'] = false; 79 } 80 // Convert this into no more than $wgUpdateRowsPerJob RefreshLinks per-title 81 // jobs and possibly a recursive RefreshLinks job for the rest of the backlinks 82 $jobs = BacklinkJobUtils::partitionBacklinkJob( 83 $this, 84 $wgUpdateRowsPerJob, 85 1, // job-per-title 86 array( 'params' => $extraParams ) 87 ); 88 JobQueueGroup::singleton()->push( $jobs ); 89 // Job to update link tables for for a set of titles 90 } elseif ( isset( $this->params['pages'] ) ) { 91 foreach ( $this->params['pages'] as $pageId => $nsAndKey ) { 92 list( $ns, $dbKey ) = $nsAndKey; 93 $this->runForTitle( Title::makeTitleSafe( $ns, $dbKey ) ); 94 } 95 // Job to update link tables for a given title 96 } else { 97 $this->runForTitle( $this->title ); 98 } 99 100 return true; 101 } 102 103 protected function runForTitle( Title $title = null ) { 104 $linkCache = LinkCache::singleton(); 105 $linkCache->clear(); 106 107 if ( is_null( $title ) ) { 108 $this->setLastError( "refreshLinks: Invalid title" ); 109 return false; 110 } 111 112 // Wait for the DB of the current/next slave DB handle to catch up to the master. 113 // This way, we get the correct page_latest for templates or files that just changed 114 // milliseconds ago, having triggered this job to begin with. 115 if ( isset( $this->params['masterPos'] ) && $this->params['masterPos'] !== false ) { 116 wfGetLB()->waitFor( $this->params['masterPos'] ); 117 } 118 119 $page = WikiPage::factory( $title ); 120 121 // Fetch the current revision... 122 $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); 123 if ( !$revision ) { 124 $this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" ); 125 return false; // XXX: what if it was just deleted? 126 } 127 $content = $revision->getContent( Revision::RAW ); 128 if ( !$content ) { 129 // If there is no content, pretend the content is empty 130 $content = $revision->getContentHandler()->makeEmptyContent(); 131 } 132 133 $parserOutput = false; 134 $parserOptions = $page->makeParserOptions( 'canonical' ); 135 // If page_touched changed after this root job (with a good slave lag skew factor), 136 // then it is likely that any views of the pages already resulted in re-parses which 137 // are now in cache. This can be reused to avoid expensive parsing in some cases. 138 if ( isset( $this->params['rootJobTimestamp'] ) ) { 139 $skewedTimestamp = wfTimestamp( TS_UNIX, $this->params['rootJobTimestamp'] ) + 5; 140 if ( $page->getLinksTimestamp() > wfTimestamp( TS_MW, $skewedTimestamp ) ) { 141 // Something already updated the backlinks since this job was made 142 return true; 143 } 144 if ( $page->getTouched() > wfTimestamp( TS_MW, $skewedTimestamp ) ) { 145 $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions ); 146 if ( $parserOutput && $parserOutput->getCacheTime() <= $skewedTimestamp ) { 147 $parserOutput = false; // too stale 148 } 149 } 150 } 151 // Fetch the current revision and parse it if necessary... 152 if ( $parserOutput == false ) { 153 $start = microtime( true ); 154 // Revision ID must be passed to the parser output to get revision variables correct 155 $parserOutput = $content->getParserOutput( 156 $title, $revision->getId(), $parserOptions, false ); 157 $ellapsed = microtime( true ) - $start; 158 // If it took a long time to render, then save this back to the cache to avoid 159 // wasted CPU by other apaches or job runners. We don't want to always save to 160 // cache as this cause cause high cache I/O and LRU churn when a template changes. 161 if ( $ellapsed >= self::PARSE_THRESHOLD_SEC 162 && $page->isParserCacheUsed( $parserOptions, $revision->getId() ) 163 && $parserOutput->isCacheable() 164 ) { 165 $ctime = wfTimestamp( TS_MW, (int)$start ); // cache time 166 ParserCache::singleton()->save( 167 $parserOutput, $page, $parserOptions, $ctime, $revision->getId() 168 ); 169 } 170 } 171 172 $updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput ); 173 DataUpdate::runUpdates( $updates ); 174 175 InfoAction::invalidateCache( $title ); 176 177 return true; 178 } 179 180 public function getDeduplicationInfo() { 181 $info = parent::getDeduplicationInfo(); 182 if ( is_array( $info['params'] ) ) { 183 // Don't let highly unique "masterPos" values ruin duplicate detection 184 unset( $info['params']['masterPos'] ); 185 // For per-pages jobs, the job title is that of the template that changed 186 // (or similar), so remove that since it ruins duplicate detection 187 if ( isset( $info['pages'] ) ) { 188 unset( $info['namespace'] ); 189 unset( $info['title'] ); 190 } 191 } 192 193 return $info; 194 } 195 196 public function workItemCount() { 197 return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1; 198 } 199 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |