MediaWiki
REL1_20
|
00001 <?php 00029 class RefreshLinksJob extends Job { 00030 00031 function __construct( $title, $params = '', $id = 0 ) { 00032 parent::__construct( 'refreshLinks', $title, $params, $id ); 00033 } 00034 00039 function run() { 00040 wfProfileIn( __METHOD__ ); 00041 00042 $linkCache = LinkCache::singleton(); 00043 $linkCache->clear(); 00044 00045 if ( is_null( $this->title ) ) { 00046 $this->error = "refreshLinks: Invalid title"; 00047 wfProfileOut( __METHOD__ ); 00048 return false; 00049 } 00050 00051 # Wait for the DB of the current/next slave DB handle to catch up to the master. 00052 # This way, we get the correct page_latest for templates or files that just changed 00053 # milliseconds ago, having triggered this job to begin with. 00054 if ( isset( $this->params['masterPos'] ) ) { 00055 wfGetLB()->waitFor( $this->params['masterPos'] ); 00056 } 00057 00058 $revision = Revision::newFromTitle( $this->title, false, Revision::READ_NORMAL ); 00059 if ( !$revision ) { 00060 $this->error = 'refreshLinks: Article not found "' . 00061 $this->title->getPrefixedDBkey() . '"'; 00062 wfProfileOut( __METHOD__ ); 00063 return false; // XXX: what if it was just deleted? 00064 } 00065 00066 self::runForTitleInternal( $this->title, $revision, __METHOD__ ); 00067 00068 wfProfileOut( __METHOD__ ); 00069 return true; 00070 } 00071 00072 public static function runForTitleInternal( Title $title, Revision $revision, $fname ) { 00073 global $wgParser, $wgContLang; 00074 00075 wfProfileIn( $fname . '-parse' ); 00076 $options = ParserOptions::newFromUserAndLang( new User, $wgContLang ); 00077 $parserOutput = $wgParser->parse( 00078 $revision->getText(), $title, $options, true, true, $revision->getId() ); 00079 wfProfileOut( $fname . '-parse' ); 00080 00081 wfProfileIn( $fname . '-update' ); 00082 $updates = $parserOutput->getSecondaryDataUpdates( $title, false ); 00083 DataUpdate::runUpdates( $updates ); 00084 wfProfileOut( $fname . '-update' ); 00085 } 00086 } 00087 00094 class RefreshLinksJob2 extends Job { 00095 const MAX_TITLES_RUN = 10; 00096 00097 function __construct( $title, $params, $id = 0 ) { 00098 parent::__construct( 'refreshLinks2', $title, $params, $id ); 00099 } 00100 00105 function run() { 00106 wfProfileIn( __METHOD__ ); 00107 00108 $linkCache = LinkCache::singleton(); 00109 $linkCache->clear(); 00110 00111 if ( is_null( $this->title ) ) { 00112 $this->error = "refreshLinks2: Invalid title"; 00113 wfProfileOut( __METHOD__ ); 00114 return false; 00115 } elseif ( !isset( $this->params['start'] ) || !isset( $this->params['end'] ) ) { 00116 $this->error = "refreshLinks2: Invalid params"; 00117 wfProfileOut( __METHOD__ ); 00118 return false; 00119 } 00120 00121 // Back compat for pre-r94435 jobs 00122 $table = isset( $this->params['table'] ) ? $this->params['table'] : 'templatelinks'; 00123 00124 // Avoid slave lag when fetching templates 00125 if ( isset( $this->params['masterPos'] ) ) { 00126 $masterPos = $this->params['masterPos']; 00127 } elseif ( wfGetLB()->getServerCount() > 1 ) { 00128 $masterPos = wfGetLB()->getMasterPos(); 00129 } else { 00130 $masterPos = false; 00131 } 00132 00133 $titles = $this->title->getBacklinkCache()->getLinks( 00134 $table, $this->params['start'], $this->params['end'] ); 00135 00136 if ( $titles->count() > self::MAX_TITLES_RUN ) { 00137 # We don't want to parse too many pages per job as it can starve other jobs. 00138 # If there are too many pages to parse, break this up into smaller jobs. By passing 00139 # in the master position here we can cut down on the time spent waiting for slaves to 00140 # catch up by the runners handling these jobs since time will have passed between now 00141 # and when they pop these jobs off the queue. 00142 $start = 0; // batch start 00143 $end = 0; // batch end 00144 $bsize = 0; // batch size 00145 $first = true; // first of batch 00146 $jobs = array(); 00147 foreach ( $titles as $title ) { 00148 $start = $first ? $title->getArticleId() : $start; 00149 $end = $title->getArticleId(); 00150 $first = false; 00151 if ( ++$bsize >= self::MAX_TITLES_RUN ) { 00152 $jobs[] = new RefreshLinksJob2( $this->title, array( 00153 'table' => $table, 00154 'start' => $start, 00155 'end' => $end, 00156 'masterPos' => $masterPos 00157 ) ); 00158 $first = true; 00159 $start = $end = $bsize = 0; 00160 } 00161 } 00162 if ( $bsize > 0 ) { // group remaining pages into a job 00163 $jobs[] = new RefreshLinksJob2( $this->title, array( 00164 'table' => $table, 00165 'start' => $start, 00166 'end' => $end, 00167 'masterPos' => $masterPos 00168 ) ); 00169 } 00170 Job::batchInsert( $jobs ); 00171 } elseif ( php_sapi_name() != 'cli' ) { 00172 # Not suitable for page load triggered job running! 00173 # Gracefully switch to refreshLinks jobs if this happens. 00174 $jobs = array(); 00175 foreach ( $titles as $title ) { 00176 $jobs[] = new RefreshLinksJob( $title, array( 'masterPos' => $masterPos ) ); 00177 } 00178 Job::batchInsert( $jobs ); 00179 } else { 00180 # Wait for the DB of the current/next slave DB handle to catch up to the master. 00181 # This way, we get the correct page_latest for templates or files that just changed 00182 # milliseconds ago, having triggered this job to begin with. 00183 if ( $masterPos ) { 00184 wfGetLB()->waitFor( $masterPos ); 00185 } 00186 # Re-parse each page that transcludes this page and update their tracking links... 00187 foreach ( $titles as $title ) { 00188 $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL ); 00189 if ( !$revision ) { 00190 $this->error = 'refreshLinks: Article not found "' . 00191 $title->getPrefixedDBkey() . '"'; 00192 continue; // skip this page 00193 } 00194 RefreshLinksJob::runForTitleInternal( $title, $revision, __METHOD__ ); 00195 wfWaitForSlaves(); 00196 } 00197 } 00198 00199 wfProfileOut( __METHOD__ ); 00200 return true; 00201 } 00202 }