MediaWiki  REL1_20
RefreshLinksJob.php
Go to the documentation of this file.
00001 <?php
00029 class RefreshLinksJob extends Job {
00030 
00031         function __construct( $title, $params = '', $id = 0 ) {
00032                 parent::__construct( 'refreshLinks', $title, $params, $id );
00033         }
00034 
00039         function run() {
00040                 wfProfileIn( __METHOD__ );
00041 
00042                 $linkCache = LinkCache::singleton();
00043                 $linkCache->clear();
00044 
00045                 if ( is_null( $this->title ) ) {
00046                         $this->error = "refreshLinks: Invalid title";
00047                         wfProfileOut( __METHOD__ );
00048                         return false;
00049                 }
00050 
00051                 # Wait for the DB of the current/next slave DB handle to catch up to the master.
00052                 # This way, we get the correct page_latest for templates or files that just changed
00053                 # milliseconds ago, having triggered this job to begin with.
00054                 if ( isset( $this->params['masterPos'] ) ) {
00055                         wfGetLB()->waitFor( $this->params['masterPos'] );
00056                 }
00057 
00058                 $revision = Revision::newFromTitle( $this->title, false, Revision::READ_NORMAL );
00059                 if ( !$revision ) {
00060                         $this->error = 'refreshLinks: Article not found "' .
00061                                 $this->title->getPrefixedDBkey() . '"';
00062                         wfProfileOut( __METHOD__ );
00063                         return false; // XXX: what if it was just deleted?
00064                 }
00065 
00066                 self::runForTitleInternal( $this->title, $revision, __METHOD__ );
00067 
00068                 wfProfileOut( __METHOD__ );
00069                 return true;
00070         }
00071 
00072         public static function runForTitleInternal( Title $title, Revision $revision, $fname ) {
00073                 global $wgParser, $wgContLang;
00074 
00075                 wfProfileIn( $fname . '-parse' );
00076                 $options = ParserOptions::newFromUserAndLang( new User, $wgContLang );
00077                 $parserOutput = $wgParser->parse(
00078                         $revision->getText(), $title, $options, true, true, $revision->getId() );
00079                 wfProfileOut( $fname . '-parse' );
00080 
00081                 wfProfileIn( $fname . '-update' );
00082                 $updates = $parserOutput->getSecondaryDataUpdates( $title, false );
00083                 DataUpdate::runUpdates( $updates );
00084                 wfProfileOut( $fname . '-update' );
00085         }
00086 }
00087 
00094 class RefreshLinksJob2 extends Job {
00095         const MAX_TITLES_RUN = 10;
00096 
00097         function __construct( $title, $params, $id = 0 ) {
00098                 parent::__construct( 'refreshLinks2', $title, $params, $id );
00099         }
00100 
00105         function run() {
00106                 wfProfileIn( __METHOD__ );
00107 
00108                 $linkCache = LinkCache::singleton();
00109                 $linkCache->clear();
00110 
00111                 if ( is_null( $this->title ) ) {
00112                         $this->error = "refreshLinks2: Invalid title";
00113                         wfProfileOut( __METHOD__ );
00114                         return false;
00115                 } elseif ( !isset( $this->params['start'] ) || !isset( $this->params['end'] ) ) {
00116                         $this->error = "refreshLinks2: Invalid params";
00117                         wfProfileOut( __METHOD__ );
00118                         return false;
00119                 }
00120 
00121                 // Back compat for pre-r94435 jobs
00122                 $table = isset( $this->params['table'] ) ? $this->params['table'] : 'templatelinks';
00123 
00124                 // Avoid slave lag when fetching templates
00125                 if ( isset( $this->params['masterPos'] ) ) {
00126                         $masterPos = $this->params['masterPos'];
00127                 } elseif ( wfGetLB()->getServerCount() > 1  ) {
00128                         $masterPos = wfGetLB()->getMasterPos();
00129                 } else {
00130                         $masterPos = false;
00131                 }
00132 
00133                 $titles = $this->title->getBacklinkCache()->getLinks(
00134                         $table, $this->params['start'], $this->params['end'] );
00135 
00136                 if ( $titles->count() > self::MAX_TITLES_RUN ) {
00137                         # We don't want to parse too many pages per job as it can starve other jobs.
00138                         # If there are too many pages to parse, break this up into smaller jobs. By passing
00139                         # in the master position here we can cut down on the time spent waiting for slaves to
00140                         # catch up by the runners handling these jobs since time will have passed between now
00141                         # and when they pop these jobs off the queue.
00142                         $start = 0; // batch start
00143                         $end   = 0; // batch end
00144                         $bsize = 0; // batch size
00145                         $first = true; // first of batch
00146                         $jobs  = array();
00147                         foreach ( $titles as $title ) {
00148                                 $start = $first ? $title->getArticleId() : $start;
00149                                 $end   = $title->getArticleId();
00150                                 $first = false;
00151                                 if ( ++$bsize >= self::MAX_TITLES_RUN ) {
00152                                         $jobs[] = new RefreshLinksJob2( $this->title, array(
00153                                                 'table'     => $table,
00154                                                 'start'     => $start,
00155                                                 'end'       => $end,
00156                                                 'masterPos' => $masterPos
00157                                         ) );
00158                                         $first = true;
00159                                         $start = $end = $bsize = 0;
00160                                 }
00161                         }
00162                         if ( $bsize > 0 ) { // group remaining pages into a job
00163                                 $jobs[] = new RefreshLinksJob2( $this->title, array(
00164                                         'table'     => $table,
00165                                         'start'     => $start,
00166                                         'end'       => $end,
00167                                         'masterPos' => $masterPos
00168                                 ) );
00169                         }
00170                         Job::batchInsert( $jobs );
00171                 } elseif ( php_sapi_name() != 'cli' ) {
00172                         # Not suitable for page load triggered job running!
00173                         # Gracefully switch to refreshLinks jobs if this happens.
00174                         $jobs = array();
00175                         foreach ( $titles as $title ) {
00176                                 $jobs[] = new RefreshLinksJob( $title, array( 'masterPos' => $masterPos ) );
00177                         }
00178                         Job::batchInsert( $jobs );
00179                 } else {
00180                         # Wait for the DB of the current/next slave DB handle to catch up to the master.
00181                         # This way, we get the correct page_latest for templates or files that just changed
00182                         # milliseconds ago, having triggered this job to begin with.
00183                         if ( $masterPos ) {
00184                                 wfGetLB()->waitFor( $masterPos );
00185                         }
00186                         # Re-parse each page that transcludes this page and update their tracking links...
00187                         foreach ( $titles as $title ) {
00188                                 $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
00189                                 if ( !$revision ) {
00190                                         $this->error = 'refreshLinks: Article not found "' .
00191                                                 $title->getPrefixedDBkey() . '"';
00192                                         continue; // skip this page
00193                                 }
00194                                 RefreshLinksJob::runForTitleInternal( $title, $revision, __METHOD__ );
00195                                 wfWaitForSlaves();
00196                         }
00197                 }
00198 
00199                 wfProfileOut( __METHOD__ );
00200                 return true;
00201         }
00202 }