MediaWiki  REL1_22
RefreshLinksJob.php
Go to the documentation of this file.
00001 <?php
00029 class RefreshLinksJob extends Job {
00030     function __construct( $title, $params = '', $id = 0 ) {
00031         parent::__construct( 'refreshLinks', $title, $params, $id );
00032         $this->removeDuplicates = true; // job is expensive
00033     }
00034 
00039     function run() {
00040         $linkCache = LinkCache::singleton();
00041         $linkCache->clear();
00042 
00043         if ( is_null( $this->title ) ) {
00044             $this->error = "refreshLinks: Invalid title";
00045             return false;
00046         }
00047 
00048         # Wait for the DB of the current/next slave DB handle to catch up to the master.
00049         # This way, we get the correct page_latest for templates or files that just changed
00050         # milliseconds ago, having triggered this job to begin with.
00051         if ( isset( $this->params['masterPos'] ) && $this->params['masterPos'] !== false ) {
00052             wfGetLB()->waitFor( $this->params['masterPos'] );
00053         }
00054 
00055         $revision = Revision::newFromTitle( $this->title, false, Revision::READ_NORMAL );
00056         if ( !$revision ) {
00057             $this->error = 'refreshLinks: Article not found "' .
00058                 $this->title->getPrefixedDBkey() . '"';
00059             return false; // XXX: what if it was just deleted?
00060         }
00061 
00062         self::runForTitleInternal( $this->title, $revision, __METHOD__ );
00063 
00064         return true;
00065     }
00066 
00070     public function getDeduplicationInfo() {
00071         $info = parent::getDeduplicationInfo();
00072         // Don't let highly unique "masterPos" values ruin duplicate detection
00073         if ( is_array( $info['params'] ) ) {
00074             unset( $info['params']['masterPos'] );
00075         }
00076         return $info;
00077     }
00078 
00085     public static function runForTitleInternal( Title $title, Revision $revision, $fname ) {
00086         wfProfileIn( $fname );
00087         $content = $revision->getContent( Revision::RAW );
00088 
00089         if ( !$content ) {
00090             // if there is no content, pretend the content is empty
00091             $content = $revision->getContentHandler()->makeEmptyContent();
00092         }
00093 
00094         // Revision ID must be passed to the parser output to get revision variables correct
00095         $parserOutput = $content->getParserOutput( $title, $revision->getId(), null, false );
00096 
00097         $updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput );
00098         DataUpdate::runUpdates( $updates );
00099 
00100         InfoAction::invalidateCache( $title );
00101 
00102         wfProfileOut( $fname );
00103     }
00104 }
00105 
00112 class RefreshLinksJob2 extends Job {
00113     function __construct( $title, $params, $id = 0 ) {
00114         parent::__construct( 'refreshLinks2', $title, $params, $id );
00115         // Base jobs for large templates can easily be de-duplicated
00116         $this->removeDuplicates = !isset( $params['start'] ) && !isset( $params['end'] );
00117     }
00118 
00123     function run() {
00124         global $wgUpdateRowsPerJob;
00125 
00126         $linkCache = LinkCache::singleton();
00127         $linkCache->clear();
00128 
00129         if ( is_null( $this->title ) ) {
00130             $this->error = "refreshLinks2: Invalid title";
00131             return false;
00132         }
00133 
00134         // Back compat for pre-r94435 jobs
00135         $table = isset( $this->params['table'] ) ? $this->params['table'] : 'templatelinks';
00136 
00137         // Avoid slave lag when fetching templates.
00138         // When the outermost job is run, we know that the caller that enqueued it must have
00139         // committed the relevant changes to the DB by now. At that point, record the master
00140         // position and pass it along as the job recursively breaks into smaller range jobs.
00141         // Hopefully, when leaf jobs are popped, the slaves will have reached that position.
00142         if ( isset( $this->params['masterPos'] ) ) {
00143             $masterPos = $this->params['masterPos'];
00144         } elseif ( wfGetLB()->getServerCount() > 1 ) {
00145             $masterPos = wfGetLB()->getMasterPos();
00146         } else {
00147             $masterPos = false;
00148         }
00149 
00150         $tbc = $this->title->getBacklinkCache();
00151 
00152         $jobs = array(); // jobs to insert
00153         if ( isset( $this->params['start'] ) && isset( $this->params['end'] ) ) {
00154             # This is a partition job to trigger the insertion of leaf jobs...
00155             $jobs = array_merge( $jobs, $this->getSingleTitleJobs( $table, $masterPos ) );
00156         } else {
00157             # This is a base job to trigger the insertion of partitioned jobs...
00158             if ( $tbc->getNumLinks( $table, $wgUpdateRowsPerJob + 1 ) <= $wgUpdateRowsPerJob ) {
00159                 # Just directly insert the single per-title jobs
00160                 $jobs = array_merge( $jobs, $this->getSingleTitleJobs( $table, $masterPos ) );
00161             } else {
00162                 # Insert the partition jobs to make per-title jobs
00163                 foreach ( $tbc->partition( $table, $wgUpdateRowsPerJob ) as $batch ) {
00164                     list( $start, $end ) = $batch;
00165                     $jobs[] = new RefreshLinksJob2( $this->title,
00166                         array(
00167                             'table' => $table,
00168                             'start' => $start,
00169                             'end' => $end,
00170                             'masterPos' => $masterPos,
00171                         ) + $this->getRootJobParams() // carry over information for de-duplication
00172                     );
00173                 }
00174             }
00175         }
00176 
00177         if ( count( $jobs ) ) {
00178             JobQueueGroup::singleton()->push( $jobs );
00179         }
00180 
00181         return true;
00182     }
00183 
00189     protected function getSingleTitleJobs( $table, $masterPos ) {
00190         # The "start"/"end" fields are not set for the base jobs
00191         $start = isset( $this->params['start'] ) ? $this->params['start'] : false;
00192         $end = isset( $this->params['end'] ) ? $this->params['end'] : false;
00193         $titles = $this->title->getBacklinkCache()->getLinks( $table, $start, $end );
00194         # Convert into single page refresh links jobs.
00195         # This handles well when in sapi mode and is useful in any case for job
00196         # de-duplication. If many pages use template A, and that template itself
00197         # uses template B, then an edit to both will create many duplicate jobs.
00198         # Roughly speaking, for each page, one of the "RefreshLinksJob" jobs will
00199         # get run first, and when it does, it will remove the duplicates. Of course,
00200         # one page could have its job popped when the other page's job is still
00201         # buried within the logic of a refreshLinks2 job.
00202         $jobs = array();
00203         foreach ( $titles as $title ) {
00204             $jobs[] = new RefreshLinksJob( $title,
00205                 array( 'masterPos' => $masterPos ) + $this->getRootJobParams()
00206             ); // carry over information for de-duplication
00207         }
00208         return $jobs;
00209     }
00210 
00214     public function getDeduplicationInfo() {
00215         $info = parent::getDeduplicationInfo();
00216         // Don't let highly unique "masterPos" values ruin duplicate detection
00217         if ( is_array( $info['params'] ) ) {
00218             unset( $info['params']['masterPos'] );
00219         }
00220         return $info;
00221     }
00222 }