MediaWiki  REL1_21
RefreshLinksJob.php
Go to the documentation of this file.
00001 <?php
00029 class RefreshLinksJob extends Job {
00030         function __construct( $title, $params = '', $id = 0 ) {
00031                 parent::__construct( 'refreshLinks', $title, $params, $id );
00032                 $this->removeDuplicates = true; // job is expensive
00033         }
00034 
00039         function run() {
00040                 wfProfileIn( __METHOD__ );
00041 
00042                 $linkCache = LinkCache::singleton();
00043                 $linkCache->clear();
00044 
00045                 if ( is_null( $this->title ) ) {
00046                         $this->error = "refreshLinks: Invalid title";
00047                         wfProfileOut( __METHOD__ );
00048                         return false;
00049                 }
00050 
00051                 # Wait for the DB of the current/next slave DB handle to catch up to the master.
00052                 # This way, we get the correct page_latest for templates or files that just changed
00053                 # milliseconds ago, having triggered this job to begin with.
00054                 if ( isset( $this->params['masterPos'] ) ) {
00055                         wfGetLB()->waitFor( $this->params['masterPos'] );
00056                 }
00057 
00058                 $revision = Revision::newFromTitle( $this->title, false, Revision::READ_NORMAL );
00059                 if ( !$revision ) {
00060                         $this->error = 'refreshLinks: Article not found "' .
00061                                 $this->title->getPrefixedDBkey() . '"';
00062                         wfProfileOut( __METHOD__ );
00063                         return false; // XXX: what if it was just deleted?
00064                 }
00065 
00066                 self::runForTitleInternal( $this->title, $revision, __METHOD__ );
00067 
00068                 wfProfileOut( __METHOD__ );
00069                 return true;
00070         }
00071 
00075         public function getDeduplicationInfo() {
00076                 $info = parent::getDeduplicationInfo();
00077                 // Don't let highly unique "masterPos" values ruin duplicate detection
00078                 if ( is_array( $info['params'] ) ) {
00079                         unset( $info['params']['masterPos'] );
00080                 }
00081                 return $info;
00082         }
00083 
00090         public static function runForTitleInternal( Title $title, Revision $revision, $fname ) {
00091                 wfProfileIn( $fname );
00092                 $content = $revision->getContent( Revision::RAW );
00093 
00094                 if ( !$content ) {
00095                         // if there is no content, pretend the content is empty
00096                         $content = $revision->getContentHandler()->makeEmptyContent();
00097                 }
00098 
00099                 // Revision ID must be passed to the parser output to get revision variables correct
00100                 $parserOutput = $content->getParserOutput( $title, $revision->getId(), null, false );
00101 
00102                 $updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput );
00103                 DataUpdate::runUpdates( $updates );
00104                 wfProfileOut( $fname );
00105         }
00106 }
00107 
00114 class RefreshLinksJob2 extends Job {
00115         function __construct( $title, $params, $id = 0 ) {
00116                 parent::__construct( 'refreshLinks2', $title, $params, $id );
00117         }
00118 
00123         function run() {
00124                 global $wgUpdateRowsPerJob;
00125 
00126                 wfProfileIn( __METHOD__ );
00127 
00128                 $linkCache = LinkCache::singleton();
00129                 $linkCache->clear();
00130 
00131                 if ( is_null( $this->title ) ) {
00132                         $this->error = "refreshLinks2: Invalid title";
00133                         wfProfileOut( __METHOD__ );
00134                         return false;
00135                 }
00136 
00137                 // Back compat for pre-r94435 jobs
00138                 $table = isset( $this->params['table'] ) ? $this->params['table'] : 'templatelinks';
00139 
00140                 // Avoid slave lag when fetching templates.
00141                 // When the outermost job is run, we know that the caller that enqueued it must have
00142                 // committed the relevant changes to the DB by now. At that point, record the master
00143                 // position and pass it along as the job recursively breaks into smaller range jobs.
00144                 // Hopefully, when leaf jobs are popped, the slaves will have reached that position.
00145                 if ( isset( $this->params['masterPos'] ) ) {
00146                         $masterPos = $this->params['masterPos'];
00147                 } elseif ( wfGetLB()->getServerCount() > 1  ) {
00148                         $masterPos = wfGetLB()->getMasterPos();
00149                 } else {
00150                         $masterPos = false;
00151                 }
00152 
00153                 $tbc = $this->title->getBacklinkCache();
00154 
00155                 $jobs = array(); // jobs to insert
00156                 if ( isset( $this->params['start'] ) && isset( $this->params['end'] ) ) {
00157                         # This is a partition job to trigger the insertion of leaf jobs...
00158                         $jobs = array_merge( $jobs, $this->getSingleTitleJobs( $table, $masterPos ) );
00159                 } else {
00160                         # This is a base job to trigger the insertion of partitioned jobs...
00161                         if ( $tbc->getNumLinks( $table ) <= $wgUpdateRowsPerJob ) {
00162                                 # Just directly insert the single per-title jobs
00163                                 $jobs = array_merge( $jobs, $this->getSingleTitleJobs( $table, $masterPos ) );
00164                         } else {
00165                                 # Insert the partition jobs to make per-title jobs
00166                                 foreach ( $tbc->partition( $table, $wgUpdateRowsPerJob ) as $batch ) {
00167                                         list( $start, $end ) = $batch;
00168                                         $jobs[] = new RefreshLinksJob2( $this->title,
00169                                                 array(
00170                                                         'table'            => $table,
00171                                                         'start'            => $start,
00172                                                         'end'              => $end,
00173                                                         'masterPos'        => $masterPos,
00174                                                 ) + $this->getRootJobParams() // carry over information for de-duplication
00175                                         );
00176                                 }
00177                         }
00178                 }
00179 
00180                 if ( count( $jobs ) ) {
00181                         JobQueueGroup::singleton()->push( $jobs );
00182                 }
00183 
00184                 wfProfileOut( __METHOD__ );
00185                 return true;
00186         }
00187 
00193         protected function getSingleTitleJobs( $table, $masterPos ) {
00194                 # The "start"/"end" fields are not set for the base jobs
00195                 $start = isset( $this->params['start'] ) ? $this->params['start'] : false;
00196                 $end = isset( $this->params['end'] ) ? $this->params['end'] : false;
00197                 $titles = $this->title->getBacklinkCache()->getLinks( $table, $start, $end );
00198                 # Convert into single page refresh links jobs.
00199                 # This handles well when in sapi mode and is useful in any case for job
00200                 # de-duplication. If many pages use template A, and that template itself
00201                 # uses template B, then an edit to both will create many duplicate jobs.
00202                 # Roughly speaking, for each page, one of the "RefreshLinksJob" jobs will
00203                 # get run first, and when it does, it will remove the duplicates. Of course,
00204                 # one page could have its job popped when the other page's job is still
00205                 # buried within the logic of a refreshLinks2 job.
00206                 $jobs = array();
00207                 foreach ( $titles as $title ) {
00208                         $jobs[] = new RefreshLinksJob( $title,
00209                                 array( 'masterPos' => $masterPos ) + $this->getRootJobParams()
00210                         ); // carry over information for de-duplication
00211                 }
00212                 return $jobs;
00213         }
00214 
00218         public function getDeduplicationInfo() {
00219                 $info = parent::getDeduplicationInfo();
00220                 // Don't let highly unique "masterPos" values ruin duplicate detection
00221                 if ( is_array( $info['params'] ) ) {
00222                         unset( $info['params']['masterPos'] );
00223                 }
00224                 return $info;
00225         }
00226 }