[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/jobqueue/jobs/ -> RefreshLinksJob.php (source)

   1  <?php
   2  /**
   3   * Job to update link tables for pages
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   * @ingroup JobQueue
  22   */
  23  
  24  /**
  25   * Job to update link tables for pages
  26   *
  27   * This job comes in a few variants:
  28   *   - a) Recursive jobs to update links for backlink pages for a given title.
  29   *        These jobs have have (recursive:true,table:<table>) set.
  30   *   - b) Jobs to update links for a set of pages (the job title is ignored).
  31   *          These jobs have have (pages:(<page ID>:(<namespace>,<title>),...) set.
  32   *   - c) Jobs to update links for a single page (the job title)
  33   *        These jobs need no extra fields set.
  34   *
  35   * @ingroup JobQueue
  36   */
  37  class RefreshLinksJob extends Job {
  38      const PARSE_THRESHOLD_SEC = 1.0;
  39  
  40  	function __construct( $title, $params = '' ) {
  41          parent::__construct( 'refreshLinks', $title, $params );
  42          // Base backlink update jobs and per-title update jobs can be de-duplicated.
  43          // If template A changes twice before any jobs run, a clean queue will have:
  44          //        (A base, A base)
  45          // The second job is ignored by the queue on insertion.
  46          // Suppose, many pages use template A, and that template itself uses template B.
  47          // An edit to both will first create two base jobs. A clean FIFO queue will have:
  48          //        (A base, B base)
  49          // When these jobs run, the queue will have per-title and remnant partition jobs:
  50          //        (titleX,titleY,titleZ,...,A remnant,titleM,titleN,titleO,...,B remnant)
  51          // Some these jobs will be the same, and will automatically be ignored by
  52          // the queue upon insertion. Some title jobs will run before the duplicate is
  53          // inserted, so the work will still be done twice in those cases. More titles
  54          // can be de-duplicated as the remnant jobs continue to be broken down. This
  55          // works best when $wgUpdateRowsPerJob, and either the pages have few backlinks
  56          // and/or the backlink sets for pages A and B are almost identical.
  57          $this->removeDuplicates = !isset( $params['range'] )
  58              && ( !isset( $params['pages'] ) || count( $params['pages'] ) == 1 );
  59      }
  60  
  61  	function run() {
  62          global $wgUpdateRowsPerJob;
  63  
  64          // Job to update all (or a range of) backlink pages for a page
  65          if ( !empty( $this->params['recursive'] ) ) {
  66              // Carry over information for de-duplication
  67              $extraParams = $this->getRootJobParams();
  68              // Avoid slave lag when fetching templates.
  69              // When the outermost job is run, we know that the caller that enqueued it must have
  70              // committed the relevant changes to the DB by now. At that point, record the master
  71              // position and pass it along as the job recursively breaks into smaller range jobs.
  72              // Hopefully, when leaf jobs are popped, the slaves will have reached that position.
  73              if ( isset( $this->params['masterPos'] ) ) {
  74                  $extraParams['masterPos'] = $this->params['masterPos'];
  75              } elseif ( wfGetLB()->getServerCount() > 1 ) {
  76                  $extraParams['masterPos'] = wfGetLB()->getMasterPos();
  77              } else {
  78                  $extraParams['masterPos'] = false;
  79              }
  80              // Convert this into no more than $wgUpdateRowsPerJob RefreshLinks per-title
  81              // jobs and possibly a recursive RefreshLinks job for the rest of the backlinks
  82              $jobs = BacklinkJobUtils::partitionBacklinkJob(
  83                  $this,
  84                  $wgUpdateRowsPerJob,
  85                  1, // job-per-title
  86                  array( 'params' => $extraParams )
  87              );
  88              JobQueueGroup::singleton()->push( $jobs );
  89          // Job to update link tables for for a set of titles
  90          } elseif ( isset( $this->params['pages'] ) ) {
  91              foreach ( $this->params['pages'] as $pageId => $nsAndKey ) {
  92                  list( $ns, $dbKey ) = $nsAndKey;
  93                  $this->runForTitle( Title::makeTitleSafe( $ns, $dbKey ) );
  94              }
  95          // Job to update link tables for a given title
  96          } else {
  97              $this->runForTitle( $this->title );
  98          }
  99  
 100          return true;
 101      }
 102  
 103  	protected function runForTitle( Title $title = null ) {
 104          $linkCache = LinkCache::singleton();
 105          $linkCache->clear();
 106  
 107          if ( is_null( $title ) ) {
 108              $this->setLastError( "refreshLinks: Invalid title" );
 109              return false;
 110          }
 111  
 112          // Wait for the DB of the current/next slave DB handle to catch up to the master.
 113          // This way, we get the correct page_latest for templates or files that just changed
 114          // milliseconds ago, having triggered this job to begin with.
 115          if ( isset( $this->params['masterPos'] ) && $this->params['masterPos'] !== false ) {
 116              wfGetLB()->waitFor( $this->params['masterPos'] );
 117          }
 118  
 119          $page = WikiPage::factory( $title );
 120  
 121          // Fetch the current revision...
 122          $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
 123          if ( !$revision ) {
 124              $this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" );
 125              return false; // XXX: what if it was just deleted?
 126          }
 127          $content = $revision->getContent( Revision::RAW );
 128          if ( !$content ) {
 129              // If there is no content, pretend the content is empty
 130              $content = $revision->getContentHandler()->makeEmptyContent();
 131          }
 132  
 133          $parserOutput = false;
 134          $parserOptions = $page->makeParserOptions( 'canonical' );
 135          // If page_touched changed after this root job (with a good slave lag skew factor),
 136          // then it is likely that any views of the pages already resulted in re-parses which
 137          // are now in cache. This can be reused to avoid expensive parsing in some cases.
 138          if ( isset( $this->params['rootJobTimestamp'] ) ) {
 139              $skewedTimestamp = wfTimestamp( TS_UNIX, $this->params['rootJobTimestamp'] ) + 5;
 140              if ( $page->getLinksTimestamp() > wfTimestamp( TS_MW, $skewedTimestamp ) ) {
 141                  // Something already updated the backlinks since this job was made
 142                  return true;
 143              }
 144              if ( $page->getTouched() > wfTimestamp( TS_MW, $skewedTimestamp ) ) {
 145                  $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions );
 146                  if ( $parserOutput && $parserOutput->getCacheTime() <= $skewedTimestamp ) {
 147                      $parserOutput = false; // too stale
 148                  }
 149              }
 150          }
 151          // Fetch the current revision and parse it if necessary...
 152          if ( $parserOutput == false ) {
 153              $start = microtime( true );
 154              // Revision ID must be passed to the parser output to get revision variables correct
 155              $parserOutput = $content->getParserOutput(
 156                  $title, $revision->getId(), $parserOptions, false );
 157              $ellapsed = microtime( true ) - $start;
 158              // If it took a long time to render, then save this back to the cache to avoid
 159              // wasted CPU by other apaches or job runners. We don't want to always save to
 160              // cache as this cause cause high cache I/O and LRU churn when a template changes.
 161              if ( $ellapsed >= self::PARSE_THRESHOLD_SEC
 162                  && $page->isParserCacheUsed( $parserOptions, $revision->getId() )
 163                  && $parserOutput->isCacheable()
 164              ) {
 165                  $ctime = wfTimestamp( TS_MW, (int)$start ); // cache time
 166                  ParserCache::singleton()->save(
 167                      $parserOutput, $page, $parserOptions, $ctime, $revision->getId()
 168                  );
 169              }
 170          }
 171  
 172          $updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput );
 173          DataUpdate::runUpdates( $updates );
 174  
 175          InfoAction::invalidateCache( $title );
 176  
 177          return true;
 178      }
 179  
 180  	public function getDeduplicationInfo() {
 181          $info = parent::getDeduplicationInfo();
 182          if ( is_array( $info['params'] ) ) {
 183              // Don't let highly unique "masterPos" values ruin duplicate detection
 184              unset( $info['params']['masterPos'] );
 185              // For per-pages jobs, the job title is that of the template that changed
 186              // (or similar), so remove that since it ruins duplicate detection
 187              if ( isset( $info['pages'] ) ) {
 188                  unset( $info['namespace'] );
 189                  unset( $info['title'] );
 190              }
 191          }
 192  
 193          return $info;
 194      }
 195  
 196  	public function workItemCount() {
 197          return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1;
 198      }
 199  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1