[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/maintenance/ -> purgeChangedPages.php (source)

   1  <?php
   2  /**
   3   * Send purge requests for pages edited in date range to squid/varnish.
   4   *
   5   * @section LICENSE
   6   * This program is free software; you can redistribute it and/or modify
   7   * it under the terms of the GNU General Public License as published by
   8   * the Free Software Foundation; either version 2 of the License, or
   9   * (at your option) any later version.
  10   *
  11   * This program is distributed in the hope that it will be useful,
  12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14   * GNU General Public License for more details.
  15   *
  16   * You should have received a copy of the GNU General Public License along
  17   * with this program; if not, write to the Free Software Foundation, Inc.,
  18   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  19   * http://www.gnu.org/copyleft/gpl.html
  20   *
  21   * @file
  22   * @ingroup Maintenance
  23   */
  24  
  25  require_once  __DIR__ . '/Maintenance.php';
  26  
  27  /**
  28   * Maintenance script that sends purge requests for pages edited in a date
  29   * range to squid/varnish.
  30   *
  31   * Can be used to recover from an HTCP message partition or other major cache
  32   * layer interruption.
  33   *
  34   * @ingroup Maintenance
  35   */
  36  class PurgeChangedPages extends Maintenance {
  37  
  38  	public function __construct() {
  39          parent::__construct();
  40          $this->mDescription = 'Send purge requests for edits in date range to squid/varnish';
  41          $this->addOption( 'starttime', 'Starting timestamp', true, true );
  42          $this->addOption( 'endtime', 'Ending timestamp', true, true );
  43          $this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true );
  44          $this->addOption( 'sleep-per-batch', 'Milliseconds to sleep between batches', false, true );
  45          $this->addOption( 'dry-run', 'Do not send purge requests' );
  46          $this->addOption( 'verbose', 'Show more output', false, false, 'v' );
  47          $this->setBatchSize( 100 );
  48      }
  49  
  50  	public function execute() {
  51          global $wgHTCPRouting;
  52  
  53          if ( $this->hasOption( 'htcp-dest' ) ) {
  54              $parts = explode( ':', $this->getOption( 'htcp-dest' ) );
  55              if ( count( $parts ) < 2 ) {
  56                  // Add default htcp port
  57                  $parts[] = '4827';
  58              }
  59  
  60              // Route all HTCP messages to provided host:port
  61              $wgHTCPRouting = array(
  62                  '' => array( 'host' => $parts[0], 'port' => $parts[1] ),
  63              );
  64              if ( $this->hasOption( 'verbose' ) ) {
  65                  $this->output( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" );
  66              }
  67          }
  68  
  69          $dbr = $this->getDB( DB_SLAVE );
  70          $minTime = $dbr->timestamp( $this->getOption( 'starttime' ) );
  71          $maxTime = $dbr->timestamp( $this->getOption( 'endtime' ) );
  72  
  73          if ( $maxTime < $minTime ) {
  74              $this->error( "\nERROR: starttime after endtime\n" );
  75              $this->maybeHelp( true );
  76          }
  77  
  78          $stuckCount = 0; // loop breaker
  79          while ( true ) {
  80              // Adjust bach size if we are stuck in a second that had many changes
  81              $bSize = $this->mBatchSize + ( $stuckCount * $this->mBatchSize );
  82  
  83              $res = $dbr->select(
  84                  array( 'page', 'revision' ),
  85                  array(
  86                      'rev_timestamp',
  87                      'page_namespace',
  88                      'page_title',
  89                  ),
  90                  array(
  91                      "rev_timestamp > " . $dbr->addQuotes( $minTime ),
  92                      "rev_timestamp <= " . $dbr->addQuotes( $maxTime ),
  93                      // Only get rows where the revision is the latest for the page.
  94                      // Other revisions would be duplicate and we don't need to purge if
  95                      // there has been an edit after the interesting time window.
  96                      "page_latest = rev_id",
  97                  ),
  98                  __METHOD__,
  99                  array( 'ORDER BY' => 'rev_timestamp', 'LIMIT' => $bSize ),
 100                  array(
 101                      'page' => array( 'INNER JOIN', 'rev_page=page_id' ),
 102                  )
 103              );
 104  
 105              if ( !$res->numRows() ) {
 106                  // nothing more found so we are done
 107                  break;
 108              }
 109  
 110              // Kludge to not get stuck in loops for batches with the same timestamp
 111              list( $rows, $lastTime ) = $this->pageableSortedRows( $res, 'rev_timestamp', $bSize );
 112              if ( !count( $rows ) ) {
 113                  ++$stuckCount;
 114                  continue;
 115              }
 116              // Reset suck counter
 117              $stuckCount = 0;
 118  
 119              $this->output( "Processing changes from {$minTime} to {$lastTime}.\n" );
 120  
 121              // Advance past the last row next time
 122              $minTime = $lastTime;
 123  
 124              // Create list of URLs from page_namespace + page_title
 125              $urls = array();
 126              foreach ( $rows as $row ) {
 127                  $title = Title::makeTitle( $row->page_namespace, $row->page_title );
 128                  $urls[] = $title->getInternalURL();
 129              }
 130  
 131              if ( $this->hasOption( 'dry-run' ) || $this->hasOption( 'verbose' ) ) {
 132                  $this->output( implode( "\n", $urls ) . "\n" );
 133                  if ( $this->hasOption( 'dry-run' ) ) {
 134                      continue;
 135                  }
 136              }
 137  
 138              // Send batch of purge requests out to squids
 139              $squid = new SquidUpdate( $urls, count( $urls ) );
 140              $squid->doUpdate();
 141  
 142              if ( $this->hasOption( 'sleep-per-batch' ) ) {
 143                  // sleep-per-batch is milliseconds, usleep wants micro seconds.
 144                  usleep( 1000 * (int)$this->getOption( 'sleep-per-batch' ) );
 145              }
 146          }
 147  
 148          $this->output( "Done!\n" );
 149      }
 150  
 151      /**
 152       * Remove all the rows in a result set with the highest value for column
 153       * $column unless the number of rows is less $limit. This returns the new
 154       * array of rows and the highest value of column $column for the rows left.
 155       * The ordering of rows is maintained.
 156       *
 157       * This is useful for paging on mostly-unique values that may sometimes
 158       * have large clumps of identical values. It should be safe to do the next
 159       * query on items with a value higher than the highest of the rows returned here.
 160       * If this returns an empty array for a non-empty query result, then all the rows
 161       * had the same column value and the query should be repeated with a higher LIMIT.
 162       *
 163       * @todo move this elsewhere
 164       *
 165       * @param ResultWrapper $res Query result sorted by $column (ascending)
 166       * @param string $column
 167       * @param int $limit
 168       * @return array (array of rows, string column value)
 169       */
 170  	protected function pageableSortedRows( ResultWrapper $res, $column, $limit ) {
 171          $rows = iterator_to_array( $res, false );
 172          $count = count( $rows );
 173          if ( !$count ) {
 174              return array( array(), null ); // nothing to do
 175          } elseif ( $count < $limit ) {
 176              return array( $rows, $rows[$count - 1]->$column ); // no more rows left
 177          }
 178          $lastValue = $rows[$count - 1]->$column; // should be the highest
 179          for ( $i = $count - 1; $i >= 0; --$i ) {
 180              if ( $rows[$i]->$column === $lastValue ) {
 181                  unset( $rows[$i] );
 182              } else {
 183                  break;
 184              }
 185          }
 186          $lastValueLeft = count( $rows ) ? $rows[count( $rows ) - 1]->$column : null;
 187  
 188          return array( $rows, $lastValueLeft );
 189      }
 190  }
 191  
 192  $maintClass = "PurgeChangedPages";
 193  require_once RUN_MAINTENANCE_IF_MAIN;


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1