MediaWiki
REL1_22
|
00001 <?php 00025 require_once __DIR__ . '/Maintenance.php'; 00026 00036 class PurgeChangedPages extends Maintenance { 00037 00038 public function __construct() { 00039 parent::__construct(); 00040 $this->mDescription = 'Send purge requests for edits in date range to squid/varnish'; 00041 $this->addOption( 'starttime', 'Starting timestamp', true, true ); 00042 $this->addOption( 'endtime', 'Ending timestamp', true, true ); 00043 $this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true ); 00044 $this->addOption( 'sleep-per-batch', 'Milliseconds to sleep between batches', false, true ); 00045 $this->addOption( 'dry-run', 'Do not send purge requests' ); 00046 $this->addOption( 'verbose', 'Show more output', false, false, 'v' ); 00047 $this->setBatchSize( 100 ); 00048 } 00049 00050 public function execute() { 00051 global $wgHTCPRouting; 00052 00053 if ( $this->hasOption( 'htcp-dest' ) ) { 00054 $parts = explode( ':', $this->getOption( 'htcp-dest' ) ); 00055 if ( count( $parts ) < 2 ) { 00056 // Add default htcp port 00057 $parts[] = '4827'; 00058 } 00059 00060 // Route all HTCP messages to provided host:port 00061 $wgHTCPRouting = array( 00062 '' => array( 'host' => $parts[0], 'port' => $parts[1] ), 00063 ); 00064 if ( $this->hasOption( 'verbose' ) ) { 00065 $this->output( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" ); 00066 } 00067 } 00068 00069 $dbr = $this->getDB( DB_SLAVE ); 00070 $minTime = $dbr->timestamp( $this->getOption( 'starttime' ) ); 00071 $maxTime = $dbr->timestamp( $this->getOption( 'endtime' ) ); 00072 00073 if ( $maxTime < $minTime ) { 00074 $this->error( "\nERROR: starttime after endtime\n" ); 00075 $this->maybeHelp( true ); 00076 } 00077 00078 $stuckCount = 0; // loop breaker 00079 while ( true ) { 00080 // Adjust bach size if we are stuck in a second that had many changes 00081 $bSize = $this->mBatchSize + ( $stuckCount * $this->mBatchSize ); 00082 00083 $res = $dbr->select( 00084 array( 'page', 'revision' ), 00085 array( 00086 'rev_timestamp', 00087 'page_namespace', 00088 'page_title', 00089 ), 00090 array( 00091 "rev_timestamp > " . $dbr->addQuotes( $minTime ), 00092 "rev_timestamp <= " . $dbr->addQuotes( $maxTime ), 00093 // Only get rows where the revision is the latest for the page. 00094 // Other revisions would be duplicate and we don't need to purge if 00095 // there has been an edit after the interesting time window. 00096 "page_latest = rev_id", 00097 ), 00098 __METHOD__, 00099 array( 'ORDER BY' => 'rev_timestamp', 'LIMIT' => $bSize ), 00100 array( 00101 'page' => array( 'INNER JOIN', 'rev_page=page_id' ), 00102 ) 00103 ); 00104 00105 if ( !$res->numRows() ) { 00106 // nothing more found so we are done 00107 break; 00108 } 00109 00110 // Kludge to not get stuck in loops for batches with the same timestamp 00111 list( $rows, $lastTime ) = $this->pageableSortedRows( $res, 'rev_timestamp', $bSize ); 00112 if ( !count( $rows ) ) { 00113 ++$stuckCount; 00114 continue; 00115 } 00116 // Reset suck counter 00117 $stuckCount = 0; 00118 00119 $this->output( "Processing changes from {$minTime} to {$lastTime}.\n" ); 00120 00121 // Advance past the last row next time 00122 $minTime = $lastTime; 00123 00124 // Create list of URLs from page_namespace + page_title 00125 $urls = array(); 00126 foreach ( $rows as $row ) { 00127 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 00128 $urls[] = $title->getInternalURL(); 00129 } 00130 00131 if ( $this->hasOption( 'dry-run' ) || $this->hasOption( 'verbose' ) ) { 00132 $this->output( implode( "\n", $urls ) . "\n" ); 00133 if ( $this->hasOption( 'dry-run' ) ) { 00134 continue; 00135 } 00136 } 00137 00138 // Send batch of purge requests out to squids 00139 $squid = new SquidUpdate( $urls, count( $urls ) ); 00140 $squid->doUpdate(); 00141 00142 if ( $this->hasOption( 'sleep-per-batch' ) ) { 00143 // sleep-per-batch is milliseconds, usleep wants micro seconds. 00144 usleep( 1000 * (int)$this->getOption( 'sleep-per-batch' ) ); 00145 } 00146 } 00147 00148 $this->output( "Done!\n" ); 00149 } 00150 00169 protected function pageableSortedRows( ResultWrapper $res, $column, $limit ) { 00170 $rows = iterator_to_array( $res, false ); 00171 $count = count( $rows ); 00172 if ( !$count ) { 00173 return array( array(), null ); // nothing to do 00174 } elseif ( $count < $limit ) { 00175 return array( $rows, $rows[$count - 1]->$column ); // no more rows left 00176 } 00177 $lastValue = $rows[$count - 1]->$column; // should be the highest 00178 for ( $i = $count - 1; $i >= 0; --$i ) { 00179 if ( $rows[$i]->$column === $lastValue ) { 00180 unset( $rows[$i] ); 00181 } else { 00182 break; 00183 } 00184 } 00185 $lastValueLeft = count( $rows ) ? $rows[count( $rows ) - 1]->$column : null; 00186 return array( $rows, $lastValueLeft ); 00187 } 00188 } 00189 00190 $maintClass = "PurgeChangedPages"; 00191 require_once RUN_MAINTENANCE_IF_MAIN;