[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Send purge requests for pages edited in date range to squid/varnish. 4 * 5 * @section LICENSE 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License along 17 * with this program; if not, write to the Free Software Foundation, Inc., 18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 * http://www.gnu.org/copyleft/gpl.html 20 * 21 * @file 22 * @ingroup Maintenance 23 */ 24 25 require_once __DIR__ . '/Maintenance.php'; 26 27 /** 28 * Maintenance script that sends purge requests for pages edited in a date 29 * range to squid/varnish. 30 * 31 * Can be used to recover from an HTCP message partition or other major cache 32 * layer interruption. 33 * 34 * @ingroup Maintenance 35 */ 36 class PurgeChangedPages extends Maintenance { 37 38 public function __construct() { 39 parent::__construct(); 40 $this->mDescription = 'Send purge requests for edits in date range to squid/varnish'; 41 $this->addOption( 'starttime', 'Starting timestamp', true, true ); 42 $this->addOption( 'endtime', 'Ending timestamp', true, true ); 43 $this->addOption( 'htcp-dest', 'HTCP announcement destination (IP:port)', false, true ); 44 $this->addOption( 'sleep-per-batch', 'Milliseconds to sleep between batches', false, true ); 45 $this->addOption( 'dry-run', 'Do not send purge requests' ); 46 $this->addOption( 'verbose', 'Show more output', false, false, 'v' ); 47 $this->setBatchSize( 100 ); 48 } 49 50 public function execute() { 51 global $wgHTCPRouting; 52 53 if ( $this->hasOption( 'htcp-dest' ) ) { 54 $parts = explode( ':', $this->getOption( 'htcp-dest' ) ); 55 if ( count( $parts ) < 2 ) { 56 // Add default htcp port 57 $parts[] = '4827'; 58 } 59 60 // Route all HTCP messages to provided host:port 61 $wgHTCPRouting = array( 62 '' => array( 'host' => $parts[0], 'port' => $parts[1] ), 63 ); 64 if ( $this->hasOption( 'verbose' ) ) { 65 $this->output( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" ); 66 } 67 } 68 69 $dbr = $this->getDB( DB_SLAVE ); 70 $minTime = $dbr->timestamp( $this->getOption( 'starttime' ) ); 71 $maxTime = $dbr->timestamp( $this->getOption( 'endtime' ) ); 72 73 if ( $maxTime < $minTime ) { 74 $this->error( "\nERROR: starttime after endtime\n" ); 75 $this->maybeHelp( true ); 76 } 77 78 $stuckCount = 0; // loop breaker 79 while ( true ) { 80 // Adjust bach size if we are stuck in a second that had many changes 81 $bSize = $this->mBatchSize + ( $stuckCount * $this->mBatchSize ); 82 83 $res = $dbr->select( 84 array( 'page', 'revision' ), 85 array( 86 'rev_timestamp', 87 'page_namespace', 88 'page_title', 89 ), 90 array( 91 "rev_timestamp > " . $dbr->addQuotes( $minTime ), 92 "rev_timestamp <= " . $dbr->addQuotes( $maxTime ), 93 // Only get rows where the revision is the latest for the page. 94 // Other revisions would be duplicate and we don't need to purge if 95 // there has been an edit after the interesting time window. 96 "page_latest = rev_id", 97 ), 98 __METHOD__, 99 array( 'ORDER BY' => 'rev_timestamp', 'LIMIT' => $bSize ), 100 array( 101 'page' => array( 'INNER JOIN', 'rev_page=page_id' ), 102 ) 103 ); 104 105 if ( !$res->numRows() ) { 106 // nothing more found so we are done 107 break; 108 } 109 110 // Kludge to not get stuck in loops for batches with the same timestamp 111 list( $rows, $lastTime ) = $this->pageableSortedRows( $res, 'rev_timestamp', $bSize ); 112 if ( !count( $rows ) ) { 113 ++$stuckCount; 114 continue; 115 } 116 // Reset suck counter 117 $stuckCount = 0; 118 119 $this->output( "Processing changes from {$minTime} to {$lastTime}.\n" ); 120 121 // Advance past the last row next time 122 $minTime = $lastTime; 123 124 // Create list of URLs from page_namespace + page_title 125 $urls = array(); 126 foreach ( $rows as $row ) { 127 $title = Title::makeTitle( $row->page_namespace, $row->page_title ); 128 $urls[] = $title->getInternalURL(); 129 } 130 131 if ( $this->hasOption( 'dry-run' ) || $this->hasOption( 'verbose' ) ) { 132 $this->output( implode( "\n", $urls ) . "\n" ); 133 if ( $this->hasOption( 'dry-run' ) ) { 134 continue; 135 } 136 } 137 138 // Send batch of purge requests out to squids 139 $squid = new SquidUpdate( $urls, count( $urls ) ); 140 $squid->doUpdate(); 141 142 if ( $this->hasOption( 'sleep-per-batch' ) ) { 143 // sleep-per-batch is milliseconds, usleep wants micro seconds. 144 usleep( 1000 * (int)$this->getOption( 'sleep-per-batch' ) ); 145 } 146 } 147 148 $this->output( "Done!\n" ); 149 } 150 151 /** 152 * Remove all the rows in a result set with the highest value for column 153 * $column unless the number of rows is less $limit. This returns the new 154 * array of rows and the highest value of column $column for the rows left. 155 * The ordering of rows is maintained. 156 * 157 * This is useful for paging on mostly-unique values that may sometimes 158 * have large clumps of identical values. It should be safe to do the next 159 * query on items with a value higher than the highest of the rows returned here. 160 * If this returns an empty array for a non-empty query result, then all the rows 161 * had the same column value and the query should be repeated with a higher LIMIT. 162 * 163 * @todo move this elsewhere 164 * 165 * @param ResultWrapper $res Query result sorted by $column (ascending) 166 * @param string $column 167 * @param int $limit 168 * @return array (array of rows, string column value) 169 */ 170 protected function pageableSortedRows( ResultWrapper $res, $column, $limit ) { 171 $rows = iterator_to_array( $res, false ); 172 $count = count( $rows ); 173 if ( !$count ) { 174 return array( array(), null ); // nothing to do 175 } elseif ( $count < $limit ) { 176 return array( $rows, $rows[$count - 1]->$column ); // no more rows left 177 } 178 $lastValue = $rows[$count - 1]->$column; // should be the highest 179 for ( $i = $count - 1; $i >= 0; --$i ) { 180 if ( $rows[$i]->$column === $lastValue ) { 181 unset( $rows[$i] ); 182 } else { 183 break; 184 } 185 } 186 $lastValueLeft = count( $rows ) ? $rows[count( $rows ) - 1]->$column : null; 187 188 return array( $rows, $lastValueLeft ); 189 } 190 } 191 192 $maintClass = "PurgeChangedPages"; 193 require_once RUN_MAINTENANCE_IF_MAIN;
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |