[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Refresh link tables. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup Maintenance 22 */ 23 24 require_once __DIR__ . '/Maintenance.php'; 25 26 /** 27 * Maintenance script to refresh link tables. 28 * 29 * @ingroup Maintenance 30 */ 31 class RefreshLinks extends Maintenance { 32 public function __construct() { 33 parent::__construct(); 34 $this->mDescription = "Refresh link tables"; 35 $this->addOption( 'dfn-only', 'Delete links from nonexistent articles only' ); 36 $this->addOption( 'new-only', 'Only affect articles with just a single edit' ); 37 $this->addOption( 'redirects-only', 'Only fix redirects, not all links' ); 38 $this->addOption( 'old-redirects-only', 'Only fix redirects with no redirect table entry' ); 39 $this->addOption( 'm', 'Maximum replication lag', false, true ); 40 $this->addOption( 'e', 'Last page id to refresh', false, true ); 41 $this->addArg( 'start', 'Page_id to start from, default 1', false ); 42 $this->setBatchSize( 100 ); 43 } 44 45 public function execute() { 46 $max = $this->getOption( 'm', 0 ); 47 if ( !$this->hasOption( 'dfn-only' ) ) { 48 $start = $this->getArg( 0, 1 ); 49 $new = $this->getOption( 'new-only', false ); 50 $end = $this->getOption( 'e', 0 ); 51 $redir = $this->getOption( 'redirects-only', false ); 52 $oldRedir = $this->getOption( 'old-redirects-only', false ); 53 $this->doRefreshLinks( $start, $new, $max, $end, $redir, $oldRedir ); 54 } 55 $this->deleteLinksFromNonexistent( $max, $this->mBatchSize ); 56 } 57 58 /** 59 * Do the actual link refreshing. 60 * @param int $start Page_id to start from 61 * @param bool $newOnly Only do pages with 1 edit 62 * @param int $maxLag Max DB replication lag 63 * @param int $end Page_id to stop at 64 * @param bool $redirectsOnly Only fix redirects 65 * @param bool $oldRedirectsOnly Only fix redirects without redirect entries 66 */ 67 private function doRefreshLinks( $start, $newOnly = false, $maxLag = false, 68 $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false 69 ) { 70 global $wgParser, $wgUseTidy; 71 72 $reportingInterval = 100; 73 $dbr = wfGetDB( DB_SLAVE ); 74 $start = intval( $start ); 75 76 // Give extensions a chance to optimize settings 77 wfRunHooks( 'MaintenanceRefreshLinksInit', array( $this ) ); 78 79 # Don't generate extension images (e.g. Timeline) 80 $wgParser->clearTagHooks(); 81 82 # Don't use HTML tidy 83 $wgUseTidy = false; 84 85 $what = $redirectsOnly ? "redirects" : "links"; 86 87 if ( $oldRedirectsOnly ) { 88 # This entire code path is cut-and-pasted from below. Hurrah. 89 90 $conds = array( 91 "page_is_redirect=1", 92 "rd_from IS NULL" 93 ); 94 95 if ( $end == 0 ) { 96 $conds[] = "page_id >= $start"; 97 } else { 98 $conds[] = "page_id BETWEEN $start AND $end"; 99 } 100 101 $res = $dbr->select( 102 array( 'page', 'redirect' ), 103 'page_id', 104 $conds, 105 __METHOD__, 106 array(), 107 array( 'redirect' => array( "LEFT JOIN", "page_id=rd_from" ) ) 108 ); 109 $num = $res->numRows(); 110 $this->output( "Refreshing $num old redirects from $start...\n" ); 111 112 $i = 0; 113 114 foreach ( $res as $row ) { 115 if ( !( ++$i % $reportingInterval ) ) { 116 $this->output( "$i\n" ); 117 wfWaitForSlaves(); 118 } 119 $this->fixRedirect( $row->page_id ); 120 } 121 } elseif ( $newOnly ) { 122 $this->output( "Refreshing $what from " ); 123 $res = $dbr->select( 'page', 124 array( 'page_id' ), 125 array( 126 'page_is_new' => 1, 127 "page_id >= $start" ), 128 __METHOD__ 129 ); 130 $num = $res->numRows(); 131 $this->output( "$num new articles...\n" ); 132 133 $i = 0; 134 foreach ( $res as $row ) { 135 if ( !( ++$i % $reportingInterval ) ) { 136 $this->output( "$i\n" ); 137 wfWaitForSlaves(); 138 } 139 if ( $redirectsOnly ) { 140 $this->fixRedirect( $row->page_id ); 141 } else { 142 self::fixLinksFromArticle( $row->page_id ); 143 } 144 } 145 } else { 146 if ( !$end ) { 147 $maxPage = $dbr->selectField( 'page', 'max(page_id)', false ); 148 $maxRD = $dbr->selectField( 'redirect', 'max(rd_from)', false ); 149 $end = max( $maxPage, $maxRD ); 150 } 151 $this->output( "Refreshing redirects table.\n" ); 152 $this->output( "Starting from page_id $start of $end.\n" ); 153 154 for ( $id = $start; $id <= $end; $id++ ) { 155 156 if ( !( $id % $reportingInterval ) ) { 157 $this->output( "$id\n" ); 158 wfWaitForSlaves(); 159 } 160 $this->fixRedirect( $id ); 161 } 162 163 if ( !$redirectsOnly ) { 164 $this->output( "Refreshing links tables.\n" ); 165 $this->output( "Starting from page_id $start of $end.\n" ); 166 167 for ( $id = $start; $id <= $end; $id++ ) { 168 169 if ( !( $id % $reportingInterval ) ) { 170 $this->output( "$id\n" ); 171 wfWaitForSlaves(); 172 } 173 self::fixLinksFromArticle( $id ); 174 } 175 } 176 } 177 } 178 179 /** 180 * Update the redirect entry for a given page. 181 * 182 * This methods bypasses the "redirect" table to get the redirect target, 183 * and parses the page's content to fetch it. This allows to be sure that 184 * the redirect target is up to date and valid. 185 * This is particularly useful when modifying namespaces to be sure the 186 * entry in the "redirect" table points to the correct page and not to an 187 * invalid one. 188 * 189 * @param int $id The page ID to check 190 */ 191 private function fixRedirect( $id ) { 192 $page = WikiPage::newFromID( $id ); 193 $dbw = wfGetDB( DB_MASTER ); 194 195 if ( $page === null ) { 196 // This page doesn't exist (any more) 197 // Delete any redirect table entry for it 198 $dbw->delete( 'redirect', array( 'rd_from' => $id ), 199 __METHOD__ ); 200 201 return; 202 } 203 204 $rt = null; 205 $content = $page->getContent( Revision::RAW ); 206 if ( $content !== null ) { 207 $rt = $content->getUltimateRedirectTarget(); 208 } 209 210 if ( $rt === null ) { 211 // The page is not a redirect 212 // Delete any redirect table entry for it 213 $dbw->delete( 'redirect', array( 'rd_from' => $id ), __METHOD__ ); 214 $fieldValue = 0; 215 } else { 216 $page->insertRedirectEntry( $rt ); 217 $fieldValue = 1; 218 } 219 220 // Update the page table to be sure it is an a consistent state 221 $dbw->update( 'page', array( 'page_is_redirect' => $fieldValue ), 222 array( 'page_id' => $id ), __METHOD__ ); 223 } 224 225 /** 226 * Run LinksUpdate for all links on a given page_id 227 * @param int $id The page_id 228 */ 229 public static function fixLinksFromArticle( $id ) { 230 $page = WikiPage::newFromID( $id ); 231 232 LinkCache::singleton()->clear(); 233 234 if ( $page === null ) { 235 return; 236 } 237 238 $content = $page->getContent( Revision::RAW ); 239 if ( $content === null ) { 240 return; 241 } 242 243 $dbw = wfGetDB( DB_MASTER ); 244 $dbw->begin( __METHOD__ ); 245 246 $updates = $content->getSecondaryDataUpdates( $page->getTitle() ); 247 DataUpdate::runUpdates( $updates ); 248 249 $dbw->commit( __METHOD__ ); 250 } 251 252 /** 253 * Removes non-existing links from pages from pagelinks, imagelinks, 254 * categorylinks, templatelinks, externallinks, interwikilinks, langlinks and redirect tables. 255 * 256 * @param int $maxLag 257 * @param int $batchSize The size of deletion batches 258 * 259 * @author Merlijn van Deen <[email protected]> 260 */ 261 private function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) { 262 wfWaitForSlaves(); 263 264 $dbw = wfGetDB( DB_MASTER ); 265 266 $lb = wfGetLBFactory()->newMainLB(); 267 $dbr = $lb->getConnection( DB_SLAVE ); 268 $dbr->bufferResults( false ); 269 270 $linksTables = array( // table name => page_id field 271 'pagelinks' => 'pl_from', 272 'imagelinks' => 'il_from', 273 'categorylinks' => 'cl_from', 274 'templatelinks' => 'tl_from', 275 'externallinks' => 'el_from', 276 'iwlinks' => 'iwl_from', 277 'langlinks' => 'll_from', 278 'redirect' => 'rd_from', 279 'page_props' => 'pp_page', 280 ); 281 282 foreach ( $linksTables as $table => $field ) { 283 $this->output( "Retrieving illegal entries from $table... " ); 284 285 // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL; 286 $results = $dbr->select( 287 array( $table, 'page' ), 288 $field, 289 array( 'page_id' => null ), 290 __METHOD__, 291 'DISTINCT', 292 array( 'page' => array( 'LEFT JOIN', "$field=page_id" ) ) 293 ); 294 295 $counter = 0; 296 $list = array(); 297 $this->output( "0.." ); 298 foreach ( $results as $row ) { 299 $counter++; 300 $list[] = $row->$field; 301 if ( ( $counter % $batchSize ) == 0 ) { 302 wfWaitForSlaves(); 303 $dbw->delete( $table, array( $field => $list ), __METHOD__ ); 304 305 $this->output( $counter . ".." ); 306 $list = array(); 307 } 308 } 309 $this->output( $counter ); 310 if ( count( $list ) > 0 ) { 311 $dbw->delete( $table, array( $field => $list ), __METHOD__ ); 312 } 313 $this->output( "\n" ); 314 wfWaitForSlaves(); 315 } 316 $lb->closeAll(); 317 } 318 } 319 320 $maintClass = 'RefreshLinks'; 321 require_once RUN_MAINTENANCE_IF_MAIN;
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |