MediaWiki
REL1_22
|
00001 <?php 00024 require_once __DIR__ . '/Maintenance.php'; 00025 00031 class RefreshLinks extends Maintenance { 00032 public function __construct() { 00033 parent::__construct(); 00034 $this->mDescription = "Refresh link tables"; 00035 $this->addOption( 'dfn-only', 'Delete links from nonexistent articles only' ); 00036 $this->addOption( 'new-only', 'Only affect articles with just a single edit' ); 00037 $this->addOption( 'redirects-only', 'Only fix redirects, not all links' ); 00038 $this->addOption( 'old-redirects-only', 'Only fix redirects with no redirect table entry' ); 00039 $this->addOption( 'm', 'Maximum replication lag', false, true ); 00040 $this->addOption( 'e', 'Last page id to refresh', false, true ); 00041 $this->addArg( 'start', 'Page_id to start from, default 1', false ); 00042 $this->setBatchSize( 100 ); 00043 } 00044 00045 public function execute() { 00046 $max = $this->getOption( 'm', 0 ); 00047 if ( !$this->hasOption( 'dfn-only' ) ) { 00048 $start = $this->getArg( 0, 1 ); 00049 $new = $this->getOption( 'new-only', false ); 00050 $end = $this->getOption( 'e', 0 ); 00051 $redir = $this->getOption( 'redirects-only', false ); 00052 $oldRedir = $this->getOption( 'old-redirects-only', false ); 00053 $this->doRefreshLinks( $start, $new, $max, $end, $redir, $oldRedir ); 00054 } 00055 $this->deleteLinksFromNonexistent( $max, $this->mBatchSize ); 00056 } 00057 00067 private function doRefreshLinks( $start, $newOnly = false, $maxLag = false, 00068 $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) { 00069 global $wgParser, $wgUseTidy; 00070 00071 $reportingInterval = 100; 00072 $dbr = wfGetDB( DB_SLAVE ); 00073 $start = intval( $start ); 00074 00075 // Give extensions a chance to optimize settings 00076 wfRunHooks( 'MaintenanceRefreshLinksInit', array( $this ) ); 00077 00078 # Don't generate extension images (e.g. Timeline) 00079 $wgParser->clearTagHooks(); 00080 00081 # Don't use HTML tidy 00082 $wgUseTidy = false; 00083 00084 $what = $redirectsOnly ? "redirects" : "links"; 00085 00086 if ( $oldRedirectsOnly ) { 00087 # This entire code path is cut-and-pasted from below. Hurrah. 00088 00089 $conds = array( 00090 "page_is_redirect=1", 00091 "rd_from IS NULL" 00092 ); 00093 00094 if ( $end == 0 ) { 00095 $conds[] = "page_id >= $start"; 00096 } else { 00097 $conds[] = "page_id BETWEEN $start AND $end"; 00098 } 00099 00100 $res = $dbr->select( 00101 array( 'page', 'redirect' ), 00102 'page_id', 00103 $conds, 00104 __METHOD__, 00105 array(), 00106 array( 'redirect' => array( "LEFT JOIN", "page_id=rd_from" ) ) 00107 ); 00108 $num = $res->numRows(); 00109 $this->output( "Refreshing $num old redirects from $start...\n" ); 00110 00111 $i = 0; 00112 00113 foreach ( $res as $row ) { 00114 if ( !( ++$i % $reportingInterval ) ) { 00115 $this->output( "$i\n" ); 00116 wfWaitForSlaves(); 00117 } 00118 $this->fixRedirect( $row->page_id ); 00119 } 00120 } elseif ( $newOnly ) { 00121 $this->output( "Refreshing $what from " ); 00122 $res = $dbr->select( 'page', 00123 array( 'page_id' ), 00124 array( 00125 'page_is_new' => 1, 00126 "page_id >= $start" ), 00127 __METHOD__ 00128 ); 00129 $num = $res->numRows(); 00130 $this->output( "$num new articles...\n" ); 00131 00132 $i = 0; 00133 foreach ( $res as $row ) { 00134 if ( !( ++$i % $reportingInterval ) ) { 00135 $this->output( "$i\n" ); 00136 wfWaitForSlaves(); 00137 } 00138 if ( $redirectsOnly ) { 00139 $this->fixRedirect( $row->page_id ); 00140 } else { 00141 self::fixLinksFromArticle( $row->page_id ); 00142 } 00143 } 00144 } else { 00145 if ( !$end ) { 00146 $maxPage = $dbr->selectField( 'page', 'max(page_id)', false ); 00147 $maxRD = $dbr->selectField( 'redirect', 'max(rd_from)', false ); 00148 $end = max( $maxPage, $maxRD ); 00149 } 00150 $this->output( "Refreshing redirects table.\n" ); 00151 $this->output( "Starting from page_id $start of $end.\n" ); 00152 00153 for ( $id = $start; $id <= $end; $id++ ) { 00154 00155 if ( !( $id % $reportingInterval ) ) { 00156 $this->output( "$id\n" ); 00157 wfWaitForSlaves(); 00158 } 00159 $this->fixRedirect( $id ); 00160 } 00161 00162 if ( !$redirectsOnly ) { 00163 $this->output( "Refreshing links tables.\n" ); 00164 $this->output( "Starting from page_id $start of $end.\n" ); 00165 00166 for ( $id = $start; $id <= $end; $id++ ) { 00167 00168 if ( !( $id % $reportingInterval ) ) { 00169 $this->output( "$id\n" ); 00170 wfWaitForSlaves(); 00171 } 00172 self::fixLinksFromArticle( $id ); 00173 } 00174 } 00175 } 00176 } 00177 00190 private function fixRedirect( $id ) { 00191 $page = WikiPage::newFromID( $id ); 00192 $dbw = wfGetDB( DB_MASTER ); 00193 00194 if ( $page === null ) { 00195 // This page doesn't exist (any more) 00196 // Delete any redirect table entry for it 00197 $dbw->delete( 'redirect', array( 'rd_from' => $id ), 00198 __METHOD__ ); 00199 return; 00200 } 00201 00202 $rt = null; 00203 $content = $page->getContent( Revision::RAW ); 00204 if ( $content !== null ) { 00205 $rt = $content->getUltimateRedirectTarget(); 00206 } 00207 00208 if ( $rt === null ) { 00209 // The page is not a redirect 00210 // Delete any redirect table entry for it 00211 $dbw->delete( 'redirect', array( 'rd_from' => $id ), __METHOD__ ); 00212 $fieldValue = 0; 00213 } else { 00214 $page->insertRedirectEntry( $rt ); 00215 $fieldValue = 1; 00216 } 00217 00218 // Update the page table to be sure it is an a consistent state 00219 $dbw->update( 'page', array( 'page_is_redirect' => $fieldValue ), 00220 array( 'page_id' => $id ), __METHOD__ ); 00221 } 00222 00227 public static function fixLinksFromArticle( $id ) { 00228 $page = WikiPage::newFromID( $id ); 00229 00230 LinkCache::singleton()->clear(); 00231 00232 if ( $page === null ) { 00233 return; 00234 } 00235 00236 $content = $page->getContent( Revision::RAW ); 00237 if ( $content === null ) { 00238 return; 00239 } 00240 00241 $dbw = wfGetDB( DB_MASTER ); 00242 $dbw->begin( __METHOD__ ); 00243 00244 $updates = $content->getSecondaryDataUpdates( $page->getTitle() ); 00245 DataUpdate::runUpdates( $updates ); 00246 00247 $dbw->commit( __METHOD__ ); 00248 } 00249 00259 private function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) { 00260 wfWaitForSlaves(); 00261 00262 $dbw = wfGetDB( DB_MASTER ); 00263 00264 $lb = wfGetLBFactory()->newMainLB(); 00265 $dbr = $lb->getConnection( DB_SLAVE ); 00266 $dbr->bufferResults( false ); 00267 00268 $linksTables = array( // table name => page_id field 00269 'pagelinks' => 'pl_from', 00270 'imagelinks' => 'il_from', 00271 'categorylinks' => 'cl_from', 00272 'templatelinks' => 'tl_from', 00273 'externallinks' => 'el_from', 00274 'iwlinks' => 'iwl_from', 00275 'langlinks' => 'll_from', 00276 'redirect' => 'rd_from', 00277 'page_props' => 'pp_page', 00278 ); 00279 00280 foreach ( $linksTables as $table => $field ) { 00281 $this->output( "Retrieving illegal entries from $table... " ); 00282 00283 // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL; 00284 $results = $dbr->select( 00285 array( $table, 'page' ), 00286 $field, 00287 array( 'page_id' => null ), 00288 __METHOD__, 00289 'DISTINCT', 00290 array( 'page' => array( 'LEFT JOIN', "$field=page_id" ) ) 00291 ); 00292 00293 $counter = 0; 00294 $list = array(); 00295 $this->output( "0.." ); 00296 foreach ( $results as $row ) { 00297 $counter++; 00298 $list[] = $row->$field; 00299 if ( ( $counter % $batchSize ) == 0 ) { 00300 wfWaitForSlaves(); 00301 $dbw->delete( $table, array( $field => $list ), __METHOD__ ); 00302 00303 $this->output( $counter . ".." ); 00304 $list = array(); 00305 } 00306 } 00307 $this->output( $counter ); 00308 if ( count( $list ) > 0 ) { 00309 $dbw->delete( $table, array( $field => $list ), __METHOD__ ); 00310 } 00311 $this->output( "\n" ); 00312 wfWaitForSlaves(); 00313 } 00314 $lb->closeAll(); 00315 } 00316 } 00317 00318 $maintClass = 'RefreshLinks'; 00319 require_once RUN_MAINTENANCE_IF_MAIN;