MediaWiki
REL1_24
|
00001 <?php 00024 require_once __DIR__ . '/Maintenance.php'; 00025 00031 class RefreshLinks extends Maintenance { 00032 public function __construct() { 00033 parent::__construct(); 00034 $this->mDescription = "Refresh link tables"; 00035 $this->addOption( 'dfn-only', 'Delete links from nonexistent articles only' ); 00036 $this->addOption( 'new-only', 'Only affect articles with just a single edit' ); 00037 $this->addOption( 'redirects-only', 'Only fix redirects, not all links' ); 00038 $this->addOption( 'old-redirects-only', 'Only fix redirects with no redirect table entry' ); 00039 $this->addOption( 'm', 'Maximum replication lag', false, true ); 00040 $this->addOption( 'e', 'Last page id to refresh', false, true ); 00041 $this->addArg( 'start', 'Page_id to start from, default 1', false ); 00042 $this->setBatchSize( 100 ); 00043 } 00044 00045 public function execute() { 00046 $max = $this->getOption( 'm', 0 ); 00047 if ( !$this->hasOption( 'dfn-only' ) ) { 00048 $start = $this->getArg( 0, 1 ); 00049 $new = $this->getOption( 'new-only', false ); 00050 $end = $this->getOption( 'e', 0 ); 00051 $redir = $this->getOption( 'redirects-only', false ); 00052 $oldRedir = $this->getOption( 'old-redirects-only', false ); 00053 $this->doRefreshLinks( $start, $new, $max, $end, $redir, $oldRedir ); 00054 } 00055 $this->deleteLinksFromNonexistent( $max, $this->mBatchSize ); 00056 } 00057 00067 private function doRefreshLinks( $start, $newOnly = false, $maxLag = false, 00068 $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false 00069 ) { 00070 global $wgParser, $wgUseTidy; 00071 00072 $reportingInterval = 100; 00073 $dbr = wfGetDB( DB_SLAVE ); 00074 $start = intval( $start ); 00075 00076 // Give extensions a chance to optimize settings 00077 wfRunHooks( 'MaintenanceRefreshLinksInit', array( $this ) ); 00078 00079 # Don't generate extension images (e.g. Timeline) 00080 $wgParser->clearTagHooks(); 00081 00082 # Don't use HTML tidy 00083 $wgUseTidy = false; 00084 00085 $what = $redirectsOnly ? "redirects" : "links"; 00086 00087 if ( $oldRedirectsOnly ) { 00088 # This entire code path is cut-and-pasted from below. Hurrah. 00089 00090 $conds = array( 00091 "page_is_redirect=1", 00092 "rd_from IS NULL" 00093 ); 00094 00095 if ( $end == 0 ) { 00096 $conds[] = "page_id >= $start"; 00097 } else { 00098 $conds[] = "page_id BETWEEN $start AND $end"; 00099 } 00100 00101 $res = $dbr->select( 00102 array( 'page', 'redirect' ), 00103 'page_id', 00104 $conds, 00105 __METHOD__, 00106 array(), 00107 array( 'redirect' => array( "LEFT JOIN", "page_id=rd_from" ) ) 00108 ); 00109 $num = $res->numRows(); 00110 $this->output( "Refreshing $num old redirects from $start...\n" ); 00111 00112 $i = 0; 00113 00114 foreach ( $res as $row ) { 00115 if ( !( ++$i % $reportingInterval ) ) { 00116 $this->output( "$i\n" ); 00117 wfWaitForSlaves(); 00118 } 00119 $this->fixRedirect( $row->page_id ); 00120 } 00121 } elseif ( $newOnly ) { 00122 $this->output( "Refreshing $what from " ); 00123 $res = $dbr->select( 'page', 00124 array( 'page_id' ), 00125 array( 00126 'page_is_new' => 1, 00127 "page_id >= $start" ), 00128 __METHOD__ 00129 ); 00130 $num = $res->numRows(); 00131 $this->output( "$num new articles...\n" ); 00132 00133 $i = 0; 00134 foreach ( $res as $row ) { 00135 if ( !( ++$i % $reportingInterval ) ) { 00136 $this->output( "$i\n" ); 00137 wfWaitForSlaves(); 00138 } 00139 if ( $redirectsOnly ) { 00140 $this->fixRedirect( $row->page_id ); 00141 } else { 00142 self::fixLinksFromArticle( $row->page_id ); 00143 } 00144 } 00145 } else { 00146 if ( !$end ) { 00147 $maxPage = $dbr->selectField( 'page', 'max(page_id)', false ); 00148 $maxRD = $dbr->selectField( 'redirect', 'max(rd_from)', false ); 00149 $end = max( $maxPage, $maxRD ); 00150 } 00151 $this->output( "Refreshing redirects table.\n" ); 00152 $this->output( "Starting from page_id $start of $end.\n" ); 00153 00154 for ( $id = $start; $id <= $end; $id++ ) { 00155 00156 if ( !( $id % $reportingInterval ) ) { 00157 $this->output( "$id\n" ); 00158 wfWaitForSlaves(); 00159 } 00160 $this->fixRedirect( $id ); 00161 } 00162 00163 if ( !$redirectsOnly ) { 00164 $this->output( "Refreshing links tables.\n" ); 00165 $this->output( "Starting from page_id $start of $end.\n" ); 00166 00167 for ( $id = $start; $id <= $end; $id++ ) { 00168 00169 if ( !( $id % $reportingInterval ) ) { 00170 $this->output( "$id\n" ); 00171 wfWaitForSlaves(); 00172 } 00173 self::fixLinksFromArticle( $id ); 00174 } 00175 } 00176 } 00177 } 00178 00191 private function fixRedirect( $id ) { 00192 $page = WikiPage::newFromID( $id ); 00193 $dbw = wfGetDB( DB_MASTER ); 00194 00195 if ( $page === null ) { 00196 // This page doesn't exist (any more) 00197 // Delete any redirect table entry for it 00198 $dbw->delete( 'redirect', array( 'rd_from' => $id ), 00199 __METHOD__ ); 00200 00201 return; 00202 } 00203 00204 $rt = null; 00205 $content = $page->getContent( Revision::RAW ); 00206 if ( $content !== null ) { 00207 $rt = $content->getUltimateRedirectTarget(); 00208 } 00209 00210 if ( $rt === null ) { 00211 // The page is not a redirect 00212 // Delete any redirect table entry for it 00213 $dbw->delete( 'redirect', array( 'rd_from' => $id ), __METHOD__ ); 00214 $fieldValue = 0; 00215 } else { 00216 $page->insertRedirectEntry( $rt ); 00217 $fieldValue = 1; 00218 } 00219 00220 // Update the page table to be sure it is an a consistent state 00221 $dbw->update( 'page', array( 'page_is_redirect' => $fieldValue ), 00222 array( 'page_id' => $id ), __METHOD__ ); 00223 } 00224 00229 public static function fixLinksFromArticle( $id ) { 00230 $page = WikiPage::newFromID( $id ); 00231 00232 LinkCache::singleton()->clear(); 00233 00234 if ( $page === null ) { 00235 return; 00236 } 00237 00238 $content = $page->getContent( Revision::RAW ); 00239 if ( $content === null ) { 00240 return; 00241 } 00242 00243 $dbw = wfGetDB( DB_MASTER ); 00244 $dbw->begin( __METHOD__ ); 00245 00246 $updates = $content->getSecondaryDataUpdates( $page->getTitle() ); 00247 DataUpdate::runUpdates( $updates ); 00248 00249 $dbw->commit( __METHOD__ ); 00250 } 00251 00261 private function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) { 00262 wfWaitForSlaves(); 00263 00264 $dbw = wfGetDB( DB_MASTER ); 00265 00266 $lb = wfGetLBFactory()->newMainLB(); 00267 $dbr = $lb->getConnection( DB_SLAVE ); 00268 $dbr->bufferResults( false ); 00269 00270 $linksTables = array( // table name => page_id field 00271 'pagelinks' => 'pl_from', 00272 'imagelinks' => 'il_from', 00273 'categorylinks' => 'cl_from', 00274 'templatelinks' => 'tl_from', 00275 'externallinks' => 'el_from', 00276 'iwlinks' => 'iwl_from', 00277 'langlinks' => 'll_from', 00278 'redirect' => 'rd_from', 00279 'page_props' => 'pp_page', 00280 ); 00281 00282 foreach ( $linksTables as $table => $field ) { 00283 $this->output( "Retrieving illegal entries from $table... " ); 00284 00285 // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL; 00286 $results = $dbr->select( 00287 array( $table, 'page' ), 00288 $field, 00289 array( 'page_id' => null ), 00290 __METHOD__, 00291 'DISTINCT', 00292 array( 'page' => array( 'LEFT JOIN', "$field=page_id" ) ) 00293 ); 00294 00295 $counter = 0; 00296 $list = array(); 00297 $this->output( "0.." ); 00298 foreach ( $results as $row ) { 00299 $counter++; 00300 $list[] = $row->$field; 00301 if ( ( $counter % $batchSize ) == 0 ) { 00302 wfWaitForSlaves(); 00303 $dbw->delete( $table, array( $field => $list ), __METHOD__ ); 00304 00305 $this->output( $counter . ".." ); 00306 $list = array(); 00307 } 00308 } 00309 $this->output( $counter ); 00310 if ( count( $list ) > 0 ) { 00311 $dbw->delete( $table, array( $field => $list ), __METHOD__ ); 00312 } 00313 $this->output( "\n" ); 00314 wfWaitForSlaves(); 00315 } 00316 $lb->closeAll(); 00317 } 00318 } 00319 00320 $maintClass = 'RefreshLinks'; 00321 require_once RUN_MAINTENANCE_IF_MAIN;