MediaWiki
REL1_20
|
00001 <?php 00024 require_once( __DIR__ . '/Maintenance.php' ); 00025 00031 class RefreshLinks extends Maintenance { 00032 public function __construct() { 00033 parent::__construct(); 00034 $this->mDescription = "Refresh link tables"; 00035 $this->addOption( 'dfn-only', 'Delete links from nonexistent articles only' ); 00036 $this->addOption( 'new-only', 'Only affect articles with just a single edit' ); 00037 $this->addOption( 'redirects-only', 'Only fix redirects, not all links' ); 00038 $this->addOption( 'old-redirects-only', 'Only fix redirects with no redirect table entry' ); 00039 $this->addOption( 'm', 'Maximum replication lag', false, true ); 00040 $this->addOption( 'e', 'Last page id to refresh', false, true ); 00041 $this->addArg( 'start', 'Page_id to start from, default 1', false ); 00042 $this->setBatchSize( 100 ); 00043 } 00044 00045 public function execute() { 00046 $max = $this->getOption( 'm', 0 ); 00047 if ( !$this->hasOption( 'dfn-only' ) ) { 00048 $start = $this->getArg( 0, 1 ); 00049 $new = $this->getOption( 'new-only', false ); 00050 $end = $this->getOption( 'e', 0 ); 00051 $redir = $this->getOption( 'redirects-only', false ); 00052 $oldRedir = $this->getOption( 'old-redirects-only', false ); 00053 $this->doRefreshLinks( $start, $new, $max, $end, $redir, $oldRedir ); 00054 } 00055 $this->deleteLinksFromNonexistent( $max, $this->mBatchSize ); 00056 } 00057 00067 private function doRefreshLinks( $start, $newOnly = false, $maxLag = false, 00068 $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) { 00069 global $wgParser, $wgUseTidy; 00070 00071 $reportingInterval = 100; 00072 $dbr = wfGetDB( DB_SLAVE ); 00073 $start = intval( $start ); 00074 00075 // Give extensions a chance to optimize settings 00076 wfRunHooks( 'MaintenanceRefreshLinksInit', array( $this ) ); 00077 00078 # Don't generate extension images (e.g. Timeline) 00079 $wgParser->clearTagHooks(); 00080 00081 # Don't use HTML tidy 00082 $wgUseTidy = false; 00083 00084 $what = $redirectsOnly ? "redirects" : "links"; 00085 00086 if ( $oldRedirectsOnly ) { 00087 # This entire code path is cut-and-pasted from below. Hurrah. 00088 00089 $conds = array( 00090 "page_is_redirect=1", 00091 "rd_from IS NULL" 00092 ); 00093 00094 if ( $end == 0 ) { 00095 $conds[] = "page_id >= $start"; 00096 } else { 00097 $conds[] = "page_id BETWEEN $start AND $end"; 00098 } 00099 00100 $res = $dbr->select( 00101 array( 'page', 'redirect' ), 00102 'page_id', 00103 $conds, 00104 __METHOD__, 00105 array(), 00106 array( 'redirect' => array( "LEFT JOIN", "page_id=rd_from" ) ) 00107 ); 00108 $num = $dbr->numRows( $res ); 00109 $this->output( "Refreshing $num old redirects from $start...\n" ); 00110 00111 $i = 0; 00112 00113 foreach ( $res as $row ) { 00114 if ( !( ++$i % $reportingInterval ) ) { 00115 $this->output( "$i\n" ); 00116 wfWaitForSlaves(); 00117 } 00118 $this->fixRedirect( $row->page_id ); 00119 } 00120 } elseif ( $newOnly ) { 00121 $this->output( "Refreshing $what from " ); 00122 $res = $dbr->select( 'page', 00123 array( 'page_id' ), 00124 array( 00125 'page_is_new' => 1, 00126 "page_id >= $start" ), 00127 __METHOD__ 00128 ); 00129 $num = $dbr->numRows( $res ); 00130 $this->output( "$num new articles...\n" ); 00131 00132 $i = 0; 00133 foreach ( $res as $row ) { 00134 if ( !( ++$i % $reportingInterval ) ) { 00135 $this->output( "$i\n" ); 00136 wfWaitForSlaves(); 00137 } 00138 if ( $redirectsOnly ) { 00139 $this->fixRedirect( $row->page_id ); 00140 } else { 00141 self::fixLinksFromArticle( $row->page_id ); 00142 } 00143 } 00144 } else { 00145 if ( !$end ) { 00146 $maxPage = $dbr->selectField( 'page', 'max(page_id)', false ); 00147 $maxRD = $dbr->selectField( 'redirect', 'max(rd_from)', false ); 00148 $end = max( $maxPage, $maxRD ); 00149 } 00150 $this->output( "Refreshing redirects table.\n" ); 00151 $this->output( "Starting from page_id $start of $end.\n" ); 00152 00153 for ( $id = $start; $id <= $end; $id++ ) { 00154 00155 if ( !( $id % $reportingInterval ) ) { 00156 $this->output( "$id\n" ); 00157 wfWaitForSlaves(); 00158 } 00159 $this->fixRedirect( $id ); 00160 } 00161 00162 if ( !$redirectsOnly ) { 00163 $this->output( "Refreshing links table.\n" ); 00164 $this->output( "Starting from page_id $start of $end.\n" ); 00165 00166 for ( $id = $start; $id <= $end; $id++ ) { 00167 00168 if ( !( $id % $reportingInterval ) ) { 00169 $this->output( "$id\n" ); 00170 wfWaitForSlaves(); 00171 } 00172 self::fixLinksFromArticle( $id ); 00173 } 00174 } 00175 } 00176 } 00177 00182 private function fixRedirect( $id ) { 00183 $page = WikiPage::newFromID( $id ); 00184 $dbw = wfGetDB( DB_MASTER ); 00185 00186 if ( $page === null ) { 00187 // This page doesn't exist (any more) 00188 // Delete any redirect table entry for it 00189 $dbw->delete( 'redirect', array( 'rd_from' => $id ), 00190 __METHOD__ ); 00191 return; 00192 } 00193 00194 $rt = $page->getRedirectTarget(); 00195 00196 if ( $rt === null ) { 00197 // The page is not a redirect 00198 // Delete any redirect table entry for it 00199 $dbw->delete( 'redirect', array( 'rd_from' => $id ), 00200 __METHOD__ ); 00201 } 00202 } 00203 00208 public static function fixLinksFromArticle( $id ) { 00209 global $wgParser, $wgContLang; 00210 00211 $page = WikiPage::newFromID( $id ); 00212 00213 LinkCache::singleton()->clear(); 00214 00215 if ( $page === null ) { 00216 return; 00217 } 00218 00219 $text = $page->getRawText(); 00220 if ( $text === false ) { 00221 return; 00222 } 00223 00224 $dbw = wfGetDB( DB_MASTER ); 00225 $dbw->begin( __METHOD__ ); 00226 00227 $options = ParserOptions::newFromUserAndLang( new User, $wgContLang ); 00228 $parserOutput = $wgParser->parse( $text, $page->getTitle(), $options, true, true, $page->getLatest() ); 00229 $update = new LinksUpdate( $page->getTitle(), $parserOutput, false ); 00230 $update->doUpdate(); 00231 00232 $dbw->commit( __METHOD__ ); 00233 } 00234 00244 private function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) { 00245 wfWaitForSlaves(); 00246 00247 $dbw = wfGetDB( DB_MASTER ); 00248 00249 $lb = wfGetLBFactory()->newMainLB(); 00250 $dbr = $lb->getConnection( DB_SLAVE ); 00251 $dbr->bufferResults( false ); 00252 00253 $linksTables = array( // table name => page_id field 00254 'pagelinks' => 'pl_from', 00255 'imagelinks' => 'il_from', 00256 'categorylinks' => 'cl_from', 00257 'templatelinks' => 'tl_from', 00258 'externallinks' => 'el_from', 00259 'iwlinks' => 'iwl_from', 00260 'langlinks' => 'll_from', 00261 'redirect' => 'rd_from', 00262 'page_props' => 'pp_page', 00263 ); 00264 00265 foreach ( $linksTables as $table => $field ) { 00266 $this->output( "Retrieving illegal entries from $table... " ); 00267 00268 // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL; 00269 $results = $dbr->select( array( $table, 'page' ), 00270 $field, 00271 array( 'page_id' => null ), 00272 __METHOD__, 00273 'DISTINCT', 00274 array( 'page' => array( 'LEFT JOIN', "$field=page_id" ) ) 00275 ); 00276 00277 $counter = 0; 00278 $list = array(); 00279 $this->output( "0.." ); 00280 foreach ( $results as $row ) { 00281 $counter++; 00282 $list[] = $row->$field; 00283 if ( ( $counter % $batchSize ) == 0 ) { 00284 wfWaitForSlaves(); 00285 $dbw->delete( $table, array( $field => $list ), __METHOD__ ); 00286 00287 $this->output( $counter . ".." ); 00288 $list = array(); 00289 } 00290 } 00291 $this->output( $counter ); 00292 if ( count( $list ) > 0 ) { 00293 $dbw->delete( $table, array( $field => $list ), __METHOD__ ); 00294 } 00295 $this->output( "\n" ); 00296 wfWaitForSlaves(); 00297 } 00298 $lb->closeAll(); 00299 } 00300 } 00301 00302 $maintClass = 'RefreshLinks'; 00303 require_once( RUN_MAINTENANCE_IF_MAIN );