MediaWiki
REL1_19
|
00001 <?php 00023 require_once( dirname( __FILE__ ) . '/Maintenance.php' ); 00024 00025 class RefreshLinks extends Maintenance { 00026 public function __construct() { 00027 parent::__construct(); 00028 $this->mDescription = "Refresh link tables"; 00029 $this->addOption( 'dfn-only', 'Delete links from nonexistent articles only' ); 00030 $this->addOption( 'new-only', 'Only affect articles with just a single edit' ); 00031 $this->addOption( 'redirects-only', 'Only fix redirects, not all links' ); 00032 $this->addOption( 'old-redirects-only', 'Only fix redirects with no redirect table entry' ); 00033 $this->addOption( 'm', 'Maximum replication lag', false, true ); 00034 $this->addOption( 'e', 'Last page id to refresh', false, true ); 00035 $this->addArg( 'start', 'Page_id to start from, default 1', false ); 00036 $this->setBatchSize( 100 ); 00037 } 00038 00039 public function execute() { 00040 $max = $this->getOption( 'm', 0 ); 00041 if ( !$this->hasOption( 'dfn-only' ) ) { 00042 $start = $this->getArg( 0, 1 ); 00043 $new = $this->getOption( 'new-only', false ); 00044 $end = $this->getOption( 'e', 0 ); 00045 $redir = $this->getOption( 'redirects-only', false ); 00046 $oldRedir = $this->getOption( 'old-redirects-only', false ); 00047 $this->doRefreshLinks( $start, $new, $max, $end, $redir, $oldRedir ); 00048 } 00049 $this->deleteLinksFromNonexistent( $max, $this->mBatchSize ); 00050 } 00051 00061 private function doRefreshLinks( $start, $newOnly = false, $maxLag = false, 00062 $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) { 00063 global $wgParser, $wgUseTidy; 00064 00065 $reportingInterval = 100; 00066 $dbr = wfGetDB( DB_SLAVE ); 00067 $start = intval( $start ); 00068 00069 // Give extensions a chance to optimize settings 00070 wfRunHooks( 'MaintenanceRefreshLinksInit', array( $this ) ); 00071 00072 # Don't generate extension images (e.g. Timeline) 00073 $wgParser->clearTagHooks(); 00074 00075 # Don't use HTML tidy 00076 $wgUseTidy = false; 00077 00078 $what = $redirectsOnly ? "redirects" : "links"; 00079 00080 if ( $oldRedirectsOnly ) { 00081 # This entire code path is cut-and-pasted from below. Hurrah. 00082 00083 $conds = array( 00084 "page_is_redirect=1", 00085 "rd_from IS NULL" 00086 ); 00087 00088 if ( $end == 0 ) { 00089 $conds[] = "page_id >= $start"; 00090 } else { 00091 $conds[] = "page_id BETWEEN $start AND $end"; 00092 } 00093 00094 $res = $dbr->select( 00095 array( 'page', 'redirect' ), 00096 'page_id', 00097 $conds, 00098 __METHOD__, 00099 array(), 00100 array( 'redirect' => array( "LEFT JOIN", "page_id=rd_from" ) ) 00101 ); 00102 $num = $dbr->numRows( $res ); 00103 $this->output( "Refreshing $num old redirects from $start...\n" ); 00104 00105 $i = 0; 00106 00107 foreach ( $res as $row ) { 00108 if ( !( ++$i % $reportingInterval ) ) { 00109 $this->output( "$i\n" ); 00110 wfWaitForSlaves(); 00111 } 00112 $this->fixRedirect( $row->page_id ); 00113 } 00114 } elseif ( $newOnly ) { 00115 $this->output( "Refreshing $what from " ); 00116 $res = $dbr->select( 'page', 00117 array( 'page_id' ), 00118 array( 00119 'page_is_new' => 1, 00120 "page_id >= $start" ), 00121 __METHOD__ 00122 ); 00123 $num = $dbr->numRows( $res ); 00124 $this->output( "$num new articles...\n" ); 00125 00126 $i = 0; 00127 foreach ( $res as $row ) { 00128 if ( !( ++$i % $reportingInterval ) ) { 00129 $this->output( "$i\n" ); 00130 wfWaitForSlaves(); 00131 } 00132 if ( $redirectsOnly ) { 00133 $this->fixRedirect( $row->page_id ); 00134 } else { 00135 self::fixLinksFromArticle( $row->page_id ); 00136 } 00137 } 00138 } else { 00139 if ( !$end ) { 00140 $maxPage = $dbr->selectField( 'page', 'max(page_id)', false ); 00141 $maxRD = $dbr->selectField( 'redirect', 'max(rd_from)', false ); 00142 $end = max( $maxPage, $maxRD ); 00143 } 00144 $this->output( "Refreshing redirects table.\n" ); 00145 $this->output( "Starting from page_id $start of $end.\n" ); 00146 00147 for ( $id = $start; $id <= $end; $id++ ) { 00148 00149 if ( !( $id % $reportingInterval ) ) { 00150 $this->output( "$id\n" ); 00151 wfWaitForSlaves(); 00152 } 00153 $this->fixRedirect( $id ); 00154 } 00155 00156 if ( !$redirectsOnly ) { 00157 $this->output( "Refreshing links table.\n" ); 00158 $this->output( "Starting from page_id $start of $end.\n" ); 00159 00160 for ( $id = $start; $id <= $end; $id++ ) { 00161 00162 if ( !( $id % $reportingInterval ) ) { 00163 $this->output( "$id\n" ); 00164 wfWaitForSlaves(); 00165 } 00166 self::fixLinksFromArticle( $id ); 00167 } 00168 } 00169 } 00170 } 00171 00176 private function fixRedirect( $id ) { 00177 $title = Title::newFromID( $id ); 00178 $dbw = wfGetDB( DB_MASTER ); 00179 00180 if ( is_null( $title ) ) { 00181 // This page doesn't exist (any more) 00182 // Delete any redirect table entry for it 00183 $dbw->delete( 'redirect', array( 'rd_from' => $id ), 00184 __METHOD__ ); 00185 return; 00186 } 00187 00188 $page = WikiPage::factory( $title ); 00189 $rt = $page->getRedirectTarget(); 00190 00191 if ( $rt === null ) { 00192 // $title is not a redirect 00193 // Delete any redirect table entry for it 00194 $dbw->delete( 'redirect', array( 'rd_from' => $id ), 00195 __METHOD__ ); 00196 } 00197 } 00198 00203 public static function fixLinksFromArticle( $id ) { 00204 global $wgParser; 00205 00206 $title = Title::newFromID( $id ); 00207 $dbw = wfGetDB( DB_MASTER ); 00208 00209 LinkCache::singleton()->clear(); 00210 00211 if ( is_null( $title ) ) { 00212 return; 00213 } 00214 00215 $revision = Revision::newFromTitle( $title ); 00216 if ( !$revision ) { 00217 return; 00218 } 00219 00220 $dbw->begin(); 00221 00222 $options = new ParserOptions; 00223 $parserOutput = $wgParser->parse( $revision->getText(), $title, $options, true, true, $revision->getId() ); 00224 $update = new LinksUpdate( $title, $parserOutput, false ); 00225 $update->doUpdate(); 00226 $dbw->commit(); 00227 } 00228 00238 private function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) { 00239 wfWaitForSlaves(); 00240 00241 $dbw = wfGetDB( DB_MASTER ); 00242 00243 $lb = wfGetLBFactory()->newMainLB(); 00244 $dbr = $lb->getConnection( DB_SLAVE ); 00245 $dbr->bufferResults( false ); 00246 00247 $linksTables = array( // table name => page_id field 00248 'pagelinks' => 'pl_from', 00249 'imagelinks' => 'il_from', 00250 'categorylinks' => 'cl_from', 00251 'templatelinks' => 'tl_from', 00252 'externallinks' => 'el_from', 00253 'iwlinks' => 'iwl_from', 00254 'langlinks' => 'll_from', 00255 'redirect' => 'rd_from', 00256 'page_props' => 'pp_page', 00257 ); 00258 00259 foreach ( $linksTables as $table => $field ) { 00260 $this->output( "Retrieving illegal entries from $table... " ); 00261 00262 // SELECT DISTINCT( $field ) FROM $table LEFT JOIN page ON $field=page_id WHERE page_id IS NULL; 00263 $results = $dbr->select( array( $table, 'page' ), 00264 $field, 00265 array( 'page_id' => null ), 00266 __METHOD__, 00267 'DISTINCT', 00268 array( 'page' => array( 'LEFT JOIN', "$field=page_id" ) ) 00269 ); 00270 00271 $counter = 0; 00272 $list = array(); 00273 $this->output( "0.." ); 00274 foreach ( $results as $row ) { 00275 $counter++; 00276 $list[] = $row->$field; 00277 if ( ( $counter % $batchSize ) == 0 ) { 00278 wfWaitForSlaves(); 00279 $dbw->delete( $table, array( $field => $list ), __METHOD__ ); 00280 00281 $this->output( $counter . ".." ); 00282 $list = array(); 00283 } 00284 } 00285 $this->output( $counter ); 00286 if ( count( $list ) > 0 ) { 00287 $dbw->delete( $table, array( $field => $list ), __METHOD__ ); 00288 } 00289 $this->output( "\n" ); 00290 } 00291 $lb->closeAll(); 00292 } 00293 } 00294 00295 $maintClass = 'RefreshLinks'; 00296 require_once( RUN_MAINTENANCE_IF_MAIN );