MediaWiki  REL1_24
convertLinks.php
Go to the documentation of this file.
00001 <?php
00024 require_once __DIR__ . '/Maintenance.php';
00025 
00034 class ConvertLinks extends Maintenance {
00035     private $logPerformance;
00036 
00037     public function __construct() {
00038         parent::__construct();
00039         $this->mDescription =
00040             "Convert from the old links schema (string->ID) to the new schema (ID->ID)."
00041                 . "The wiki should be put into read-only mode while this script executes";
00042 
00043         $this->addArg( 'logperformance', "Log performance to perfLogFilename.", false );
00044         $this->addArg(
00045             'perfLogFilename',
00046             "Filename where performance is logged if --logperformance was set "
00047                 . "(defaults to 'convLinksPerf.txt').",
00048             false
00049         );
00050         $this->addArg(
00051             'keep-links-table',
00052             "Don't overwrite the old links table with the new one, leave the new table at links_temp.",
00053             false
00054         );
00055         $this->addArg(
00056             'nokeys',
00057             /* (What about InnoDB?) */
00058             "Don't create keys, and so allow duplicates in the new links table.\n"
00059                 . "This gives a huge speed improvement for very large links tables which are MyISAM.",
00060             false
00061         );
00062     }
00063 
00064     public function getDbType() {
00065         return Maintenance::DB_ADMIN;
00066     }
00067 
00068     public function execute() {
00069         $dbw = wfGetDB( DB_MASTER );
00070 
00071         $type = $dbw->getType();
00072         if ( $type != 'mysql' ) {
00073             $this->output( "Link table conversion not necessary for $type\n" );
00074 
00075             return;
00076         }
00077 
00078         global $wgContLang;
00079 
00080         # counters etc
00081         $numBadLinks = $curRowsRead = 0;
00082 
00083         # total tuples INSERTed into links_temp
00084         $totalTuplesInserted = 0;
00085 
00086         # whether or not to give progress reports while reading IDs from cur table
00087         $reportCurReadProgress = true;
00088 
00089         # number of rows between progress reports
00090         $curReadReportInterval = 1000;
00091 
00092         # whether or not to give progress reports during conversion
00093         $reportLinksConvProgress = true;
00094 
00095         # number of rows per INSERT
00096         $linksConvInsertInterval = 1000;
00097 
00098         $initialRowOffset = 0;
00099 
00100         # not used yet; highest row number from links table to process
00101         # $finalRowOffset = 0;
00102 
00103         $overwriteLinksTable = !$this->hasOption( 'keep-links-table' );
00104         $noKeys = $this->hasOption( 'noKeys' );
00105         $this->logPerformance = $this->hasOption( 'logperformance' );
00106         $perfLogFilename = $this->getArg( 'perfLogFilename', "convLinksPerf.txt" );
00107 
00108         # --------------------------------------------------------------------
00109 
00110         list( $cur, $links, $links_temp, $links_backup ) =
00111             $dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' );
00112 
00113         if ( $dbw->tableExists( 'pagelinks' ) ) {
00114             $this->output( "...have pagelinks; skipping old links table updates\n" );
00115 
00116             return;
00117         }
00118 
00119         $res = $dbw->query( "SELECT l_from FROM $links LIMIT 1" );
00120         if ( $dbw->fieldType( $res, 0 ) == "int" ) {
00121             $this->output( "Schema already converted\n" );
00122 
00123             return;
00124         }
00125 
00126         $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" );
00127         $row = $dbw->fetchObject( $res );
00128         $numRows = $row->count;
00129         $dbw->freeResult( $res );
00130 
00131         if ( $numRows == 0 ) {
00132             $this->output( "Updating schema (no rows to convert)...\n" );
00133             $this->createTempTable();
00134         } else {
00135             $fh = false;
00136             if ( $this->logPerformance ) {
00137                 $fh = fopen( $perfLogFilename, "w" );
00138                 if ( !$fh ) {
00139                     $this->error( "Couldn't open $perfLogFilename" );
00140                     $this->logPerformance = false;
00141                 }
00142             }
00143             $baseTime = $startTime = microtime( true );
00144             # Create a title -> cur_id map
00145             $this->output( "Loading IDs from $cur table...\n" );
00146             $this->performanceLog( $fh, "Reading $numRows rows from cur table...\n" );
00147             $this->performanceLog( $fh, "rows read vs seconds elapsed:\n" );
00148 
00149             $dbw->bufferResults( false );
00150             $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" );
00151             $ids = array();
00152 
00153             foreach ( $res as $row ) {
00154                 $title = $row->cur_title;
00155                 if ( $row->cur_namespace ) {
00156                     $title = $wgContLang->getNsText( $row->cur_namespace ) . ":$title";
00157                 }
00158                 $ids[$title] = $row->cur_id;
00159                 $curRowsRead++;
00160                 if ( $reportCurReadProgress ) {
00161                     if ( ( $curRowsRead % $curReadReportInterval ) == 0 ) {
00162                         $this->performanceLog(
00163                             $fh,
00164                             $curRowsRead . " " . ( microtime( true ) - $baseTime ) . "\n"
00165                         );
00166                         $this->output( "\t$curRowsRead rows of $cur table read.\n" );
00167                     }
00168                 }
00169             }
00170             $dbw->freeResult( $res );
00171             $dbw->bufferResults( true );
00172             $this->output( "Finished loading IDs.\n\n" );
00173             $this->performanceLog(
00174                 $fh,
00175                 "Took " . ( microtime( true ) - $baseTime ) . " seconds to load IDs.\n\n"
00176             );
00177 
00178             # --------------------------------------------------------------------
00179 
00180             # Now, step through the links table (in chunks of $linksConvInsertInterval rows),
00181             # convert, and write to the new table.
00182             $this->createTempTable();
00183             $this->performanceLog( $fh, "Resetting timer.\n\n" );
00184             $baseTime = microtime( true );
00185             $this->output( "Processing $numRows rows from $links table...\n" );
00186             $this->performanceLog( $fh, "Processing $numRows rows from $links table...\n" );
00187             $this->performanceLog( $fh, "rows inserted vs seconds elapsed:\n" );
00188 
00189             for ( $rowOffset = $initialRowOffset; $rowOffset < $numRows;
00190                 $rowOffset += $linksConvInsertInterval
00191             ) {
00192                 $sqlRead = "SELECT * FROM $links ";
00193                 $sqlRead = $dbw->limitResult( $sqlRead, $linksConvInsertInterval, $rowOffset );
00194                 $res = $dbw->query( $sqlRead );
00195                 if ( $noKeys ) {
00196                     $sqlWrite = array( "INSERT INTO $links_temp (l_from,l_to) VALUES " );
00197                 } else {
00198                     $sqlWrite = array( "INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES " );
00199                 }
00200 
00201                 $tuplesAdded = 0; # no tuples added to INSERT yet
00202                 foreach ( $res as $row ) {
00203                     $fromTitle = $row->l_from;
00204                     if ( array_key_exists( $fromTitle, $ids ) ) { # valid title
00205                         $from = $ids[$fromTitle];
00206                         $to = $row->l_to;
00207                         if ( $tuplesAdded != 0 ) {
00208                             $sqlWrite[] = ",";
00209                         }
00210                         $sqlWrite[] = "($from,$to)";
00211                         $tuplesAdded++;
00212                     } else { # invalid title
00213                         $numBadLinks++;
00214                     }
00215                 }
00216                 $dbw->freeResult( $res );
00217                 # $this->output( "rowOffset: $rowOffset\ttuplesAdded: "
00218                 #   . "$tuplesAdded\tnumBadLinks: $numBadLinks\n" );
00219                 if ( $tuplesAdded != 0 ) {
00220                     if ( $reportLinksConvProgress ) {
00221                         $this->output( "Inserting $tuplesAdded tuples into $links_temp..." );
00222                     }
00223                     $dbw->query( implode( "", $sqlWrite ) );
00224                     $totalTuplesInserted += $tuplesAdded;
00225                     if ( $reportLinksConvProgress ) {
00226                         $this->output( " done. Total $totalTuplesInserted tuples inserted.\n" );
00227                         $this->performanceLog(
00228                             $fh,
00229                             $totalTuplesInserted . " " . ( microtime( true ) - $baseTime ) . "\n"
00230                         );
00231                     }
00232                 }
00233             }
00234             $this->output( "$totalTuplesInserted valid titles and "
00235                 . "$numBadLinks invalid titles were processed.\n\n" );
00236             $this->performanceLog(
00237                 $fh,
00238                 "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n"
00239             );
00240             $this->performanceLog(
00241                 $fh,
00242                 "Total execution time: " . ( microtime( true ) - $startTime ) . " seconds.\n"
00243             );
00244             if ( $this->logPerformance ) {
00245                 fclose( $fh );
00246             }
00247         }
00248         # --------------------------------------------------------------------
00249 
00250         if ( $overwriteLinksTable ) {
00251             # Check for existing links_backup, and delete it if it exists.
00252             $this->output( "Dropping backup links table if it exists..." );
00253             $dbw->query( "DROP TABLE IF EXISTS $links_backup", __METHOD__ );
00254             $this->output( " done.\n" );
00255 
00256             # Swap in the new table, and move old links table to links_backup
00257             $this->output( "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'..." );
00258             $dbw->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", __METHOD__ );
00259             $this->output( " done.\n\n" );
00260 
00261             $this->output( "Conversion complete. The old table remains at $links_backup;\n" );
00262             $this->output( "delete at your leisure.\n" );
00263         } else {
00264             $this->output( "Conversion complete.  The converted table is at $links_temp;\n" );
00265             $this->output( "the original links table is unchanged.\n" );
00266         }
00267     }
00268 
00269     private function createTempTable() {
00270         $dbConn = wfGetDB( DB_MASTER );
00271 
00272         if ( !( $dbConn->isOpen() ) ) {
00273             $this->output( "Opening connection to database failed.\n" );
00274 
00275             return;
00276         }
00277         $links_temp = $dbConn->tableName( 'links_temp' );
00278 
00279         $this->output( "Dropping temporary links table if it exists..." );
00280         $dbConn->query( "DROP TABLE IF EXISTS $links_temp" );
00281         $this->output( " done.\n" );
00282 
00283         $this->output( "Creating temporary links table..." );
00284         if ( $this->hasOption( 'noKeys' ) ) {
00285             $dbConn->query( "CREATE TABLE $links_temp ( " .
00286                 "l_from int(8) unsigned NOT NULL default '0', " .
00287                 "l_to int(8) unsigned NOT NULL default '0')" );
00288         } else {
00289             $dbConn->query( "CREATE TABLE $links_temp ( " .
00290                 "l_from int(8) unsigned NOT NULL default '0', " .
00291                 "l_to int(8) unsigned NOT NULL default '0', " .
00292                 "UNIQUE KEY l_from(l_from,l_to), " .
00293                 "KEY (l_to))" );
00294         }
00295         $this->output( " done.\n\n" );
00296     }
00297 
00298     private function performanceLog( $fh, $text ) {
00299         if ( $this->logPerformance ) {
00300             fwrite( $fh, $text );
00301         }
00302     }
00303 }
00304 
00305 $maintClass = "ConvertLinks";
00306 require_once RUN_MAINTENANCE_IF_MAIN;