MediaWiki  REL1_19
convertLinks.php
Go to the documentation of this file.
00001 <?php
00025 require_once( dirname( __FILE__ ) . '/Maintenance.php' );
00026 
00027 class ConvertLinks extends Maintenance {
00028         private $logPerformance;
00029 
00030         public function __construct() {
00031                 parent::__construct();
00032                 $this->mDescription = "Convert from the old links schema (string->ID) to the new schema (ID->ID)
00033 The wiki should be put into read-only mode while this script executes";
00034 
00035                 $this->addArg( 'logperformance', "Log performance to perfLogFilename.", false );
00036                 $this->addArg( 'perfLogFilename', "Filename where performance is logged if --logperformance was set (defaults to 'convLinksPerf.txt').", false );
00037                 $this->addArg( 'keep-links-table', "Don't overwrite the old links table with the new one, leave the new table at links_temp.", false );
00038                 $this->addArg( 'nokeys', "Don't create keys, and so allow duplicates in the new links table.\n
00039 This gives a huge speed improvement for very large links tables which are MyISAM." /* (What about InnoDB?) */, false );
00040         }
00041 
00042         public function getDbType() {
00043                 return Maintenance::DB_ADMIN;
00044         }
00045 
00046         public function execute() {
00047                 $dbw = wfGetDB( DB_MASTER );
00048 
00049                 $type = $dbw->getType();
00050                 if ( $type != 'mysql' ) {
00051                         $this->output( "Link table conversion not necessary for $type\n" );
00052                         return;
00053                 }
00054 
00055                 global $wgContLang;
00056 
00057                 $numBadLinks = $curRowsRead = 0; # counters etc
00058                 $totalTuplesInserted = 0; # total tuples INSERTed into links_temp
00059 
00060                 $reportCurReadProgress = true; # whether or not to give progress reports while reading IDs from cur table
00061                 $curReadReportInterval = 1000; # number of rows between progress reports
00062 
00063                 $reportLinksConvProgress = true; # whether or not to give progress reports during conversion
00064                 $linksConvInsertInterval = 1000; # number of rows per INSERT
00065 
00066                 $initialRowOffset = 0;
00067                 # $finalRowOffset = 0; # not used yet; highest row number from links table to process
00068 
00069                 $overwriteLinksTable = !$this->hasOption( 'keep-links-table' );
00070                 $noKeys = $this->hasOption( 'noKeys' );
00071                 $this->logPerformance = $this->hasOption( 'logperformance' );
00072                 $perfLogFilename = $this->getArg( 'perfLogFilename', "convLinksPerf.txt" );
00073 
00074                 # --------------------------------------------------------------------
00075 
00076                 list ( $cur, $links, $links_temp, $links_backup ) = $dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' );
00077 
00078                 if( $dbw->tableExists( 'pagelinks' ) ) {
00079                         $this->output( "...have pagelinks; skipping old links table updates\n" );
00080                         return;
00081                 }
00082 
00083                 $res = $dbw->query( "SELECT l_from FROM $links LIMIT 1" );
00084                 if ( $dbw->fieldType( $res, 0 ) == "int" ) {
00085                         $this->output( "Schema already converted\n" );
00086                         return;
00087                 }
00088 
00089                 $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" );
00090                 $row = $dbw->fetchObject( $res );
00091                 $numRows = $row->count;
00092                 $dbw->freeResult( $res );
00093 
00094                 if ( $numRows == 0 ) {
00095                         $this->output( "Updating schema (no rows to convert)...\n" );
00096                         $this->createTempTable();
00097                 } else {
00098                         $fh = false;
00099                         if ( $this->logPerformance ) {
00100                                 $fh = fopen ( $perfLogFilename, "w" );
00101                                 if ( !$fh ) {
00102                                         $this->error( "Couldn't open $perfLogFilename" );
00103                                         $this->logPerformance = false;
00104                                 }
00105                         }
00106                         $baseTime = $startTime = $this->getMicroTime();
00107                         # Create a title -> cur_id map
00108                         $this->output( "Loading IDs from $cur table...\n" );
00109                         $this->performanceLog ( $fh, "Reading $numRows rows from cur table...\n" );
00110                         $this->performanceLog ( $fh, "rows read vs seconds elapsed:\n" );
00111 
00112                         $dbw->bufferResults( false );
00113                         $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" );
00114                         $ids = array();
00115 
00116                         foreach ( $res as $row ) {
00117                                 $title = $row->cur_title;
00118                                 if ( $row->cur_namespace ) {
00119                                         $title = $wgContLang->getNsText( $row->cur_namespace ) . ":$title";
00120                                 }
00121                                 $ids[$title] = $row->cur_id;
00122                                 $curRowsRead++;
00123                                 if ( $reportCurReadProgress ) {
00124                                         if ( ( $curRowsRead % $curReadReportInterval ) == 0 ) {
00125                                                 $this->performanceLog( $fh, $curRowsRead . " " . ( $this->getMicroTime() - $baseTime ) . "\n" );
00126                                                 $this->output( "\t$curRowsRead rows of $cur table read.\n" );
00127                                         }
00128                                 }
00129                         }
00130                         $dbw->freeResult( $res );
00131                         $dbw->bufferResults( true );
00132                         $this->output( "Finished loading IDs.\n\n" );
00133                         $this->performanceLog( $fh, "Took " . ( $this->getMicroTime() - $baseTime ) . " seconds to load IDs.\n\n" );
00134 
00135                         # --------------------------------------------------------------------
00136 
00137                         # Now, step through the links table (in chunks of $linksConvInsertInterval rows),
00138                         # convert, and write to the new table.
00139                         $this->createTempTable();
00140                         $this->performanceLog( $fh, "Resetting timer.\n\n" );
00141                         $baseTime = $this->getMicroTime();
00142                         $this->output( "Processing $numRows rows from $links table...\n" );
00143                         $this->performanceLog( $fh, "Processing $numRows rows from $links table...\n" );
00144                         $this->performanceLog( $fh, "rows inserted vs seconds elapsed:\n" );
00145 
00146                         for ( $rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval ) {
00147                                 $sqlRead = "SELECT * FROM $links ";
00148                                 $sqlRead = $dbw->limitResult( $sqlRead, $linksConvInsertInterval, $rowOffset );
00149                                 $res = $dbw->query( $sqlRead );
00150                                 if ( $noKeys ) {
00151                                         $sqlWrite = array( "INSERT INTO $links_temp (l_from,l_to) VALUES " );
00152                                 } else {
00153                                         $sqlWrite = array( "INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES " );
00154                                 }
00155 
00156                                 $tuplesAdded = 0; # no tuples added to INSERT yet
00157                                 foreach ( $res as $row ) {
00158                                         $fromTitle = $row->l_from;
00159                                         if ( array_key_exists( $fromTitle, $ids ) ) { # valid title
00160                                                 $from = $ids[$fromTitle];
00161                                                 $to = $row->l_to;
00162                                                 if ( $tuplesAdded != 0 ) {
00163                                                         $sqlWrite[] = ",";
00164                                                 }
00165                                                 $sqlWrite[] = "($from,$to)";
00166                                                 $tuplesAdded++;
00167                                         } else { # invalid title
00168                                                 $numBadLinks++;
00169                                         }
00170                                 }
00171                                 $dbw->freeResult( $res );
00172                                 # $this->output( "rowOffset: $rowOffset\ttuplesAdded: $tuplesAdded\tnumBadLinks: $numBadLinks\n" );
00173                                 if ( $tuplesAdded != 0  ) {
00174                                         if ( $reportLinksConvProgress ) {
00175                                                 $this->output( "Inserting $tuplesAdded tuples into $links_temp..." );
00176                                         }
00177                                         $dbw->query( implode( "", $sqlWrite ) );
00178                                         $totalTuplesInserted += $tuplesAdded;
00179                                         if ( $reportLinksConvProgress )
00180                                                 $this->output( " done. Total $totalTuplesInserted tuples inserted.\n" );
00181                                                 $this->performanceLog( $fh, $totalTuplesInserted . " " . ( $this->getMicroTime() - $baseTime ) . "\n"  );
00182                                 }
00183                         }
00184                         $this->output( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n\n" );
00185                         $this->performanceLog( $fh, "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" );
00186                         $this->performanceLog( $fh, "Total execution time: " . ( $this->getMicroTime() - $startTime ) . " seconds.\n" );
00187                         if ( $this->logPerformance ) {
00188                                 fclose ( $fh );
00189                         }
00190                 }
00191                 # --------------------------------------------------------------------
00192 
00193                 if ( $overwriteLinksTable ) {
00194                         # Check for existing links_backup, and delete it if it exists.
00195                         $this->output( "Dropping backup links table if it exists..." );
00196                         $dbw->query( "DROP TABLE IF EXISTS $links_backup", __METHOD__ );
00197                         $this->output( " done.\n" );
00198 
00199                         # Swap in the new table, and move old links table to links_backup
00200                         $this->output( "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'..." );
00201                         $dbw->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", __METHOD__ );
00202                         $this->output( " done.\n\n" );
00203 
00204                         $dbw->close();
00205                         $this->output( "Conversion complete. The old table remains at $links_backup;\n" );
00206                         $this->output( "delete at your leisure.\n" );
00207                 } else {
00208                         $this->output( "Conversion complete.  The converted table is at $links_temp;\n" );
00209                         $this->output( "the original links table is unchanged.\n" );
00210                 }
00211         }
00212 
00213         private function createTempTable() {
00214                 $dbConn = wfGetDB( DB_MASTER );
00215 
00216                 if ( !( $dbConn->isOpen() ) ) {
00217                         $this->output( "Opening connection to database failed.\n" );
00218                         return;
00219                 }
00220                 $links_temp = $dbConn->tableName( 'links_temp' );
00221 
00222                 $this->output( "Dropping temporary links table if it exists..." );
00223                 $dbConn->query( "DROP TABLE IF EXISTS $links_temp" );
00224                 $this->output( " done.\n" );
00225 
00226                 $this->output( "Creating temporary links table..." );
00227                 if ( $this->hasOption( 'noKeys' ) ) {
00228                         $dbConn->query( "CREATE TABLE $links_temp ( " .
00229                         "l_from int(8) unsigned NOT NULL default '0', " .
00230                         "l_to int(8) unsigned NOT NULL default '0')" );
00231                 } else {
00232                         $dbConn->query( "CREATE TABLE $links_temp ( " .
00233                         "l_from int(8) unsigned NOT NULL default '0', " .
00234                         "l_to int(8) unsigned NOT NULL default '0', " .
00235                         "UNIQUE KEY l_from(l_from,l_to), " .
00236                         "KEY (l_to))" );
00237                 }
00238                 $this->output( " done.\n\n" );
00239         }
00240 
00241         private function performanceLog( $fh, $text ) {
00242                 if ( $this->logPerformance ) {
00243                         fwrite( $fh, $text );
00244                 }
00245         }
00246 
00247         private function getMicroTime() { # return time in seconds, with microsecond accuracy
00248                 list( $usec, $sec ) = explode( " ", microtime() );
00249                 return ( (float)$usec + (float)$sec );
00250         }
00251 }
00252 
00253 $maintClass = "ConvertLinks";
00254 require_once( RUN_MAINTENANCE_IF_MAIN );