[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/maintenance/ -> convertLinks.php (source)

   1  <?php
   2  /**
   3   * Convert from the old links schema (string->ID) to the new schema (ID->ID).
   4   *
   5   * This program is free software; you can redistribute it and/or modify
   6   * it under the terms of the GNU General Public License as published by
   7   * the Free Software Foundation; either version 2 of the License, or
   8   * (at your option) any later version.
   9   *
  10   * This program is distributed in the hope that it will be useful,
  11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13   * GNU General Public License for more details.
  14   *
  15   * You should have received a copy of the GNU General Public License along
  16   * with this program; if not, write to the Free Software Foundation, Inc.,
  17   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18   * http://www.gnu.org/copyleft/gpl.html
  19   *
  20   * @file
  21   * @ingroup Maintenance
  22   */
  23  
  24  require_once  __DIR__ . '/Maintenance.php';
  25  
  26  /**
  27   * Maintenance script to convert from the old links schema (string->ID)
  28   * to the new schema (ID->ID).
  29   *
  30   * The wiki should be put into read-only mode while this script executes.
  31   *
  32   * @ingroup Maintenance
  33   */
  34  class ConvertLinks extends Maintenance {
  35      private $logPerformance;
  36  
  37  	public function __construct() {
  38          parent::__construct();
  39          $this->mDescription =
  40              "Convert from the old links schema (string->ID) to the new schema (ID->ID)."
  41                  . "The wiki should be put into read-only mode while this script executes";
  42  
  43          $this->addArg( 'logperformance', "Log performance to perfLogFilename.", false );
  44          $this->addArg(
  45              'perfLogFilename',
  46              "Filename where performance is logged if --logperformance was set "
  47                  . "(defaults to 'convLinksPerf.txt').",
  48              false
  49          );
  50          $this->addArg(
  51              'keep-links-table',
  52              "Don't overwrite the old links table with the new one, leave the new table at links_temp.",
  53              false
  54          );
  55          $this->addArg(
  56              'nokeys',
  57              /* (What about InnoDB?) */
  58              "Don't create keys, and so allow duplicates in the new links table.\n"
  59                  . "This gives a huge speed improvement for very large links tables which are MyISAM.",
  60              false
  61          );
  62      }
  63  
  64  	public function getDbType() {
  65          return Maintenance::DB_ADMIN;
  66      }
  67  
  68  	public function execute() {
  69          $dbw = wfGetDB( DB_MASTER );
  70  
  71          $type = $dbw->getType();
  72          if ( $type != 'mysql' ) {
  73              $this->output( "Link table conversion not necessary for $type\n" );
  74  
  75              return;
  76          }
  77  
  78          global $wgContLang;
  79  
  80          # counters etc
  81          $numBadLinks = $curRowsRead = 0;
  82  
  83          # total tuples INSERTed into links_temp
  84          $totalTuplesInserted = 0;
  85  
  86          # whether or not to give progress reports while reading IDs from cur table
  87          $reportCurReadProgress = true;
  88  
  89          # number of rows between progress reports
  90          $curReadReportInterval = 1000;
  91  
  92          # whether or not to give progress reports during conversion
  93          $reportLinksConvProgress = true;
  94  
  95          # number of rows per INSERT
  96          $linksConvInsertInterval = 1000;
  97  
  98          $initialRowOffset = 0;
  99  
 100          # not used yet; highest row number from links table to process
 101          # $finalRowOffset = 0;
 102  
 103          $overwriteLinksTable = !$this->hasOption( 'keep-links-table' );
 104          $noKeys = $this->hasOption( 'noKeys' );
 105          $this->logPerformance = $this->hasOption( 'logperformance' );
 106          $perfLogFilename = $this->getArg( 'perfLogFilename', "convLinksPerf.txt" );
 107  
 108          # --------------------------------------------------------------------
 109  
 110          list( $cur, $links, $links_temp, $links_backup ) =
 111              $dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' );
 112  
 113          if ( $dbw->tableExists( 'pagelinks' ) ) {
 114              $this->output( "...have pagelinks; skipping old links table updates\n" );
 115  
 116              return;
 117          }
 118  
 119          $res = $dbw->query( "SELECT l_from FROM $links LIMIT 1" );
 120          if ( $dbw->fieldType( $res, 0 ) == "int" ) {
 121              $this->output( "Schema already converted\n" );
 122  
 123              return;
 124          }
 125  
 126          $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" );
 127          $row = $dbw->fetchObject( $res );
 128          $numRows = $row->count;
 129          $dbw->freeResult( $res );
 130  
 131          if ( $numRows == 0 ) {
 132              $this->output( "Updating schema (no rows to convert)...\n" );
 133              $this->createTempTable();
 134          } else {
 135              $fh = false;
 136              if ( $this->logPerformance ) {
 137                  $fh = fopen( $perfLogFilename, "w" );
 138                  if ( !$fh ) {
 139                      $this->error( "Couldn't open $perfLogFilename" );
 140                      $this->logPerformance = false;
 141                  }
 142              }
 143              $baseTime = $startTime = microtime( true );
 144              # Create a title -> cur_id map
 145              $this->output( "Loading IDs from $cur table...\n" );
 146              $this->performanceLog( $fh, "Reading $numRows rows from cur table...\n" );
 147              $this->performanceLog( $fh, "rows read vs seconds elapsed:\n" );
 148  
 149              $dbw->bufferResults( false );
 150              $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" );
 151              $ids = array();
 152  
 153              foreach ( $res as $row ) {
 154                  $title = $row->cur_title;
 155                  if ( $row->cur_namespace ) {
 156                      $title = $wgContLang->getNsText( $row->cur_namespace ) . ":$title";
 157                  }
 158                  $ids[$title] = $row->cur_id;
 159                  $curRowsRead++;
 160                  if ( $reportCurReadProgress ) {
 161                      if ( ( $curRowsRead % $curReadReportInterval ) == 0 ) {
 162                          $this->performanceLog(
 163                              $fh,
 164                              $curRowsRead . " " . ( microtime( true ) - $baseTime ) . "\n"
 165                          );
 166                          $this->output( "\t$curRowsRead rows of $cur table read.\n" );
 167                      }
 168                  }
 169              }
 170              $dbw->freeResult( $res );
 171              $dbw->bufferResults( true );
 172              $this->output( "Finished loading IDs.\n\n" );
 173              $this->performanceLog(
 174                  $fh,
 175                  "Took " . ( microtime( true ) - $baseTime ) . " seconds to load IDs.\n\n"
 176              );
 177  
 178              # --------------------------------------------------------------------
 179  
 180              # Now, step through the links table (in chunks of $linksConvInsertInterval rows),
 181              # convert, and write to the new table.
 182              $this->createTempTable();
 183              $this->performanceLog( $fh, "Resetting timer.\n\n" );
 184              $baseTime = microtime( true );
 185              $this->output( "Processing $numRows rows from $links table...\n" );
 186              $this->performanceLog( $fh, "Processing $numRows rows from $links table...\n" );
 187              $this->performanceLog( $fh, "rows inserted vs seconds elapsed:\n" );
 188  
 189              for ( $rowOffset = $initialRowOffset; $rowOffset < $numRows;
 190                  $rowOffset += $linksConvInsertInterval
 191              ) {
 192                  $sqlRead = "SELECT * FROM $links ";
 193                  $sqlRead = $dbw->limitResult( $sqlRead, $linksConvInsertInterval, $rowOffset );
 194                  $res = $dbw->query( $sqlRead );
 195                  if ( $noKeys ) {
 196                      $sqlWrite = array( "INSERT INTO $links_temp (l_from,l_to) VALUES " );
 197                  } else {
 198                      $sqlWrite = array( "INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES " );
 199                  }
 200  
 201                  $tuplesAdded = 0; # no tuples added to INSERT yet
 202                  foreach ( $res as $row ) {
 203                      $fromTitle = $row->l_from;
 204                      if ( array_key_exists( $fromTitle, $ids ) ) { # valid title
 205                          $from = $ids[$fromTitle];
 206                          $to = $row->l_to;
 207                          if ( $tuplesAdded != 0 ) {
 208                              $sqlWrite[] = ",";
 209                          }
 210                          $sqlWrite[] = "($from,$to)";
 211                          $tuplesAdded++;
 212                      } else { # invalid title
 213                          $numBadLinks++;
 214                      }
 215                  }
 216                  $dbw->freeResult( $res );
 217                  # $this->output( "rowOffset: $rowOffset\ttuplesAdded: "
 218                  #    . "$tuplesAdded\tnumBadLinks: $numBadLinks\n" );
 219                  if ( $tuplesAdded != 0 ) {
 220                      if ( $reportLinksConvProgress ) {
 221                          $this->output( "Inserting $tuplesAdded tuples into $links_temp..." );
 222                      }
 223                      $dbw->query( implode( "", $sqlWrite ) );
 224                      $totalTuplesInserted += $tuplesAdded;
 225                      if ( $reportLinksConvProgress ) {
 226                          $this->output( " done. Total $totalTuplesInserted tuples inserted.\n" );
 227                          $this->performanceLog(
 228                              $fh,
 229                              $totalTuplesInserted . " " . ( microtime( true ) - $baseTime ) . "\n"
 230                          );
 231                      }
 232                  }
 233              }
 234              $this->output( "$totalTuplesInserted valid titles and "
 235                  . "$numBadLinks invalid titles were processed.\n\n" );
 236              $this->performanceLog(
 237                  $fh,
 238                  "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n"
 239              );
 240              $this->performanceLog(
 241                  $fh,
 242                  "Total execution time: " . ( microtime( true ) - $startTime ) . " seconds.\n"
 243              );
 244              if ( $this->logPerformance ) {
 245                  fclose( $fh );
 246              }
 247          }
 248          # --------------------------------------------------------------------
 249  
 250          if ( $overwriteLinksTable ) {
 251              # Check for existing links_backup, and delete it if it exists.
 252              $this->output( "Dropping backup links table if it exists..." );
 253              $dbw->query( "DROP TABLE IF EXISTS $links_backup", __METHOD__ );
 254              $this->output( " done.\n" );
 255  
 256              # Swap in the new table, and move old links table to links_backup
 257              $this->output( "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'..." );
 258              $dbw->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", __METHOD__ );
 259              $this->output( " done.\n\n" );
 260  
 261              $this->output( "Conversion complete. The old table remains at $links_backup;\n" );
 262              $this->output( "delete at your leisure.\n" );
 263          } else {
 264              $this->output( "Conversion complete.  The converted table is at $links_temp;\n" );
 265              $this->output( "the original links table is unchanged.\n" );
 266          }
 267      }
 268  
 269  	private function createTempTable() {
 270          $dbConn = wfGetDB( DB_MASTER );
 271  
 272          if ( !( $dbConn->isOpen() ) ) {
 273              $this->output( "Opening connection to database failed.\n" );
 274  
 275              return;
 276          }
 277          $links_temp = $dbConn->tableName( 'links_temp' );
 278  
 279          $this->output( "Dropping temporary links table if it exists..." );
 280          $dbConn->query( "DROP TABLE IF EXISTS $links_temp" );
 281          $this->output( " done.\n" );
 282  
 283          $this->output( "Creating temporary links table..." );
 284          if ( $this->hasOption( 'noKeys' ) ) {
 285              $dbConn->query( "CREATE TABLE $links_temp ( " .
 286                  "l_from int(8) unsigned NOT NULL default '0', " .
 287                  "l_to int(8) unsigned NOT NULL default '0')" );
 288          } else {
 289              $dbConn->query( "CREATE TABLE $links_temp ( " .
 290                  "l_from int(8) unsigned NOT NULL default '0', " .
 291                  "l_to int(8) unsigned NOT NULL default '0', " .
 292                  "UNIQUE KEY l_from(l_from,l_to), " .
 293                  "KEY (l_to))" );
 294          }
 295          $this->output( " done.\n\n" );
 296      }
 297  
 298  	private function performanceLog( $fh, $text ) {
 299          if ( $this->logPerformance ) {
 300              fwrite( $fh, $text );
 301          }
 302      }
 303  }
 304  
 305  $maintClass = "ConvertLinks";
 306  require_once RUN_MAINTENANCE_IF_MAIN;


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1