[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/maintenance/postgres/ -> mediawiki_mysql2postgres.pl (source)

   1  #!/usr/bin/perl
   2  
   3  ## Convert data from a MySQL mediawiki database into a Postgres mediawiki database
   4  
   5  ## NOTE: It is probably easier to dump your wiki using maintenance/dumpBackup.php
   6  ## and then import it with maintenance/importDump.php
   7  
   8  ## If having UTF-8 problems, there are reports that adding --compatible=postgresql
   9  ## may help.
  10  
  11  use strict;
  12  use warnings;
  13  use Data::Dumper;
  14  use Getopt::Long;
  15  
  16  use vars qw(%table %tz %special @torder $COM);
  17  my $VERSION = '1.2';
  18  
  19  ## The following options can be changed via command line arguments:
  20  my $MYSQLDB       = '';
  21  my $MYSQLUSER     = '';
  22  
  23  ## If the following are zero-length, we omit their arguments entirely:
  24  my $MYSQLHOST     = '';
  25  my $MYSQLPASSWORD = '';
  26  my $MYSQLSOCKET   = '';
  27  
  28  ## Name of the dump file created
  29  my $MYSQLDUMPFILE = 'mediawiki_upgrade.pg';
  30  
  31  ## How verbose should this script be (0, 1, or 2)
  32  my $verbose = 0;
  33  
  34  my $help = 0;
  35  
  36  my $USAGE = "
  37  Usage: $0 --db=<dbname> --user=<user> [OPTION]...
  38  Example: $0 --db=wikidb --user=wikiuser --pass=sushi
  39  
  40  Converts a MediaWiki schema from MySQL to Postgres
  41  Options:
  42    db       Name of the MySQL database
  43    user     MySQL database username
  44    pass     MySQL database password
  45    host     MySQL database host
  46    socket   MySQL database socket
  47    verbose  Verbosity, increases with multiple uses
  48  ";
  49  
  50  GetOptions
  51      (
  52       'db=s'     => \$MYSQLDB,
  53       'user=s'   => \$MYSQLUSER,
  54       'pass=s'   => \$MYSQLPASSWORD,
  55       'host=s'   => \$MYSQLHOST,
  56       'socket=s' => \$MYSQLSOCKET,
  57       'verbose+' => \$verbose,
  58       'help'     => \$help,
  59   );
  60  
  61  die $USAGE
  62      if ! length $MYSQLDB
  63      or ! length $MYSQLUSER
  64      or $help;
  65  
  66  ## The Postgres schema file: should not be changed
  67  my $PG_SCHEMA = 'tables.sql';
  68  
  69  ## What version we default to when we can't parse the old schema
  70  my $MW_DEFAULT_VERSION = 110;
  71  
  72  ## Try and find a working version of mysqldump
  73  $verbose and warn "Locating the mysqldump executable\n";
  74  my @MYSQLDUMP = ('/usr/local/bin/mysqldump', '/usr/bin/mysqldump');
  75  my $MYSQLDUMP;
  76  for my $mytry (@MYSQLDUMP) {
  77      next if ! -e $mytry;
  78      -x $mytry or die qq{Not an executable file: "$mytry"\n};
  79      my $version = qx{$mytry -V};
  80      $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n};
  81      $MYSQLDUMP = $mytry;
  82  }
  83  $MYSQLDUMP or die qq{Could not find the mysqldump program\n};
  84  
  85  ## Flags we use for mysqldump
  86  my @MYSQLDUMPARGS = qw(
  87  --skip-lock-tables
  88  --complete-insert
  89  --skip-extended-insert
  90  --skip-add-drop-table
  91  --skip-add-locks
  92  --skip-disable-keys
  93  --skip-set-charset
  94  --skip-comments
  95  --skip-quote-names
  96  );
  97  
  98  
  99  $verbose and warn "Checking that mysqldump can handle our flags\n";
 100  ## Make sure this version can handle all the flags we want.
 101  ## Combine with user dump below
 102  my $MYSQLDUMPARGS = join ' ' => @MYSQLDUMPARGS;
 103  ## Argh. Any way to make this work on Win32?
 104  my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1};
 105  if ($version =~ /unknown option/) {
 106      die qq{Sorry, you need to use a newer version of the mysqldump program than the one at "$MYSQLDUMP"\n};
 107  }
 108  
 109  push @MYSQLDUMPARGS, "--user=$MYSQLUSER";
 110  length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD";
 111  length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST";
 112  
 113  ## Open the dump file to hold the mysqldump output
 114  open my $mdump, '+>', $MYSQLDUMPFILE or die qq{Could not open "$MYSQLDUMPFILE": $!\n};
 115  print qq{Writing file "$MYSQLDUMPFILE"\n};
 116  
 117  open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, '--no-data', $MYSQLDB;
 118  my $oldselect = select $mdump;
 119  
 120  print while <$mfork2>;
 121  
 122  ## Slurp in the current schema
 123  my $current_schema;
 124  seek $mdump, 0, 0;
 125  {
 126      local $/;
 127      $current_schema = <$mdump>;
 128  }
 129  seek $mdump, 0, 0;
 130  truncate $mdump, 0;
 131  
 132  warn qq{Trying to determine database version...\n} if $verbose;
 133  
 134  my $current_version = 0;
 135  if ($current_schema =~ /CREATE TABLE \S+cur /) {
 136      $current_version = 103;
 137  }
 138  elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) {
 139      $current_version = 104;
 140  }
 141  elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) {
 142      $current_version = 105;
 143  }
 144  elsif ($current_schema !~ /CREATE TABLE \S+validate /) {
 145      $current_version = 106;
 146  }
 147  elsif ($current_schema !~ /ipb_auto tinyint/) {
 148      $current_version = 107;
 149  }
 150  elsif ($current_schema !~ /CREATE TABLE \S+profiling /) {
 151      $current_version = 108;
 152  }
 153  elsif ($current_schema !~ /CREATE TABLE \S+querycachetwo /) {
 154      $current_version = 109;
 155  }
 156  else {
 157      $current_version = $MW_DEFAULT_VERSION;
 158  }
 159  
 160  if (!$current_version) {
 161      warn qq{WARNING! Could not figure out the old version, assuming MediaWiki $MW_DEFAULT_VERSION\n};
 162      $current_version = $MW_DEFAULT_VERSION;
 163  }
 164  
 165  ## Check for a table prefix:
 166  my $table_prefix = '';
 167  if ($current_schema =~ /CREATE TABLE (\S+)querycache /) {
 168      $table_prefix = $1;
 169  }
 170  
 171  warn qq{Old schema is from MediaWiki version $current_version\n} if $verbose;
 172  warn qq{Table prefix is "$table_prefix"\n} if $verbose and length $table_prefix;
 173  
 174  $verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n};
 175  my $now = scalar localtime;
 176  my $conninfo = '';
 177  $MYSQLHOST and $conninfo .= "\n--   host      $MYSQLHOST";
 178  $MYSQLSOCKET and $conninfo .= "\n--   socket    $MYSQLSOCKET";
 179  
 180  print qq{
 181  -- Dump of MySQL Mediawiki tables for import into a Postgres Mediawiki schema
 182  -- Performed by the program: $0
 183  -- Version: $VERSION
 184  -- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome
 185  --
 186  -- This file was created: $now
 187  -- Executable used: $MYSQLDUMP
 188  -- Connection information:
 189  --   database:  $MYSQLDB
 190  --   user:      $MYSQLUSER$conninfo
 191  
 192  -- This file can be imported manually with psql like so:
 193  -- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename
 194  -- This will overwrite any existing MediaWiki information, so be careful
 195  
 196  };
 197  
 198  ## psql specific stuff
 199  print q{
 200  \\set ON_ERROR_STOP
 201  BEGIN;
 202  SET client_min_messages = 'WARNING';
 203  SET timezone = 'GMT';
 204  SET DateStyle = 'ISO, YMD';
 205  };
 206  
 207  warn qq{Reading in the Postgres schema information\n} if $verbose;
 208  open my $schema, '<', $PG_SCHEMA
 209      or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n};
 210  my $t;
 211  while (<$schema>) {
 212      if (/CREATE TABLE\s+(\S+)/) {
 213          $t = $1;
 214          $table{$t}={};
 215          $verbose > 1 and warn qq{  Found table $t\n};
 216      }
 217      elsif (/^ +(\w+)\s+TIMESTAMP/) {
 218          $tz{$t}{$1}++;
 219          $verbose > 1 and warn qq{    Got a timestamp for column $1\n};
 220      }
 221      elsif (/REFERENCES\s*([^( ]+)/) {
 222          my $ref = $1;
 223          exists $table{$ref} or die qq{No parent table $ref found for $t\n};
 224          $table{$t}{$ref}++;
 225      }
 226  }
 227  close $schema or die qq{Could not close "$PG_SCHEMA": $!\n};
 228  
 229  ## Read in special cases and table/version information
 230  $verbose and warn qq{Reading in schema exception information\n};
 231  my %version_tables;
 232  while (<DATA>) {
 233      if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) {
 234          my $list = join '|' => split /\s+/ => $2;
 235          $version_tables{$1} = qr{\b$list\b};
 236          next;
 237      }
 238      next unless /^(\w+)\s*(.*)/;
 239      $special{$1} = $2||'';
 240      $special{$2} = $1 if length $2;
 241  }
 242  
 243  ## Determine the order of tables based on foreign key constraints
 244  $verbose and warn qq{Figuring out order of tables to dump\n};
 245  my %dumped;
 246  my $bail = 0;
 247  {
 248      my $found=0;
 249      T: for my $t (sort keys %table) {
 250          next if exists $dumped{$t} and $dumped{$t} >= 1;
 251          $found=1;
 252          for my $dep (sort keys %{$table{$t}}) {
 253              next T if ! exists $dumped{$dep} or $dumped{$dep} < 0;
 254          }
 255          $dumped{$t} = -1 if ! exists $dumped{$t};
 256          ## Skip certain tables that are not imported
 257          next if exists $special{$t} and !$special{$t};
 258          push @torder, $special{$t} || $t;
 259      }
 260      last if !$found;
 261      push @torder, '---';
 262      for (values %dumped) { $_+=2; }
 263      die "Too many loops!\n" if $bail++ > 1000;
 264      redo;
 265  }
 266  
 267  ## Prepare the Postgres database for the move
 268  $verbose and warn qq{Writing Postgres transformation information\n};
 269  
 270  print "\n-- Empty out all existing tables\n";
 271  $verbose and warn qq{Writing truncates to empty existing tables\n};
 272  
 273  
 274  for my $t (@torder, 'objectcache', 'querycache') {
 275      next if $t eq '---';
 276      my $tname = $special{$t}||$t;
 277      printf qq{TRUNCATE TABLE %-20s CASCADE;\n}, qq{"$tname"};
 278  }
 279  print "\n\n";
 280  
 281  print qq{-- Temporarily rename pagecontent to "$table_prefix}text"\n};
 282  print qq{ALTER TABLE pagecontent RENAME TO "$table_prefix}text";\n\n};
 283  
 284  print qq{-- Allow rc_ip to contain empty string, will convert at end\n};
 285  print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n};
 286  
 287  print "-- Changing all timestamp fields to handle raw integers\n";
 288  for my $t (sort keys %tz) {
 289      next if $t eq 'archive2';
 290      for my $c (sort keys %{$tz{$t}}) {
 291          printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c;
 292      }
 293  }
 294  print "\n";
 295  
 296  print q{
 297  INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10);
 298  };
 299  
 300  ## If we have a table _prefix, we need to temporarily rename all of our Postgres
 301  ## tables temporarily for the import. Perhaps consider making this an auto-schema
 302  ## thing in the future.
 303  if (length $table_prefix) {
 304      print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n};
 305      for my $t (@torder) {
 306          next if $t eq '---' or $t eq 'text' or $t eq 'user';
 307          my $tname = $special{$t}||$t;
 308          printf qq{ALTER TABLE %-18s RENAME TO "$table_prefix}$tname";\n}, qq{"$tname"};
 309      }
 310  }
 311  
 312  
 313  ## Try and dump the ill-named "user" table:
 314  ## We do this table alone because "user" is a reserved word.
 315  print q{
 316  
 317  SET escape_string_warning TO 'off';
 318  \\o /dev/null
 319  
 320  -- Postgres uses a table name of "mwuser" instead of "user"
 321  
 322  -- Create a dummy user to satisfy fk contraints especially with revisions
 323  SELECT setval('user_user_id_seq',0,'false');
 324  INSERT INTO mwuser
 325    VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
 326  
 327  };
 328  
 329  push @MYSQLDUMPARGS, '--no-create-info';
 330  
 331  $verbose and warn qq{Dumping "user" table\n};
 332  $verbose > 2 and warn Dumper \@MYSQLDUMPARGS;
 333  my $usertable = "$table_prefix}user";
 334  open my $mfork, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable;
 335  ## Unfortunately, there is no easy way to catch errors
 336  my $numusers = 0;
 337  while (<$mfork>) {
 338      ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/;
 339  }
 340  close $mfork;
 341  if ($numusers < 1) {
 342      warn qq{No users found, probably a connection error.\n};
 343      print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n};
 344      close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n};
 345      exit;
 346  }
 347  print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n";
 348  
 349  warn qq{Dumping all other tables from the MySQL schema\n} if $verbose;
 350  
 351  ## Dump the rest of the tables, in chunks based on constraints
 352  ## We do not need the user table:
 353  my @dumplist = grep { $_ ne 'user'} @torder;
 354  my @alist;
 355  {
 356      undef @alist;
 357      PICKATABLE: {
 358          my $tname = shift @dumplist;
 359          ## XXX Make this dynamic below
 360          for my $ver (sort {$b <=> $a } keys %version_tables) {
 361              redo PICKATABLE if $tname =~ $version_tables{$ver};
 362          }
 363          $tname = "$table_prefix}$tname" if length $table_prefix;
 364          next if $tname !~ /^\w/;
 365          push @alist, $tname;
 366          $verbose and warn "  $tname...\n";
 367          pop @alist and last if index($alist[-1],'---') >= 0;
 368          redo if @dumplist;
 369      }
 370  
 371      ## Dump everything else
 372      open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist;
 373      print while <$mfork2>;
 374      close $mfork2;
 375      warn qq{Finished dumping from MySQL\n} if $verbose;
 376  
 377      redo if @dumplist;
 378  }
 379  
 380  warn qq{Writing information to return Postgres database to normal\n} if $verbose;
 381  print qq{ALTER TABLE "$table_prefix}text" RENAME TO pagecontent;\n};
 382  print qq{ALTER TABLE $table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n};
 383  print qq{  CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n};
 384  
 385  ## Return tables to their original names if a table prefix was used.
 386  if (length $table_prefix) {
 387      print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n};
 388      my $maxsize = 18;
 389      for (@torder) {
 390          $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize;
 391      }
 392      for my $t (@torder) {
 393          next if $t eq '---' or $t eq 'text' or $t eq 'user';
 394          my $tname = $special{$t}||$t;
 395          printf qq{ALTER TABLE %*s RENAME TO "$tname";\n}, $maxsize+1, qq{"$table_prefix}$tname"};
 396      }
 397  }
 398  
 399  print qq{\n\n--Returning timestamps to normal\n};
 400  for my $t (sort keys %tz) {
 401      next if $t eq 'archive2';
 402      for my $c (sort keys %{$tz{$t}}) {
 403          printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n".
 404                  "  USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c;
 405      }
 406  }
 407  
 408  ## Reset sequences
 409  print q{
 410  SELECT setval('filearchive_fa_id_seq', 1+coalesce(max(fa_id)  ,0),false) FROM filearchive;
 411  SELECT setval('ipblocks_ipb_id_seq',   1+coalesce(max(ipb_id) ,0),false) FROM ipblocks;
 412  SELECT setval('job_job_id_seq',        1+coalesce(max(job_id) ,0),false) FROM job;
 413  SELECT setval('logging_log_id_seq',    1+coalesce(max(log_id) ,0),false) FROM logging;
 414  SELECT setval('page_page_id_seq',      1+coalesce(max(page_id),0),false) FROM page;
 415  SELECT setval('page_restrictions_pr_id_seq', 1+coalesce(max(pr_id)  ,0),false) FROM page_restrictions;
 416  SELECT setval('recentchanges_rc_id_seq',     1+coalesce(max(rc_id)  ,0),false) FROM recentchanges;
 417  SELECT setval('revision_rev_id_seq',         1+coalesce(max(rev_id) ,0),false) FROM revision;
 418  SELECT setval('text_old_id_seq',       1+coalesce(max(old_id) ,0),false) FROM pagecontent;
 419  SELECT setval('user_user_id_seq',      1+coalesce(max(user_id),0),false) FROM mwuser;
 420  };
 421  
 422  print "COMMIT;\n\\o\n\n-- End of dump\n\n";
 423  select $oldselect;
 424  close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n};
 425  exit;
 426  
 427  
 428  __DATA__
 429  ## Known remappings: either indicate the MySQL name,
 430  ## or leave blank if it should be skipped
 431  pagecontent text
 432  mwuser user
 433  archive2
 434  profiling
 435  objectcache
 436  
 437  ## Which tables to ignore depending on the version
 438  VERSION 1.6: externallinks job templatelinks transcache
 439  VERSION 1.7: filearchive langlinks querycache_info
 440  VERSION 1.9: querycachetwo page_restrictions redirect
 441  


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1