[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 #!/usr/bin/perl 2 3 ## Convert data from a MySQL mediawiki database into a Postgres mediawiki database 4 5 ## NOTE: It is probably easier to dump your wiki using maintenance/dumpBackup.php 6 ## and then import it with maintenance/importDump.php 7 8 ## If having UTF-8 problems, there are reports that adding --compatible=postgresql 9 ## may help. 10 11 use strict; 12 use warnings; 13 use Data::Dumper; 14 use Getopt::Long; 15 16 use vars qw(%table %tz %special @torder $COM); 17 my $VERSION = '1.2'; 18 19 ## The following options can be changed via command line arguments: 20 my $MYSQLDB = ''; 21 my $MYSQLUSER = ''; 22 23 ## If the following are zero-length, we omit their arguments entirely: 24 my $MYSQLHOST = ''; 25 my $MYSQLPASSWORD = ''; 26 my $MYSQLSOCKET = ''; 27 28 ## Name of the dump file created 29 my $MYSQLDUMPFILE = 'mediawiki_upgrade.pg'; 30 31 ## How verbose should this script be (0, 1, or 2) 32 my $verbose = 0; 33 34 my $help = 0; 35 36 my $USAGE = " 37 Usage: $0 --db=<dbname> --user=<user> [OPTION]... 38 Example: $0 --db=wikidb --user=wikiuser --pass=sushi 39 40 Converts a MediaWiki schema from MySQL to Postgres 41 Options: 42 db Name of the MySQL database 43 user MySQL database username 44 pass MySQL database password 45 host MySQL database host 46 socket MySQL database socket 47 verbose Verbosity, increases with multiple uses 48 "; 49 50 GetOptions 51 ( 52 'db=s' => \$MYSQLDB, 53 'user=s' => \$MYSQLUSER, 54 'pass=s' => \$MYSQLPASSWORD, 55 'host=s' => \$MYSQLHOST, 56 'socket=s' => \$MYSQLSOCKET, 57 'verbose+' => \$verbose, 58 'help' => \$help, 59 ); 60 61 die $USAGE 62 if ! length $MYSQLDB 63 or ! length $MYSQLUSER 64 or $help; 65 66 ## The Postgres schema file: should not be changed 67 my $PG_SCHEMA = 'tables.sql'; 68 69 ## What version we default to when we can't parse the old schema 70 my $MW_DEFAULT_VERSION = 110; 71 72 ## Try and find a working version of mysqldump 73 $verbose and warn "Locating the mysqldump executable\n"; 74 my @MYSQLDUMP = ('/usr/local/bin/mysqldump', '/usr/bin/mysqldump'); 75 my $MYSQLDUMP; 76 for my $mytry (@MYSQLDUMP) { 77 next if ! -e $mytry; 78 -x $mytry or die qq{Not an executable file: "$mytry"\n}; 79 my $version = qx{$mytry -V}; 80 $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n}; 81 $MYSQLDUMP = $mytry; 82 } 83 $MYSQLDUMP or die qq{Could not find the mysqldump program\n}; 84 85 ## Flags we use for mysqldump 86 my @MYSQLDUMPARGS = qw( 87 --skip-lock-tables 88 --complete-insert 89 --skip-extended-insert 90 --skip-add-drop-table 91 --skip-add-locks 92 --skip-disable-keys 93 --skip-set-charset 94 --skip-comments 95 --skip-quote-names 96 ); 97 98 99 $verbose and warn "Checking that mysqldump can handle our flags\n"; 100 ## Make sure this version can handle all the flags we want. 101 ## Combine with user dump below 102 my $MYSQLDUMPARGS = join ' ' => @MYSQLDUMPARGS; 103 ## Argh. Any way to make this work on Win32? 104 my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1}; 105 if ($version =~ /unknown option/) { 106 die qq{Sorry, you need to use a newer version of the mysqldump program than the one at "$MYSQLDUMP"\n}; 107 } 108 109 push @MYSQLDUMPARGS, "--user=$MYSQLUSER"; 110 length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD"; 111 length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST"; 112 113 ## Open the dump file to hold the mysqldump output 114 open my $mdump, '+>', $MYSQLDUMPFILE or die qq{Could not open "$MYSQLDUMPFILE": $!\n}; 115 print qq{Writing file "$MYSQLDUMPFILE"\n}; 116 117 open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, '--no-data', $MYSQLDB; 118 my $oldselect = select $mdump; 119 120 print while <$mfork2>; 121 122 ## Slurp in the current schema 123 my $current_schema; 124 seek $mdump, 0, 0; 125 { 126 local $/; 127 $current_schema = <$mdump>; 128 } 129 seek $mdump, 0, 0; 130 truncate $mdump, 0; 131 132 warn qq{Trying to determine database version...\n} if $verbose; 133 134 my $current_version = 0; 135 if ($current_schema =~ /CREATE TABLE \S+cur /) { 136 $current_version = 103; 137 } 138 elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) { 139 $current_version = 104; 140 } 141 elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) { 142 $current_version = 105; 143 } 144 elsif ($current_schema !~ /CREATE TABLE \S+validate /) { 145 $current_version = 106; 146 } 147 elsif ($current_schema !~ /ipb_auto tinyint/) { 148 $current_version = 107; 149 } 150 elsif ($current_schema !~ /CREATE TABLE \S+profiling /) { 151 $current_version = 108; 152 } 153 elsif ($current_schema !~ /CREATE TABLE \S+querycachetwo /) { 154 $current_version = 109; 155 } 156 else { 157 $current_version = $MW_DEFAULT_VERSION; 158 } 159 160 if (!$current_version) { 161 warn qq{WARNING! Could not figure out the old version, assuming MediaWiki $MW_DEFAULT_VERSION\n}; 162 $current_version = $MW_DEFAULT_VERSION; 163 } 164 165 ## Check for a table prefix: 166 my $table_prefix = ''; 167 if ($current_schema =~ /CREATE TABLE (\S+)querycache /) { 168 $table_prefix = $1; 169 } 170 171 warn qq{Old schema is from MediaWiki version $current_version\n} if $verbose; 172 warn qq{Table prefix is "$table_prefix"\n} if $verbose and length $table_prefix; 173 174 $verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n}; 175 my $now = scalar localtime; 176 my $conninfo = ''; 177 $MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST"; 178 $MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET"; 179 180 print qq{ 181 -- Dump of MySQL Mediawiki tables for import into a Postgres Mediawiki schema 182 -- Performed by the program: $0 183 -- Version: $VERSION 184 -- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome 185 -- 186 -- This file was created: $now 187 -- Executable used: $MYSQLDUMP 188 -- Connection information: 189 -- database: $MYSQLDB 190 -- user: $MYSQLUSER$conninfo 191 192 -- This file can be imported manually with psql like so: 193 -- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename 194 -- This will overwrite any existing MediaWiki information, so be careful 195 196 }; 197 198 ## psql specific stuff 199 print q{ 200 \\set ON_ERROR_STOP 201 BEGIN; 202 SET client_min_messages = 'WARNING'; 203 SET timezone = 'GMT'; 204 SET DateStyle = 'ISO, YMD'; 205 }; 206 207 warn qq{Reading in the Postgres schema information\n} if $verbose; 208 open my $schema, '<', $PG_SCHEMA 209 or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n}; 210 my $t; 211 while (<$schema>) { 212 if (/CREATE TABLE\s+(\S+)/) { 213 $t = $1; 214 $table{$t}={}; 215 $verbose > 1 and warn qq{ Found table $t\n}; 216 } 217 elsif (/^ +(\w+)\s+TIMESTAMP/) { 218 $tz{$t}{$1}++; 219 $verbose > 1 and warn qq{ Got a timestamp for column $1\n}; 220 } 221 elsif (/REFERENCES\s*([^( ]+)/) { 222 my $ref = $1; 223 exists $table{$ref} or die qq{No parent table $ref found for $t\n}; 224 $table{$t}{$ref}++; 225 } 226 } 227 close $schema or die qq{Could not close "$PG_SCHEMA": $!\n}; 228 229 ## Read in special cases and table/version information 230 $verbose and warn qq{Reading in schema exception information\n}; 231 my %version_tables; 232 while (<DATA>) { 233 if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) { 234 my $list = join '|' => split /\s+/ => $2; 235 $version_tables{$1} = qr{\b$list\b}; 236 next; 237 } 238 next unless /^(\w+)\s*(.*)/; 239 $special{$1} = $2||''; 240 $special{$2} = $1 if length $2; 241 } 242 243 ## Determine the order of tables based on foreign key constraints 244 $verbose and warn qq{Figuring out order of tables to dump\n}; 245 my %dumped; 246 my $bail = 0; 247 { 248 my $found=0; 249 T: for my $t (sort keys %table) { 250 next if exists $dumped{$t} and $dumped{$t} >= 1; 251 $found=1; 252 for my $dep (sort keys %{$table{$t}}) { 253 next T if ! exists $dumped{$dep} or $dumped{$dep} < 0; 254 } 255 $dumped{$t} = -1 if ! exists $dumped{$t}; 256 ## Skip certain tables that are not imported 257 next if exists $special{$t} and !$special{$t}; 258 push @torder, $special{$t} || $t; 259 } 260 last if !$found; 261 push @torder, '---'; 262 for (values %dumped) { $_+=2; } 263 die "Too many loops!\n" if $bail++ > 1000; 264 redo; 265 } 266 267 ## Prepare the Postgres database for the move 268 $verbose and warn qq{Writing Postgres transformation information\n}; 269 270 print "\n-- Empty out all existing tables\n"; 271 $verbose and warn qq{Writing truncates to empty existing tables\n}; 272 273 274 for my $t (@torder, 'objectcache', 'querycache') { 275 next if $t eq '---'; 276 my $tname = $special{$t}||$t; 277 printf qq{TRUNCATE TABLE %-20s CASCADE;\n}, qq{"$tname"}; 278 } 279 print "\n\n"; 280 281 print qq{-- Temporarily rename pagecontent to "$table_prefix}text"\n}; 282 print qq{ALTER TABLE pagecontent RENAME TO "$table_prefix}text";\n\n}; 283 284 print qq{-- Allow rc_ip to contain empty string, will convert at end\n}; 285 print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n}; 286 287 print "-- Changing all timestamp fields to handle raw integers\n"; 288 for my $t (sort keys %tz) { 289 next if $t eq 'archive2'; 290 for my $c (sort keys %{$tz{$t}}) { 291 printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c; 292 } 293 } 294 print "\n"; 295 296 print q{ 297 INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10); 298 }; 299 300 ## If we have a table _prefix, we need to temporarily rename all of our Postgres 301 ## tables temporarily for the import. Perhaps consider making this an auto-schema 302 ## thing in the future. 303 if (length $table_prefix) { 304 print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n}; 305 for my $t (@torder) { 306 next if $t eq '---' or $t eq 'text' or $t eq 'user'; 307 my $tname = $special{$t}||$t; 308 printf qq{ALTER TABLE %-18s RENAME TO "$table_prefix}$tname";\n}, qq{"$tname"}; 309 } 310 } 311 312 313 ## Try and dump the ill-named "user" table: 314 ## We do this table alone because "user" is a reserved word. 315 print q{ 316 317 SET escape_string_warning TO 'off'; 318 \\o /dev/null 319 320 -- Postgres uses a table name of "mwuser" instead of "user" 321 322 -- Create a dummy user to satisfy fk contraints especially with revisions 323 SELECT setval('user_user_id_seq',0,'false'); 324 INSERT INTO mwuser 325 VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now()); 326 327 }; 328 329 push @MYSQLDUMPARGS, '--no-create-info'; 330 331 $verbose and warn qq{Dumping "user" table\n}; 332 $verbose > 2 and warn Dumper \@MYSQLDUMPARGS; 333 my $usertable = "$table_prefix}user"; 334 open my $mfork, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable; 335 ## Unfortunately, there is no easy way to catch errors 336 my $numusers = 0; 337 while (<$mfork>) { 338 ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/; 339 } 340 close $mfork; 341 if ($numusers < 1) { 342 warn qq{No users found, probably a connection error.\n}; 343 print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n}; 344 close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n}; 345 exit; 346 } 347 print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n"; 348 349 warn qq{Dumping all other tables from the MySQL schema\n} if $verbose; 350 351 ## Dump the rest of the tables, in chunks based on constraints 352 ## We do not need the user table: 353 my @dumplist = grep { $_ ne 'user'} @torder; 354 my @alist; 355 { 356 undef @alist; 357 PICKATABLE: { 358 my $tname = shift @dumplist; 359 ## XXX Make this dynamic below 360 for my $ver (sort {$b <=> $a } keys %version_tables) { 361 redo PICKATABLE if $tname =~ $version_tables{$ver}; 362 } 363 $tname = "$table_prefix}$tname" if length $table_prefix; 364 next if $tname !~ /^\w/; 365 push @alist, $tname; 366 $verbose and warn " $tname...\n"; 367 pop @alist and last if index($alist[-1],'---') >= 0; 368 redo if @dumplist; 369 } 370 371 ## Dump everything else 372 open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist; 373 print while <$mfork2>; 374 close $mfork2; 375 warn qq{Finished dumping from MySQL\n} if $verbose; 376 377 redo if @dumplist; 378 } 379 380 warn qq{Writing information to return Postgres database to normal\n} if $verbose; 381 print qq{ALTER TABLE "$table_prefix}text" RENAME TO pagecontent;\n}; 382 print qq{ALTER TABLE $table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n}; 383 print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n}; 384 385 ## Return tables to their original names if a table prefix was used. 386 if (length $table_prefix) { 387 print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n}; 388 my $maxsize = 18; 389 for (@torder) { 390 $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize; 391 } 392 for my $t (@torder) { 393 next if $t eq '---' or $t eq 'text' or $t eq 'user'; 394 my $tname = $special{$t}||$t; 395 printf qq{ALTER TABLE %*s RENAME TO "$tname";\n}, $maxsize+1, qq{"$table_prefix}$tname"}; 396 } 397 } 398 399 print qq{\n\n--Returning timestamps to normal\n}; 400 for my $t (sort keys %tz) { 401 next if $t eq 'archive2'; 402 for my $c (sort keys %{$tz{$t}}) { 403 printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n". 404 " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c; 405 } 406 } 407 408 ## Reset sequences 409 print q{ 410 SELECT setval('filearchive_fa_id_seq', 1+coalesce(max(fa_id) ,0),false) FROM filearchive; 411 SELECT setval('ipblocks_ipb_id_seq', 1+coalesce(max(ipb_id) ,0),false) FROM ipblocks; 412 SELECT setval('job_job_id_seq', 1+coalesce(max(job_id) ,0),false) FROM job; 413 SELECT setval('logging_log_id_seq', 1+coalesce(max(log_id) ,0),false) FROM logging; 414 SELECT setval('page_page_id_seq', 1+coalesce(max(page_id),0),false) FROM page; 415 SELECT setval('page_restrictions_pr_id_seq', 1+coalesce(max(pr_id) ,0),false) FROM page_restrictions; 416 SELECT setval('recentchanges_rc_id_seq', 1+coalesce(max(rc_id) ,0),false) FROM recentchanges; 417 SELECT setval('revision_rev_id_seq', 1+coalesce(max(rev_id) ,0),false) FROM revision; 418 SELECT setval('text_old_id_seq', 1+coalesce(max(old_id) ,0),false) FROM pagecontent; 419 SELECT setval('user_user_id_seq', 1+coalesce(max(user_id),0),false) FROM mwuser; 420 }; 421 422 print "COMMIT;\n\\o\n\n-- End of dump\n\n"; 423 select $oldselect; 424 close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n}; 425 exit; 426 427 428 __DATA__ 429 ## Known remappings: either indicate the MySQL name, 430 ## or leave blank if it should be skipped 431 pagecontent text 432 mwuser user 433 archive2 434 profiling 435 objectcache 436 437 ## Which tables to ignore depending on the version 438 VERSION 1.6: externallinks job templatelinks transcache 439 VERSION 1.7: filearchive langlinks querycache_info 440 VERSION 1.9: querycachetwo page_restrictions redirect 441
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |