PHPXRef 0.7.1 : MediaWiki-1.24.0 : /maintenance/postgres/compare

[Summary view] [Print] [Text view]
   1  #!/usr/bin/perl
   2  
   3  ## Rough check that the base and postgres "tables.sql" are in sync
   4  ## Should be run from maintenance/postgres
   5  ## Checks a few other things as well...
   6  
   7  use strict;
   8  use warnings;
   9  use Data::Dumper;
  10  use Cwd;
  11  
  12  #check_valid_sql();
  13  
  14  my @old = ('../tables.sql');
  15  my $new = 'tables.sql';
  16  my @xfile;
  17  
  18  ## Read in exceptions and other metadata
  19  my %ok;
  20  while (<DATA>) {
  21      next unless /^(\w+)\s*:\s*([^#]+)/;
  22      my ($name,$val) = ($1,$2);
  23      chomp $val;
  24      if ($name eq 'RENAME') {
  25          die "Invalid rename\n" unless $val =~ /(\w+)\s+(\w+)/;
  26          $ok{OLD}{$1} = $2;
  27          $ok{NEW}{$2} = $1;
  28          next;
  29      }
  30      if ($name eq 'XFILE') {
  31          push @xfile, $val;
  32          next;
  33      }
  34      for (split /\s+/ => $val) {
  35          $ok{$name}{$_} = 0;
  36      }
  37  }
  38  
  39  my $datatype = join '|' => qw(
  40  bool
  41  tinyint smallint int bigint real float
  42  tinytext mediumtext text char varchar varbinary binary
  43  timestamp datetime
  44  tinyblob mediumblob blob
  45  );
  46  $datatype .= q{|ENUM\([\"\w\', ]+\)};
  47  $datatype = qr{($datatype)};
  48  
  49  my $typeval = qr{(\(\d+\))?};
  50  
  51  my $typeval2 = qr{ signed| unsigned| binary| NOT NULL| NULL| PRIMARY KEY| AUTO_INCREMENT| default ['\-\d\w"]+| REFERENCES .+CASCADE};
  52  
  53  my $indextype = join '|' => qw(INDEX KEY FULLTEXT), 'PRIMARY KEY', 'UNIQUE INDEX', 'UNIQUE KEY';
  54  $indextype = qr{$indextype};
  55  
  56  my $engine = qr{TYPE|ENGINE};
  57  
  58  my $tabletype = qr{InnoDB|MyISAM|HEAP|HEAP MAX_ROWS=\d+|InnoDB MAX_ROWS=\d+ AVG_ROW_LENGTH=\d+};
  59  
  60  my $charset = qr{utf8|binary};
  61  
  62  open my $newfh, '<', $new or die qq{Could not open $new: $!\n};
  63  
  64  
  65  my ($table,%old);
  66  
  67  ## Read in the xfiles
  68  my %xinfo;
  69  for my $xfile (@xfile) {
  70      print "Loading $xfile\n";
  71      my $info = parse_sql($xfile);
  72      for (keys %$info) {
  73          $xinfo{$_} = $info->{$_};
  74      }
  75  }
  76  
  77  for my $oldfile (@old) {
  78      print "Loading $oldfile\n";
  79      my $info = parse_sql($oldfile);
  80      for (keys %xinfo) {
  81          $info->{$_} = $xinfo{$_};
  82      }
  83      $old{$oldfile} = $info;
  84  }
  85  
  86  sub parse_sql {
  87  
  88      my $oldfile = shift;
  89  
  90      open my $oldfh, '<', $oldfile or die qq{Could not open $oldfile: $!\n};
  91  
  92      my %info;
  93      while (<$oldfh>) {
  94          next if /^\s*\-\-/ or /^\s+$/;
  95          s/\s*\-\- [\w ]+$//;
  96          chomp;
  97  
  98          if (/CREATE\s*TABLE/i) {
  99              if (m{^CREATE TABLE /\*_\*/(\w+) \($}) {
 100                  $table = $1;
 101              }
 102              elsif (m{^CREATE TABLE /\*\$wgDBprefix\*/(\w+) \($}) {
 103                  $table = $1;
 104              }
 105              else {
 106                  die qq{Invalid CREATE TABLE at line $. of $oldfile\n};
 107              }
 108              $info{$table}{name}=$table;
 109          }
 110          elsif (m{^\) /\*\$wgDBTableOptions\*/}) {
 111              $info{$table}{engine} = 'ENGINE';
 112              $info{$table}{type} = 'variable';
 113          }
 114          elsif (/^\) ($engine)=($tabletype);$/) {
 115              $info{$table}{engine}=$1;
 116              $info{$table}{type}=$2;
 117          }
 118          elsif (/^\) ($engine)=($tabletype), DEFAULT CHARSET=($charset);$/) {
 119              $info{$table}{engine}=$1;
 120              $info{$table}{type}=$2;
 121              $info{$table}{charset}=$3;
 122          }
 123          elsif (/^  (\w+) $datatype$typeval$typeval2{0,4},?$/) {
 124              $info{$table}{column}{$1} = $2;
 125              my $extra = $3 || '';
 126              $info{$table}{columnfull}{$1} = "$2$extra";
 127          }
 128          elsif (m{^  UNIQUE KEY (\w+) \((.+?)\)}) {
 129          }
 130          elsif (m{^CREATE (?:UNIQUE )?(?:FULLTEXT )?INDEX /\*i\*/(\w+) ON /\*_\*/(\w+) \((.+?)\);}) {
 131          }
 132          elsif (m{^\s*PRIMARY KEY \([\w,]+\)}) {
 133          }
 134          else {
 135              die "Cannot parse line $. of $oldfile:\n$_\n";
 136          }
 137  
 138      }
 139      close $oldfh or die qq{Could not close "$oldfile": $!\n};
 140  
 141      return \%info;
 142  
 143  } ## end of parse_sql
 144  
 145  for my $oldfile (@old) {
 146  
 147  ## Begin non-standard indent
 148  
 149  ## MySQL sanity checks
 150  for my $table (sort keys %{$old{$oldfile}}) {
 151      my $t = $old{$oldfile}{$table};
 152      if ($t->{engine} eq 'TYPE') {
 153          die "Invalid engine for $oldfile: $t->{engine}\n" unless $t->{name} eq 'profiling';
 154      }
 155      my $charset = $t->{charset} || '';
 156      if ($oldfile !~ /binary/ and $charset eq 'binary') {
 157          die "Invalid charset for $oldfile: $charset\n";
 158      }
 159  }
 160  
 161  my $dtypelist = join '|' => qw(
 162  SMALLINT INTEGER BIGINT NUMERIC SERIAL
 163  TEXT CHAR VARCHAR
 164  BYTEA
 165  TIMESTAMPTZ
 166  CIDR
 167  );
 168  my $dtype = qr{($dtypelist)};
 169  my %new;
 170  my ($infunction,$inview,$inrule,$lastcomma) = (0,0,0,0);
 171  my %custom_type;
 172  seek $newfh, 0, 0;
 173  while (<$newfh>) {
 174      next if /^\s*\-\-/ or /^\s*$/;
 175      s/\s*\-\- [\w ']+$//;
 176      next if /^BEGIN;/ or /^SET / or /^COMMIT;/;
 177      next if /^CREATE SEQUENCE/;
 178      next if /^CREATE(?: UNIQUE)? INDEX/;
 179      next if /^CREATE FUNCTION/;
 180      next if /^CREATE TRIGGER/ or /^  FOR EACH ROW/;
 181      next if /^INSERT INTO/ or /^  VALUES \(/;
 182      next if /^ALTER TABLE/;
 183      next if /^DROP SEQUENCE/;
 184      next if /^DROP FUNCTION/;
 185  
 186      if (/^CREATE TYPE (\w+)/) {
 187          die "Type $1 declared more than once!\n" if $custom_type{$1}++;
 188          $dtype = qr{($dtypelist|$1)};
 189          next;
 190      }
 191  
 192      chomp;
 193  
 194      if (/^\$mw\$;?$/) {
 195          $infunction = $infunction ? 0 : 1;
 196          next;
 197      }
 198      next if $infunction;
 199  
 200      next if /^CREATE VIEW/ and $inview = 1;
 201      if ($inview) {
 202          /;$/ and $inview = 0;
 203          next;
 204      }
 205  
 206      next if /^CREATE RULE/ and $inrule = 1;
 207      if ($inrule) {
 208          /;$/ and $inrule = 0;
 209          next;
 210      }
 211  
 212      if (/^CREATE TABLE "?(\w+)"? \($/) {
 213          $table = $1;
 214          $new{$table}{name}=$table;
 215          $lastcomma = 1;
 216      }
 217      elsif (/^\);$/) {
 218          if ($lastcomma) {
 219              warn "Stray comma before line $.\n";
 220          }
 221      }
 222      elsif (/^  (\w+) +$dtype.*?(,?)(?: --.*)?$/) {
 223          $new{$table}{column}{$1} = $2;
 224          if (!$lastcomma) {
 225              print "Missing comma before line $. of $new\n";
 226          }
 227          $lastcomma = $3 ? 1 : 0;
 228      }
 229      elsif (m{^\s*PRIMARY KEY \([\w,]+\)}) {
 230          $lastcomma = 0;
 231      }
 232      else {
 233          die "Cannot parse line $. of $new:\n$_\n";
 234      }
 235  }
 236  
 237  ## Which column types are okay to map from mysql to postgres?
 238  my $COLMAP = q{
 239  ## INTS:
 240  tinyint SMALLINT
 241  int INTEGER SERIAL
 242  smallint SMALLINT
 243  bigint BIGINT
 244  real NUMERIC
 245  float NUMERIC
 246  
 247  ## TEXT:
 248  varchar(15) TEXT
 249  varchar(32) TEXT
 250  varchar(70) TEXT
 251  varchar(255) TEXT
 252  varchar TEXT
 253  text TEXT
 254  tinytext TEXT
 255  ENUM TEXT
 256  
 257  ## TIMESTAMPS:
 258  varbinary(14) TIMESTAMPTZ
 259  binary(14) TIMESTAMPTZ
 260  datetime TIMESTAMPTZ
 261  timestamp TIMESTAMPTZ
 262  
 263  ## BYTEA:
 264  mediumblob BYTEA
 265  
 266  ## OTHER:
 267  bool SMALLINT # Sigh
 268  
 269  };
 270  ## Allow specific exceptions to the above
 271  my $COLMAPOK = q{
 272  ## User inputted text strings:
 273  ar_comment      tinyblob       TEXT
 274  fa_description  tinyblob       TEXT
 275  img_description tinyblob       TEXT
 276  ipb_reason      tinyblob       TEXT
 277  log_action      varbinary(32)  TEXT
 278  log_type        varbinary(32)  TEXT
 279  oi_description  tinyblob       TEXT
 280  rev_comment     tinyblob       TEXT
 281  rc_log_action   varbinary(255) TEXT
 282  rc_log_type     varbinary(255) TEXT
 283  
 284  ## Simple text-only strings:
 285  ar_flags          tinyblob       TEXT
 286  cf_name           varbinary(255) TEXT
 287  cf_value          blob           TEXT
 288  ar_sha1           varbinary(32)  TEXT
 289  cl_collation      varbinary(32)  TEXT
 290  cl_sortkey        varbinary(230) TEXT
 291  ct_params         blob           TEXT
 292  fa_minor_mime     varbinary(100) TEXT
 293  fa_storage_group  varbinary(16)  TEXT # Just 'deleted' for now, should stay plain text
 294  fa_storage_key    varbinary(64)  TEXT # sha1 plus text extension
 295  ipb_address       tinyblob       TEXT # IP address or username
 296  ipb_range_end     tinyblob       TEXT # hexadecimal
 297  ipb_range_start   tinyblob       TEXT # hexadecimal
 298  img_minor_mime    varbinary(100) TEXT
 299  lc_lang           varbinary(32)  TEXT
 300  lc_value          varbinary(32)  TEXT
 301  img_sha1          varbinary(32)  TEXT
 302  iw_wikiid         varchar(64)    TEXT
 303  job_cmd           varbinary(60)  TEXT # Should we limit to 60 as well?
 304  keyname           varbinary(255) TEXT # No tablename prefix (objectcache)
 305  ll_lang           varbinary(20)  TEXT # Language code
 306  lc_value          mediumblob     TEXT
 307  log_params        blob           TEXT # LF separated list of args
 308  log_type          varbinary(10)  TEXT
 309  ls_field          varbinary(32)  TEXT
 310  md_deps           mediumblob     TEXT # JSON
 311  md_module         varbinary(255) TEXT
 312  md_skin           varbinary(32)  TEXT
 313  mr_blob           mediumblob     TEXT # JSON
 314  mr_lang           varbinary(32)  TEXT
 315  mr_resource       varbinary(255) TEXT
 316  mrl_message       varbinary(255) TEXT
 317  mrl_resource      varbinary(255) TEXT
 318  oi_minor_mime     varbinary(100) TEXT
 319  oi_sha1           varbinary(32)  TEXT
 320  old_flags         tinyblob       TEXT
 321  old_text          mediumblob     TEXT
 322  pp_propname       varbinary(60)  TEXT
 323  pp_value          blob           TEXT
 324  page_restrictions tinyblob       TEXT # CSV string
 325  pf_server         varchar(30)    TEXT
 326  pr_level          varbinary(60)  TEXT
 327  pr_type           varbinary(60)  TEXT
 328  pt_create_perm    varbinary(60)  TEXT
 329  pt_reason         tinyblob       TEXT
 330  qc_type           varbinary(32)  TEXT
 331  qcc_type          varbinary(32)  TEXT
 332  qci_type          varbinary(32)  TEXT
 333  rc_params         blob           TEXT
 334  rev_sha1          varbinary(32)  TEXT
 335  rlc_to_blob       blob           TEXT
 336  ts_tags           blob           TEXT
 337  ufg_group         varbinary(32)  TEXT
 338  ug_group          varbinary(32)  TEXT
 339  ul_value          blob           TEXT
 340  up_property       varbinary(255) TEXT
 341  up_value          blob           TEXT
 342  us_sha1           varchar(31)    TEXT
 343  us_source_type    varchar(50)    TEXT
 344  us_status         varchar(50)    TEXT
 345  user_email_token  binary(32)     TEXT
 346  user_ip           varbinary(40)  TEXT
 347  user_newpassword  tinyblob       TEXT
 348  user_options      blob           TEXT
 349  user_password     tinyblob       TEXT
 350  user_token        binary(32)     TEXT
 351  iwl_prefix      varbinary(20)  TEXT
 352  
 353  ## Text URLs:
 354  el_index blob           TEXT
 355  el_to    blob           TEXT
 356  iw_api   blob           TEXT
 357  iw_url   blob           TEXT
 358  tb_url   blob           TEXT
 359  tc_url   varbinary(255) TEXT
 360  
 361  ## Deprecated or not yet used:
 362  ar_text     mediumblob TEXT
 363  job_params  blob       TEXT
 364  log_deleted tinyint    INTEGER # Not used yet, but keep it INTEGER for safety
 365  rc_type     tinyint    CHAR
 366  
 367  ## Number tweaking:
 368  fa_bits   int SMALLINT # bits per pixel
 369  fa_height int SMALLINT
 370  fa_width  int SMALLINT # Hope we don't see an image this wide...
 371  hc_id     int BIGINT   # Odd that site_stats is all bigint...
 372  img_bits  int SMALLINT # bits per image should stay sane
 373  oi_bits   int SMALLINT
 374  
 375  ## True binary fields, usually due to gzdeflate and/or serialize:
 376  math_inputhash  varbinary(16) BYTEA
 377  math_outputhash varbinary(16) BYTEA
 378  
 379  ## Namespaces: not need for such a high range
 380  ar_namespace     int SMALLINT
 381  job_namespace    int SMALLINT
 382  log_namespace    int SMALLINT
 383  page_namespace   int SMALLINT
 384  pl_namespace     int SMALLINT
 385  pt_namespace     int SMALLINT
 386  qc_namespace     int SMALLINT
 387  rc_namespace     int SMALLINT
 388  rd_namespace     int SMALLINT
 389  rlc_to_namespace int SMALLINT
 390  tl_namespace     int SMALLINT
 391  wl_namespace     int SMALLINT
 392  
 393  ## Easy enough to change if a wiki ever does grow this big:
 394  ss_active_users  bigint INTEGER
 395  ss_good_articles bigint INTEGER
 396  ss_total_edits   bigint INTEGER
 397  ss_total_pages   bigint INTEGER
 398  ss_total_views   bigint INTEGER
 399  ss_users         bigint INTEGER
 400  
 401  ## True IP - keep an eye on these, coders tend to make textual assumptions
 402  rc_ip varbinary(40) CIDR # Want to keep an eye on this
 403  
 404  ## Others:
 405  tc_time int TIMESTAMPTZ
 406  
 407  
 408  };
 409  
 410  my %colmap;
 411  for (split /\n/ => $COLMAP) {
 412      next unless /^\w/;
 413      s/(.*?)#.*/$1/;
 414      my ($col,@maps) = split / +/, $_;
 415      for (@maps) {
 416          $colmap{$col}{$_} = 1;
 417      }
 418  }
 419  
 420  my %colmapok;
 421  for (split /\n/ => $COLMAPOK) {
 422      next unless /^\w/;
 423      my ($col,$old,$new) = split / +/, $_;
 424      $colmapok{$col}{$old}{$new} = 1;
 425  }
 426  
 427  ## Old but not new
 428  for my $t (sort keys %{$old{$oldfile}}) {
 429      if (!exists $new{$t} and !exists $ok{OLD}{$t}) {
 430          print "Table not in $new: $t\n";
 431          next;
 432      }
 433      next if exists $ok{OLD}{$t} and !$ok{OLD}{$t};
 434      my $newt = exists $ok{OLD}{$t} ? $ok{OLD}{$t} : $t;
 435      my $oldcol = $old{$oldfile}{$t}{column};
 436      my $oldcolfull = $old{$oldfile}{$t}{columnfull};
 437      my $newcol = $new{$newt}{column};
 438      for my $c (keys %$oldcol) {
 439          if (!exists $newcol->{$c}) {
 440              print "Column $t.$c not in $new\n";
 441              next;
 442          }
 443      }
 444      for my $c (sort keys %$newcol) {
 445          if (!exists $oldcol->{$c}) {
 446              print "Column $t.$c not in $oldfile\n";
 447              next;
 448          }
 449          ## Column types (roughly) match up?
 450          my $new = $newcol->{$c};
 451          my $old = $oldcolfull->{$c};
 452  
 453          ## Known exceptions:
 454          next if exists $colmapok{$c}{$old}{$new};
 455  
 456          $old =~ s/ENUM.*/ENUM/;
 457  
 458          next if $old eq 'ENUM' and $new eq 'media_type';
 459  
 460          if (! exists $colmap{$old}{$new}) {
 461              print "Column types for $t.$c do not match: $old does not map to $new\n";
 462          }
 463      }
 464  }
 465  ## New but not old:
 466  for (sort keys %new) {
 467      if (!exists $old{$oldfile}{$_} and !exists $ok{NEW}{$_}) {
 468          print "Not in $oldfile: $_\n";
 469          next;
 470      }
 471  }
 472  
 473  
 474  } ## end each file to be parsed
 475  
 476  
 477  sub check_valid_sql {
 478  
 479      ## Check for a few common problems in most php files
 480  
 481      my $olddir = getcwd();
 482      chdir("../..");
 483      for my $basedir (qw/includes extensions/) {
 484          scan_dir($basedir);
 485      }
 486      chdir $olddir;
 487  
 488      return;
 489  
 490  } ## end of check_valid_sql
 491  
 492  
 493  sub scan_dir {
 494  
 495      my $dir = shift;
 496  
 497      opendir my $dh, $dir or die qq{Could not opendir $dir: $!\n};
 498      #print "Scanning $dir...\n";
 499      for my $file (grep { -f "$dir/$_" and /\.php$/ } readdir $dh) {
 500          find_problems("$dir/$file");
 501      }
 502      rewinddir $dh;
 503      for my $subdir (grep { -d "$dir/$_" and ! /\./ } readdir $dh) {
 504          scan_dir("$dir/$subdir");
 505      }
 506      closedir $dh or die qq{Closedir failed: $!\n};
 507      return;
 508  
 509  } ## end of scan_dir
 510  
 511  sub find_problems {
 512  
 513      my $file = shift;
 514      open my $fh, '<', $file or die qq{Could not open "$file": $!\n};
 515      my $lastline = '';
 516      my $inarray = 0;
 517      while (<$fh>) {
 518          if (/FORCE INDEX/ and $file !~ /Database\w*\.php/) {
 519              warn "Found FORCE INDEX string at line $. of $file\n";
 520          }
 521          if (/REPLACE INTO/ and $file !~ /Database\w*\.php/) {
 522              warn "Found REPLACE INTO string at line $. of $file\n";
 523          }
 524          if (/\bIF\s*\(/ and $file !~ /DatabaseMySQL\.php/) {
 525              warn "Found IF string at line $. of $file\n";
 526          }
 527          if (/\bCONCAT\b/ and $file !~ /Database\w*\.php/) {
 528              warn "Found CONCAT string at line $. of $file\n";
 529          }
 530          if (/\bGROUP\s+BY\s*\d\b/i and $file !~ /Database\w*\.php/) {
 531              warn "Found GROUP BY # at line $. of $file\n";
 532          }
 533          if (/wfGetDB\s*\(\s+\)/io) {
 534              warn "wfGETDB is missing parameters at line $. of $file\n";
 535          }
 536          if (/=\s*array\s*\(\s*$/) {
 537              $inarray = 1;
 538              next;
 539          }
 540          if ($inarray) {
 541              if (/\s*\);\s*$/) {
 542                  $inarray = 0;
 543                  next;
 544              }
 545              next if ! /\w/ or /array\(\s*$/ or /^\s*#/ or m{^\s*//};
 546              if (! /,/) {
 547                  my $nextline = <$fh>;
 548                  last if ! defined $nextline;
 549                  if ($nextline =~ /^\s*\)[;,]/) {
 550                      $inarray = 0;
 551                      next;
 552                  }
 553                  #warn "Array is missing a comma? Line $. of $file\n";
 554              }
 555          }
 556      }
 557      close $fh or die qq{Could not close "$file": $!\n};
 558      return;
 559  
 560  } ## end of find_problems
 561  
 562  
 563  __DATA__
 564  ## Known exceptions
 565  OLD: searchindex          ## We use tsearch2 directly on the page table instead
 566  RENAME: user mwuser       ## Reserved word causing lots of problems
 567  RENAME: text pagecontent  ## Reserved word
 568  XFILE: ../archives/patch-profiling.sql
PHP Cross Reference of MediaWiki-1.24.0

/maintenance/postgres/ -> compare_schemas.pl (source)