[ Index ]

PHP Cross Reference of MediaWiki-1.24.0

title

Body

[close]

/includes/diff/ -> DairikiDiff.php (source)

   1  <?php
   2  /**
   3   * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
   4   *
   5   * Copyright © 2000, 2001 Geoffrey T. Dairiki <[email protected]>
   6   * You may copy this code freely under the conditions of the GPL.
   7   *
   8   * This program is free software; you can redistribute it and/or modify
   9   * it under the terms of the GNU General Public License as published by
  10   * the Free Software Foundation; either version 2 of the License, or
  11   * (at your option) any later version.
  12   *
  13   * This program is distributed in the hope that it will be useful,
  14   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16   * GNU General Public License for more details.
  17   *
  18   * You should have received a copy of the GNU General Public License along
  19   * with this program; if not, write to the Free Software Foundation, Inc.,
  20   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  21   * http://www.gnu.org/copyleft/gpl.html
  22   *
  23   * @file
  24   * @ingroup DifferenceEngine
  25   * @defgroup DifferenceEngine DifferenceEngine
  26   */
  27  
  28  /**
  29   * @todo document
  30   * @private
  31   * @ingroup DifferenceEngine
  32   */
  33  abstract class DiffOp {
  34  
  35      /**
  36       * @var string
  37       */
  38      public $type;
  39  
  40      /**
  41       * @var string[]
  42       */
  43      public $orig;
  44  
  45      /**
  46       * @var string[]
  47       */
  48      public $closing;
  49  
  50      /**
  51       * @return string
  52       */
  53  	public function getType() {
  54          return $this->type;
  55      }
  56  
  57      /**
  58       * @return string[]
  59       */
  60  	public function getOrig() {
  61          return $this->orig;
  62      }
  63  
  64      /**
  65       * @param int $i
  66       * @return string|null
  67       */
  68  	public function getClosing( $i = null ) {
  69          if ( $i === null ) {
  70              return $this->closing;
  71          }
  72          if ( array_key_exists( $i, $this->closing ) ) {
  73              return $this->closing[$i];
  74          }
  75          return null;
  76      }
  77  
  78      abstract public function reverse();
  79  
  80      /**
  81       * @return int
  82       */
  83  	public function norig() {
  84          return $this->orig ? count( $this->orig ) : 0;
  85      }
  86  
  87      /**
  88       * @return int
  89       */
  90  	public function nclosing() {
  91          return $this->closing ? count( $this->closing ) : 0;
  92      }
  93  }
  94  
  95  /**
  96   * @todo document
  97   * @private
  98   * @ingroup DifferenceEngine
  99   */
 100  class DiffOpCopy extends DiffOp {
 101      public $type = 'copy';
 102  
 103  	public function __construct( $orig, $closing = false ) {
 104          if ( !is_array( $closing ) ) {
 105              $closing = $orig;
 106          }
 107          $this->orig = $orig;
 108          $this->closing = $closing;
 109      }
 110  
 111      /**
 112       * @return DiffOpCopy
 113       */
 114  	public function reverse() {
 115          return new DiffOpCopy( $this->closing, $this->orig );
 116      }
 117  }
 118  
 119  /**
 120   * @todo document
 121   * @private
 122   * @ingroup DifferenceEngine
 123   */
 124  class DiffOpDelete extends DiffOp {
 125      public $type = 'delete';
 126  
 127  	public function __construct( $lines ) {
 128          $this->orig = $lines;
 129          $this->closing = false;
 130      }
 131  
 132      /**
 133       * @return DiffOpAdd
 134       */
 135  	public function reverse() {
 136          return new DiffOpAdd( $this->orig );
 137      }
 138  }
 139  
 140  /**
 141   * @todo document
 142   * @private
 143   * @ingroup DifferenceEngine
 144   */
 145  class DiffOpAdd extends DiffOp {
 146      public $type = 'add';
 147  
 148  	public function __construct( $lines ) {
 149          $this->closing = $lines;
 150          $this->orig = false;
 151      }
 152  
 153      /**
 154       * @return DiffOpDelete
 155       */
 156  	public function reverse() {
 157          return new DiffOpDelete( $this->closing );
 158      }
 159  }
 160  
 161  /**
 162   * @todo document
 163   * @private
 164   * @ingroup DifferenceEngine
 165   */
 166  class DiffOpChange extends DiffOp {
 167      public $type = 'change';
 168  
 169  	public function __construct( $orig, $closing ) {
 170          $this->orig = $orig;
 171          $this->closing = $closing;
 172      }
 173  
 174      /**
 175       * @return DiffOpChange
 176       */
 177  	public function reverse() {
 178          return new DiffOpChange( $this->closing, $this->orig );
 179      }
 180  }
 181  
 182  /**
 183   * Class used internally by Diff to actually compute the diffs.
 184   *
 185   * The algorithm used here is mostly lifted from the perl module
 186   * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
 187   *     http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
 188   *
 189   * More ideas are taken from:
 190   *     http://www.ics.uci.edu/~eppstein/161/960229.html
 191   *
 192   * Some ideas are (and a bit of code) are from from analyze.c, from GNU
 193   * diffutils-2.7, which can be found at:
 194   *     ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
 195   *
 196   * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
 197   * are my own.
 198   *
 199   * Line length limits for robustness added by Tim Starling, 2005-08-31
 200   * Alternative implementation added by Guy Van den Broeck, 2008-07-30
 201   *
 202   * @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck
 203   * @private
 204   * @ingroup DifferenceEngine
 205   */
 206  class DiffEngine {
 207      const MAX_XREF_LENGTH = 10000;
 208  
 209      protected $xchanged, $ychanged;
 210  
 211      protected $xv = array(), $yv = array();
 212      protected $xind = array(), $yind = array();
 213  
 214      protected $seq = array(), $in_seq = array();
 215  
 216      protected $lcs = 0;
 217  
 218      /**
 219       * @param string[] $from_lines
 220       * @param string[] $to_lines
 221       *
 222       * @return DiffOp[]
 223       */
 224  	public function diff( $from_lines, $to_lines ) {
 225          wfProfileIn( __METHOD__ );
 226  
 227          // Diff and store locally
 228          $this->diffLocal( $from_lines, $to_lines );
 229  
 230          // Merge edits when possible
 231          $this->shiftBoundaries( $from_lines, $this->xchanged, $this->ychanged );
 232          $this->shiftBoundaries( $to_lines, $this->ychanged, $this->xchanged );
 233  
 234          // Compute the edit operations.
 235          $n_from = count( $from_lines );
 236          $n_to = count( $to_lines );
 237  
 238          $edits = array();
 239          $xi = $yi = 0;
 240          while ( $xi < $n_from || $yi < $n_to ) {
 241              assert( '$yi < $n_to || $this->xchanged[$xi]' );
 242              assert( '$xi < $n_from || $this->ychanged[$yi]' );
 243  
 244              // Skip matching "snake".
 245              $copy = array();
 246              while ( $xi < $n_from && $yi < $n_to
 247                  && !$this->xchanged[$xi] && !$this->ychanged[$yi]
 248              ) {
 249                  $copy[] = $from_lines[$xi++];
 250                  ++$yi;
 251              }
 252              if ( $copy ) {
 253                  $edits[] = new DiffOpCopy( $copy );
 254              }
 255  
 256              // Find deletes & adds.
 257              $delete = array();
 258              while ( $xi < $n_from && $this->xchanged[$xi] ) {
 259                  $delete[] = $from_lines[$xi++];
 260              }
 261  
 262              $add = array();
 263              while ( $yi < $n_to && $this->ychanged[$yi] ) {
 264                  $add[] = $to_lines[$yi++];
 265              }
 266  
 267              if ( $delete && $add ) {
 268                  $edits[] = new DiffOpChange( $delete, $add );
 269              } elseif ( $delete ) {
 270                  $edits[] = new DiffOpDelete( $delete );
 271              } elseif ( $add ) {
 272                  $edits[] = new DiffOpAdd( $add );
 273              }
 274          }
 275          wfProfileOut( __METHOD__ );
 276  
 277          return $edits;
 278      }
 279  
 280      /**
 281       * @param string[] $from_lines
 282       * @param string[] $to_lines
 283       */
 284  	private function diffLocal( $from_lines, $to_lines ) {
 285          global $wgExternalDiffEngine;
 286          wfProfileIn( __METHOD__ );
 287  
 288          if ( $wgExternalDiffEngine == 'wikidiff3' ) {
 289              // wikidiff3
 290              $wikidiff3 = new WikiDiff3();
 291              $wikidiff3->diff( $from_lines, $to_lines );
 292              $this->xchanged = $wikidiff3->removed;
 293              $this->ychanged = $wikidiff3->added;
 294              unset( $wikidiff3 );
 295          } else {
 296              // old diff
 297              $n_from = count( $from_lines );
 298              $n_to = count( $to_lines );
 299              $this->xchanged = $this->ychanged = array();
 300              $this->xv = $this->yv = array();
 301              $this->xind = $this->yind = array();
 302              unset( $this->seq );
 303              unset( $this->in_seq );
 304              unset( $this->lcs );
 305  
 306              // Skip leading common lines.
 307              for ( $skip = 0; $skip < $n_from && $skip < $n_to; $skip++ ) {
 308                  if ( $from_lines[$skip] !== $to_lines[$skip] ) {
 309                      break;
 310                  }
 311                  $this->xchanged[$skip] = $this->ychanged[$skip] = false;
 312              }
 313              // Skip trailing common lines.
 314              $xi = $n_from;
 315              $yi = $n_to;
 316              for ( $endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++ ) {
 317                  if ( $from_lines[$xi] !== $to_lines[$yi] ) {
 318                      break;
 319                  }
 320                  $this->xchanged[$xi] = $this->ychanged[$yi] = false;
 321              }
 322  
 323              // Ignore lines which do not exist in both files.
 324              for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
 325                  $xhash[$this->lineHash( $from_lines[$xi] )] = 1;
 326              }
 327  
 328              for ( $yi = $skip; $yi < $n_to - $endskip; $yi++ ) {
 329                  $line = $to_lines[$yi];
 330                  if ( ( $this->ychanged[$yi] = empty( $xhash[$this->lineHash( $line )] ) ) ) {
 331                      continue;
 332                  }
 333                  $yhash[$this->lineHash( $line )] = 1;
 334                  $this->yv[] = $line;
 335                  $this->yind[] = $yi;
 336              }
 337              for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
 338                  $line = $from_lines[$xi];
 339                  if ( ( $this->xchanged[$xi] = empty( $yhash[$this->lineHash( $line )] ) ) ) {
 340                      continue;
 341                  }
 342                  $this->xv[] = $line;
 343                  $this->xind[] = $xi;
 344              }
 345  
 346              // Find the LCS.
 347              $this->compareSeq( 0, count( $this->xv ), 0, count( $this->yv ) );
 348          }
 349          wfProfileOut( __METHOD__ );
 350      }
 351  
 352      /**
 353       * Returns the whole line if it's small enough, or the MD5 hash otherwise
 354       *
 355       * @param string $line
 356       *
 357       * @return string
 358       */
 359  	private function lineHash( $line ) {
 360          if ( strlen( $line ) > self::MAX_XREF_LENGTH ) {
 361              return md5( $line );
 362          } else {
 363              return $line;
 364          }
 365      }
 366  
 367      /**
 368       * Divide the Largest Common Subsequence (LCS) of the sequences
 369       * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
 370       * sized segments.
 371       *
 372       * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
 373       * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
 374       * sub sequences.  The first sub-sequence is contained in [X0, X1),
 375       * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
 376       * that (X0, Y0) == (XOFF, YOFF) and
 377       * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
 378       *
 379       * This function assumes that the first lines of the specified portions
 380       * of the two files do not match, and likewise that the last lines do not
 381       * match.  The caller must trim matching lines from the beginning and end
 382       * of the portions it is going to specify.
 383       *
 384       * @param int $xoff
 385       * @param int $xlim
 386       * @param int $yoff
 387       * @param int $ylim
 388       * @param int $nchunks
 389       *
 390       * @return array List of two elements, integer and array[].
 391       */
 392  	private function diag( $xoff, $xlim, $yoff, $ylim, $nchunks ) {
 393          $flip = false;
 394  
 395          if ( $xlim - $xoff > $ylim - $yoff ) {
 396              // Things seems faster (I'm not sure I understand why)
 397              // when the shortest sequence in X.
 398              $flip = true;
 399              list( $xoff, $xlim, $yoff, $ylim ) = array( $yoff, $ylim, $xoff, $xlim );
 400          }
 401  
 402          if ( $flip ) {
 403              for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
 404                  $ymatches[$this->xv[$i]][] = $i;
 405              }
 406          } else {
 407              for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
 408                  $ymatches[$this->yv[$i]][] = $i;
 409              }
 410          }
 411  
 412          $this->lcs = 0;
 413          $this->seq[0] = $yoff - 1;
 414          $this->in_seq = array();
 415          $ymids[0] = array();
 416  
 417          $numer = $xlim - $xoff + $nchunks - 1;
 418          $x = $xoff;
 419          for ( $chunk = 0; $chunk < $nchunks; $chunk++ ) {
 420              if ( $chunk > 0 ) {
 421                  for ( $i = 0; $i <= $this->lcs; $i++ ) {
 422                      $ymids[$i][$chunk - 1] = $this->seq[$i];
 423                  }
 424              }
 425  
 426              $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $chunk ) / $nchunks );
 427              // @codingStandardsIgnoreFile Ignore Squiz.WhiteSpace.SemicolonSpacing.Incorrect
 428              for ( ; $x < $x1; $x++ ) {
 429                  // @codingStandardsIgnoreEnd
 430                  $line = $flip ? $this->yv[$x] : $this->xv[$x];
 431                  if ( empty( $ymatches[$line] ) ) {
 432                      continue;
 433                  }
 434  
 435                  $k = 0;
 436                  $matches = $ymatches[$line];
 437                  reset( $matches );
 438                  while ( list( , $y ) = each( $matches ) ) {
 439                      if ( empty( $this->in_seq[$y] ) ) {
 440                          $k = $this->lcsPos( $y );
 441                          assert( '$k > 0' );
 442                          $ymids[$k] = $ymids[$k - 1];
 443                          break;
 444                      }
 445                  }
 446  
 447                  while ( list( , $y ) = each( $matches ) ) {
 448                      if ( $y > $this->seq[$k - 1] ) {
 449                          assert( '$y < $this->seq[$k]' );
 450                          // Optimization: this is a common case:
 451                          //    next match is just replacing previous match.
 452                          $this->in_seq[$this->seq[$k]] = false;
 453                          $this->seq[$k] = $y;
 454                          $this->in_seq[$y] = 1;
 455                      } elseif ( empty( $this->in_seq[$y] ) ) {
 456                          $k = $this->lcsPos( $y );
 457                          assert( '$k > 0' );
 458                          $ymids[$k] = $ymids[$k - 1];
 459                      }
 460                  }
 461              }
 462          }
 463  
 464          $seps[] = $flip ? array( $yoff, $xoff ) : array( $xoff, $yoff );
 465          $ymid = $ymids[$this->lcs];
 466          for ( $n = 0; $n < $nchunks - 1; $n++ ) {
 467              $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $n ) / $nchunks );
 468              $y1 = $ymid[$n] + 1;
 469              $seps[] = $flip ? array( $y1, $x1 ) : array( $x1, $y1 );
 470          }
 471          $seps[] = $flip ? array( $ylim, $xlim ) : array( $xlim, $ylim );
 472  
 473          return array( $this->lcs, $seps );
 474      }
 475  
 476      /**
 477       * @param int $ypos
 478       *
 479       * @return int
 480       */
 481  	private function lcsPos( $ypos ) {
 482          $end = $this->lcs;
 483          if ( $end == 0 || $ypos > $this->seq[$end] ) {
 484              $this->seq[++$this->lcs] = $ypos;
 485              $this->in_seq[$ypos] = 1;
 486  
 487              return $this->lcs;
 488          }
 489  
 490          $beg = 1;
 491          while ( $beg < $end ) {
 492              $mid = (int)( ( $beg + $end ) / 2 );
 493              if ( $ypos > $this->seq[$mid] ) {
 494                  $beg = $mid + 1;
 495              } else {
 496                  $end = $mid;
 497              }
 498          }
 499  
 500          assert( '$ypos != $this->seq[$end]' );
 501  
 502          $this->in_seq[$this->seq[$end]] = false;
 503          $this->seq[$end] = $ypos;
 504          $this->in_seq[$ypos] = 1;
 505  
 506          return $end;
 507      }
 508  
 509      /**
 510       * Find LCS of two sequences.
 511       *
 512       * The results are recorded in the vectors $this->{x,y}changed[], by
 513       * storing a 1 in the element for each line that is an insertion
 514       * or deletion (ie. is not in the LCS).
 515       *
 516       * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
 517       *
 518       * Note that XLIM, YLIM are exclusive bounds.
 519       * All line numbers are origin-0 and discarded lines are not counted.
 520       *
 521       * @param int $xoff
 522       * @param int $xlim
 523       * @param int $yoff
 524       * @param int $ylim
 525       */
 526  	private function compareSeq( $xoff, $xlim, $yoff, $ylim ) {
 527          // Slide down the bottom initial diagonal.
 528          while ( $xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff] ) {
 529              ++$xoff;
 530              ++$yoff;
 531          }
 532  
 533          // Slide up the top initial diagonal.
 534          while ( $xlim > $xoff && $ylim > $yoff
 535              && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]
 536          ) {
 537              --$xlim;
 538              --$ylim;
 539          }
 540  
 541          if ( $xoff == $xlim || $yoff == $ylim ) {
 542              $lcs = 0;
 543          } else {
 544              // This is ad hoc but seems to work well.
 545              // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
 546              // $nchunks = max(2,min(8,(int)$nchunks));
 547              $nchunks = min( 7, $xlim - $xoff, $ylim - $yoff ) + 1;
 548              list( $lcs, $seps ) = $this->diag( $xoff, $xlim, $yoff, $ylim, $nchunks );
 549          }
 550  
 551          if ( $lcs == 0 ) {
 552              // X and Y sequences have no common subsequence:
 553              // mark all changed.
 554              while ( $yoff < $ylim ) {
 555                  $this->ychanged[$this->yind[$yoff++]] = 1;
 556              }
 557              while ( $xoff < $xlim ) {
 558                  $this->xchanged[$this->xind[$xoff++]] = 1;
 559              }
 560          } else {
 561              // Use the partitions to split this problem into subproblems.
 562              reset( $seps );
 563              $pt1 = $seps[0];
 564              while ( $pt2 = next( $seps ) ) {
 565                  $this->compareSeq( $pt1[0], $pt2[0], $pt1[1], $pt2[1] );
 566                  $pt1 = $pt2;
 567              }
 568          }
 569      }
 570  
 571      /**
 572       * Adjust inserts/deletes of identical lines to join changes
 573       * as much as possible.
 574       *
 575       * We do something when a run of changed lines include a
 576       * line at one end and has an excluded, identical line at the other.
 577       * We are free to choose which identical line is included.
 578       * `compareseq' usually chooses the one at the beginning,
 579       * but usually it is cleaner to consider the following identical line
 580       * to be the "change".
 581       *
 582       * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
 583       */
 584  	private function shiftBoundaries( $lines, &$changed, $other_changed ) {
 585          wfProfileIn( __METHOD__ );
 586          $i = 0;
 587          $j = 0;
 588  
 589          assert( 'count($lines) == count($changed)' );
 590          $len = count( $lines );
 591          $other_len = count( $other_changed );
 592  
 593          while ( 1 ) {
 594              /*
 595               * Scan forwards to find beginning of another run of changes.
 596               * Also keep track of the corresponding point in the other file.
 597               *
 598               * Throughout this code, $i and $j are adjusted together so that
 599               * the first $i elements of $changed and the first $j elements
 600               * of $other_changed both contain the same number of zeros
 601               * (unchanged lines).
 602               * Furthermore, $j is always kept so that $j == $other_len or
 603               * $other_changed[$j] == false.
 604               */
 605              while ( $j < $other_len && $other_changed[$j] ) {
 606                  $j++;
 607              }
 608  
 609              while ( $i < $len && !$changed[$i] ) {
 610                  assert( '$j < $other_len && ! $other_changed[$j]' );
 611                  $i++;
 612                  $j++;
 613                  while ( $j < $other_len && $other_changed[$j] ) {
 614                      $j++;
 615                  }
 616              }
 617  
 618              if ( $i == $len ) {
 619                  break;
 620              }
 621  
 622              $start = $i;
 623  
 624              // Find the end of this run of changes.
 625              while ( ++$i < $len && $changed[$i] ) {
 626                  continue;
 627              }
 628  
 629              do {
 630                  /*
 631                   * Record the length of this run of changes, so that
 632                   * we can later determine whether the run has grown.
 633                   */
 634                  $runlength = $i - $start;
 635  
 636                  /*
 637                   * Move the changed region back, so long as the
 638                   * previous unchanged line matches the last changed one.
 639                   * This merges with previous changed regions.
 640                   */
 641                  while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
 642                      $changed[--$start] = 1;
 643                      $changed[--$i] = false;
 644                      while ( $start > 0 && $changed[$start - 1] ) {
 645                          $start--;
 646                      }
 647                      assert( '$j > 0' );
 648                      while ( $other_changed[--$j] ) {
 649                          continue;
 650                      }
 651                      assert( '$j >= 0 && !$other_changed[$j]' );
 652                  }
 653  
 654                  /*
 655                   * Set CORRESPONDING to the end of the changed run, at the last
 656                   * point where it corresponds to a changed run in the other file.
 657                   * CORRESPONDING == LEN means no such point has been found.
 658                   */
 659                  $corresponding = $j < $other_len ? $i : $len;
 660  
 661                  /*
 662                   * Move the changed region forward, so long as the
 663                   * first changed line matches the following unchanged one.
 664                   * This merges with following changed regions.
 665                   * Do this second, so that if there are no merges,
 666                   * the changed region is moved forward as far as possible.
 667                   */
 668                  while ( $i < $len && $lines[$start] == $lines[$i] ) {
 669                      $changed[$start++] = false;
 670                      $changed[$i++] = 1;
 671                      while ( $i < $len && $changed[$i] ) {
 672                          $i++;
 673                      }
 674  
 675                      assert( '$j < $other_len && ! $other_changed[$j]' );
 676                      $j++;
 677                      if ( $j < $other_len && $other_changed[$j] ) {
 678                          $corresponding = $i;
 679                          while ( $j < $other_len && $other_changed[$j] ) {
 680                              $j++;
 681                          }
 682                      }
 683                  }
 684              } while ( $runlength != $i - $start );
 685  
 686              /*
 687               * If possible, move the fully-merged run of changes
 688               * back to a corresponding run in the other file.
 689               */
 690              while ( $corresponding < $i ) {
 691                  $changed[--$start] = 1;
 692                  $changed[--$i] = 0;
 693                  assert( '$j > 0' );
 694                  while ( $other_changed[--$j] ) {
 695                      continue;
 696                  }
 697                  assert( '$j >= 0 && !$other_changed[$j]' );
 698              }
 699          }
 700          wfProfileOut( __METHOD__ );
 701      }
 702  }
 703  
 704  /**
 705   * Class representing a 'diff' between two sequences of strings.
 706   * @todo document
 707   * @private
 708   * @ingroup DifferenceEngine
 709   */
 710  class Diff {
 711  
 712      /**
 713       * @var DiffOp[]
 714       */
 715      public $edits;
 716  
 717      /**
 718       * Constructor.
 719       * Computes diff between sequences of strings.
 720       *
 721       * @param string[] $from_lines An array of strings.
 722       *   Typically these are lines from a file.
 723       * @param string[] $to_lines An array of strings.
 724       */
 725  	public function __construct( $from_lines, $to_lines ) {
 726          $eng = new DiffEngine;
 727          $this->edits = $eng->diff( $from_lines, $to_lines );
 728      }
 729  
 730      /**
 731       * @return DiffOp[]
 732       */
 733  	public function getEdits() {
 734          return $this->edits;
 735      }
 736  
 737      /**
 738       * Compute reversed Diff.
 739       *
 740       * SYNOPSIS:
 741       *
 742       *    $diff = new Diff($lines1, $lines2);
 743       *    $rev = $diff->reverse();
 744       *
 745       * @return Object A Diff object representing the inverse of the
 746       *   original diff.
 747       */
 748  	public function reverse() {
 749          $rev = $this;
 750          $rev->edits = array();
 751          /** @var DiffOp $edit */
 752          foreach ( $this->edits as $edit ) {
 753              $rev->edits[] = $edit->reverse();
 754          }
 755  
 756          return $rev;
 757      }
 758  
 759      /**
 760       * Check for empty diff.
 761       *
 762       * @return bool True if two sequences were identical.
 763       */
 764  	public function isEmpty() {
 765          foreach ( $this->edits as $edit ) {
 766              if ( $edit->type != 'copy' ) {
 767                  return false;
 768              }
 769          }
 770  
 771          return true;
 772      }
 773  
 774      /**
 775       * Compute the length of the Longest Common Subsequence (LCS).
 776       *
 777       * This is mostly for diagnostic purposed.
 778       *
 779       * @return int The length of the LCS.
 780       */
 781  	public function lcs() {
 782          $lcs = 0;
 783          foreach ( $this->edits as $edit ) {
 784              if ( $edit->type == 'copy' ) {
 785                  $lcs += count( $edit->orig );
 786              }
 787          }
 788  
 789          return $lcs;
 790      }
 791  
 792      /**
 793       * Get the original set of lines.
 794       *
 795       * This reconstructs the $from_lines parameter passed to the
 796       * constructor.
 797       *
 798       * @return string[] The original sequence of strings.
 799       */
 800  	public function orig() {
 801          $lines = array();
 802  
 803          foreach ( $this->edits as $edit ) {
 804              if ( $edit->orig ) {
 805                  array_splice( $lines, count( $lines ), 0, $edit->orig );
 806              }
 807          }
 808  
 809          return $lines;
 810      }
 811  
 812      /**
 813       * Get the closing set of lines.
 814       *
 815       * This reconstructs the $to_lines parameter passed to the
 816       * constructor.
 817       *
 818       * @return string[] The sequence of strings.
 819       */
 820  	public function closing() {
 821          $lines = array();
 822  
 823          foreach ( $this->edits as $edit ) {
 824              if ( $edit->closing ) {
 825                  array_splice( $lines, count( $lines ), 0, $edit->closing );
 826              }
 827          }
 828  
 829          return $lines;
 830      }
 831  }
 832  
 833  /**
 834   * @todo document, bad name.
 835   * @private
 836   * @ingroup DifferenceEngine
 837   */
 838  class MappedDiff extends Diff {
 839      /**
 840       * Constructor.
 841       *
 842       * Computes diff between sequences of strings.
 843       *
 844       * This can be used to compute things like
 845       * case-insensitve diffs, or diffs which ignore
 846       * changes in white-space.
 847       *
 848       * @param string[] $from_lines An array of strings.
 849       *   Typically these are lines from a file.
 850       * @param string[] $to_lines An array of strings.
 851       * @param string[] $mapped_from_lines This array should
 852       *   have the same size number of elements as $from_lines.
 853       *   The elements in $mapped_from_lines and
 854       *   $mapped_to_lines are what is actually compared
 855       *   when computing the diff.
 856       * @param string[] $mapped_to_lines This array should
 857       *   have the same number of elements as $to_lines.
 858       */
 859  	public function __construct( $from_lines, $to_lines,
 860          $mapped_from_lines, $mapped_to_lines ) {
 861          wfProfileIn( __METHOD__ );
 862  
 863          assert( 'count( $from_lines ) == count( $mapped_from_lines )' );
 864          assert( 'count( $to_lines ) == count( $mapped_to_lines )' );
 865  
 866          parent::__construct( $mapped_from_lines, $mapped_to_lines );
 867  
 868          $xi = $yi = 0;
 869          $editCount = count( $this->edits );
 870          for ( $i = 0; $i < $editCount; $i++ ) {
 871              $orig = &$this->edits[$i]->orig;
 872              if ( is_array( $orig ) ) {
 873                  $orig = array_slice( $from_lines, $xi, count( $orig ) );
 874                  $xi += count( $orig );
 875              }
 876  
 877              $closing = &$this->edits[$i]->closing;
 878              if ( is_array( $closing ) ) {
 879                  $closing = array_slice( $to_lines, $yi, count( $closing ) );
 880                  $yi += count( $closing );
 881              }
 882          }
 883          wfProfileOut( __METHOD__ );
 884      }
 885  }
 886  
 887  /**
 888   * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
 889   */
 890  
 891  /**
 892   * @todo document
 893   * @private
 894   * @ingroup DifferenceEngine
 895   */
 896  class HWLDFWordAccumulator {
 897      public $insClass = ' class="diffchange diffchange-inline"';
 898      public $delClass = ' class="diffchange diffchange-inline"';
 899  
 900      private $lines = array();
 901      private $line = '';
 902      private $group = '';
 903      private $tag = '';
 904  
 905      /**
 906       * @param string $new_tag
 907       */
 908  	private function flushGroup( $new_tag ) {
 909          if ( $this->group !== '' ) {
 910              if ( $this->tag == 'ins' ) {
 911                  $this->line .= "<ins{$this->insClass}>" .
 912                      htmlspecialchars( $this->group ) . '</ins>';
 913              } elseif ( $this->tag == 'del' ) {
 914                  $this->line .= "<del{$this->delClass}>" .
 915                      htmlspecialchars( $this->group ) . '</del>';
 916              } else {
 917                  $this->line .= htmlspecialchars( $this->group );
 918              }
 919          }
 920          $this->group = '';
 921          $this->tag = $new_tag;
 922      }
 923  
 924      /**
 925       * @param string $new_tag
 926       */
 927  	private function flushLine( $new_tag ) {
 928          $this->flushGroup( $new_tag );
 929          if ( $this->line != '' ) {
 930              array_push( $this->lines, $this->line );
 931          } else {
 932              # make empty lines visible by inserting an NBSP
 933              array_push( $this->lines, '&#160;' );
 934          }
 935          $this->line = '';
 936      }
 937  
 938      /**
 939       * @param string[] $words
 940       * @param string $tag
 941       */
 942  	public function addWords( $words, $tag = '' ) {
 943          if ( $tag != $this->tag ) {
 944              $this->flushGroup( $tag );
 945          }
 946  
 947          foreach ( $words as $word ) {
 948              // new-line should only come as first char of word.
 949              if ( $word == '' ) {
 950                  continue;
 951              }
 952              if ( $word[0] == "\n" ) {
 953                  $this->flushLine( $tag );
 954                  $word = substr( $word, 1 );
 955              }
 956              assert( '!strstr( $word, "\n" )' );
 957              $this->group .= $word;
 958          }
 959      }
 960  
 961      /**
 962       * @return string[]
 963       */
 964  	public function getLines() {
 965          $this->flushLine( '~done' );
 966  
 967          return $this->lines;
 968      }
 969  }
 970  
 971  /**
 972   * @todo document
 973   * @private
 974   * @ingroup DifferenceEngine
 975   */
 976  class WordLevelDiff extends MappedDiff {
 977      const MAX_LINE_LENGTH = 10000;
 978  
 979      /**
 980       * @param string[] $orig_lines
 981       * @param string[] $closing_lines
 982       */
 983  	public function __construct( $orig_lines, $closing_lines ) {
 984          wfProfileIn( __METHOD__ );
 985  
 986          list( $orig_words, $orig_stripped ) = $this->split( $orig_lines );
 987          list( $closing_words, $closing_stripped ) = $this->split( $closing_lines );
 988  
 989          parent::__construct( $orig_words, $closing_words,
 990              $orig_stripped, $closing_stripped );
 991          wfProfileOut( __METHOD__ );
 992      }
 993  
 994      /**
 995       * @param string[] $lines
 996       *
 997       * @return array[]
 998       */
 999  	private function split( $lines ) {
1000          wfProfileIn( __METHOD__ );
1001  
1002          $words = array();
1003          $stripped = array();
1004          $first = true;
1005          foreach ( $lines as $line ) {
1006              # If the line is too long, just pretend the entire line is one big word
1007              # This prevents resource exhaustion problems
1008              if ( $first ) {
1009                  $first = false;
1010              } else {
1011                  $words[] = "\n";
1012                  $stripped[] = "\n";
1013              }
1014              if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
1015                  $words[] = $line;
1016                  $stripped[] = $line;
1017              } else {
1018                  $m = array();
1019                  if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
1020                      $line, $m )
1021                  ) {
1022                      foreach ( $m[0] as $word ) {
1023                          $words[] = $word;
1024                      }
1025                      foreach ( $m[1] as $stripped_word ) {
1026                          $stripped[] = $stripped_word;
1027                      }
1028                  }
1029              }
1030          }
1031          wfProfileOut( __METHOD__ );
1032  
1033          return array( $words, $stripped );
1034      }
1035  
1036      /**
1037       * @return string[]
1038       */
1039  	public function orig() {
1040          wfProfileIn( __METHOD__ );
1041          $orig = new HWLDFWordAccumulator;
1042  
1043          foreach ( $this->edits as $edit ) {
1044              if ( $edit->type == 'copy' ) {
1045                  $orig->addWords( $edit->orig );
1046              } elseif ( $edit->orig ) {
1047                  $orig->addWords( $edit->orig, 'del' );
1048              }
1049          }
1050          $lines = $orig->getLines();
1051          wfProfileOut( __METHOD__ );
1052  
1053          return $lines;
1054      }
1055  
1056      /**
1057       * @return string[]
1058       */
1059  	public function closing() {
1060          wfProfileIn( __METHOD__ );
1061          $closing = new HWLDFWordAccumulator;
1062  
1063          foreach ( $this->edits as $edit ) {
1064              if ( $edit->type == 'copy' ) {
1065                  $closing->addWords( $edit->closing );
1066              } elseif ( $edit->closing ) {
1067                  $closing->addWords( $edit->closing, 'ins' );
1068              }
1069          }
1070          $lines = $closing->getLines();
1071          wfProfileOut( __METHOD__ );
1072  
1073          return $lines;
1074      }
1075  
1076  }


Generated: Fri Nov 28 14:03:12 2014 Cross-referenced by PHPXref 0.7.1