[ Index ] |
PHP Cross Reference of MediaWiki-1.24.0 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3) 4 * 5 * Copyright © 2000, 2001 Geoffrey T. Dairiki <[email protected]> 6 * You may copy this code freely under the conditions of the GPL. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License along 19 * with this program; if not, write to the Free Software Foundation, Inc., 20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 21 * http://www.gnu.org/copyleft/gpl.html 22 * 23 * @file 24 * @ingroup DifferenceEngine 25 * @defgroup DifferenceEngine DifferenceEngine 26 */ 27 28 /** 29 * @todo document 30 * @private 31 * @ingroup DifferenceEngine 32 */ 33 abstract class DiffOp { 34 35 /** 36 * @var string 37 */ 38 public $type; 39 40 /** 41 * @var string[] 42 */ 43 public $orig; 44 45 /** 46 * @var string[] 47 */ 48 public $closing; 49 50 /** 51 * @return string 52 */ 53 public function getType() { 54 return $this->type; 55 } 56 57 /** 58 * @return string[] 59 */ 60 public function getOrig() { 61 return $this->orig; 62 } 63 64 /** 65 * @param int $i 66 * @return string|null 67 */ 68 public function getClosing( $i = null ) { 69 if ( $i === null ) { 70 return $this->closing; 71 } 72 if ( array_key_exists( $i, $this->closing ) ) { 73 return $this->closing[$i]; 74 } 75 return null; 76 } 77 78 abstract public function reverse(); 79 80 /** 81 * @return int 82 */ 83 public function norig() { 84 return $this->orig ? count( $this->orig ) : 0; 85 } 86 87 /** 88 * @return int 89 */ 90 public function nclosing() { 91 return $this->closing ? count( $this->closing ) : 0; 92 } 93 } 94 95 /** 96 * @todo document 97 * @private 98 * @ingroup DifferenceEngine 99 */ 100 class DiffOpCopy extends DiffOp { 101 public $type = 'copy'; 102 103 public function __construct( $orig, $closing = false ) { 104 if ( !is_array( $closing ) ) { 105 $closing = $orig; 106 } 107 $this->orig = $orig; 108 $this->closing = $closing; 109 } 110 111 /** 112 * @return DiffOpCopy 113 */ 114 public function reverse() { 115 return new DiffOpCopy( $this->closing, $this->orig ); 116 } 117 } 118 119 /** 120 * @todo document 121 * @private 122 * @ingroup DifferenceEngine 123 */ 124 class DiffOpDelete extends DiffOp { 125 public $type = 'delete'; 126 127 public function __construct( $lines ) { 128 $this->orig = $lines; 129 $this->closing = false; 130 } 131 132 /** 133 * @return DiffOpAdd 134 */ 135 public function reverse() { 136 return new DiffOpAdd( $this->orig ); 137 } 138 } 139 140 /** 141 * @todo document 142 * @private 143 * @ingroup DifferenceEngine 144 */ 145 class DiffOpAdd extends DiffOp { 146 public $type = 'add'; 147 148 public function __construct( $lines ) { 149 $this->closing = $lines; 150 $this->orig = false; 151 } 152 153 /** 154 * @return DiffOpDelete 155 */ 156 public function reverse() { 157 return new DiffOpDelete( $this->closing ); 158 } 159 } 160 161 /** 162 * @todo document 163 * @private 164 * @ingroup DifferenceEngine 165 */ 166 class DiffOpChange extends DiffOp { 167 public $type = 'change'; 168 169 public function __construct( $orig, $closing ) { 170 $this->orig = $orig; 171 $this->closing = $closing; 172 } 173 174 /** 175 * @return DiffOpChange 176 */ 177 public function reverse() { 178 return new DiffOpChange( $this->closing, $this->orig ); 179 } 180 } 181 182 /** 183 * Class used internally by Diff to actually compute the diffs. 184 * 185 * The algorithm used here is mostly lifted from the perl module 186 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at: 187 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip 188 * 189 * More ideas are taken from: 190 * http://www.ics.uci.edu/~eppstein/161/960229.html 191 * 192 * Some ideas are (and a bit of code) are from from analyze.c, from GNU 193 * diffutils-2.7, which can be found at: 194 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz 195 * 196 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations) 197 * are my own. 198 * 199 * Line length limits for robustness added by Tim Starling, 2005-08-31 200 * Alternative implementation added by Guy Van den Broeck, 2008-07-30 201 * 202 * @author Geoffrey T. Dairiki, Tim Starling, Guy Van den Broeck 203 * @private 204 * @ingroup DifferenceEngine 205 */ 206 class DiffEngine { 207 const MAX_XREF_LENGTH = 10000; 208 209 protected $xchanged, $ychanged; 210 211 protected $xv = array(), $yv = array(); 212 protected $xind = array(), $yind = array(); 213 214 protected $seq = array(), $in_seq = array(); 215 216 protected $lcs = 0; 217 218 /** 219 * @param string[] $from_lines 220 * @param string[] $to_lines 221 * 222 * @return DiffOp[] 223 */ 224 public function diff( $from_lines, $to_lines ) { 225 wfProfileIn( __METHOD__ ); 226 227 // Diff and store locally 228 $this->diffLocal( $from_lines, $to_lines ); 229 230 // Merge edits when possible 231 $this->shiftBoundaries( $from_lines, $this->xchanged, $this->ychanged ); 232 $this->shiftBoundaries( $to_lines, $this->ychanged, $this->xchanged ); 233 234 // Compute the edit operations. 235 $n_from = count( $from_lines ); 236 $n_to = count( $to_lines ); 237 238 $edits = array(); 239 $xi = $yi = 0; 240 while ( $xi < $n_from || $yi < $n_to ) { 241 assert( '$yi < $n_to || $this->xchanged[$xi]' ); 242 assert( '$xi < $n_from || $this->ychanged[$yi]' ); 243 244 // Skip matching "snake". 245 $copy = array(); 246 while ( $xi < $n_from && $yi < $n_to 247 && !$this->xchanged[$xi] && !$this->ychanged[$yi] 248 ) { 249 $copy[] = $from_lines[$xi++]; 250 ++$yi; 251 } 252 if ( $copy ) { 253 $edits[] = new DiffOpCopy( $copy ); 254 } 255 256 // Find deletes & adds. 257 $delete = array(); 258 while ( $xi < $n_from && $this->xchanged[$xi] ) { 259 $delete[] = $from_lines[$xi++]; 260 } 261 262 $add = array(); 263 while ( $yi < $n_to && $this->ychanged[$yi] ) { 264 $add[] = $to_lines[$yi++]; 265 } 266 267 if ( $delete && $add ) { 268 $edits[] = new DiffOpChange( $delete, $add ); 269 } elseif ( $delete ) { 270 $edits[] = new DiffOpDelete( $delete ); 271 } elseif ( $add ) { 272 $edits[] = new DiffOpAdd( $add ); 273 } 274 } 275 wfProfileOut( __METHOD__ ); 276 277 return $edits; 278 } 279 280 /** 281 * @param string[] $from_lines 282 * @param string[] $to_lines 283 */ 284 private function diffLocal( $from_lines, $to_lines ) { 285 global $wgExternalDiffEngine; 286 wfProfileIn( __METHOD__ ); 287 288 if ( $wgExternalDiffEngine == 'wikidiff3' ) { 289 // wikidiff3 290 $wikidiff3 = new WikiDiff3(); 291 $wikidiff3->diff( $from_lines, $to_lines ); 292 $this->xchanged = $wikidiff3->removed; 293 $this->ychanged = $wikidiff3->added; 294 unset( $wikidiff3 ); 295 } else { 296 // old diff 297 $n_from = count( $from_lines ); 298 $n_to = count( $to_lines ); 299 $this->xchanged = $this->ychanged = array(); 300 $this->xv = $this->yv = array(); 301 $this->xind = $this->yind = array(); 302 unset( $this->seq ); 303 unset( $this->in_seq ); 304 unset( $this->lcs ); 305 306 // Skip leading common lines. 307 for ( $skip = 0; $skip < $n_from && $skip < $n_to; $skip++ ) { 308 if ( $from_lines[$skip] !== $to_lines[$skip] ) { 309 break; 310 } 311 $this->xchanged[$skip] = $this->ychanged[$skip] = false; 312 } 313 // Skip trailing common lines. 314 $xi = $n_from; 315 $yi = $n_to; 316 for ( $endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++ ) { 317 if ( $from_lines[$xi] !== $to_lines[$yi] ) { 318 break; 319 } 320 $this->xchanged[$xi] = $this->ychanged[$yi] = false; 321 } 322 323 // Ignore lines which do not exist in both files. 324 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) { 325 $xhash[$this->lineHash( $from_lines[$xi] )] = 1; 326 } 327 328 for ( $yi = $skip; $yi < $n_to - $endskip; $yi++ ) { 329 $line = $to_lines[$yi]; 330 if ( ( $this->ychanged[$yi] = empty( $xhash[$this->lineHash( $line )] ) ) ) { 331 continue; 332 } 333 $yhash[$this->lineHash( $line )] = 1; 334 $this->yv[] = $line; 335 $this->yind[] = $yi; 336 } 337 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) { 338 $line = $from_lines[$xi]; 339 if ( ( $this->xchanged[$xi] = empty( $yhash[$this->lineHash( $line )] ) ) ) { 340 continue; 341 } 342 $this->xv[] = $line; 343 $this->xind[] = $xi; 344 } 345 346 // Find the LCS. 347 $this->compareSeq( 0, count( $this->xv ), 0, count( $this->yv ) ); 348 } 349 wfProfileOut( __METHOD__ ); 350 } 351 352 /** 353 * Returns the whole line if it's small enough, or the MD5 hash otherwise 354 * 355 * @param string $line 356 * 357 * @return string 358 */ 359 private function lineHash( $line ) { 360 if ( strlen( $line ) > self::MAX_XREF_LENGTH ) { 361 return md5( $line ); 362 } else { 363 return $line; 364 } 365 } 366 367 /** 368 * Divide the Largest Common Subsequence (LCS) of the sequences 369 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally 370 * sized segments. 371 * 372 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an 373 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between 374 * sub sequences. The first sub-sequence is contained in [X0, X1), 375 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note 376 * that (X0, Y0) == (XOFF, YOFF) and 377 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM). 378 * 379 * This function assumes that the first lines of the specified portions 380 * of the two files do not match, and likewise that the last lines do not 381 * match. The caller must trim matching lines from the beginning and end 382 * of the portions it is going to specify. 383 * 384 * @param int $xoff 385 * @param int $xlim 386 * @param int $yoff 387 * @param int $ylim 388 * @param int $nchunks 389 * 390 * @return array List of two elements, integer and array[]. 391 */ 392 private function diag( $xoff, $xlim, $yoff, $ylim, $nchunks ) { 393 $flip = false; 394 395 if ( $xlim - $xoff > $ylim - $yoff ) { 396 // Things seems faster (I'm not sure I understand why) 397 // when the shortest sequence in X. 398 $flip = true; 399 list( $xoff, $xlim, $yoff, $ylim ) = array( $yoff, $ylim, $xoff, $xlim ); 400 } 401 402 if ( $flip ) { 403 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) { 404 $ymatches[$this->xv[$i]][] = $i; 405 } 406 } else { 407 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) { 408 $ymatches[$this->yv[$i]][] = $i; 409 } 410 } 411 412 $this->lcs = 0; 413 $this->seq[0] = $yoff - 1; 414 $this->in_seq = array(); 415 $ymids[0] = array(); 416 417 $numer = $xlim - $xoff + $nchunks - 1; 418 $x = $xoff; 419 for ( $chunk = 0; $chunk < $nchunks; $chunk++ ) { 420 if ( $chunk > 0 ) { 421 for ( $i = 0; $i <= $this->lcs; $i++ ) { 422 $ymids[$i][$chunk - 1] = $this->seq[$i]; 423 } 424 } 425 426 $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $chunk ) / $nchunks ); 427 // @codingStandardsIgnoreFile Ignore Squiz.WhiteSpace.SemicolonSpacing.Incorrect 428 for ( ; $x < $x1; $x++ ) { 429 // @codingStandardsIgnoreEnd 430 $line = $flip ? $this->yv[$x] : $this->xv[$x]; 431 if ( empty( $ymatches[$line] ) ) { 432 continue; 433 } 434 435 $k = 0; 436 $matches = $ymatches[$line]; 437 reset( $matches ); 438 while ( list( , $y ) = each( $matches ) ) { 439 if ( empty( $this->in_seq[$y] ) ) { 440 $k = $this->lcsPos( $y ); 441 assert( '$k > 0' ); 442 $ymids[$k] = $ymids[$k - 1]; 443 break; 444 } 445 } 446 447 while ( list( , $y ) = each( $matches ) ) { 448 if ( $y > $this->seq[$k - 1] ) { 449 assert( '$y < $this->seq[$k]' ); 450 // Optimization: this is a common case: 451 // next match is just replacing previous match. 452 $this->in_seq[$this->seq[$k]] = false; 453 $this->seq[$k] = $y; 454 $this->in_seq[$y] = 1; 455 } elseif ( empty( $this->in_seq[$y] ) ) { 456 $k = $this->lcsPos( $y ); 457 assert( '$k > 0' ); 458 $ymids[$k] = $ymids[$k - 1]; 459 } 460 } 461 } 462 } 463 464 $seps[] = $flip ? array( $yoff, $xoff ) : array( $xoff, $yoff ); 465 $ymid = $ymids[$this->lcs]; 466 for ( $n = 0; $n < $nchunks - 1; $n++ ) { 467 $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $n ) / $nchunks ); 468 $y1 = $ymid[$n] + 1; 469 $seps[] = $flip ? array( $y1, $x1 ) : array( $x1, $y1 ); 470 } 471 $seps[] = $flip ? array( $ylim, $xlim ) : array( $xlim, $ylim ); 472 473 return array( $this->lcs, $seps ); 474 } 475 476 /** 477 * @param int $ypos 478 * 479 * @return int 480 */ 481 private function lcsPos( $ypos ) { 482 $end = $this->lcs; 483 if ( $end == 0 || $ypos > $this->seq[$end] ) { 484 $this->seq[++$this->lcs] = $ypos; 485 $this->in_seq[$ypos] = 1; 486 487 return $this->lcs; 488 } 489 490 $beg = 1; 491 while ( $beg < $end ) { 492 $mid = (int)( ( $beg + $end ) / 2 ); 493 if ( $ypos > $this->seq[$mid] ) { 494 $beg = $mid + 1; 495 } else { 496 $end = $mid; 497 } 498 } 499 500 assert( '$ypos != $this->seq[$end]' ); 501 502 $this->in_seq[$this->seq[$end]] = false; 503 $this->seq[$end] = $ypos; 504 $this->in_seq[$ypos] = 1; 505 506 return $end; 507 } 508 509 /** 510 * Find LCS of two sequences. 511 * 512 * The results are recorded in the vectors $this->{x,y}changed[], by 513 * storing a 1 in the element for each line that is an insertion 514 * or deletion (ie. is not in the LCS). 515 * 516 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. 517 * 518 * Note that XLIM, YLIM are exclusive bounds. 519 * All line numbers are origin-0 and discarded lines are not counted. 520 * 521 * @param int $xoff 522 * @param int $xlim 523 * @param int $yoff 524 * @param int $ylim 525 */ 526 private function compareSeq( $xoff, $xlim, $yoff, $ylim ) { 527 // Slide down the bottom initial diagonal. 528 while ( $xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff] ) { 529 ++$xoff; 530 ++$yoff; 531 } 532 533 // Slide up the top initial diagonal. 534 while ( $xlim > $xoff && $ylim > $yoff 535 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1] 536 ) { 537 --$xlim; 538 --$ylim; 539 } 540 541 if ( $xoff == $xlim || $yoff == $ylim ) { 542 $lcs = 0; 543 } else { 544 // This is ad hoc but seems to work well. 545 // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); 546 // $nchunks = max(2,min(8,(int)$nchunks)); 547 $nchunks = min( 7, $xlim - $xoff, $ylim - $yoff ) + 1; 548 list( $lcs, $seps ) = $this->diag( $xoff, $xlim, $yoff, $ylim, $nchunks ); 549 } 550 551 if ( $lcs == 0 ) { 552 // X and Y sequences have no common subsequence: 553 // mark all changed. 554 while ( $yoff < $ylim ) { 555 $this->ychanged[$this->yind[$yoff++]] = 1; 556 } 557 while ( $xoff < $xlim ) { 558 $this->xchanged[$this->xind[$xoff++]] = 1; 559 } 560 } else { 561 // Use the partitions to split this problem into subproblems. 562 reset( $seps ); 563 $pt1 = $seps[0]; 564 while ( $pt2 = next( $seps ) ) { 565 $this->compareSeq( $pt1[0], $pt2[0], $pt1[1], $pt2[1] ); 566 $pt1 = $pt2; 567 } 568 } 569 } 570 571 /** 572 * Adjust inserts/deletes of identical lines to join changes 573 * as much as possible. 574 * 575 * We do something when a run of changed lines include a 576 * line at one end and has an excluded, identical line at the other. 577 * We are free to choose which identical line is included. 578 * `compareseq' usually chooses the one at the beginning, 579 * but usually it is cleaner to consider the following identical line 580 * to be the "change". 581 * 582 * This is extracted verbatim from analyze.c (GNU diffutils-2.7). 583 */ 584 private function shiftBoundaries( $lines, &$changed, $other_changed ) { 585 wfProfileIn( __METHOD__ ); 586 $i = 0; 587 $j = 0; 588 589 assert( 'count($lines) == count($changed)' ); 590 $len = count( $lines ); 591 $other_len = count( $other_changed ); 592 593 while ( 1 ) { 594 /* 595 * Scan forwards to find beginning of another run of changes. 596 * Also keep track of the corresponding point in the other file. 597 * 598 * Throughout this code, $i and $j are adjusted together so that 599 * the first $i elements of $changed and the first $j elements 600 * of $other_changed both contain the same number of zeros 601 * (unchanged lines). 602 * Furthermore, $j is always kept so that $j == $other_len or 603 * $other_changed[$j] == false. 604 */ 605 while ( $j < $other_len && $other_changed[$j] ) { 606 $j++; 607 } 608 609 while ( $i < $len && !$changed[$i] ) { 610 assert( '$j < $other_len && ! $other_changed[$j]' ); 611 $i++; 612 $j++; 613 while ( $j < $other_len && $other_changed[$j] ) { 614 $j++; 615 } 616 } 617 618 if ( $i == $len ) { 619 break; 620 } 621 622 $start = $i; 623 624 // Find the end of this run of changes. 625 while ( ++$i < $len && $changed[$i] ) { 626 continue; 627 } 628 629 do { 630 /* 631 * Record the length of this run of changes, so that 632 * we can later determine whether the run has grown. 633 */ 634 $runlength = $i - $start; 635 636 /* 637 * Move the changed region back, so long as the 638 * previous unchanged line matches the last changed one. 639 * This merges with previous changed regions. 640 */ 641 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) { 642 $changed[--$start] = 1; 643 $changed[--$i] = false; 644 while ( $start > 0 && $changed[$start - 1] ) { 645 $start--; 646 } 647 assert( '$j > 0' ); 648 while ( $other_changed[--$j] ) { 649 continue; 650 } 651 assert( '$j >= 0 && !$other_changed[$j]' ); 652 } 653 654 /* 655 * Set CORRESPONDING to the end of the changed run, at the last 656 * point where it corresponds to a changed run in the other file. 657 * CORRESPONDING == LEN means no such point has been found. 658 */ 659 $corresponding = $j < $other_len ? $i : $len; 660 661 /* 662 * Move the changed region forward, so long as the 663 * first changed line matches the following unchanged one. 664 * This merges with following changed regions. 665 * Do this second, so that if there are no merges, 666 * the changed region is moved forward as far as possible. 667 */ 668 while ( $i < $len && $lines[$start] == $lines[$i] ) { 669 $changed[$start++] = false; 670 $changed[$i++] = 1; 671 while ( $i < $len && $changed[$i] ) { 672 $i++; 673 } 674 675 assert( '$j < $other_len && ! $other_changed[$j]' ); 676 $j++; 677 if ( $j < $other_len && $other_changed[$j] ) { 678 $corresponding = $i; 679 while ( $j < $other_len && $other_changed[$j] ) { 680 $j++; 681 } 682 } 683 } 684 } while ( $runlength != $i - $start ); 685 686 /* 687 * If possible, move the fully-merged run of changes 688 * back to a corresponding run in the other file. 689 */ 690 while ( $corresponding < $i ) { 691 $changed[--$start] = 1; 692 $changed[--$i] = 0; 693 assert( '$j > 0' ); 694 while ( $other_changed[--$j] ) { 695 continue; 696 } 697 assert( '$j >= 0 && !$other_changed[$j]' ); 698 } 699 } 700 wfProfileOut( __METHOD__ ); 701 } 702 } 703 704 /** 705 * Class representing a 'diff' between two sequences of strings. 706 * @todo document 707 * @private 708 * @ingroup DifferenceEngine 709 */ 710 class Diff { 711 712 /** 713 * @var DiffOp[] 714 */ 715 public $edits; 716 717 /** 718 * Constructor. 719 * Computes diff between sequences of strings. 720 * 721 * @param string[] $from_lines An array of strings. 722 * Typically these are lines from a file. 723 * @param string[] $to_lines An array of strings. 724 */ 725 public function __construct( $from_lines, $to_lines ) { 726 $eng = new DiffEngine; 727 $this->edits = $eng->diff( $from_lines, $to_lines ); 728 } 729 730 /** 731 * @return DiffOp[] 732 */ 733 public function getEdits() { 734 return $this->edits; 735 } 736 737 /** 738 * Compute reversed Diff. 739 * 740 * SYNOPSIS: 741 * 742 * $diff = new Diff($lines1, $lines2); 743 * $rev = $diff->reverse(); 744 * 745 * @return Object A Diff object representing the inverse of the 746 * original diff. 747 */ 748 public function reverse() { 749 $rev = $this; 750 $rev->edits = array(); 751 /** @var DiffOp $edit */ 752 foreach ( $this->edits as $edit ) { 753 $rev->edits[] = $edit->reverse(); 754 } 755 756 return $rev; 757 } 758 759 /** 760 * Check for empty diff. 761 * 762 * @return bool True if two sequences were identical. 763 */ 764 public function isEmpty() { 765 foreach ( $this->edits as $edit ) { 766 if ( $edit->type != 'copy' ) { 767 return false; 768 } 769 } 770 771 return true; 772 } 773 774 /** 775 * Compute the length of the Longest Common Subsequence (LCS). 776 * 777 * This is mostly for diagnostic purposed. 778 * 779 * @return int The length of the LCS. 780 */ 781 public function lcs() { 782 $lcs = 0; 783 foreach ( $this->edits as $edit ) { 784 if ( $edit->type == 'copy' ) { 785 $lcs += count( $edit->orig ); 786 } 787 } 788 789 return $lcs; 790 } 791 792 /** 793 * Get the original set of lines. 794 * 795 * This reconstructs the $from_lines parameter passed to the 796 * constructor. 797 * 798 * @return string[] The original sequence of strings. 799 */ 800 public function orig() { 801 $lines = array(); 802 803 foreach ( $this->edits as $edit ) { 804 if ( $edit->orig ) { 805 array_splice( $lines, count( $lines ), 0, $edit->orig ); 806 } 807 } 808 809 return $lines; 810 } 811 812 /** 813 * Get the closing set of lines. 814 * 815 * This reconstructs the $to_lines parameter passed to the 816 * constructor. 817 * 818 * @return string[] The sequence of strings. 819 */ 820 public function closing() { 821 $lines = array(); 822 823 foreach ( $this->edits as $edit ) { 824 if ( $edit->closing ) { 825 array_splice( $lines, count( $lines ), 0, $edit->closing ); 826 } 827 } 828 829 return $lines; 830 } 831 } 832 833 /** 834 * @todo document, bad name. 835 * @private 836 * @ingroup DifferenceEngine 837 */ 838 class MappedDiff extends Diff { 839 /** 840 * Constructor. 841 * 842 * Computes diff between sequences of strings. 843 * 844 * This can be used to compute things like 845 * case-insensitve diffs, or diffs which ignore 846 * changes in white-space. 847 * 848 * @param string[] $from_lines An array of strings. 849 * Typically these are lines from a file. 850 * @param string[] $to_lines An array of strings. 851 * @param string[] $mapped_from_lines This array should 852 * have the same size number of elements as $from_lines. 853 * The elements in $mapped_from_lines and 854 * $mapped_to_lines are what is actually compared 855 * when computing the diff. 856 * @param string[] $mapped_to_lines This array should 857 * have the same number of elements as $to_lines. 858 */ 859 public function __construct( $from_lines, $to_lines, 860 $mapped_from_lines, $mapped_to_lines ) { 861 wfProfileIn( __METHOD__ ); 862 863 assert( 'count( $from_lines ) == count( $mapped_from_lines )' ); 864 assert( 'count( $to_lines ) == count( $mapped_to_lines )' ); 865 866 parent::__construct( $mapped_from_lines, $mapped_to_lines ); 867 868 $xi = $yi = 0; 869 $editCount = count( $this->edits ); 870 for ( $i = 0; $i < $editCount; $i++ ) { 871 $orig = &$this->edits[$i]->orig; 872 if ( is_array( $orig ) ) { 873 $orig = array_slice( $from_lines, $xi, count( $orig ) ); 874 $xi += count( $orig ); 875 } 876 877 $closing = &$this->edits[$i]->closing; 878 if ( is_array( $closing ) ) { 879 $closing = array_slice( $to_lines, $yi, count( $closing ) ); 880 $yi += count( $closing ); 881 } 882 } 883 wfProfileOut( __METHOD__ ); 884 } 885 } 886 887 /** 888 * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3 889 */ 890 891 /** 892 * @todo document 893 * @private 894 * @ingroup DifferenceEngine 895 */ 896 class HWLDFWordAccumulator { 897 public $insClass = ' class="diffchange diffchange-inline"'; 898 public $delClass = ' class="diffchange diffchange-inline"'; 899 900 private $lines = array(); 901 private $line = ''; 902 private $group = ''; 903 private $tag = ''; 904 905 /** 906 * @param string $new_tag 907 */ 908 private function flushGroup( $new_tag ) { 909 if ( $this->group !== '' ) { 910 if ( $this->tag == 'ins' ) { 911 $this->line .= "<ins{$this->insClass}>" . 912 htmlspecialchars( $this->group ) . '</ins>'; 913 } elseif ( $this->tag == 'del' ) { 914 $this->line .= "<del{$this->delClass}>" . 915 htmlspecialchars( $this->group ) . '</del>'; 916 } else { 917 $this->line .= htmlspecialchars( $this->group ); 918 } 919 } 920 $this->group = ''; 921 $this->tag = $new_tag; 922 } 923 924 /** 925 * @param string $new_tag 926 */ 927 private function flushLine( $new_tag ) { 928 $this->flushGroup( $new_tag ); 929 if ( $this->line != '' ) { 930 array_push( $this->lines, $this->line ); 931 } else { 932 # make empty lines visible by inserting an NBSP 933 array_push( $this->lines, ' ' ); 934 } 935 $this->line = ''; 936 } 937 938 /** 939 * @param string[] $words 940 * @param string $tag 941 */ 942 public function addWords( $words, $tag = '' ) { 943 if ( $tag != $this->tag ) { 944 $this->flushGroup( $tag ); 945 } 946 947 foreach ( $words as $word ) { 948 // new-line should only come as first char of word. 949 if ( $word == '' ) { 950 continue; 951 } 952 if ( $word[0] == "\n" ) { 953 $this->flushLine( $tag ); 954 $word = substr( $word, 1 ); 955 } 956 assert( '!strstr( $word, "\n" )' ); 957 $this->group .= $word; 958 } 959 } 960 961 /** 962 * @return string[] 963 */ 964 public function getLines() { 965 $this->flushLine( '~done' ); 966 967 return $this->lines; 968 } 969 } 970 971 /** 972 * @todo document 973 * @private 974 * @ingroup DifferenceEngine 975 */ 976 class WordLevelDiff extends MappedDiff { 977 const MAX_LINE_LENGTH = 10000; 978 979 /** 980 * @param string[] $orig_lines 981 * @param string[] $closing_lines 982 */ 983 public function __construct( $orig_lines, $closing_lines ) { 984 wfProfileIn( __METHOD__ ); 985 986 list( $orig_words, $orig_stripped ) = $this->split( $orig_lines ); 987 list( $closing_words, $closing_stripped ) = $this->split( $closing_lines ); 988 989 parent::__construct( $orig_words, $closing_words, 990 $orig_stripped, $closing_stripped ); 991 wfProfileOut( __METHOD__ ); 992 } 993 994 /** 995 * @param string[] $lines 996 * 997 * @return array[] 998 */ 999 private function split( $lines ) { 1000 wfProfileIn( __METHOD__ ); 1001 1002 $words = array(); 1003 $stripped = array(); 1004 $first = true; 1005 foreach ( $lines as $line ) { 1006 # If the line is too long, just pretend the entire line is one big word 1007 # This prevents resource exhaustion problems 1008 if ( $first ) { 1009 $first = false; 1010 } else { 1011 $words[] = "\n"; 1012 $stripped[] = "\n"; 1013 } 1014 if ( strlen( $line ) > self::MAX_LINE_LENGTH ) { 1015 $words[] = $line; 1016 $stripped[] = $line; 1017 } else { 1018 $m = array(); 1019 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs', 1020 $line, $m ) 1021 ) { 1022 foreach ( $m[0] as $word ) { 1023 $words[] = $word; 1024 } 1025 foreach ( $m[1] as $stripped_word ) { 1026 $stripped[] = $stripped_word; 1027 } 1028 } 1029 } 1030 } 1031 wfProfileOut( __METHOD__ ); 1032 1033 return array( $words, $stripped ); 1034 } 1035 1036 /** 1037 * @return string[] 1038 */ 1039 public function orig() { 1040 wfProfileIn( __METHOD__ ); 1041 $orig = new HWLDFWordAccumulator; 1042 1043 foreach ( $this->edits as $edit ) { 1044 if ( $edit->type == 'copy' ) { 1045 $orig->addWords( $edit->orig ); 1046 } elseif ( $edit->orig ) { 1047 $orig->addWords( $edit->orig, 'del' ); 1048 } 1049 } 1050 $lines = $orig->getLines(); 1051 wfProfileOut( __METHOD__ ); 1052 1053 return $lines; 1054 } 1055 1056 /** 1057 * @return string[] 1058 */ 1059 public function closing() { 1060 wfProfileIn( __METHOD__ ); 1061 $closing = new HWLDFWordAccumulator; 1062 1063 foreach ( $this->edits as $edit ) { 1064 if ( $edit->type == 'copy' ) { 1065 $closing->addWords( $edit->closing ); 1066 } elseif ( $edit->closing ) { 1067 $closing->addWords( $edit->closing, 'ins' ); 1068 } 1069 } 1070 $lines = $closing->getLines(); 1071 wfProfileOut( __METHOD__ ); 1072 1073 return $lines; 1074 } 1075 1076 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 14:03:12 2014 | Cross-referenced by PHPXref 0.7.1 |