MediaWiki
REL1_22
|
00001 <?php 00033 class _DiffOp { 00034 var $type; 00035 var $orig; 00036 var $closing; 00037 00038 function reverse() { 00039 trigger_error( 'pure virtual', E_USER_ERROR ); 00040 } 00041 00045 function norig() { 00046 return $this->orig ? count( $this->orig ) : 0; 00047 } 00048 00052 function nclosing() { 00053 return $this->closing ? count( $this->closing ) : 0; 00054 } 00055 } 00056 00062 class _DiffOp_Copy extends _DiffOp { 00063 var $type = 'copy'; 00064 00065 function __construct( $orig, $closing = false ) { 00066 if ( !is_array( $closing ) ) { 00067 $closing = $orig; 00068 } 00069 $this->orig = $orig; 00070 $this->closing = $closing; 00071 } 00072 00076 function reverse() { 00077 return new _DiffOp_Copy( $this->closing, $this->orig ); 00078 } 00079 } 00080 00086 class _DiffOp_Delete extends _DiffOp { 00087 var $type = 'delete'; 00088 00089 function __construct( $lines ) { 00090 $this->orig = $lines; 00091 $this->closing = false; 00092 } 00093 00097 function reverse() { 00098 return new _DiffOp_Add( $this->orig ); 00099 } 00100 } 00101 00107 class _DiffOp_Add extends _DiffOp { 00108 var $type = 'add'; 00109 00110 function __construct( $lines ) { 00111 $this->closing = $lines; 00112 $this->orig = false; 00113 } 00114 00118 function reverse() { 00119 return new _DiffOp_Delete( $this->closing ); 00120 } 00121 } 00122 00128 class _DiffOp_Change extends _DiffOp { 00129 var $type = 'change'; 00130 00131 function __construct( $orig, $closing ) { 00132 $this->orig = $orig; 00133 $this->closing = $closing; 00134 } 00135 00139 function reverse() { 00140 return new _DiffOp_Change( $this->closing, $this->orig ); 00141 } 00142 } 00143 00168 class _DiffEngine { 00169 00170 const MAX_XREF_LENGTH = 10000; 00171 00172 protected $xchanged, $ychanged; 00173 00174 protected $xv = array(), $yv = array(); 00175 protected $xind = array(), $yind = array(); 00176 00177 protected $seq = array(), $in_seq = array(); 00178 00179 protected $lcs = 0; 00180 00186 function diff( $from_lines, $to_lines ) { 00187 wfProfileIn( __METHOD__ ); 00188 00189 // Diff and store locally 00190 $this->diff_local( $from_lines, $to_lines ); 00191 00192 // Merge edits when possible 00193 $this->_shift_boundaries( $from_lines, $this->xchanged, $this->ychanged ); 00194 $this->_shift_boundaries( $to_lines, $this->ychanged, $this->xchanged ); 00195 00196 // Compute the edit operations. 00197 $n_from = count( $from_lines ); 00198 $n_to = count( $to_lines ); 00199 00200 $edits = array(); 00201 $xi = $yi = 0; 00202 while ( $xi < $n_from || $yi < $n_to ) { 00203 assert( '$yi < $n_to || $this->xchanged[$xi]' ); 00204 assert( '$xi < $n_from || $this->ychanged[$yi]' ); 00205 00206 // Skip matching "snake". 00207 $copy = array(); 00208 while ( $xi < $n_from && $yi < $n_to 00209 && !$this->xchanged[$xi] && !$this->ychanged[$yi] ) { 00210 $copy[] = $from_lines[$xi++]; 00211 ++$yi; 00212 } 00213 if ( $copy ) { 00214 $edits[] = new _DiffOp_Copy( $copy ); 00215 } 00216 00217 // Find deletes & adds. 00218 $delete = array(); 00219 while ( $xi < $n_from && $this->xchanged[$xi] ) { 00220 $delete[] = $from_lines[$xi++]; 00221 } 00222 00223 $add = array(); 00224 while ( $yi < $n_to && $this->ychanged[$yi] ) { 00225 $add[] = $to_lines[$yi++]; 00226 } 00227 00228 if ( $delete && $add ) { 00229 $edits[] = new _DiffOp_Change( $delete, $add ); 00230 } elseif ( $delete ) { 00231 $edits[] = new _DiffOp_Delete( $delete ); 00232 } elseif ( $add ) { 00233 $edits[] = new _DiffOp_Add( $add ); 00234 } 00235 } 00236 wfProfileOut( __METHOD__ ); 00237 return $edits; 00238 } 00239 00244 function diff_local( $from_lines, $to_lines ) { 00245 global $wgExternalDiffEngine; 00246 wfProfileIn( __METHOD__ ); 00247 00248 if ( $wgExternalDiffEngine == 'wikidiff3' ) { 00249 // wikidiff3 00250 $wikidiff3 = new WikiDiff3(); 00251 $wikidiff3->diff( $from_lines, $to_lines ); 00252 $this->xchanged = $wikidiff3->removed; 00253 $this->ychanged = $wikidiff3->added; 00254 unset( $wikidiff3 ); 00255 } else { 00256 // old diff 00257 $n_from = count( $from_lines ); 00258 $n_to = count( $to_lines ); 00259 $this->xchanged = $this->ychanged = array(); 00260 $this->xv = $this->yv = array(); 00261 $this->xind = $this->yind = array(); 00262 unset( $this->seq ); 00263 unset( $this->in_seq ); 00264 unset( $this->lcs ); 00265 00266 // Skip leading common lines. 00267 for ( $skip = 0; $skip < $n_from && $skip < $n_to; $skip++ ) { 00268 if ( $from_lines[$skip] !== $to_lines[$skip] ) { 00269 break; 00270 } 00271 $this->xchanged[$skip] = $this->ychanged[$skip] = false; 00272 } 00273 // Skip trailing common lines. 00274 $xi = $n_from; 00275 $yi = $n_to; 00276 for ( $endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++ ) { 00277 if ( $from_lines[$xi] !== $to_lines[$yi] ) { 00278 break; 00279 } 00280 $this->xchanged[$xi] = $this->ychanged[$yi] = false; 00281 } 00282 00283 // Ignore lines which do not exist in both files. 00284 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) { 00285 $xhash[$this->_line_hash( $from_lines[$xi] )] = 1; 00286 } 00287 00288 for ( $yi = $skip; $yi < $n_to - $endskip; $yi++ ) { 00289 $line = $to_lines[$yi]; 00290 if ( ( $this->ychanged[$yi] = empty( $xhash[$this->_line_hash( $line )] ) ) ) { 00291 continue; 00292 } 00293 $yhash[$this->_line_hash( $line )] = 1; 00294 $this->yv[] = $line; 00295 $this->yind[] = $yi; 00296 } 00297 for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) { 00298 $line = $from_lines[$xi]; 00299 if ( ( $this->xchanged[$xi] = empty( $yhash[$this->_line_hash( $line )] ) ) ) { 00300 continue; 00301 } 00302 $this->xv[] = $line; 00303 $this->xind[] = $xi; 00304 } 00305 00306 // Find the LCS. 00307 $this->_compareseq( 0, count( $this->xv ), 0, count( $this->yv ) ); 00308 } 00309 wfProfileOut( __METHOD__ ); 00310 } 00311 00317 function _line_hash( $line ) { 00318 if ( strlen( $line ) > self::MAX_XREF_LENGTH ) { 00319 return md5( $line ); 00320 } else { 00321 return $line; 00322 } 00323 } 00324 00348 function _diag( $xoff, $xlim, $yoff, $ylim, $nchunks ) { 00349 $flip = false; 00350 00351 if ( $xlim - $xoff > $ylim - $yoff ) { 00352 // Things seems faster (I'm not sure I understand why) 00353 // when the shortest sequence in X. 00354 $flip = true; 00355 list( $xoff, $xlim, $yoff, $ylim ) = array( $yoff, $ylim, $xoff, $xlim ); 00356 } 00357 00358 if ( $flip ) { 00359 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) { 00360 $ymatches[$this->xv[$i]][] = $i; 00361 } 00362 } else { 00363 for ( $i = $ylim - 1; $i >= $yoff; $i-- ) { 00364 $ymatches[$this->yv[$i]][] = $i; 00365 } 00366 } 00367 00368 $this->lcs = 0; 00369 $this->seq[0] = $yoff - 1; 00370 $this->in_seq = array(); 00371 $ymids[0] = array(); 00372 00373 $numer = $xlim - $xoff + $nchunks - 1; 00374 $x = $xoff; 00375 for ( $chunk = 0; $chunk < $nchunks; $chunk++ ) { 00376 if ( $chunk > 0 ) { 00377 for ( $i = 0; $i <= $this->lcs; $i++ ) { 00378 $ymids[$i][$chunk -1] = $this->seq[$i]; 00379 } 00380 } 00381 00382 $x1 = $xoff + (int)( ( $numer + ( $xlim -$xoff ) * $chunk ) / $nchunks ); 00383 for ( ; $x < $x1; $x++ ) { 00384 $line = $flip ? $this->yv[$x] : $this->xv[$x]; 00385 if ( empty( $ymatches[$line] ) ) { 00386 continue; 00387 } 00388 $matches = $ymatches[$line]; 00389 reset( $matches ); 00390 while ( list( , $y ) = each( $matches ) ) { 00391 if ( empty( $this->in_seq[$y] ) ) { 00392 $k = $this->_lcs_pos( $y ); 00393 assert( '$k > 0' ); 00394 $ymids[$k] = $ymids[$k -1]; 00395 break; 00396 } 00397 } 00398 while ( list( , $y ) = each( $matches ) ) { 00399 if ( $y > $this->seq[$k -1] ) { 00400 assert( '$y < $this->seq[$k]' ); 00401 // Optimization: this is a common case: 00402 // next match is just replacing previous match. 00403 $this->in_seq[$this->seq[$k]] = false; 00404 $this->seq[$k] = $y; 00405 $this->in_seq[$y] = 1; 00406 } elseif ( empty( $this->in_seq[$y] ) ) { 00407 $k = $this->_lcs_pos( $y ); 00408 assert( '$k > 0' ); 00409 $ymids[$k] = $ymids[$k -1]; 00410 } 00411 } 00412 } 00413 } 00414 00415 $seps[] = $flip ? array( $yoff, $xoff ) : array( $xoff, $yoff ); 00416 $ymid = $ymids[$this->lcs]; 00417 for ( $n = 0; $n < $nchunks - 1; $n++ ) { 00418 $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $n ) / $nchunks ); 00419 $y1 = $ymid[$n] + 1; 00420 $seps[] = $flip ? array( $y1, $x1 ) : array( $x1, $y1 ); 00421 } 00422 $seps[] = $flip ? array( $ylim, $xlim ) : array( $xlim, $ylim ); 00423 00424 return array( $this->lcs, $seps ); 00425 } 00426 00431 function _lcs_pos( $ypos ) { 00432 $end = $this->lcs; 00433 if ( $end == 0 || $ypos > $this->seq[$end] ) { 00434 $this->seq[++$this->lcs] = $ypos; 00435 $this->in_seq[$ypos] = 1; 00436 return $this->lcs; 00437 } 00438 00439 $beg = 1; 00440 while ( $beg < $end ) { 00441 $mid = (int)( ( $beg + $end ) / 2 ); 00442 if ( $ypos > $this->seq[$mid] ) { 00443 $beg = $mid + 1; 00444 } else { 00445 $end = $mid; 00446 } 00447 } 00448 00449 assert( '$ypos != $this->seq[$end]' ); 00450 00451 $this->in_seq[$this->seq[$end]] = false; 00452 $this->seq[$end] = $ypos; 00453 $this->in_seq[$ypos] = 1; 00454 return $end; 00455 } 00456 00473 function _compareseq( $xoff, $xlim, $yoff, $ylim ) { 00474 // Slide down the bottom initial diagonal. 00475 while ( $xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff] ) { 00476 ++$xoff; 00477 ++$yoff; 00478 } 00479 00480 // Slide up the top initial diagonal. 00481 while ( $xlim > $xoff && $ylim > $yoff 00482 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1] ) { 00483 --$xlim; 00484 --$ylim; 00485 } 00486 00487 if ( $xoff == $xlim || $yoff == $ylim ) { 00488 $lcs = 0; 00489 } else { 00490 // This is ad hoc but seems to work well. 00491 // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); 00492 // $nchunks = max(2,min(8,(int)$nchunks)); 00493 $nchunks = min( 7, $xlim - $xoff, $ylim - $yoff ) + 1; 00494 list( $lcs, $seps ) = $this->_diag( $xoff, $xlim, $yoff, $ylim, $nchunks ); 00495 } 00496 00497 if ( $lcs == 0 ) { 00498 // X and Y sequences have no common subsequence: 00499 // mark all changed. 00500 while ( $yoff < $ylim ) { 00501 $this->ychanged[$this->yind[$yoff++]] = 1; 00502 } 00503 while ( $xoff < $xlim ) { 00504 $this->xchanged[$this->xind[$xoff++]] = 1; 00505 } 00506 } else { 00507 // Use the partitions to split this problem into subproblems. 00508 reset( $seps ); 00509 $pt1 = $seps[0]; 00510 while ( $pt2 = next( $seps ) ) { 00511 $this->_compareseq( $pt1[0], $pt2[0], $pt1[1], $pt2[1] ); 00512 $pt1 = $pt2; 00513 } 00514 } 00515 } 00516 00530 function _shift_boundaries( $lines, &$changed, $other_changed ) { 00531 wfProfileIn( __METHOD__ ); 00532 $i = 0; 00533 $j = 0; 00534 00535 assert( 'count($lines) == count($changed)' ); 00536 $len = count( $lines ); 00537 $other_len = count( $other_changed ); 00538 00539 while ( 1 ) { 00540 /* 00541 * Scan forwards to find beginning of another run of changes. 00542 * Also keep track of the corresponding point in the other file. 00543 * 00544 * Throughout this code, $i and $j are adjusted together so that 00545 * the first $i elements of $changed and the first $j elements 00546 * of $other_changed both contain the same number of zeros 00547 * (unchanged lines). 00548 * Furthermore, $j is always kept so that $j == $other_len or 00549 * $other_changed[$j] == false. 00550 */ 00551 while ( $j < $other_len && $other_changed[$j] ) { 00552 $j++; 00553 } 00554 00555 while ( $i < $len && ! $changed[$i] ) { 00556 assert( '$j < $other_len && ! $other_changed[$j]' ); 00557 $i++; 00558 $j++; 00559 while ( $j < $other_len && $other_changed[$j] ) { 00560 $j++; 00561 } 00562 } 00563 00564 if ( $i == $len ) { 00565 break; 00566 } 00567 00568 $start = $i; 00569 00570 // Find the end of this run of changes. 00571 while ( ++$i < $len && $changed[$i] ) { 00572 continue; 00573 } 00574 00575 do { 00576 /* 00577 * Record the length of this run of changes, so that 00578 * we can later determine whether the run has grown. 00579 */ 00580 $runlength = $i - $start; 00581 00582 /* 00583 * Move the changed region back, so long as the 00584 * previous unchanged line matches the last changed one. 00585 * This merges with previous changed regions. 00586 */ 00587 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) { 00588 $changed[--$start] = 1; 00589 $changed[--$i] = false; 00590 while ( $start > 0 && $changed[$start - 1] ) { 00591 $start--; 00592 } 00593 assert( '$j > 0' ); 00594 while ( $other_changed[--$j] ) { 00595 continue; 00596 } 00597 assert( '$j >= 0 && !$other_changed[$j]' ); 00598 } 00599 00600 /* 00601 * Set CORRESPONDING to the end of the changed run, at the last 00602 * point where it corresponds to a changed run in the other file. 00603 * CORRESPONDING == LEN means no such point has been found. 00604 */ 00605 $corresponding = $j < $other_len ? $i : $len; 00606 00607 /* 00608 * Move the changed region forward, so long as the 00609 * first changed line matches the following unchanged one. 00610 * This merges with following changed regions. 00611 * Do this second, so that if there are no merges, 00612 * the changed region is moved forward as far as possible. 00613 */ 00614 while ( $i < $len && $lines[$start] == $lines[$i] ) { 00615 $changed[$start++] = false; 00616 $changed[$i++] = 1; 00617 while ( $i < $len && $changed[$i] ) { 00618 $i++; 00619 } 00620 00621 assert( '$j < $other_len && ! $other_changed[$j]' ); 00622 $j++; 00623 if ( $j < $other_len && $other_changed[$j] ) { 00624 $corresponding = $i; 00625 while ( $j < $other_len && $other_changed[$j] ) { 00626 $j++; 00627 } 00628 } 00629 } 00630 } while ( $runlength != $i - $start ); 00631 00632 /* 00633 * If possible, move the fully-merged run of changes 00634 * back to a corresponding run in the other file. 00635 */ 00636 while ( $corresponding < $i ) { 00637 $changed[--$start] = 1; 00638 $changed[--$i] = 0; 00639 assert( '$j > 0' ); 00640 while ( $other_changed[--$j] ) { 00641 continue; 00642 } 00643 assert( '$j >= 0 && !$other_changed[$j]' ); 00644 } 00645 } 00646 wfProfileOut( __METHOD__ ); 00647 } 00648 } 00649 00656 class Diff { 00657 var $edits; 00658 00667 function __construct( $from_lines, $to_lines ) { 00668 $eng = new _DiffEngine; 00669 $this->edits = $eng->diff( $from_lines, $to_lines ); 00670 // $this->_check($from_lines, $to_lines); 00671 } 00672 00683 function reverse() { 00684 $rev = $this; 00685 $rev->edits = array(); 00686 foreach ( $this->edits as $edit ) { 00687 $rev->edits[] = $edit->reverse(); 00688 } 00689 return $rev; 00690 } 00691 00697 function isEmpty() { 00698 foreach ( $this->edits as $edit ) { 00699 if ( $edit->type != 'copy' ) { 00700 return false; 00701 } 00702 } 00703 return true; 00704 } 00705 00713 function lcs() { 00714 $lcs = 0; 00715 foreach ( $this->edits as $edit ) { 00716 if ( $edit->type == 'copy' ) { 00717 $lcs += count( $edit->orig ); 00718 } 00719 } 00720 return $lcs; 00721 } 00722 00731 function orig() { 00732 $lines = array(); 00733 00734 foreach ( $this->edits as $edit ) { 00735 if ( $edit->orig ) { 00736 array_splice( $lines, count( $lines ), 0, $edit->orig ); 00737 } 00738 } 00739 return $lines; 00740 } 00741 00750 function closing() { 00751 $lines = array(); 00752 00753 foreach ( $this->edits as $edit ) { 00754 if ( $edit->closing ) { 00755 array_splice( $lines, count( $lines ), 0, $edit->closing ); 00756 } 00757 } 00758 return $lines; 00759 } 00760 00768 function _check( $from_lines, $to_lines ) { 00769 wfProfileIn( __METHOD__ ); 00770 if ( serialize( $from_lines ) != serialize( $this->orig() ) ) { 00771 trigger_error( "Reconstructed original doesn't match", E_USER_ERROR ); 00772 } 00773 if ( serialize( $to_lines ) != serialize( $this->closing() ) ) { 00774 trigger_error( "Reconstructed closing doesn't match", E_USER_ERROR ); 00775 } 00776 00777 $rev = $this->reverse(); 00778 if ( serialize( $to_lines ) != serialize( $rev->orig() ) ) { 00779 trigger_error( "Reversed original doesn't match", E_USER_ERROR ); 00780 } 00781 if ( serialize( $from_lines ) != serialize( $rev->closing() ) ) { 00782 trigger_error( "Reversed closing doesn't match", E_USER_ERROR ); 00783 } 00784 00785 $prevtype = 'none'; 00786 foreach ( $this->edits as $edit ) { 00787 if ( $prevtype == $edit->type ) { 00788 trigger_error( 'Edit sequence is non-optimal', E_USER_ERROR ); 00789 } 00790 $prevtype = $edit->type; 00791 } 00792 00793 $lcs = $this->lcs(); 00794 trigger_error( 'Diff okay: LCS = ' . $lcs, E_USER_NOTICE ); 00795 wfProfileOut( __METHOD__ ); 00796 } 00797 } 00798 00804 class MappedDiff extends Diff { 00828 function __construct( $from_lines, $to_lines, 00829 $mapped_from_lines, $mapped_to_lines ) { 00830 wfProfileIn( __METHOD__ ); 00831 00832 assert( 'count( $from_lines ) == count( $mapped_from_lines )' ); 00833 assert( 'count( $to_lines ) == count( $mapped_to_lines )' ); 00834 00835 parent::__construct( $mapped_from_lines, $mapped_to_lines ); 00836 00837 $xi = $yi = 0; 00838 for ( $i = 0; $i < count( $this->edits ); $i++ ) { 00839 $orig = &$this->edits[$i]->orig; 00840 if ( is_array( $orig ) ) { 00841 $orig = array_slice( $from_lines, $xi, count( $orig ) ); 00842 $xi += count( $orig ); 00843 } 00844 00845 $closing = &$this->edits[$i]->closing; 00846 if ( is_array( $closing ) ) { 00847 $closing = array_slice( $to_lines, $yi, count( $closing ) ); 00848 $yi += count( $closing ); 00849 } 00850 } 00851 wfProfileOut( __METHOD__ ); 00852 } 00853 } 00854 00865 class DiffFormatter { 00872 var $leading_context_lines = 0; 00873 00880 var $trailing_context_lines = 0; 00881 00888 function format( $diff ) { 00889 wfProfileIn( __METHOD__ ); 00890 00891 $xi = $yi = 1; 00892 $block = false; 00893 $context = array(); 00894 00895 $nlead = $this->leading_context_lines; 00896 $ntrail = $this->trailing_context_lines; 00897 00898 $this->_start_diff(); 00899 00900 foreach ( $diff->edits as $edit ) { 00901 if ( $edit->type == 'copy' ) { 00902 if ( is_array( $block ) ) { 00903 if ( count( $edit->orig ) <= $nlead + $ntrail ) { 00904 $block[] = $edit; 00905 } else { 00906 if ( $ntrail ) { 00907 $context = array_slice( $edit->orig, 0, $ntrail ); 00908 $block[] = new _DiffOp_Copy( $context ); 00909 } 00910 $this->_block( $x0, $ntrail + $xi - $x0, 00911 $y0, $ntrail + $yi - $y0, 00912 $block ); 00913 $block = false; 00914 } 00915 } 00916 $context = $edit->orig; 00917 } else { 00918 if ( !is_array( $block ) ) { 00919 $context = array_slice( $context, count( $context ) - $nlead ); 00920 $x0 = $xi - count( $context ); 00921 $y0 = $yi - count( $context ); 00922 $block = array(); 00923 if ( $context ) { 00924 $block[] = new _DiffOp_Copy( $context ); 00925 } 00926 } 00927 $block[] = $edit; 00928 } 00929 00930 if ( $edit->orig ) { 00931 $xi += count( $edit->orig ); 00932 } 00933 if ( $edit->closing ) { 00934 $yi += count( $edit->closing ); 00935 } 00936 } 00937 00938 if ( is_array( $block ) ) { 00939 $this->_block( $x0, $xi - $x0, 00940 $y0, $yi - $y0, 00941 $block ); 00942 } 00943 00944 $end = $this->_end_diff(); 00945 wfProfileOut( __METHOD__ ); 00946 return $end; 00947 } 00948 00956 function _block( $xbeg, $xlen, $ybeg, $ylen, &$edits ) { 00957 wfProfileIn( __METHOD__ ); 00958 $this->_start_block( $this->_block_header( $xbeg, $xlen, $ybeg, $ylen ) ); 00959 foreach ( $edits as $edit ) { 00960 if ( $edit->type == 'copy' ) { 00961 $this->_context( $edit->orig ); 00962 } elseif ( $edit->type == 'add' ) { 00963 $this->_added( $edit->closing ); 00964 } elseif ( $edit->type == 'delete' ) { 00965 $this->_deleted( $edit->orig ); 00966 } elseif ( $edit->type == 'change' ) { 00967 $this->_changed( $edit->orig, $edit->closing ); 00968 } else { 00969 trigger_error( 'Unknown edit type', E_USER_ERROR ); 00970 } 00971 } 00972 $this->_end_block(); 00973 wfProfileOut( __METHOD__ ); 00974 } 00975 00976 function _start_diff() { 00977 ob_start(); 00978 } 00979 00983 function _end_diff() { 00984 $val = ob_get_contents(); 00985 ob_end_clean(); 00986 return $val; 00987 } 00988 00996 function _block_header( $xbeg, $xlen, $ybeg, $ylen ) { 00997 if ( $xlen > 1 ) { 00998 $xbeg .= ',' . ( $xbeg + $xlen - 1 ); 00999 } 01000 if ( $ylen > 1 ) { 01001 $ybeg .= ',' . ( $ybeg + $ylen - 1 ); 01002 } 01003 01004 return $xbeg . ( $xlen ? ( $ylen ? 'c' : 'd' ) : 'a' ) . $ybeg; 01005 } 01006 01007 function _start_block( $header ) { 01008 echo $header . "\n"; 01009 } 01010 01011 function _end_block() { 01012 } 01013 01018 function _lines( $lines, $prefix = ' ' ) { 01019 foreach ( $lines as $line ) { 01020 echo "$prefix $line\n"; 01021 } 01022 } 01023 01027 function _context( $lines ) { 01028 $this->_lines( $lines ); 01029 } 01030 01034 function _added( $lines ) { 01035 $this->_lines( $lines, '>' ); 01036 } 01037 01041 function _deleted( $lines ) { 01042 $this->_lines( $lines, '<' ); 01043 } 01044 01049 function _changed( $orig, $closing ) { 01050 $this->_deleted( $orig ); 01051 echo "---\n"; 01052 $this->_added( $closing ); 01053 } 01054 } 01055 01060 class UnifiedDiffFormatter extends DiffFormatter { 01061 var $leading_context_lines = 2; 01062 var $trailing_context_lines = 2; 01063 01067 function _added( $lines ) { 01068 $this->_lines( $lines, '+' ); 01069 } 01070 01074 function _deleted( $lines ) { 01075 $this->_lines( $lines, '-' ); 01076 } 01077 01082 function _changed( $orig, $closing ) { 01083 $this->_deleted( $orig ); 01084 $this->_added( $closing ); 01085 } 01086 01094 function _block_header( $xbeg, $xlen, $ybeg, $ylen ) { 01095 return "@@ -$xbeg,$xlen +$ybeg,$ylen @@"; 01096 } 01097 } 01098 01103 class ArrayDiffFormatter extends DiffFormatter { 01104 01109 function format( $diff ) { 01110 $oldline = 1; 01111 $newline = 1; 01112 $retval = array(); 01113 foreach ( $diff->edits as $edit ) { 01114 switch ( $edit->type ) { 01115 case 'add': 01116 foreach ( $edit->closing as $l ) { 01117 $retval[] = array( 01118 'action' => 'add', 01119 'new' => $l, 01120 'newline' => $newline++ 01121 ); 01122 } 01123 break; 01124 case 'delete': 01125 foreach ( $edit->orig as $l ) { 01126 $retval[] = array( 01127 'action' => 'delete', 01128 'old' => $l, 01129 'oldline' => $oldline++, 01130 ); 01131 } 01132 break; 01133 case 'change': 01134 foreach ( $edit->orig as $i => $l ) { 01135 $retval[] = array( 01136 'action' => 'change', 01137 'old' => $l, 01138 'new' => isset( $edit->closing[$i] ) ? $edit->closing[$i] : null, 01139 'oldline' => $oldline++, 01140 'newline' => $newline++, 01141 ); 01142 } 01143 break; 01144 case 'copy': 01145 $oldline += count( $edit->orig ); 01146 $newline += count( $edit->orig ); 01147 } 01148 } 01149 return $retval; 01150 } 01151 } 01152 01157 define( 'NBSP', ' ' ); // iso-8859-x non-breaking space. 01158 01164 class _HWLDF_WordAccumulator { 01165 function __construct() { 01166 $this->_lines = array(); 01167 $this->_line = ''; 01168 $this->_group = ''; 01169 $this->_tag = ''; 01170 } 01171 01175 function _flushGroup( $new_tag ) { 01176 if ( $this->_group !== '' ) { 01177 if ( $this->_tag == 'ins' ) { 01178 $this->_line .= '<ins class="diffchange diffchange-inline">' . 01179 htmlspecialchars( $this->_group ) . '</ins>'; 01180 } elseif ( $this->_tag == 'del' ) { 01181 $this->_line .= '<del class="diffchange diffchange-inline">' . 01182 htmlspecialchars( $this->_group ) . '</del>'; 01183 } else { 01184 $this->_line .= htmlspecialchars( $this->_group ); 01185 } 01186 } 01187 $this->_group = ''; 01188 $this->_tag = $new_tag; 01189 } 01190 01194 function _flushLine( $new_tag ) { 01195 $this->_flushGroup( $new_tag ); 01196 if ( $this->_line != '' ) { 01197 array_push( $this->_lines, $this->_line ); 01198 } else { 01199 # make empty lines visible by inserting an NBSP 01200 array_push( $this->_lines, NBSP ); 01201 } 01202 $this->_line = ''; 01203 } 01204 01209 function addWords( $words, $tag = '' ) { 01210 if ( $tag != $this->_tag ) { 01211 $this->_flushGroup( $tag ); 01212 } 01213 01214 foreach ( $words as $word ) { 01215 // new-line should only come as first char of word. 01216 if ( $word == '' ) { 01217 continue; 01218 } 01219 if ( $word[0] == "\n" ) { 01220 $this->_flushLine( $tag ); 01221 $word = substr( $word, 1 ); 01222 } 01223 assert( '!strstr( $word, "\n" )' ); 01224 $this->_group .= $word; 01225 } 01226 } 01227 01231 function getLines() { 01232 $this->_flushLine( '~done' ); 01233 return $this->_lines; 01234 } 01235 } 01236 01242 class WordLevelDiff extends MappedDiff { 01243 const MAX_LINE_LENGTH = 10000; 01244 01249 function __construct( $orig_lines, $closing_lines ) { 01250 wfProfileIn( __METHOD__ ); 01251 01252 list( $orig_words, $orig_stripped ) = $this->_split( $orig_lines ); 01253 list( $closing_words, $closing_stripped ) = $this->_split( $closing_lines ); 01254 01255 parent::__construct( $orig_words, $closing_words, 01256 $orig_stripped, $closing_stripped ); 01257 wfProfileOut( __METHOD__ ); 01258 } 01259 01264 function _split( $lines ) { 01265 wfProfileIn( __METHOD__ ); 01266 01267 $words = array(); 01268 $stripped = array(); 01269 $first = true; 01270 foreach ( $lines as $line ) { 01271 # If the line is too long, just pretend the entire line is one big word 01272 # This prevents resource exhaustion problems 01273 if ( $first ) { 01274 $first = false; 01275 } else { 01276 $words[] = "\n"; 01277 $stripped[] = "\n"; 01278 } 01279 if ( strlen( $line ) > self::MAX_LINE_LENGTH ) { 01280 $words[] = $line; 01281 $stripped[] = $line; 01282 } else { 01283 $m = array(); 01284 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs', 01285 $line, $m ) ) 01286 { 01287 foreach ( $m[0] as $word ) { 01288 $words[] = $word; 01289 } 01290 foreach ( $m[1] as $stripped_word ) { 01291 $stripped[] = $stripped_word; 01292 } 01293 } 01294 } 01295 } 01296 wfProfileOut( __METHOD__ ); 01297 return array( $words, $stripped ); 01298 } 01299 01303 function orig() { 01304 wfProfileIn( __METHOD__ ); 01305 $orig = new _HWLDF_WordAccumulator; 01306 01307 foreach ( $this->edits as $edit ) { 01308 if ( $edit->type == 'copy' ) { 01309 $orig->addWords( $edit->orig ); 01310 } elseif ( $edit->orig ) { 01311 $orig->addWords( $edit->orig, 'del' ); 01312 } 01313 } 01314 $lines = $orig->getLines(); 01315 wfProfileOut( __METHOD__ ); 01316 return $lines; 01317 } 01318 01322 function closing() { 01323 wfProfileIn( __METHOD__ ); 01324 $closing = new _HWLDF_WordAccumulator; 01325 01326 foreach ( $this->edits as $edit ) { 01327 if ( $edit->type == 'copy' ) { 01328 $closing->addWords( $edit->closing ); 01329 } elseif ( $edit->closing ) { 01330 $closing->addWords( $edit->closing, 'ins' ); 01331 } 01332 } 01333 $lines = $closing->getLines(); 01334 wfProfileOut( __METHOD__ ); 01335 return $lines; 01336 } 01337 } 01338 01345 class TableDiffFormatter extends DiffFormatter { 01346 function __construct() { 01347 $this->leading_context_lines = 2; 01348 $this->trailing_context_lines = 2; 01349 } 01350 01356 public static function escapeWhiteSpace( $msg ) { 01357 $msg = preg_replace( '/^ /m', '  ', $msg ); 01358 $msg = preg_replace( '/ $/m', '  ', $msg ); 01359 $msg = preg_replace( '/ /', '  ', $msg ); 01360 return $msg; 01361 } 01362 01370 function _block_header( $xbeg, $xlen, $ybeg, $ylen ) { 01371 $r = '<tr><td colspan="2" class="diff-lineno"><!--LINE ' . $xbeg . "--></td>\n" . 01372 '<td colspan="2" class="diff-lineno"><!--LINE ' . $ybeg . "--></td></tr>\n"; 01373 return $r; 01374 } 01375 01379 function _start_block( $header ) { 01380 echo $header; 01381 } 01382 01383 function _end_block() { 01384 } 01385 01386 function _lines( $lines, $prefix = ' ', $color = 'white' ) { 01387 } 01388 01394 function addedLine( $line ) { 01395 return $this->wrapLine( '+', 'diff-addedline', $line ); 01396 } 01397 01403 function deletedLine( $line ) { 01404 return $this->wrapLine( '−', 'diff-deletedline', $line ); 01405 } 01406 01412 function contextLine( $line ) { 01413 return $this->wrapLine( ' ', 'diff-context', $line ); 01414 } 01415 01422 private function wrapLine( $marker, $class, $line ) { 01423 if ( $line !== '' ) { 01424 // The <div> wrapper is needed for 'overflow: auto' style to scroll properly 01425 $line = Xml::tags( 'div', null, $this->escapeWhiteSpace( $line ) ); 01426 } 01427 return "<td class='diff-marker'>$marker</td><td class='$class'>$line</td>"; 01428 } 01429 01433 function emptyLine() { 01434 return '<td colspan="2"> </td>'; 01435 } 01436 01440 function _added( $lines ) { 01441 foreach ( $lines as $line ) { 01442 echo '<tr>' . $this->emptyLine() . 01443 $this->addedLine( '<ins class="diffchange">' . 01444 htmlspecialchars( $line ) . '</ins>' ) . "</tr>\n"; 01445 } 01446 } 01447 01451 function _deleted( $lines ) { 01452 foreach ( $lines as $line ) { 01453 echo '<tr>' . $this->deletedLine( '<del class="diffchange">' . 01454 htmlspecialchars( $line ) . '</del>' ) . 01455 $this->emptyLine() . "</tr>\n"; 01456 } 01457 } 01458 01462 function _context( $lines ) { 01463 foreach ( $lines as $line ) { 01464 echo '<tr>' . 01465 $this->contextLine( htmlspecialchars( $line ) ) . 01466 $this->contextLine( htmlspecialchars( $line ) ) . "</tr>\n"; 01467 } 01468 } 01469 01474 function _changed( $orig, $closing ) { 01475 wfProfileIn( __METHOD__ ); 01476 01477 $diff = new WordLevelDiff( $orig, $closing ); 01478 $del = $diff->orig(); 01479 $add = $diff->closing(); 01480 01481 # Notice that WordLevelDiff returns HTML-escaped output. 01482 # Hence, we will be calling addedLine/deletedLine without HTML-escaping. 01483 01484 while ( $line = array_shift( $del ) ) { 01485 $aline = array_shift( $add ); 01486 echo '<tr>' . $this->deletedLine( $line ) . 01487 $this->addedLine( $aline ) . "</tr>\n"; 01488 } 01489 foreach ( $add as $line ) { # If any leftovers 01490 echo '<tr>' . $this->emptyLine() . 01491 $this->addedLine( $line ) . "</tr>\n"; 01492 } 01493 wfProfileOut( __METHOD__ ); 01494 } 01495 }