MediaWiki  REL1_21
DairikiDiff.php
Go to the documentation of this file.
00001 <?php
00033 class _DiffOp {
00034         var $type;
00035         var $orig;
00036         var $closing;
00037 
00038         function reverse() {
00039                 trigger_error( 'pure virtual', E_USER_ERROR );
00040         }
00041 
00045         function norig() {
00046                 return $this->orig ? count( $this->orig ) : 0;
00047         }
00048 
00052         function nclosing() {
00053                 return $this->closing ? count( $this->closing ) : 0;
00054         }
00055 }
00056 
00062 class _DiffOp_Copy extends _DiffOp {
00063         var $type = 'copy';
00064 
00065         function __construct( $orig, $closing = false ) {
00066                 if ( !is_array( $closing ) ) {
00067                         $closing = $orig;
00068                 }
00069                 $this->orig = $orig;
00070                 $this->closing = $closing;
00071         }
00072 
00076         function reverse() {
00077                 return new _DiffOp_Copy( $this->closing, $this->orig );
00078         }
00079 }
00080 
00086 class _DiffOp_Delete extends _DiffOp {
00087         var $type = 'delete';
00088 
00089         function __construct( $lines ) {
00090                 $this->orig = $lines;
00091                 $this->closing = false;
00092         }
00093 
00097         function reverse() {
00098                 return new _DiffOp_Add( $this->orig );
00099         }
00100 }
00101 
00107 class _DiffOp_Add extends _DiffOp {
00108         var $type = 'add';
00109 
00110         function __construct( $lines ) {
00111                 $this->closing = $lines;
00112                 $this->orig = false;
00113         }
00114 
00118         function reverse() {
00119                 return new _DiffOp_Delete( $this->closing );
00120         }
00121 }
00122 
00128 class _DiffOp_Change extends _DiffOp {
00129         var $type = 'change';
00130 
00131         function __construct( $orig, $closing ) {
00132                 $this->orig = $orig;
00133                 $this->closing = $closing;
00134         }
00135 
00139         function reverse() {
00140                 return new _DiffOp_Change( $this->closing, $this->orig );
00141         }
00142 }
00143 
00168 class _DiffEngine {
00169 
00170         const MAX_XREF_LENGTH = 10000;
00171 
00172         protected $xchanged, $ychanged;
00173 
00174         protected $xv = array(), $yv = array();
00175         protected $xind = array(), $yind = array();
00176 
00177         protected $seq = array(), $in_seq = array();
00178 
00179         protected $lcs = 0;
00180 
00186         function diff ( $from_lines, $to_lines ) {
00187                 wfProfileIn( __METHOD__ );
00188 
00189                 // Diff and store locally
00190                 $this->diff_local( $from_lines, $to_lines );
00191 
00192                 // Merge edits when possible
00193                 $this->_shift_boundaries( $from_lines, $this->xchanged, $this->ychanged );
00194                 $this->_shift_boundaries( $to_lines, $this->ychanged, $this->xchanged );
00195 
00196                 // Compute the edit operations.
00197                 $n_from = count( $from_lines );
00198                 $n_to = count( $to_lines );
00199 
00200                 $edits = array();
00201                 $xi = $yi = 0;
00202                 while ( $xi < $n_from || $yi < $n_to ) {
00203                         assert( '$yi < $n_to || $this->xchanged[$xi]' );
00204                         assert( '$xi < $n_from || $this->ychanged[$yi]' );
00205 
00206                         // Skip matching "snake".
00207                         $copy = array();
00208                         while ( $xi < $n_from && $yi < $n_to
00209                         && !$this->xchanged[$xi] && !$this->ychanged[$yi] ) {
00210                                 $copy[] = $from_lines[$xi++];
00211                                 ++$yi;
00212                         }
00213                         if ( $copy ) {
00214                                 $edits[] = new _DiffOp_Copy( $copy );
00215                         }
00216 
00217                         // Find deletes & adds.
00218                         $delete = array();
00219                         while ( $xi < $n_from && $this->xchanged[$xi] ) {
00220                                 $delete[] = $from_lines[$xi++];
00221                         }
00222 
00223                         $add = array();
00224                         while ( $yi < $n_to && $this->ychanged[$yi] ) {
00225                                 $add[] = $to_lines[$yi++];
00226                         }
00227 
00228                         if ( $delete && $add ) {
00229                                 $edits[] = new _DiffOp_Change( $delete, $add );
00230                         } elseif ( $delete ) {
00231                                 $edits[] = new _DiffOp_Delete( $delete );
00232                         } elseif ( $add ) {
00233                                 $edits[] = new _DiffOp_Add( $add );
00234                         }
00235                 }
00236                 wfProfileOut( __METHOD__ );
00237                 return $edits;
00238         }
00239 
00244         function diff_local ( $from_lines, $to_lines ) {
00245                 global $wgExternalDiffEngine;
00246                 wfProfileIn( __METHOD__ );
00247 
00248                 if ( $wgExternalDiffEngine == 'wikidiff3' ) {
00249                         // wikidiff3
00250                         $wikidiff3 = new WikiDiff3();
00251                         $wikidiff3->diff( $from_lines, $to_lines );
00252                         $this->xchanged = $wikidiff3->removed;
00253                         $this->ychanged = $wikidiff3->added;
00254                         unset( $wikidiff3 );
00255                 } else {
00256                         // old diff
00257                         $n_from = count( $from_lines );
00258                         $n_to = count( $to_lines );
00259                         $this->xchanged = $this->ychanged = array();
00260                         $this->xv = $this->yv = array();
00261                         $this->xind = $this->yind = array();
00262                         unset( $this->seq );
00263                         unset( $this->in_seq );
00264                         unset( $this->lcs );
00265 
00266                         // Skip leading common lines.
00267                         for ( $skip = 0; $skip < $n_from && $skip < $n_to; $skip++ ) {
00268                                 if ( $from_lines[$skip] !== $to_lines[$skip] ) {
00269                                         break;
00270                                 }
00271                                 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
00272                         }
00273                         // Skip trailing common lines.
00274                         $xi = $n_from; $yi = $n_to;
00275                         for ( $endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++ ) {
00276                                 if ( $from_lines[$xi] !== $to_lines[$yi] ) {
00277                                         break;
00278                                 }
00279                                 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
00280                         }
00281 
00282                         // Ignore lines which do not exist in both files.
00283                         for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
00284                                 $xhash[$this->_line_hash( $from_lines[$xi] )] = 1;
00285                         }
00286 
00287                         for ( $yi = $skip; $yi < $n_to - $endskip; $yi++ ) {
00288                                 $line = $to_lines[$yi];
00289                                 if ( ( $this->ychanged[$yi] = empty( $xhash[$this->_line_hash( $line )] ) ) ) {
00290                                         continue;
00291                                 }
00292                                 $yhash[$this->_line_hash( $line )] = 1;
00293                                 $this->yv[] = $line;
00294                                 $this->yind[] = $yi;
00295                         }
00296                         for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
00297                                 $line = $from_lines[$xi];
00298                                 if ( ( $this->xchanged[$xi] = empty( $yhash[$this->_line_hash( $line )] ) ) ) {
00299                                         continue;
00300                                 }
00301                                 $this->xv[] = $line;
00302                                 $this->xind[] = $xi;
00303                         }
00304 
00305                         // Find the LCS.
00306                         $this->_compareseq( 0, count( $this->xv ), 0, count( $this->yv ) );
00307                 }
00308                 wfProfileOut( __METHOD__ );
00309         }
00310 
00316         function _line_hash( $line ) {
00317                 if ( strlen( $line ) > self::MAX_XREF_LENGTH ) {
00318                         return md5( $line );
00319                 } else {
00320                         return $line;
00321                 }
00322         }
00323 
00347         function _diag( $xoff, $xlim, $yoff, $ylim, $nchunks ) {
00348                 $flip = false;
00349 
00350                 if ( $xlim - $xoff > $ylim - $yoff ) {
00351                         // Things seems faster (I'm not sure I understand why)
00352                         // when the shortest sequence in X.
00353                         $flip = true;
00354                         list( $xoff, $xlim, $yoff, $ylim ) = array( $yoff, $ylim, $xoff, $xlim );
00355                 }
00356 
00357                 if ( $flip ) {
00358                         for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
00359                                 $ymatches[$this->xv[$i]][] = $i;
00360                         }
00361                 } else {
00362                         for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
00363                                 $ymatches[$this->yv[$i]][] = $i;
00364                         }
00365                 }
00366 
00367                 $this->lcs = 0;
00368                 $this->seq[0] = $yoff - 1;
00369                 $this->in_seq = array();
00370                 $ymids[0] = array();
00371 
00372                 $numer = $xlim - $xoff + $nchunks - 1;
00373                 $x = $xoff;
00374                 for ( $chunk = 0; $chunk < $nchunks; $chunk++ ) {
00375                         if ( $chunk > 0 ) {
00376                                 for ( $i = 0; $i <= $this->lcs; $i++ ) {
00377                                         $ymids[$i][$chunk -1] = $this->seq[$i];
00378                                 }
00379                         }
00380 
00381                         $x1 = $xoff + (int)( ( $numer + ( $xlim -$xoff ) * $chunk ) / $nchunks );
00382                         for ( ; $x < $x1; $x++ ) {
00383                                 $line = $flip ? $this->yv[$x] : $this->xv[$x];
00384                                 if ( empty( $ymatches[$line] ) ) {
00385                                         continue;
00386                                 }
00387                                 $matches = $ymatches[$line];
00388                                 reset( $matches );
00389                                 while ( list( , $y ) = each( $matches ) ) {
00390                                         if ( empty( $this->in_seq[$y] ) ) {
00391                                                 $k = $this->_lcs_pos( $y );
00392                                                 assert( '$k > 0' );
00393                                                 $ymids[$k] = $ymids[$k -1];
00394                                                 break;
00395                                         }
00396                                 }
00397                                 while ( list ( , $y ) = each( $matches ) ) {
00398                                         if ( $y > $this->seq[$k -1] ) {
00399                                                 assert( '$y < $this->seq[$k]' );
00400                                                 // Optimization: this is a common case:
00401                                                 //      next match is just replacing previous match.
00402                                                 $this->in_seq[$this->seq[$k]] = false;
00403                                                 $this->seq[$k] = $y;
00404                                                 $this->in_seq[$y] = 1;
00405                                         } elseif ( empty( $this->in_seq[$y] ) ) {
00406                                                 $k = $this->_lcs_pos( $y );
00407                                                 assert( '$k > 0' );
00408                                                 $ymids[$k] = $ymids[$k -1];
00409                                         }
00410                                 }
00411                         }
00412                 }
00413 
00414                 $seps[] = $flip ? array( $yoff, $xoff ) : array( $xoff, $yoff );
00415                 $ymid = $ymids[$this->lcs];
00416                 for ( $n = 0; $n < $nchunks - 1; $n++ ) {
00417                         $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $n ) / $nchunks );
00418                         $y1 = $ymid[$n] + 1;
00419                         $seps[] = $flip ? array( $y1, $x1 ) : array( $x1, $y1 );
00420                 }
00421                 $seps[] = $flip ? array( $ylim, $xlim ) : array( $xlim, $ylim );
00422 
00423                 return array( $this->lcs, $seps );
00424         }
00425 
00430         function _lcs_pos( $ypos ) {
00431                 $end = $this->lcs;
00432                 if ( $end == 0 || $ypos > $this->seq[$end] ) {
00433                         $this->seq[++$this->lcs] = $ypos;
00434                         $this->in_seq[$ypos] = 1;
00435                         return $this->lcs;
00436                 }
00437 
00438                 $beg = 1;
00439                 while ( $beg < $end ) {
00440                         $mid = (int)( ( $beg + $end ) / 2 );
00441                         if ( $ypos > $this->seq[$mid] ) {
00442                                 $beg = $mid + 1;
00443                         } else {
00444                                 $end = $mid;
00445                         }
00446                 }
00447 
00448                 assert( '$ypos != $this->seq[$end]' );
00449 
00450                 $this->in_seq[$this->seq[$end]] = false;
00451                 $this->seq[$end] = $ypos;
00452                 $this->in_seq[$ypos] = 1;
00453                 return $end;
00454         }
00455 
00472         function _compareseq ( $xoff, $xlim, $yoff, $ylim ) {
00473                 // Slide down the bottom initial diagonal.
00474                 while ( $xoff < $xlim && $yoff < $ylim
00475                 && $this->xv[$xoff] == $this->yv[$yoff] ) {
00476                         ++$xoff;
00477                         ++$yoff;
00478                 }
00479 
00480                 // Slide up the top initial diagonal.
00481                 while ( $xlim > $xoff && $ylim > $yoff
00482                 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1] ) {
00483                         --$xlim;
00484                         --$ylim;
00485                 }
00486 
00487                 if ( $xoff == $xlim || $yoff == $ylim ) {
00488                         $lcs = 0;
00489                 } else {
00490                         // This is ad hoc but seems to work well.
00491                         // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
00492                         // $nchunks = max(2,min(8,(int)$nchunks));
00493                         $nchunks = min( 7, $xlim - $xoff, $ylim - $yoff ) + 1;
00494                         list ( $lcs, $seps ) = $this->_diag( $xoff, $xlim, $yoff, $ylim, $nchunks );
00495                 }
00496 
00497                 if ( $lcs == 0 ) {
00498                         // X and Y sequences have no common subsequence:
00499                         // mark all changed.
00500                         while ( $yoff < $ylim ) {
00501                                 $this->ychanged[$this->yind[$yoff++]] = 1;
00502                         }
00503                         while ( $xoff < $xlim ) {
00504                                 $this->xchanged[$this->xind[$xoff++]] = 1;
00505                         }
00506                 } else {
00507                         // Use the partitions to split this problem into subproblems.
00508                         reset( $seps );
00509                         $pt1 = $seps[0];
00510                         while ( $pt2 = next( $seps ) ) {
00511                                 $this->_compareseq ( $pt1[0], $pt2[0], $pt1[1], $pt2[1] );
00512                                 $pt1 = $pt2;
00513                         }
00514                 }
00515         }
00516 
00530         function _shift_boundaries( $lines, &$changed, $other_changed ) {
00531                 wfProfileIn( __METHOD__ );
00532                 $i = 0;
00533                 $j = 0;
00534 
00535                 assert( 'count($lines) == count($changed)' );
00536                 $len = count( $lines );
00537                 $other_len = count( $other_changed );
00538 
00539                 while ( 1 ) {
00540                         /*
00541                          * Scan forwards to find beginning of another run of changes.
00542                          * Also keep track of the corresponding point in the other file.
00543                          *
00544                          * Throughout this code, $i and $j are adjusted together so that
00545                          * the first $i elements of $changed and the first $j elements
00546                          * of $other_changed both contain the same number of zeros
00547                          * (unchanged lines).
00548                          * Furthermore, $j is always kept so that $j == $other_len or
00549                          * $other_changed[$j] == false.
00550                          */
00551                         while ( $j < $other_len && $other_changed[$j] ) {
00552                                 $j++;
00553                         }
00554 
00555                         while ( $i < $len && ! $changed[$i] ) {
00556                                 assert( '$j < $other_len && ! $other_changed[$j]' );
00557                                 $i++; $j++;
00558                                 while ( $j < $other_len && $other_changed[$j] )
00559                                 $j++;
00560                         }
00561 
00562                         if ( $i == $len ) {
00563                                 break;
00564                         }
00565 
00566                         $start = $i;
00567 
00568                         // Find the end of this run of changes.
00569                         while ( ++$i < $len && $changed[$i] ) {
00570                                 continue;
00571                         }
00572 
00573                         do {
00574                                 /*
00575                                  * Record the length of this run of changes, so that
00576                                  * we can later determine whether the run has grown.
00577                                  */
00578                                 $runlength = $i - $start;
00579 
00580                                 /*
00581                                  * Move the changed region back, so long as the
00582                                  * previous unchanged line matches the last changed one.
00583                                  * This merges with previous changed regions.
00584                                  */
00585                                 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
00586                                         $changed[--$start] = 1;
00587                                         $changed[--$i] = false;
00588                                         while ( $start > 0 && $changed[$start - 1] ) {
00589                                                 $start--;
00590                                         }
00591                                         assert( '$j > 0' );
00592                                         while ( $other_changed[--$j] ) {
00593                                                 continue;
00594                                         }
00595                                         assert( '$j >= 0 && !$other_changed[$j]' );
00596                                 }
00597 
00598                                 /*
00599                                  * Set CORRESPONDING to the end of the changed run, at the last
00600                                  * point where it corresponds to a changed run in the other file.
00601                                  * CORRESPONDING == LEN means no such point has been found.
00602                                  */
00603                                 $corresponding = $j < $other_len ? $i : $len;
00604 
00605                                 /*
00606                                  * Move the changed region forward, so long as the
00607                                  * first changed line matches the following unchanged one.
00608                                  * This merges with following changed regions.
00609                                  * Do this second, so that if there are no merges,
00610                                  * the changed region is moved forward as far as possible.
00611                                  */
00612                                 while ( $i < $len && $lines[$start] == $lines[$i] ) {
00613                                         $changed[$start++] = false;
00614                                         $changed[$i++] = 1;
00615                                         while ( $i < $len && $changed[$i] ) {
00616                                                 $i++;
00617                                         }
00618 
00619                                         assert( '$j < $other_len && ! $other_changed[$j]' );
00620                                         $j++;
00621                                         if ( $j < $other_len && $other_changed[$j] ) {
00622                                                 $corresponding = $i;
00623                                                 while ( $j < $other_len && $other_changed[$j] ) {
00624                                                         $j++;
00625                                                 }
00626                                         }
00627                                 }
00628                         } while ( $runlength != $i - $start );
00629 
00630                         /*
00631                          * If possible, move the fully-merged run of changes
00632                          * back to a corresponding run in the other file.
00633                          */
00634                         while ( $corresponding < $i ) {
00635                                 $changed[--$start] = 1;
00636                                 $changed[--$i] = 0;
00637                                 assert( '$j > 0' );
00638                                 while ( $other_changed[--$j] ) {
00639                                         continue;
00640                                 }
00641                                 assert( '$j >= 0 && !$other_changed[$j]' );
00642                         }
00643                 }
00644                 wfProfileOut( __METHOD__ );
00645         }
00646 }
00647 
00654 class Diff {
00655         var $edits;
00656 
00665         function __construct( $from_lines, $to_lines ) {
00666                 $eng = new _DiffEngine;
00667                 $this->edits = $eng->diff( $from_lines, $to_lines );
00668                 // $this->_check($from_lines, $to_lines);
00669         }
00670 
00681         function reverse() {
00682                 $rev = $this;
00683                 $rev->edits = array();
00684                 foreach ( $this->edits as $edit ) {
00685                         $rev->edits[] = $edit->reverse();
00686                 }
00687                 return $rev;
00688         }
00689 
00695         function isEmpty() {
00696                 foreach ( $this->edits as $edit ) {
00697                         if ( $edit->type != 'copy' ) {
00698                                 return false;
00699                         }
00700                 }
00701                 return true;
00702         }
00703 
00711         function lcs() {
00712                 $lcs = 0;
00713                 foreach ( $this->edits as $edit ) {
00714                         if ( $edit->type == 'copy' ) {
00715                                 $lcs += count( $edit->orig );
00716                         }
00717                 }
00718                 return $lcs;
00719         }
00720 
00729         function orig() {
00730                 $lines = array();
00731 
00732                 foreach ( $this->edits as $edit ) {
00733                         if ( $edit->orig ) {
00734                                 array_splice( $lines, count( $lines ), 0, $edit->orig );
00735                         }
00736                 }
00737                 return $lines;
00738         }
00739 
00748         function closing() {
00749                 $lines = array();
00750 
00751                 foreach ( $this->edits as $edit ) {
00752                         if ( $edit->closing ) {
00753                                 array_splice( $lines, count( $lines ), 0, $edit->closing );
00754                         }
00755                 }
00756                 return $lines;
00757         }
00758 
00766         function _check( $from_lines, $to_lines ) {
00767                 wfProfileIn( __METHOD__ );
00768                 if ( serialize( $from_lines ) != serialize( $this->orig() ) ) {
00769                         trigger_error( "Reconstructed original doesn't match", E_USER_ERROR );
00770                 }
00771                 if ( serialize( $to_lines ) != serialize( $this->closing() ) ) {
00772                         trigger_error( "Reconstructed closing doesn't match", E_USER_ERROR );
00773                 }
00774 
00775                 $rev = $this->reverse();
00776                 if ( serialize( $to_lines ) != serialize( $rev->orig() ) ) {
00777                         trigger_error( "Reversed original doesn't match", E_USER_ERROR );
00778                 }
00779                 if ( serialize( $from_lines ) != serialize( $rev->closing() ) ) {
00780                         trigger_error( "Reversed closing doesn't match", E_USER_ERROR );
00781                 }
00782 
00783                 $prevtype = 'none';
00784                 foreach ( $this->edits as $edit ) {
00785                         if ( $prevtype == $edit->type ) {
00786                                 trigger_error( 'Edit sequence is non-optimal', E_USER_ERROR );
00787                         }
00788                         $prevtype = $edit->type;
00789                 }
00790 
00791                 $lcs = $this->lcs();
00792                 trigger_error( 'Diff okay: LCS = ' . $lcs, E_USER_NOTICE );
00793                 wfProfileOut( __METHOD__ );
00794         }
00795 }
00796 
00802 class MappedDiff extends Diff {
00826         function __construct( $from_lines, $to_lines,
00827                 $mapped_from_lines, $mapped_to_lines ) {
00828                 wfProfileIn( __METHOD__ );
00829 
00830                 assert( 'count( $from_lines ) == count( $mapped_from_lines )' );
00831                 assert( 'count( $to_lines ) == count( $mapped_to_lines )' );
00832 
00833                 parent::__construct( $mapped_from_lines, $mapped_to_lines );
00834 
00835                 $xi = $yi = 0;
00836                 for ( $i = 0; $i < count( $this->edits ); $i++ ) {
00837                         $orig = &$this->edits[$i]->orig;
00838                         if ( is_array( $orig ) ) {
00839                                 $orig = array_slice( $from_lines, $xi, count( $orig ) );
00840                                 $xi += count( $orig );
00841                         }
00842 
00843                         $closing = &$this->edits[$i]->closing;
00844                         if ( is_array( $closing ) ) {
00845                                 $closing = array_slice( $to_lines, $yi, count( $closing ) );
00846                                 $yi += count( $closing );
00847                         }
00848                 }
00849                 wfProfileOut( __METHOD__ );
00850         }
00851 }
00852 
00863 class DiffFormatter {
00870         var $leading_context_lines = 0;
00871 
00878         var $trailing_context_lines = 0;
00879 
00886         function format( $diff ) {
00887                 wfProfileIn( __METHOD__ );
00888 
00889                 $xi = $yi = 1;
00890                 $block = false;
00891                 $context = array();
00892 
00893                 $nlead = $this->leading_context_lines;
00894                 $ntrail = $this->trailing_context_lines;
00895 
00896                 $this->_start_diff();
00897 
00898                 foreach ( $diff->edits as $edit ) {
00899                         if ( $edit->type == 'copy' ) {
00900                                 if ( is_array( $block ) ) {
00901                                         if ( count( $edit->orig ) <= $nlead + $ntrail ) {
00902                                                 $block[] = $edit;
00903                                         } else {
00904                                                 if ( $ntrail ) {
00905                                                         $context = array_slice( $edit->orig, 0, $ntrail );
00906                                                         $block[] = new _DiffOp_Copy( $context );
00907                                                 }
00908                                                 $this->_block( $x0, $ntrail + $xi - $x0,
00909                                                         $y0, $ntrail + $yi - $y0,
00910                                                         $block );
00911                                                 $block = false;
00912                                         }
00913                                 }
00914                                 $context = $edit->orig;
00915                         } else {
00916                                 if ( !is_array( $block ) ) {
00917                                         $context = array_slice( $context, count( $context ) - $nlead );
00918                                         $x0 = $xi - count( $context );
00919                                         $y0 = $yi - count( $context );
00920                                         $block = array();
00921                                         if ( $context ) {
00922                                                 $block[] = new _DiffOp_Copy( $context );
00923                                         }
00924                                 }
00925                                 $block[] = $edit;
00926                         }
00927 
00928                         if ( $edit->orig ) {
00929                                 $xi += count( $edit->orig );
00930                         }
00931                         if ( $edit->closing ) {
00932                                 $yi += count( $edit->closing );
00933                         }
00934                 }
00935 
00936                 if ( is_array( $block ) ) {
00937                         $this->_block( $x0, $xi - $x0,
00938                                 $y0, $yi - $y0,
00939                                 $block );
00940                 }
00941 
00942                 $end = $this->_end_diff();
00943                 wfProfileOut( __METHOD__ );
00944                 return $end;
00945         }
00946 
00954         function _block( $xbeg, $xlen, $ybeg, $ylen, &$edits ) {
00955                 wfProfileIn( __METHOD__ );
00956                 $this->_start_block( $this->_block_header( $xbeg, $xlen, $ybeg, $ylen ) );
00957                 foreach ( $edits as $edit ) {
00958                         if ( $edit->type == 'copy' ) {
00959                                 $this->_context( $edit->orig );
00960                         } elseif ( $edit->type == 'add' ) {
00961                                 $this->_added( $edit->closing );
00962                         } elseif ( $edit->type == 'delete' ) {
00963                                 $this->_deleted( $edit->orig );
00964                         } elseif ( $edit->type == 'change' ) {
00965                                 $this->_changed( $edit->orig, $edit->closing );
00966                         } else {
00967                                 trigger_error( 'Unknown edit type', E_USER_ERROR );
00968                         }
00969                 }
00970                 $this->_end_block();
00971                 wfProfileOut( __METHOD__ );
00972         }
00973 
00974         function _start_diff() {
00975                 ob_start();
00976         }
00977 
00981         function _end_diff() {
00982                 $val = ob_get_contents();
00983                 ob_end_clean();
00984                 return $val;
00985         }
00986 
00994         function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
00995                 if ( $xlen > 1 ) {
00996                         $xbeg .= ',' . ( $xbeg + $xlen - 1 );
00997                 }
00998                 if ( $ylen > 1 ) {
00999                         $ybeg .= ',' . ( $ybeg + $ylen - 1 );
01000                 }
01001 
01002                 return $xbeg . ( $xlen ? ( $ylen ? 'c' : 'd' ) : 'a' ) . $ybeg;
01003         }
01004 
01005         function _start_block( $header ) {
01006                 echo $header . "\n";
01007         }
01008 
01009         function _end_block() {
01010         }
01011 
01016         function _lines( $lines, $prefix = ' ' ) {
01017                 foreach ( $lines as $line ) {
01018                         echo "$prefix $line\n";
01019                 }
01020         }
01021 
01025         function _context( $lines ) {
01026                 $this->_lines( $lines );
01027         }
01028 
01032         function _added( $lines ) {
01033                 $this->_lines( $lines, '>' );
01034         }
01035 
01039         function _deleted( $lines ) {
01040                 $this->_lines( $lines, '<' );
01041         }
01042 
01047         function _changed( $orig, $closing ) {
01048                 $this->_deleted( $orig );
01049                 echo "---\n";
01050                 $this->_added( $closing );
01051         }
01052 }
01053 
01058 class UnifiedDiffFormatter extends DiffFormatter {
01059         var $leading_context_lines = 2;
01060         var $trailing_context_lines = 2;
01061 
01065         function _added( $lines ) {
01066                 $this->_lines( $lines, '+' );
01067         }
01068 
01072         function _deleted( $lines ) {
01073                 $this->_lines( $lines, '-' );
01074         }
01075 
01080         function _changed( $orig, $closing ) {
01081                 $this->_deleted( $orig );
01082                 $this->_added( $closing );
01083         }
01084 
01092         function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
01093                 return "@@ -$xbeg,$xlen +$ybeg,$ylen @@";
01094         }
01095 }
01096 
01101 class ArrayDiffFormatter extends DiffFormatter {
01102 
01107         function format( $diff ) {
01108                 $oldline = 1;
01109                 $newline = 1;
01110                 $retval = array();
01111                 foreach ( $diff->edits as $edit ) {
01112                         switch( $edit->type ) {
01113                                 case 'add':
01114                                         foreach ( $edit->closing as $l ) {
01115                                                 $retval[] = array(
01116                                                         'action' => 'add',
01117                                                         'new' => $l,
01118                                                         'newline' => $newline++
01119                                                 );
01120                                         }
01121                                         break;
01122                                 case 'delete':
01123                                         foreach ( $edit->orig as $l ) {
01124                                                 $retval[] = array(
01125                                                         'action' => 'delete',
01126                                                         'old' => $l,
01127                                                         'oldline' => $oldline++,
01128                                                 );
01129                                         }
01130                                         break;
01131                                 case 'change':
01132                                         foreach ( $edit->orig as $i => $l ) {
01133                                                 $retval[] = array(
01134                                                         'action' => 'change',
01135                                                         'old' => $l,
01136                                                         'new' => isset( $edit->closing[$i] ) ? $edit->closing[$i] : null,
01137                                                         'oldline' => $oldline++,
01138                                                         'newline' => $newline++,
01139                                                 );
01140                                         }
01141                                         break;
01142                                 case 'copy':
01143                                         $oldline += count( $edit->orig );
01144                                         $newline += count( $edit->orig );
01145                         }
01146                 }
01147                 return $retval;
01148         }
01149 }
01150 
01155 define( 'NBSP', '&#160;' ); // iso-8859-x non-breaking space.
01156 
01162 class _HWLDF_WordAccumulator {
01163         function __construct() {
01164                 $this->_lines = array();
01165                 $this->_line = '';
01166                 $this->_group = '';
01167                 $this->_tag = '';
01168         }
01169 
01173         function _flushGroup( $new_tag ) {
01174                 if ( $this->_group !== '' ) {
01175                         if ( $this->_tag == 'ins' ) {
01176                                 $this->_line .= '<ins class="diffchange diffchange-inline">' .
01177                                                 htmlspecialchars( $this->_group ) . '</ins>';
01178                         } elseif ( $this->_tag == 'del' ) {
01179                                 $this->_line .= '<del class="diffchange diffchange-inline">' .
01180                                                 htmlspecialchars( $this->_group ) . '</del>';
01181                         } else {
01182                                 $this->_line .= htmlspecialchars( $this->_group );
01183                         }
01184                 }
01185                 $this->_group = '';
01186                 $this->_tag = $new_tag;
01187         }
01188 
01192         function _flushLine( $new_tag ) {
01193                 $this->_flushGroup( $new_tag );
01194                 if ( $this->_line != '' ) {
01195                         array_push( $this->_lines, $this->_line );
01196                 } else {
01197                         # make empty lines visible by inserting an NBSP
01198                         array_push( $this->_lines, NBSP );
01199                 }
01200                 $this->_line = '';
01201         }
01202 
01207         function addWords ( $words, $tag = '' ) {
01208                 if ( $tag != $this->_tag ) {
01209                         $this->_flushGroup( $tag );
01210                 }
01211 
01212                 foreach ( $words as $word ) {
01213                         // new-line should only come as first char of word.
01214                         if ( $word == '' ) {
01215                                 continue;
01216                         }
01217                         if ( $word[0] == "\n" ) {
01218                                 $this->_flushLine( $tag );
01219                                 $word = substr( $word, 1 );
01220                         }
01221                         assert( '!strstr( $word, "\n" )' );
01222                         $this->_group .= $word;
01223                 }
01224         }
01225 
01229         function getLines() {
01230                 $this->_flushLine( '~done' );
01231                 return $this->_lines;
01232         }
01233 }
01234 
01240 class WordLevelDiff extends MappedDiff {
01241         const MAX_LINE_LENGTH = 10000;
01242 
01247         function __construct ( $orig_lines, $closing_lines ) {
01248                 wfProfileIn( __METHOD__ );
01249 
01250                 list( $orig_words, $orig_stripped ) = $this->_split( $orig_lines );
01251                 list( $closing_words, $closing_stripped ) = $this->_split( $closing_lines );
01252 
01253                 parent::__construct( $orig_words, $closing_words,
01254                 $orig_stripped, $closing_stripped );
01255                 wfProfileOut( __METHOD__ );
01256         }
01257 
01262         function _split( $lines ) {
01263                 wfProfileIn( __METHOD__ );
01264 
01265                 $words = array();
01266                 $stripped = array();
01267                 $first = true;
01268                 foreach ( $lines as $line ) {
01269                         # If the line is too long, just pretend the entire line is one big word
01270                         # This prevents resource exhaustion problems
01271                         if ( $first ) {
01272                                 $first = false;
01273                         } else {
01274                                 $words[] = "\n";
01275                                 $stripped[] = "\n";
01276                         }
01277                         if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
01278                                 $words[] = $line;
01279                                 $stripped[] = $line;
01280                         } else {
01281                                 $m = array();
01282                                 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
01283                                         $line, $m ) )
01284                                 {
01285                                         $words = array_merge( $words, $m[0] );
01286                                         $stripped = array_merge( $stripped, $m[1] );
01287                                 }
01288                         }
01289                 }
01290                 wfProfileOut( __METHOD__ );
01291                 return array( $words, $stripped );
01292         }
01293 
01297         function orig() {
01298                 wfProfileIn( __METHOD__ );
01299                 $orig = new _HWLDF_WordAccumulator;
01300 
01301                 foreach ( $this->edits as $edit ) {
01302                         if ( $edit->type == 'copy' ) {
01303                                 $orig->addWords( $edit->orig );
01304                         } elseif ( $edit->orig ) {
01305                                 $orig->addWords( $edit->orig, 'del' );
01306                         }
01307                 }
01308                 $lines = $orig->getLines();
01309                 wfProfileOut( __METHOD__ );
01310                 return $lines;
01311         }
01312 
01316         function closing() {
01317                 wfProfileIn( __METHOD__ );
01318                 $closing = new _HWLDF_WordAccumulator;
01319 
01320                 foreach ( $this->edits as $edit ) {
01321                         if ( $edit->type == 'copy' ) {
01322                                 $closing->addWords( $edit->closing );
01323                         } elseif ( $edit->closing ) {
01324                                 $closing->addWords( $edit->closing, 'ins' );
01325                         }
01326                 }
01327                 $lines = $closing->getLines();
01328                 wfProfileOut( __METHOD__ );
01329                 return $lines;
01330         }
01331 }
01332 
01339 class TableDiffFormatter extends DiffFormatter {
01340         function __construct() {
01341                 $this->leading_context_lines = 2;
01342                 $this->trailing_context_lines = 2;
01343         }
01344 
01350         public static function escapeWhiteSpace( $msg ) {
01351                 $msg = preg_replace( '/^ /m', '&#160; ', $msg );
01352                 $msg = preg_replace( '/ $/m', ' &#160;', $msg );
01353                 $msg = preg_replace( '/  /', '&#160; ', $msg );
01354                 return $msg;
01355         }
01356 
01364         function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
01365                 $r = '<tr><td colspan="2" class="diff-lineno"><!--LINE ' . $xbeg . "--></td>\n" .
01366                         '<td colspan="2" class="diff-lineno"><!--LINE ' . $ybeg . "--></td></tr>\n";
01367                 return $r;
01368         }
01369 
01373         function _start_block( $header ) {
01374                 echo $header;
01375         }
01376 
01377         function _end_block() {
01378         }
01379 
01380         function _lines( $lines, $prefix = ' ', $color = 'white' ) {
01381         }
01382 
01388         function addedLine( $line ) {
01389                 return $this->wrapLine( '+', 'diff-addedline', $line );
01390         }
01391 
01397         function deletedLine( $line ) {
01398                 return $this->wrapLine( '−', 'diff-deletedline', $line );
01399         }
01400 
01406         function contextLine( $line ) {
01407                 return $this->wrapLine( '&#160;', 'diff-context', $line );
01408         }
01409 
01416         private function wrapLine( $marker, $class, $line ) {
01417                 if ( $line !== '' ) {
01418                         // The <div> wrapper is needed for 'overflow: auto' style to scroll properly
01419                         $line = Xml::tags( 'div', null, $this->escapeWhiteSpace( $line ) );
01420                 }
01421                 return "<td class='diff-marker'>$marker</td><td class='$class'>$line</td>";
01422         }
01423 
01427         function emptyLine() {
01428                 return '<td colspan="2">&#160;</td>';
01429         }
01430 
01434         function _added( $lines ) {
01435                 foreach ( $lines as $line ) {
01436                         echo '<tr>' . $this->emptyLine() .
01437                         $this->addedLine( '<ins class="diffchange">' .
01438                         htmlspecialchars( $line ) . '</ins>' ) . "</tr>\n";
01439                 }
01440         }
01441 
01445         function _deleted( $lines ) {
01446                 foreach ( $lines as $line ) {
01447                         echo '<tr>' . $this->deletedLine( '<del class="diffchange">' .
01448                         htmlspecialchars( $line ) . '</del>' ) .
01449                         $this->emptyLine() . "</tr>\n";
01450                 }
01451         }
01452 
01456         function _context( $lines ) {
01457                 foreach ( $lines as $line ) {
01458                         echo '<tr>' .
01459                         $this->contextLine( htmlspecialchars( $line ) ) .
01460                         $this->contextLine( htmlspecialchars( $line ) ) . "</tr>\n";
01461                 }
01462         }
01463 
01468         function _changed( $orig, $closing ) {
01469                 wfProfileIn( __METHOD__ );
01470 
01471                 $diff = new WordLevelDiff( $orig, $closing );
01472                 $del = $diff->orig();
01473                 $add = $diff->closing();
01474 
01475                 # Notice that WordLevelDiff returns HTML-escaped output.
01476                 # Hence, we will be calling addedLine/deletedLine without HTML-escaping.
01477 
01478                 while ( $line = array_shift( $del ) ) {
01479                         $aline = array_shift( $add );
01480                         echo '<tr>' . $this->deletedLine( $line ) .
01481                         $this->addedLine( $aline ) . "</tr>\n";
01482                 }
01483                 foreach ( $add as $line ) {     # If any leftovers
01484                         echo '<tr>' . $this->emptyLine() .
01485                         $this->addedLine( $line ) . "</tr>\n";
01486                 }
01487                 wfProfileOut( __METHOD__ );
01488         }
01489 }