MediaWiki  REL1_19
DairikiDiff.php
Go to the documentation of this file.
00001 <?php
00018 class _DiffOp {
00019         var $type;
00020         var $orig;
00021         var $closing;
00022 
00023         function reverse() {
00024                 trigger_error( 'pure virtual', E_USER_ERROR );
00025         }
00026 
00030         function norig() {
00031                 return $this->orig ? sizeof( $this->orig ) : 0;
00032         }
00033 
00037         function nclosing() {
00038                 return $this->closing ? sizeof( $this->closing ) : 0;
00039         }
00040 }
00041 
00047 class _DiffOp_Copy extends _DiffOp {
00048         var $type = 'copy';
00049 
00050         function __construct( $orig, $closing = false ) {
00051                 if ( !is_array( $closing ) ) {
00052                         $closing = $orig;
00053                 }
00054                 $this->orig = $orig;
00055                 $this->closing = $closing;
00056         }
00057 
00061         function reverse() {
00062                 return new _DiffOp_Copy( $this->closing, $this->orig );
00063         }
00064 }
00065 
00071 class _DiffOp_Delete extends _DiffOp {
00072         var $type = 'delete';
00073 
00074         function __construct( $lines ) {
00075                 $this->orig = $lines;
00076                 $this->closing = false;
00077         }
00078 
00082         function reverse() {
00083                 return new _DiffOp_Add( $this->orig );
00084         }
00085 }
00086 
00092 class _DiffOp_Add extends _DiffOp {
00093         var $type = 'add';
00094 
00095         function __construct( $lines ) {
00096                 $this->closing = $lines;
00097                 $this->orig = false;
00098         }
00099 
00103         function reverse() {
00104                 return new _DiffOp_Delete( $this->closing );
00105         }
00106 }
00107 
00113 class _DiffOp_Change extends _DiffOp {
00114         var $type = 'change';
00115 
00116         function __construct( $orig, $closing ) {
00117                 $this->orig = $orig;
00118                 $this->closing = $closing;
00119         }
00120 
00124         function reverse() {
00125                 return new _DiffOp_Change( $this->closing, $this->orig );
00126         }
00127 }
00128 
00153 class _DiffEngine {
00154 
00155         const MAX_XREF_LENGTH =  10000;
00156 
00157         protected $xchanged, $ychanged;
00158 
00159         protected $xv = array(), $yv = array();
00160         protected $xind = array(), $yind = array();
00161 
00162         protected $seq = array(), $in_seq = array();
00163 
00164         protected $lcs = 0;
00165 
00171         function diff ( $from_lines, $to_lines ) {
00172                 wfProfileIn( __METHOD__ );
00173 
00174                 // Diff and store locally
00175                 $this->diff_local( $from_lines, $to_lines );
00176 
00177                 // Merge edits when possible
00178                 $this->_shift_boundaries( $from_lines, $this->xchanged, $this->ychanged );
00179                 $this->_shift_boundaries( $to_lines, $this->ychanged, $this->xchanged );
00180 
00181                 // Compute the edit operations.
00182                 $n_from = sizeof( $from_lines );
00183                 $n_to = sizeof( $to_lines );
00184 
00185                 $edits = array();
00186                 $xi = $yi = 0;
00187                 while ( $xi < $n_from || $yi < $n_to ) {
00188                         assert( $yi < $n_to || $this->xchanged[$xi] );
00189                         assert( $xi < $n_from || $this->ychanged[$yi] );
00190 
00191                         // Skip matching "snake".
00192                         $copy = array();
00193                         while ( $xi < $n_from && $yi < $n_to
00194                         && !$this->xchanged[$xi] && !$this->ychanged[$yi] ) {
00195                                 $copy[] = $from_lines[$xi++];
00196                                 ++$yi;
00197                         }
00198                         if ( $copy ) {
00199                                 $edits[] = new _DiffOp_Copy( $copy );
00200                         }
00201 
00202                         // Find deletes & adds.
00203                         $delete = array();
00204                         while ( $xi < $n_from && $this->xchanged[$xi] ) {
00205                                 $delete[] = $from_lines[$xi++];
00206                         }
00207 
00208                         $add = array();
00209                         while ( $yi < $n_to && $this->ychanged[$yi] )  {
00210                                 $add[] = $to_lines[$yi++];
00211                         }
00212 
00213                         if ( $delete && $add ) {
00214                                 $edits[] = new _DiffOp_Change( $delete, $add );
00215                         } elseif ( $delete ) {
00216                                 $edits[] = new _DiffOp_Delete( $delete );
00217                         } elseif ( $add ) {
00218                                 $edits[] = new _DiffOp_Add( $add );
00219                         }
00220                 }
00221                 wfProfileOut( __METHOD__ );
00222                 return $edits;
00223         }
00224 
00229         function diff_local ( $from_lines, $to_lines ) {
00230                 global $wgExternalDiffEngine;
00231                 wfProfileIn( __METHOD__ );
00232 
00233                 if ( $wgExternalDiffEngine == 'wikidiff3' ) {
00234                         // wikidiff3
00235                         $wikidiff3 = new WikiDiff3();
00236                         $wikidiff3->diff( $from_lines, $to_lines );
00237                         $this->xchanged = $wikidiff3->removed;
00238                         $this->ychanged = $wikidiff3->added;
00239                         unset( $wikidiff3 );
00240                 } else {
00241                         // old diff
00242                         $n_from = sizeof( $from_lines );
00243                         $n_to = sizeof( $to_lines );
00244                         $this->xchanged = $this->ychanged = array();
00245                         $this->xv = $this->yv = array();
00246                         $this->xind = $this->yind = array();
00247                         unset( $this->seq );
00248                         unset( $this->in_seq );
00249                         unset( $this->lcs );
00250 
00251                         // Skip leading common lines.
00252                         for ( $skip = 0; $skip < $n_from && $skip < $n_to; $skip++ ) {
00253                                 if ( $from_lines[$skip] !== $to_lines[$skip] ) {
00254                                         break;
00255                                 }
00256                                 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
00257                         }
00258                         // Skip trailing common lines.
00259                         $xi = $n_from; $yi = $n_to;
00260                         for ( $endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++ ) {
00261                                 if ( $from_lines[$xi] !== $to_lines[$yi] ) {
00262                                         break;
00263                                 }
00264                                 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
00265                         }
00266 
00267                         // Ignore lines which do not exist in both files.
00268                         for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
00269                                 $xhash[$this->_line_hash( $from_lines[$xi] )] = 1;
00270                         }
00271 
00272                         for ( $yi = $skip; $yi < $n_to - $endskip; $yi++ ) {
00273                                 $line = $to_lines[$yi];
00274                                 if ( ( $this->ychanged[$yi] = empty( $xhash[$this->_line_hash( $line )] ) ) ) {
00275                                         continue;
00276                                 }
00277                                 $yhash[$this->_line_hash( $line )] = 1;
00278                                 $this->yv[] = $line;
00279                                 $this->yind[] = $yi;
00280                         }
00281                         for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
00282                                 $line = $from_lines[$xi];
00283                                 if ( ( $this->xchanged[$xi] = empty( $yhash[$this->_line_hash( $line )] ) ) ) {
00284                                         continue;
00285                                 }
00286                                 $this->xv[] = $line;
00287                                 $this->xind[] = $xi;
00288                         }
00289 
00290                         // Find the LCS.
00291                         $this->_compareseq( 0, sizeof( $this->xv ), 0, sizeof( $this->yv ) );
00292                 }
00293                 wfProfileOut( __METHOD__ );
00294         }
00295 
00301         function _line_hash( $line ) {
00302                 if ( strlen( $line ) > self::MAX_XREF_LENGTH ) {
00303                         return md5( $line );
00304                 } else {
00305                         return $line;
00306                 }
00307         }
00308 
00332         function _diag( $xoff, $xlim, $yoff, $ylim, $nchunks ) {
00333                 $flip = false;
00334 
00335                 if ( $xlim - $xoff > $ylim - $yoff ) {
00336                         // Things seems faster (I'm not sure I understand why)
00337                         // when the shortest sequence in X.
00338                         $flip = true;
00339                         list( $xoff, $xlim, $yoff, $ylim ) = array( $yoff, $ylim, $xoff, $xlim );
00340                 }
00341 
00342                 if ( $flip ) {
00343                         for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
00344                                 $ymatches[$this->xv[$i]][] = $i;
00345                         }
00346                 } else {
00347                         for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
00348                                 $ymatches[$this->yv[$i]][] = $i;
00349                         }
00350                 }
00351 
00352                 $this->lcs = 0;
00353                 $this->seq[0] = $yoff - 1;
00354                 $this->in_seq = array();
00355                 $ymids[0] = array();
00356 
00357                 $numer = $xlim - $xoff + $nchunks - 1;
00358                 $x = $xoff;
00359                 for ( $chunk = 0; $chunk < $nchunks; $chunk++ ) {
00360                         if ( $chunk > 0 ) {
00361                                 for ( $i = 0; $i <= $this->lcs; $i++ ) {
00362                                         $ymids[$i][$chunk -1] = $this->seq[$i];
00363                                 }
00364                         }
00365 
00366                         $x1 = $xoff + (int)( ( $numer + ( $xlim -$xoff ) * $chunk ) / $nchunks );
00367                         for ( ; $x < $x1; $x++ ) {
00368                                 $line = $flip ? $this->yv[$x] : $this->xv[$x];
00369                                 if ( empty( $ymatches[$line] ) ) {
00370                                         continue;
00371                                 }
00372                                 $matches = $ymatches[$line];
00373                                 reset( $matches );
00374                                 while ( list( , $y ) = each( $matches ) ) {
00375                                         if ( empty( $this->in_seq[$y] ) ) {
00376                                                 $k = $this->_lcs_pos( $y );
00377                                                 assert( $k > 0 );
00378                                                 $ymids[$k] = $ymids[$k -1];
00379                                                 break;
00380                                         }
00381                                 }
00382                                 while ( list ( , $y ) = each( $matches ) ) {
00383                                         if ( $y > $this->seq[$k -1] ) {
00384                                                 assert( $y < $this->seq[$k] );
00385                                                 // Optimization: this is a common case:
00386                                                 //      next match is just replacing previous match.
00387                                                 $this->in_seq[$this->seq[$k]] = false;
00388                                                 $this->seq[$k] = $y;
00389                                                 $this->in_seq[$y] = 1;
00390                                         } elseif ( empty( $this->in_seq[$y] ) ) {
00391                                                 $k = $this->_lcs_pos( $y );
00392                                                 assert( $k > 0 );
00393                                                 $ymids[$k] = $ymids[$k -1];
00394                                         }
00395                                 }
00396                         }
00397                 }
00398 
00399                 $seps[] = $flip ? array( $yoff, $xoff ) : array( $xoff, $yoff );
00400                 $ymid = $ymids[$this->lcs];
00401                 for ( $n = 0; $n < $nchunks - 1; $n++ ) {
00402                         $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $n ) / $nchunks );
00403                         $y1 = $ymid[$n] + 1;
00404                         $seps[] = $flip ? array( $y1, $x1 ) : array( $x1, $y1 );
00405                 }
00406                 $seps[] = $flip ? array( $ylim, $xlim ) : array( $xlim, $ylim );
00407 
00408                 return array( $this->lcs, $seps );
00409         }
00410 
00415         function _lcs_pos( $ypos ) {
00416                 $end = $this->lcs;
00417                 if ( $end == 0 || $ypos > $this->seq[$end] ) {
00418                         $this->seq[++$this->lcs] = $ypos;
00419                         $this->in_seq[$ypos] = 1;
00420                         return $this->lcs;
00421                 }
00422 
00423                 $beg = 1;
00424                 while ( $beg < $end ) {
00425                         $mid = (int)( ( $beg + $end ) / 2 );
00426                         if ( $ypos > $this->seq[$mid] ) {
00427                                 $beg = $mid + 1;
00428                         } else {
00429                                 $end = $mid;
00430                         }
00431                 }
00432 
00433                 assert( $ypos != $this->seq[$end] );
00434 
00435                 $this->in_seq[$this->seq[$end]] = false;
00436                 $this->seq[$end] = $ypos;
00437                 $this->in_seq[$ypos] = 1;
00438                 return $end;
00439         }
00440 
00457         function _compareseq ( $xoff, $xlim, $yoff, $ylim ) {
00458                 // Slide down the bottom initial diagonal.
00459                 while ( $xoff < $xlim && $yoff < $ylim
00460                 && $this->xv[$xoff] == $this->yv[$yoff] ) {
00461                         ++$xoff;
00462                         ++$yoff;
00463                 }
00464 
00465                 // Slide up the top initial diagonal.
00466                 while ( $xlim > $xoff && $ylim > $yoff
00467                 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1] ) {
00468                         --$xlim;
00469                         --$ylim;
00470                 }
00471 
00472                 if ( $xoff == $xlim || $yoff == $ylim ) {
00473                         $lcs = 0;
00474                 } else {
00475                         // This is ad hoc but seems to work well.
00476                         // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
00477                         // $nchunks = max(2,min(8,(int)$nchunks));
00478                         $nchunks = min( 7, $xlim - $xoff, $ylim - $yoff ) + 1;
00479                         list ( $lcs, $seps )
00480                         = $this->_diag( $xoff, $xlim, $yoff, $ylim, $nchunks );
00481                 }
00482 
00483                 if ( $lcs == 0 ) {
00484                         // X and Y sequences have no common subsequence:
00485                         // mark all changed.
00486                         while ( $yoff < $ylim ) {
00487                                 $this->ychanged[$this->yind[$yoff++]] = 1;
00488                         }
00489                         while ( $xoff < $xlim ) {
00490                                 $this->xchanged[$this->xind[$xoff++]] = 1;
00491                         }
00492                 } else {
00493                         // Use the partitions to split this problem into subproblems.
00494                         reset( $seps );
00495                         $pt1 = $seps[0];
00496                         while ( $pt2 = next( $seps ) ) {
00497                                 $this->_compareseq ( $pt1[0], $pt2[0], $pt1[1], $pt2[1] );
00498                                 $pt1 = $pt2;
00499                         }
00500                 }
00501         }
00502 
00516         function _shift_boundaries( $lines, &$changed, $other_changed ) {
00517                 wfProfileIn( __METHOD__ );
00518                 $i = 0;
00519                 $j = 0;
00520 
00521                 assert( 'sizeof($lines) == sizeof($changed)' );
00522                 $len = sizeof( $lines );
00523                 $other_len = sizeof( $other_changed );
00524 
00525                 while ( 1 ) {
00526                         /*
00527                          * Scan forwards to find beginning of another run of changes.
00528                          * Also keep track of the corresponding point in the other file.
00529                          *
00530                          * Throughout this code, $i and $j are adjusted together so that
00531                          * the first $i elements of $changed and the first $j elements
00532                          * of $other_changed both contain the same number of zeros
00533                          * (unchanged lines).
00534                          * Furthermore, $j is always kept so that $j == $other_len or
00535                          * $other_changed[$j] == false.
00536                          */
00537                         while ( $j < $other_len && $other_changed[$j] ) {
00538                                 $j++;
00539                         }
00540 
00541                         while ( $i < $len && ! $changed[$i] ) {
00542                                 assert( '$j < $other_len && ! $other_changed[$j]' );
00543                                 $i++; $j++;
00544                                 while ( $j < $other_len && $other_changed[$j] )
00545                                 $j++;
00546                         }
00547 
00548                         if ( $i == $len ) {
00549                                 break;
00550                         }
00551 
00552                         $start = $i;
00553 
00554                         // Find the end of this run of changes.
00555                         while ( ++$i < $len && $changed[$i] ) {
00556                                 continue;
00557                         }
00558 
00559                         do {
00560                                 /*
00561                                  * Record the length of this run of changes, so that
00562                                  * we can later determine whether the run has grown.
00563                                  */
00564                                 $runlength = $i - $start;
00565 
00566                                 /*
00567                                  * Move the changed region back, so long as the
00568                                  * previous unchanged line matches the last changed one.
00569                                  * This merges with previous changed regions.
00570                                  */
00571                                 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
00572                                         $changed[--$start] = 1;
00573                                         $changed[--$i] = false;
00574                                         while ( $start > 0 && $changed[$start - 1] ) {
00575                                                 $start--;
00576                                         }
00577                                         assert( '$j > 0' );
00578                                         while ( $other_changed[--$j] ) {
00579                                                 continue;
00580                                         }
00581                                         assert( '$j >= 0 && !$other_changed[$j]' );
00582                                 }
00583 
00584                                 /*
00585                                  * Set CORRESPONDING to the end of the changed run, at the last
00586                                  * point where it corresponds to a changed run in the other file.
00587                                  * CORRESPONDING == LEN means no such point has been found.
00588                                  */
00589                                 $corresponding = $j < $other_len ? $i : $len;
00590 
00591                                 /*
00592                                  * Move the changed region forward, so long as the
00593                                  * first changed line matches the following unchanged one.
00594                                  * This merges with following changed regions.
00595                                  * Do this second, so that if there are no merges,
00596                                  * the changed region is moved forward as far as possible.
00597                                  */
00598                                 while ( $i < $len && $lines[$start] == $lines[$i] ) {
00599                                         $changed[$start++] = false;
00600                                         $changed[$i++] = 1;
00601                                         while ( $i < $len && $changed[$i] ) {
00602                                                 $i++;
00603                                         }
00604 
00605                                         assert( '$j < $other_len && ! $other_changed[$j]' );
00606                                         $j++;
00607                                         if ( $j < $other_len && $other_changed[$j] ) {
00608                                                 $corresponding = $i;
00609                                                 while ( $j < $other_len && $other_changed[$j] ) {
00610                                                         $j++;
00611                                                 }
00612                                         }
00613                                 }
00614                         } while ( $runlength != $i - $start );
00615 
00616                         /*
00617                          * If possible, move the fully-merged run of changes
00618                          * back to a corresponding run in the other file.
00619                          */
00620                         while ( $corresponding < $i ) {
00621                                 $changed[--$start] = 1;
00622                                 $changed[--$i] = 0;
00623                                 assert( '$j > 0' );
00624                                 while ( $other_changed[--$j] ) {
00625                                         continue;
00626                                 }
00627                                 assert( '$j >= 0 && !$other_changed[$j]' );
00628                         }
00629                 }
00630                 wfProfileOut( __METHOD__ );
00631         }
00632 }
00633 
00640 class Diff {
00641         var $edits;
00642 
00651         function __construct( $from_lines, $to_lines ) {
00652                 $eng = new _DiffEngine;
00653                 $this->edits = $eng->diff( $from_lines, $to_lines );
00654                 // $this->_check($from_lines, $to_lines);
00655         }
00656 
00667         function reverse() {
00668                 $rev = $this;
00669                 $rev->edits = array();
00670                 foreach ( $this->edits as $edit ) {
00671                         $rev->edits[] = $edit->reverse();
00672                 }
00673                 return $rev;
00674         }
00675 
00681         function isEmpty() {
00682                 foreach ( $this->edits as $edit ) {
00683                         if ( $edit->type != 'copy' ) {
00684                                 return false;
00685                         }
00686                 }
00687                 return true;
00688         }
00689 
00697         function lcs() {
00698                 $lcs = 0;
00699                 foreach ( $this->edits as $edit ) {
00700                         if ( $edit->type == 'copy' ) {
00701                                 $lcs += sizeof( $edit->orig );
00702                         }
00703                 }
00704                 return $lcs;
00705         }
00706 
00715         function orig() {
00716                 $lines = array();
00717 
00718                 foreach ( $this->edits as $edit ) {
00719                         if ( $edit->orig ) {
00720                                 array_splice( $lines, sizeof( $lines ), 0, $edit->orig );
00721                         }
00722                 }
00723                 return $lines;
00724         }
00725 
00734         function closing() {
00735                 $lines = array();
00736 
00737                 foreach ( $this->edits as $edit ) {
00738                         if ( $edit->closing ) {
00739                                 array_splice( $lines, sizeof( $lines ), 0, $edit->closing );
00740                         }
00741                 }
00742                 return $lines;
00743         }
00744 
00752         function _check( $from_lines, $to_lines ) {
00753                 wfProfileIn( __METHOD__ );
00754                 if ( serialize( $from_lines ) != serialize( $this->orig() ) ) {
00755                         trigger_error( "Reconstructed original doesn't match", E_USER_ERROR );
00756                 }
00757                 if ( serialize( $to_lines ) != serialize( $this->closing() ) ) {
00758                         trigger_error( "Reconstructed closing doesn't match", E_USER_ERROR );
00759                 }
00760 
00761                 $rev = $this->reverse();
00762                 if ( serialize( $to_lines ) != serialize( $rev->orig() ) ) {
00763                         trigger_error( "Reversed original doesn't match", E_USER_ERROR );
00764                 }
00765                 if ( serialize( $from_lines ) != serialize( $rev->closing() ) ) {
00766                         trigger_error( "Reversed closing doesn't match", E_USER_ERROR );
00767                 }
00768 
00769 
00770                 $prevtype = 'none';
00771                 foreach ( $this->edits as $edit ) {
00772                         if ( $prevtype == $edit->type ) {
00773                                 trigger_error( 'Edit sequence is non-optimal', E_USER_ERROR );
00774                         }
00775                         $prevtype = $edit->type;
00776                 }
00777 
00778                 $lcs = $this->lcs();
00779                 trigger_error( 'Diff okay: LCS = ' . $lcs, E_USER_NOTICE );
00780                 wfProfileOut( __METHOD__ );
00781         }
00782 }
00783 
00789 class MappedDiff extends Diff {
00813         function __construct( $from_lines, $to_lines,
00814                 $mapped_from_lines, $mapped_to_lines ) {
00815                 wfProfileIn( __METHOD__ );
00816 
00817                 assert( sizeof( $from_lines ) == sizeof( $mapped_from_lines ) );
00818                 assert( sizeof( $to_lines ) == sizeof( $mapped_to_lines ) );
00819 
00820                 parent::__construct( $mapped_from_lines, $mapped_to_lines );
00821 
00822                 $xi = $yi = 0;
00823                 for ( $i = 0; $i < sizeof( $this->edits ); $i++ ) {
00824                         $orig = &$this->edits[$i]->orig;
00825                         if ( is_array( $orig ) ) {
00826                                 $orig = array_slice( $from_lines, $xi, sizeof( $orig ) );
00827                                 $xi += sizeof( $orig );
00828                         }
00829 
00830                         $closing = &$this->edits[$i]->closing;
00831                         if ( is_array( $closing ) ) {
00832                                 $closing = array_slice( $to_lines, $yi, sizeof( $closing ) );
00833                                 $yi += sizeof( $closing );
00834                         }
00835                 }
00836                 wfProfileOut( __METHOD__ );
00837         }
00838 }
00839 
00850 class DiffFormatter {
00857         var $leading_context_lines = 0;
00858 
00865         var $trailing_context_lines = 0;
00866 
00873         function format( $diff ) {
00874                 wfProfileIn( __METHOD__ );
00875 
00876                 $xi = $yi = 1;
00877                 $block = false;
00878                 $context = array();
00879 
00880                 $nlead = $this->leading_context_lines;
00881                 $ntrail = $this->trailing_context_lines;
00882 
00883                 $this->_start_diff();
00884 
00885                 foreach ( $diff->edits as $edit ) {
00886                         if ( $edit->type == 'copy' ) {
00887                                 if ( is_array( $block ) ) {
00888                                         if ( sizeof( $edit->orig ) <= $nlead + $ntrail ) {
00889                                                 $block[] = $edit;
00890                                         } else {
00891                                                 if ( $ntrail ) {
00892                                                         $context = array_slice( $edit->orig, 0, $ntrail );
00893                                                         $block[] = new _DiffOp_Copy( $context );
00894                                                 }
00895                                                 $this->_block( $x0, $ntrail + $xi - $x0,
00896                                                         $y0, $ntrail + $yi - $y0,
00897                                                         $block );
00898                                                 $block = false;
00899                                         }
00900                                 }
00901                                 $context = $edit->orig;
00902                         } else {
00903                                 if ( !is_array( $block ) ) {
00904                                         $context = array_slice( $context, sizeof( $context ) - $nlead );
00905                                         $x0 = $xi - sizeof( $context );
00906                                         $y0 = $yi - sizeof( $context );
00907                                         $block = array();
00908                                         if ( $context ) {
00909                                                 $block[] = new _DiffOp_Copy( $context );
00910                                         }
00911                                 }
00912                                 $block[] = $edit;
00913                         }
00914 
00915                         if ( $edit->orig ) {
00916                                 $xi += sizeof( $edit->orig );
00917                         }
00918                         if ( $edit->closing ) {
00919                                 $yi += sizeof( $edit->closing );
00920                         }
00921                 }
00922 
00923                 if ( is_array( $block ) ) {
00924                         $this->_block( $x0, $xi - $x0,
00925                                 $y0, $yi - $y0,
00926                                 $block );
00927                 }
00928 
00929                 $end = $this->_end_diff();
00930                 wfProfileOut( __METHOD__ );
00931                 return $end;
00932         }
00933 
00941         function _block( $xbeg, $xlen, $ybeg, $ylen, &$edits ) {
00942                 wfProfileIn( __METHOD__ );
00943                 $this->_start_block( $this->_block_header( $xbeg, $xlen, $ybeg, $ylen ) );
00944                 foreach ( $edits as $edit ) {
00945                         if ( $edit->type == 'copy' ) {
00946                                 $this->_context( $edit->orig );
00947                         } elseif ( $edit->type == 'add' ) {
00948                                 $this->_added( $edit->closing );
00949                         } elseif ( $edit->type == 'delete' ) {
00950                                 $this->_deleted( $edit->orig );
00951                         } elseif ( $edit->type == 'change' ) {
00952                                 $this->_changed( $edit->orig, $edit->closing );
00953                         } else {
00954                                 trigger_error( 'Unknown edit type', E_USER_ERROR );
00955                         }
00956                 }
00957                 $this->_end_block();
00958                 wfProfileOut( __METHOD__ );
00959         }
00960 
00961         function _start_diff() {
00962                 ob_start();
00963         }
00964 
00968         function _end_diff() {
00969                 $val = ob_get_contents();
00970                 ob_end_clean();
00971                 return $val;
00972         }
00973 
00981         function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
00982                 if ( $xlen > 1 ) {
00983                         $xbeg .= ',' . ( $xbeg + $xlen - 1 );
00984                 }
00985                 if ( $ylen > 1 ) {
00986                         $ybeg .= ',' . ( $ybeg + $ylen - 1 );
00987                 }
00988 
00989                 return $xbeg . ( $xlen ? ( $ylen ? 'c' : 'd' ) : 'a' ) . $ybeg;
00990         }
00991 
00992         function _start_block( $header ) {
00993                 echo $header . "\n";
00994         }
00995 
00996         function _end_block() {
00997         }
00998 
01003         function _lines( $lines, $prefix = ' ' ) {
01004                 foreach ( $lines as $line ) {
01005                         echo "$prefix $line\n";
01006                 }
01007         }
01008 
01012         function _context( $lines ) {
01013                 $this->_lines( $lines );
01014         }
01015 
01019         function _added( $lines ) {
01020                 $this->_lines( $lines, '>' );
01021         }
01022 
01026         function _deleted( $lines ) {
01027                 $this->_lines( $lines, '<' );
01028         }
01029 
01034         function _changed( $orig, $closing ) {
01035                 $this->_deleted( $orig );
01036                 echo "---\n";
01037                 $this->_added( $closing );
01038         }
01039 }
01040 
01045 class UnifiedDiffFormatter extends DiffFormatter {
01046         var $leading_context_lines = 2;
01047         var $trailing_context_lines = 2;
01048 
01052         function _added( $lines ) {
01053                 $this->_lines( $lines, '+' );
01054         }
01055 
01059         function _deleted( $lines ) {
01060                 $this->_lines( $lines, '-' );
01061         }
01062 
01067         function _changed( $orig, $closing ) {
01068                 $this->_deleted( $orig );
01069                 $this->_added( $closing );
01070         }
01071 
01079         function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
01080                 return "@@ -$xbeg,$xlen +$ybeg,$ylen @@";
01081         }
01082 }
01083 
01088 class ArrayDiffFormatter extends DiffFormatter {
01089 
01094         function format( $diff ) {
01095                 $oldline = 1;
01096                 $newline = 1;
01097                 $retval = array();
01098                 foreach ( $diff->edits as $edit ) {
01099                         switch( $edit->type ) {
01100                                 case 'add':
01101                                         foreach ( $edit->closing as $l ) {
01102                                                 $retval[] = array(
01103                                                         'action' => 'add',
01104                                                         'new' => $l,
01105                                                         'newline' => $newline++
01106                                                 );
01107                                         }
01108                                         break;
01109                                 case 'delete':
01110                                         foreach ( $edit->orig as $l ) {
01111                                                 $retval[] = array(
01112                                                         'action' => 'delete',
01113                                                         'old' => $l,
01114                                                         'oldline' => $oldline++,
01115                                                 );
01116                                         }
01117                                         break;
01118                                 case 'change':
01119                                         foreach ( $edit->orig as $i => $l ) {
01120                                                 $retval[] = array(
01121                                                         'action' => 'change',
01122                                                         'old' => $l,
01123                                                         'new' => isset( $edit->closing[$i] ) ? $edit->closing[$i] : null,
01124                                                         'oldline' => $oldline++,
01125                                                         'newline' => $newline++,
01126                                                 );
01127                                         }
01128                                         break;
01129                                 case 'copy':
01130                                         $oldline += count( $edit->orig );
01131                                         $newline += count( $edit->orig );
01132                         }
01133                 }
01134                 return $retval;
01135         }
01136 }
01137 
01142 define( 'NBSP', '&#160;' ); // iso-8859-x non-breaking space.
01143 
01149 class _HWLDF_WordAccumulator {
01150         function __construct() {
01151                 $this->_lines = array();
01152                 $this->_line = '';
01153                 $this->_group = '';
01154                 $this->_tag = '';
01155         }
01156 
01160         function _flushGroup( $new_tag ) {
01161                 if ( $this->_group !== '' ) {
01162                         if ( $this->_tag == 'ins' ) {
01163                                 $this->_line .= '<ins class="diffchange diffchange-inline">' .
01164                                                 htmlspecialchars( $this->_group ) . '</ins>';
01165                         } elseif ( $this->_tag == 'del' ) {
01166                                 $this->_line .= '<del class="diffchange diffchange-inline">' .
01167                                                 htmlspecialchars( $this->_group ) . '</del>';
01168                         } else {
01169                                 $this->_line .= htmlspecialchars( $this->_group );
01170                         }
01171                 }
01172                 $this->_group = '';
01173                 $this->_tag = $new_tag;
01174         }
01175 
01179         function _flushLine( $new_tag ) {
01180                 $this->_flushGroup( $new_tag );
01181                 if ( $this->_line != '' ) {
01182                         array_push( $this->_lines, $this->_line );
01183                 } else {
01184                         # make empty lines visible by inserting an NBSP
01185                         array_push( $this->_lines, NBSP );
01186                 }
01187                 $this->_line = '';
01188         }
01189 
01194         function addWords ( $words, $tag = '' ) {
01195                 if ( $tag != $this->_tag ) {
01196                         $this->_flushGroup( $tag );
01197                 }
01198 
01199                 foreach ( $words as $word ) {
01200                         // new-line should only come as first char of word.
01201                         if ( $word == '' ) {
01202                                 continue;
01203                         }
01204                         if ( $word[0] == "\n" ) {
01205                                 $this->_flushLine( $tag );
01206                                 $word = substr( $word, 1 );
01207                         }
01208                         assert( !strstr( $word, "\n" ) );
01209                         $this->_group .= $word;
01210                 }
01211         }
01212 
01216         function getLines() {
01217                 $this->_flushLine( '~done' );
01218                 return $this->_lines;
01219         }
01220 }
01221 
01227 class WordLevelDiff extends MappedDiff {
01228         const MAX_LINE_LENGTH = 10000;
01229 
01234         function __construct ( $orig_lines, $closing_lines ) {
01235                 wfProfileIn( __METHOD__ );
01236 
01237                 list( $orig_words, $orig_stripped ) = $this->_split( $orig_lines );
01238                 list( $closing_words, $closing_stripped ) = $this->_split( $closing_lines );
01239 
01240                 parent::__construct( $orig_words, $closing_words,
01241                 $orig_stripped, $closing_stripped );
01242                 wfProfileOut( __METHOD__ );
01243         }
01244 
01249         function _split( $lines ) {
01250                 wfProfileIn( __METHOD__ );
01251 
01252                 $words = array();
01253                 $stripped = array();
01254                 $first = true;
01255                 foreach ( $lines as $line ) {
01256                         # If the line is too long, just pretend the entire line is one big word
01257                         # This prevents resource exhaustion problems
01258                         if ( $first ) {
01259                                 $first = false;
01260                         } else {
01261                                 $words[] = "\n";
01262                                 $stripped[] = "\n";
01263                         }
01264                         if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
01265                                 $words[] = $line;
01266                                 $stripped[] = $line;
01267                         } else {
01268                                 $m = array();
01269                                 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
01270                                         $line, $m ) )
01271                                 {
01272                                         $words = array_merge( $words, $m[0] );
01273                                         $stripped = array_merge( $stripped, $m[1] );
01274                                 }
01275                         }
01276                 }
01277                 wfProfileOut( __METHOD__ );
01278                 return array( $words, $stripped );
01279         }
01280 
01284         function orig() {
01285                 wfProfileIn( __METHOD__ );
01286                 $orig = new _HWLDF_WordAccumulator;
01287 
01288                 foreach ( $this->edits as $edit ) {
01289                         if ( $edit->type == 'copy' ) {
01290                                 $orig->addWords( $edit->orig );
01291                         } elseif ( $edit->orig ) {
01292                                 $orig->addWords( $edit->orig, 'del' );
01293                         }
01294                 }
01295                 $lines = $orig->getLines();
01296                 wfProfileOut( __METHOD__ );
01297                 return $lines;
01298         }
01299 
01303         function closing() {
01304                 wfProfileIn( __METHOD__ );
01305                 $closing = new _HWLDF_WordAccumulator;
01306 
01307                 foreach ( $this->edits as $edit ) {
01308                         if ( $edit->type == 'copy' ) {
01309                                 $closing->addWords( $edit->closing );
01310                         } elseif ( $edit->closing ) {
01311                                 $closing->addWords( $edit->closing, 'ins' );
01312                         }
01313                 }
01314                 $lines = $closing->getLines();
01315                 wfProfileOut( __METHOD__ );
01316                 return $lines;
01317         }
01318 }
01319 
01326 class TableDiffFormatter extends DiffFormatter {
01327         function __construct() {
01328                 $this->leading_context_lines = 2;
01329                 $this->trailing_context_lines = 2;
01330         }
01331 
01337         public static function escapeWhiteSpace( $msg ) {
01338                 $msg = preg_replace( '/^ /m', '&#160; ', $msg );
01339                 $msg = preg_replace( '/ $/m', ' &#160;', $msg );
01340                 $msg = preg_replace( '/  /', '&#160; ', $msg );
01341                 return $msg;
01342         }
01343 
01351         function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
01352                 $r = '<tr><td colspan="2" class="diff-lineno"><!--LINE ' . $xbeg . "--></td>\n" .
01353                   '<td colspan="2" class="diff-lineno"><!--LINE ' . $ybeg . "--></td></tr>\n";
01354                 return $r;
01355         }
01356 
01360         function _start_block( $header ) {
01361                 echo $header;
01362         }
01363 
01364         function _end_block() {
01365         }
01366 
01367         function _lines( $lines, $prefix = ' ', $color = 'white' ) {
01368         }
01369 
01375         function addedLine( $line ) {
01376                 return $this->wrapLine( '+', 'diff-addedline', $line );
01377         }
01378 
01384         function deletedLine( $line ) {
01385                 return $this->wrapLine( '−', 'diff-deletedline', $line );
01386         }
01387 
01393         function contextLine( $line ) {
01394                 return $this->wrapLine( '&#160;', 'diff-context', $line );
01395         }
01396 
01403         private function wrapLine( $marker, $class, $line ) {
01404                 if ( $line !== '' ) {
01405                         // The <div> wrapper is needed for 'overflow: auto' style to scroll properly
01406                         $line = Xml::tags( 'div', null, $this->escapeWhiteSpace( $line ) );
01407                 }
01408                 return "<td class='diff-marker'>$marker</td><td class='$class'>$line</td>";
01409         }
01410 
01414         function emptyLine() {
01415                 return '<td colspan="2">&#160;</td>';
01416         }
01417 
01421         function _added( $lines ) {
01422                 foreach ( $lines as $line ) {
01423                         echo '<tr>' . $this->emptyLine() .
01424                         $this->addedLine( '<ins class="diffchange">' .
01425                         htmlspecialchars( $line ) . '</ins>' ) . "</tr>\n";
01426                 }
01427         }
01428 
01432         function _deleted( $lines ) {
01433                 foreach ( $lines as $line ) {
01434                         echo '<tr>' . $this->deletedLine( '<del class="diffchange">' .
01435                         htmlspecialchars( $line ) . '</del>' ) .
01436                         $this->emptyLine() . "</tr>\n";
01437                 }
01438         }
01439 
01443         function _context( $lines ) {
01444                 foreach ( $lines as $line ) {
01445                         echo '<tr>' .
01446                         $this->contextLine( htmlspecialchars( $line ) ) .
01447                         $this->contextLine( htmlspecialchars( $line ) ) . "</tr>\n";
01448                 }
01449         }
01450 
01455         function _changed( $orig, $closing ) {
01456                 wfProfileIn( __METHOD__ );
01457 
01458                 $diff = new WordLevelDiff( $orig, $closing );
01459                 $del = $diff->orig();
01460                 $add = $diff->closing();
01461 
01462                 # Notice that WordLevelDiff returns HTML-escaped output.
01463                 # Hence, we will be calling addedLine/deletedLine without HTML-escaping.
01464 
01465                 while ( $line = array_shift( $del ) ) {
01466                         $aline = array_shift( $add );
01467                         echo '<tr>' . $this->deletedLine( $line ) .
01468                         $this->addedLine( $aline ) . "</tr>\n";
01469                 }
01470                 foreach ( $add as $line ) {     # If any leftovers
01471                         echo '<tr>' . $this->emptyLine() .
01472                         $this->addedLine( $line ) . "</tr>\n";
01473                 }
01474                 wfProfileOut( __METHOD__ );
01475         }
01476 }