MediaWiki  REL1_22
DairikiDiff.php
Go to the documentation of this file.
00001 <?php
00033 class _DiffOp {
00034     var $type;
00035     var $orig;
00036     var $closing;
00037 
00038     function reverse() {
00039         trigger_error( 'pure virtual', E_USER_ERROR );
00040     }
00041 
00045     function norig() {
00046         return $this->orig ? count( $this->orig ) : 0;
00047     }
00048 
00052     function nclosing() {
00053         return $this->closing ? count( $this->closing ) : 0;
00054     }
00055 }
00056 
00062 class _DiffOp_Copy extends _DiffOp {
00063     var $type = 'copy';
00064 
00065     function __construct( $orig, $closing = false ) {
00066         if ( !is_array( $closing ) ) {
00067             $closing = $orig;
00068         }
00069         $this->orig = $orig;
00070         $this->closing = $closing;
00071     }
00072 
00076     function reverse() {
00077         return new _DiffOp_Copy( $this->closing, $this->orig );
00078     }
00079 }
00080 
00086 class _DiffOp_Delete extends _DiffOp {
00087     var $type = 'delete';
00088 
00089     function __construct( $lines ) {
00090         $this->orig = $lines;
00091         $this->closing = false;
00092     }
00093 
00097     function reverse() {
00098         return new _DiffOp_Add( $this->orig );
00099     }
00100 }
00101 
00107 class _DiffOp_Add extends _DiffOp {
00108     var $type = 'add';
00109 
00110     function __construct( $lines ) {
00111         $this->closing = $lines;
00112         $this->orig = false;
00113     }
00114 
00118     function reverse() {
00119         return new _DiffOp_Delete( $this->closing );
00120     }
00121 }
00122 
00128 class _DiffOp_Change extends _DiffOp {
00129     var $type = 'change';
00130 
00131     function __construct( $orig, $closing ) {
00132         $this->orig = $orig;
00133         $this->closing = $closing;
00134     }
00135 
00139     function reverse() {
00140         return new _DiffOp_Change( $this->closing, $this->orig );
00141     }
00142 }
00143 
00168 class _DiffEngine {
00169 
00170     const MAX_XREF_LENGTH = 10000;
00171 
00172     protected $xchanged, $ychanged;
00173 
00174     protected $xv = array(), $yv = array();
00175     protected $xind = array(), $yind = array();
00176 
00177     protected $seq = array(), $in_seq = array();
00178 
00179     protected $lcs = 0;
00180 
00186     function diff( $from_lines, $to_lines ) {
00187         wfProfileIn( __METHOD__ );
00188 
00189         // Diff and store locally
00190         $this->diff_local( $from_lines, $to_lines );
00191 
00192         // Merge edits when possible
00193         $this->_shift_boundaries( $from_lines, $this->xchanged, $this->ychanged );
00194         $this->_shift_boundaries( $to_lines, $this->ychanged, $this->xchanged );
00195 
00196         // Compute the edit operations.
00197         $n_from = count( $from_lines );
00198         $n_to = count( $to_lines );
00199 
00200         $edits = array();
00201         $xi = $yi = 0;
00202         while ( $xi < $n_from || $yi < $n_to ) {
00203             assert( '$yi < $n_to || $this->xchanged[$xi]' );
00204             assert( '$xi < $n_from || $this->ychanged[$yi]' );
00205 
00206             // Skip matching "snake".
00207             $copy = array();
00208             while ( $xi < $n_from && $yi < $n_to
00209             && !$this->xchanged[$xi] && !$this->ychanged[$yi] ) {
00210                 $copy[] = $from_lines[$xi++];
00211                 ++$yi;
00212             }
00213             if ( $copy ) {
00214                 $edits[] = new _DiffOp_Copy( $copy );
00215             }
00216 
00217             // Find deletes & adds.
00218             $delete = array();
00219             while ( $xi < $n_from && $this->xchanged[$xi] ) {
00220                 $delete[] = $from_lines[$xi++];
00221             }
00222 
00223             $add = array();
00224             while ( $yi < $n_to && $this->ychanged[$yi] ) {
00225                 $add[] = $to_lines[$yi++];
00226             }
00227 
00228             if ( $delete && $add ) {
00229                 $edits[] = new _DiffOp_Change( $delete, $add );
00230             } elseif ( $delete ) {
00231                 $edits[] = new _DiffOp_Delete( $delete );
00232             } elseif ( $add ) {
00233                 $edits[] = new _DiffOp_Add( $add );
00234             }
00235         }
00236         wfProfileOut( __METHOD__ );
00237         return $edits;
00238     }
00239 
00244     function diff_local( $from_lines, $to_lines ) {
00245         global $wgExternalDiffEngine;
00246         wfProfileIn( __METHOD__ );
00247 
00248         if ( $wgExternalDiffEngine == 'wikidiff3' ) {
00249             // wikidiff3
00250             $wikidiff3 = new WikiDiff3();
00251             $wikidiff3->diff( $from_lines, $to_lines );
00252             $this->xchanged = $wikidiff3->removed;
00253             $this->ychanged = $wikidiff3->added;
00254             unset( $wikidiff3 );
00255         } else {
00256             // old diff
00257             $n_from = count( $from_lines );
00258             $n_to = count( $to_lines );
00259             $this->xchanged = $this->ychanged = array();
00260             $this->xv = $this->yv = array();
00261             $this->xind = $this->yind = array();
00262             unset( $this->seq );
00263             unset( $this->in_seq );
00264             unset( $this->lcs );
00265 
00266             // Skip leading common lines.
00267             for ( $skip = 0; $skip < $n_from && $skip < $n_to; $skip++ ) {
00268                 if ( $from_lines[$skip] !== $to_lines[$skip] ) {
00269                     break;
00270                 }
00271                 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
00272             }
00273             // Skip trailing common lines.
00274             $xi = $n_from;
00275             $yi = $n_to;
00276             for ( $endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++ ) {
00277                 if ( $from_lines[$xi] !== $to_lines[$yi] ) {
00278                     break;
00279                 }
00280                 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
00281             }
00282 
00283             // Ignore lines which do not exist in both files.
00284             for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
00285                 $xhash[$this->_line_hash( $from_lines[$xi] )] = 1;
00286             }
00287 
00288             for ( $yi = $skip; $yi < $n_to - $endskip; $yi++ ) {
00289                 $line = $to_lines[$yi];
00290                 if ( ( $this->ychanged[$yi] = empty( $xhash[$this->_line_hash( $line )] ) ) ) {
00291                     continue;
00292                 }
00293                 $yhash[$this->_line_hash( $line )] = 1;
00294                 $this->yv[] = $line;
00295                 $this->yind[] = $yi;
00296             }
00297             for ( $xi = $skip; $xi < $n_from - $endskip; $xi++ ) {
00298                 $line = $from_lines[$xi];
00299                 if ( ( $this->xchanged[$xi] = empty( $yhash[$this->_line_hash( $line )] ) ) ) {
00300                     continue;
00301                 }
00302                 $this->xv[] = $line;
00303                 $this->xind[] = $xi;
00304             }
00305 
00306             // Find the LCS.
00307             $this->_compareseq( 0, count( $this->xv ), 0, count( $this->yv ) );
00308         }
00309         wfProfileOut( __METHOD__ );
00310     }
00311 
00317     function _line_hash( $line ) {
00318         if ( strlen( $line ) > self::MAX_XREF_LENGTH ) {
00319             return md5( $line );
00320         } else {
00321             return $line;
00322         }
00323     }
00324 
00348     function _diag( $xoff, $xlim, $yoff, $ylim, $nchunks ) {
00349         $flip = false;
00350 
00351         if ( $xlim - $xoff > $ylim - $yoff ) {
00352             // Things seems faster (I'm not sure I understand why)
00353             // when the shortest sequence in X.
00354             $flip = true;
00355             list( $xoff, $xlim, $yoff, $ylim ) = array( $yoff, $ylim, $xoff, $xlim );
00356         }
00357 
00358         if ( $flip ) {
00359             for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
00360                 $ymatches[$this->xv[$i]][] = $i;
00361             }
00362         } else {
00363             for ( $i = $ylim - 1; $i >= $yoff; $i-- ) {
00364                 $ymatches[$this->yv[$i]][] = $i;
00365             }
00366         }
00367 
00368         $this->lcs = 0;
00369         $this->seq[0] = $yoff - 1;
00370         $this->in_seq = array();
00371         $ymids[0] = array();
00372 
00373         $numer = $xlim - $xoff + $nchunks - 1;
00374         $x = $xoff;
00375         for ( $chunk = 0; $chunk < $nchunks; $chunk++ ) {
00376             if ( $chunk > 0 ) {
00377                 for ( $i = 0; $i <= $this->lcs; $i++ ) {
00378                     $ymids[$i][$chunk -1] = $this->seq[$i];
00379                 }
00380             }
00381 
00382             $x1 = $xoff + (int)( ( $numer + ( $xlim -$xoff ) * $chunk ) / $nchunks );
00383             for ( ; $x < $x1; $x++ ) {
00384                 $line = $flip ? $this->yv[$x] : $this->xv[$x];
00385                 if ( empty( $ymatches[$line] ) ) {
00386                     continue;
00387                 }
00388                 $matches = $ymatches[$line];
00389                 reset( $matches );
00390                 while ( list( , $y ) = each( $matches ) ) {
00391                     if ( empty( $this->in_seq[$y] ) ) {
00392                         $k = $this->_lcs_pos( $y );
00393                         assert( '$k > 0' );
00394                         $ymids[$k] = $ymids[$k -1];
00395                         break;
00396                     }
00397                 }
00398                 while ( list( , $y ) = each( $matches ) ) {
00399                     if ( $y > $this->seq[$k -1] ) {
00400                         assert( '$y < $this->seq[$k]' );
00401                         // Optimization: this is a common case:
00402                         //  next match is just replacing previous match.
00403                         $this->in_seq[$this->seq[$k]] = false;
00404                         $this->seq[$k] = $y;
00405                         $this->in_seq[$y] = 1;
00406                     } elseif ( empty( $this->in_seq[$y] ) ) {
00407                         $k = $this->_lcs_pos( $y );
00408                         assert( '$k > 0' );
00409                         $ymids[$k] = $ymids[$k -1];
00410                     }
00411                 }
00412             }
00413         }
00414 
00415         $seps[] = $flip ? array( $yoff, $xoff ) : array( $xoff, $yoff );
00416         $ymid = $ymids[$this->lcs];
00417         for ( $n = 0; $n < $nchunks - 1; $n++ ) {
00418             $x1 = $xoff + (int)( ( $numer + ( $xlim - $xoff ) * $n ) / $nchunks );
00419             $y1 = $ymid[$n] + 1;
00420             $seps[] = $flip ? array( $y1, $x1 ) : array( $x1, $y1 );
00421         }
00422         $seps[] = $flip ? array( $ylim, $xlim ) : array( $xlim, $ylim );
00423 
00424         return array( $this->lcs, $seps );
00425     }
00426 
00431     function _lcs_pos( $ypos ) {
00432         $end = $this->lcs;
00433         if ( $end == 0 || $ypos > $this->seq[$end] ) {
00434             $this->seq[++$this->lcs] = $ypos;
00435             $this->in_seq[$ypos] = 1;
00436             return $this->lcs;
00437         }
00438 
00439         $beg = 1;
00440         while ( $beg < $end ) {
00441             $mid = (int)( ( $beg + $end ) / 2 );
00442             if ( $ypos > $this->seq[$mid] ) {
00443                 $beg = $mid + 1;
00444             } else {
00445                 $end = $mid;
00446             }
00447         }
00448 
00449         assert( '$ypos != $this->seq[$end]' );
00450 
00451         $this->in_seq[$this->seq[$end]] = false;
00452         $this->seq[$end] = $ypos;
00453         $this->in_seq[$ypos] = 1;
00454         return $end;
00455     }
00456 
00473     function _compareseq( $xoff, $xlim, $yoff, $ylim ) {
00474         // Slide down the bottom initial diagonal.
00475         while ( $xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff] ) {
00476             ++$xoff;
00477             ++$yoff;
00478         }
00479 
00480         // Slide up the top initial diagonal.
00481         while ( $xlim > $xoff && $ylim > $yoff
00482         && $this->xv[$xlim - 1] == $this->yv[$ylim - 1] ) {
00483             --$xlim;
00484             --$ylim;
00485         }
00486 
00487         if ( $xoff == $xlim || $yoff == $ylim ) {
00488             $lcs = 0;
00489         } else {
00490             // This is ad hoc but seems to work well.
00491             // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
00492             // $nchunks = max(2,min(8,(int)$nchunks));
00493             $nchunks = min( 7, $xlim - $xoff, $ylim - $yoff ) + 1;
00494             list( $lcs, $seps ) = $this->_diag( $xoff, $xlim, $yoff, $ylim, $nchunks );
00495         }
00496 
00497         if ( $lcs == 0 ) {
00498             // X and Y sequences have no common subsequence:
00499             // mark all changed.
00500             while ( $yoff < $ylim ) {
00501                 $this->ychanged[$this->yind[$yoff++]] = 1;
00502             }
00503             while ( $xoff < $xlim ) {
00504                 $this->xchanged[$this->xind[$xoff++]] = 1;
00505             }
00506         } else {
00507             // Use the partitions to split this problem into subproblems.
00508             reset( $seps );
00509             $pt1 = $seps[0];
00510             while ( $pt2 = next( $seps ) ) {
00511                 $this->_compareseq( $pt1[0], $pt2[0], $pt1[1], $pt2[1] );
00512                 $pt1 = $pt2;
00513             }
00514         }
00515     }
00516 
00530     function _shift_boundaries( $lines, &$changed, $other_changed ) {
00531         wfProfileIn( __METHOD__ );
00532         $i = 0;
00533         $j = 0;
00534 
00535         assert( 'count($lines) == count($changed)' );
00536         $len = count( $lines );
00537         $other_len = count( $other_changed );
00538 
00539         while ( 1 ) {
00540             /*
00541              * Scan forwards to find beginning of another run of changes.
00542              * Also keep track of the corresponding point in the other file.
00543              *
00544              * Throughout this code, $i and $j are adjusted together so that
00545              * the first $i elements of $changed and the first $j elements
00546              * of $other_changed both contain the same number of zeros
00547              * (unchanged lines).
00548              * Furthermore, $j is always kept so that $j == $other_len or
00549              * $other_changed[$j] == false.
00550              */
00551             while ( $j < $other_len && $other_changed[$j] ) {
00552                 $j++;
00553             }
00554 
00555             while ( $i < $len && ! $changed[$i] ) {
00556                 assert( '$j < $other_len && ! $other_changed[$j]' );
00557                 $i++;
00558                 $j++;
00559                 while ( $j < $other_len && $other_changed[$j] ) {
00560                     $j++;
00561                 }
00562             }
00563 
00564             if ( $i == $len ) {
00565                 break;
00566             }
00567 
00568             $start = $i;
00569 
00570             // Find the end of this run of changes.
00571             while ( ++$i < $len && $changed[$i] ) {
00572                 continue;
00573             }
00574 
00575             do {
00576                 /*
00577                  * Record the length of this run of changes, so that
00578                  * we can later determine whether the run has grown.
00579                  */
00580                 $runlength = $i - $start;
00581 
00582                 /*
00583                  * Move the changed region back, so long as the
00584                  * previous unchanged line matches the last changed one.
00585                  * This merges with previous changed regions.
00586                  */
00587                 while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) {
00588                     $changed[--$start] = 1;
00589                     $changed[--$i] = false;
00590                     while ( $start > 0 && $changed[$start - 1] ) {
00591                         $start--;
00592                     }
00593                     assert( '$j > 0' );
00594                     while ( $other_changed[--$j] ) {
00595                         continue;
00596                     }
00597                     assert( '$j >= 0 && !$other_changed[$j]' );
00598                 }
00599 
00600                 /*
00601                  * Set CORRESPONDING to the end of the changed run, at the last
00602                  * point where it corresponds to a changed run in the other file.
00603                  * CORRESPONDING == LEN means no such point has been found.
00604                  */
00605                 $corresponding = $j < $other_len ? $i : $len;
00606 
00607                 /*
00608                  * Move the changed region forward, so long as the
00609                  * first changed line matches the following unchanged one.
00610                  * This merges with following changed regions.
00611                  * Do this second, so that if there are no merges,
00612                  * the changed region is moved forward as far as possible.
00613                  */
00614                 while ( $i < $len && $lines[$start] == $lines[$i] ) {
00615                     $changed[$start++] = false;
00616                     $changed[$i++] = 1;
00617                     while ( $i < $len && $changed[$i] ) {
00618                         $i++;
00619                     }
00620 
00621                     assert( '$j < $other_len && ! $other_changed[$j]' );
00622                     $j++;
00623                     if ( $j < $other_len && $other_changed[$j] ) {
00624                         $corresponding = $i;
00625                         while ( $j < $other_len && $other_changed[$j] ) {
00626                             $j++;
00627                         }
00628                     }
00629                 }
00630             } while ( $runlength != $i - $start );
00631 
00632             /*
00633              * If possible, move the fully-merged run of changes
00634              * back to a corresponding run in the other file.
00635              */
00636             while ( $corresponding < $i ) {
00637                 $changed[--$start] = 1;
00638                 $changed[--$i] = 0;
00639                 assert( '$j > 0' );
00640                 while ( $other_changed[--$j] ) {
00641                     continue;
00642                 }
00643                 assert( '$j >= 0 && !$other_changed[$j]' );
00644             }
00645         }
00646         wfProfileOut( __METHOD__ );
00647     }
00648 }
00649 
00656 class Diff {
00657     var $edits;
00658 
00667     function __construct( $from_lines, $to_lines ) {
00668         $eng = new _DiffEngine;
00669         $this->edits = $eng->diff( $from_lines, $to_lines );
00670         // $this->_check($from_lines, $to_lines);
00671     }
00672 
00683     function reverse() {
00684         $rev = $this;
00685         $rev->edits = array();
00686         foreach ( $this->edits as $edit ) {
00687             $rev->edits[] = $edit->reverse();
00688         }
00689         return $rev;
00690     }
00691 
00697     function isEmpty() {
00698         foreach ( $this->edits as $edit ) {
00699             if ( $edit->type != 'copy' ) {
00700                 return false;
00701             }
00702         }
00703         return true;
00704     }
00705 
00713     function lcs() {
00714         $lcs = 0;
00715         foreach ( $this->edits as $edit ) {
00716             if ( $edit->type == 'copy' ) {
00717                 $lcs += count( $edit->orig );
00718             }
00719         }
00720         return $lcs;
00721     }
00722 
00731     function orig() {
00732         $lines = array();
00733 
00734         foreach ( $this->edits as $edit ) {
00735             if ( $edit->orig ) {
00736                 array_splice( $lines, count( $lines ), 0, $edit->orig );
00737             }
00738         }
00739         return $lines;
00740     }
00741 
00750     function closing() {
00751         $lines = array();
00752 
00753         foreach ( $this->edits as $edit ) {
00754             if ( $edit->closing ) {
00755                 array_splice( $lines, count( $lines ), 0, $edit->closing );
00756             }
00757         }
00758         return $lines;
00759     }
00760 
00768     function _check( $from_lines, $to_lines ) {
00769         wfProfileIn( __METHOD__ );
00770         if ( serialize( $from_lines ) != serialize( $this->orig() ) ) {
00771             trigger_error( "Reconstructed original doesn't match", E_USER_ERROR );
00772         }
00773         if ( serialize( $to_lines ) != serialize( $this->closing() ) ) {
00774             trigger_error( "Reconstructed closing doesn't match", E_USER_ERROR );
00775         }
00776 
00777         $rev = $this->reverse();
00778         if ( serialize( $to_lines ) != serialize( $rev->orig() ) ) {
00779             trigger_error( "Reversed original doesn't match", E_USER_ERROR );
00780         }
00781         if ( serialize( $from_lines ) != serialize( $rev->closing() ) ) {
00782             trigger_error( "Reversed closing doesn't match", E_USER_ERROR );
00783         }
00784 
00785         $prevtype = 'none';
00786         foreach ( $this->edits as $edit ) {
00787             if ( $prevtype == $edit->type ) {
00788                 trigger_error( 'Edit sequence is non-optimal', E_USER_ERROR );
00789             }
00790             $prevtype = $edit->type;
00791         }
00792 
00793         $lcs = $this->lcs();
00794         trigger_error( 'Diff okay: LCS = ' . $lcs, E_USER_NOTICE );
00795         wfProfileOut( __METHOD__ );
00796     }
00797 }
00798 
00804 class MappedDiff extends Diff {
00828     function __construct( $from_lines, $to_lines,
00829         $mapped_from_lines, $mapped_to_lines ) {
00830         wfProfileIn( __METHOD__ );
00831 
00832         assert( 'count( $from_lines ) == count( $mapped_from_lines )' );
00833         assert( 'count( $to_lines ) == count( $mapped_to_lines )' );
00834 
00835         parent::__construct( $mapped_from_lines, $mapped_to_lines );
00836 
00837         $xi = $yi = 0;
00838         for ( $i = 0; $i < count( $this->edits ); $i++ ) {
00839             $orig = &$this->edits[$i]->orig;
00840             if ( is_array( $orig ) ) {
00841                 $orig = array_slice( $from_lines, $xi, count( $orig ) );
00842                 $xi += count( $orig );
00843             }
00844 
00845             $closing = &$this->edits[$i]->closing;
00846             if ( is_array( $closing ) ) {
00847                 $closing = array_slice( $to_lines, $yi, count( $closing ) );
00848                 $yi += count( $closing );
00849             }
00850         }
00851         wfProfileOut( __METHOD__ );
00852     }
00853 }
00854 
00865 class DiffFormatter {
00872     var $leading_context_lines = 0;
00873 
00880     var $trailing_context_lines = 0;
00881 
00888     function format( $diff ) {
00889         wfProfileIn( __METHOD__ );
00890 
00891         $xi = $yi = 1;
00892         $block = false;
00893         $context = array();
00894 
00895         $nlead = $this->leading_context_lines;
00896         $ntrail = $this->trailing_context_lines;
00897 
00898         $this->_start_diff();
00899 
00900         foreach ( $diff->edits as $edit ) {
00901             if ( $edit->type == 'copy' ) {
00902                 if ( is_array( $block ) ) {
00903                     if ( count( $edit->orig ) <= $nlead + $ntrail ) {
00904                         $block[] = $edit;
00905                     } else {
00906                         if ( $ntrail ) {
00907                             $context = array_slice( $edit->orig, 0, $ntrail );
00908                             $block[] = new _DiffOp_Copy( $context );
00909                         }
00910                         $this->_block( $x0, $ntrail + $xi - $x0,
00911                             $y0, $ntrail + $yi - $y0,
00912                             $block );
00913                         $block = false;
00914                     }
00915                 }
00916                 $context = $edit->orig;
00917             } else {
00918                 if ( !is_array( $block ) ) {
00919                     $context = array_slice( $context, count( $context ) - $nlead );
00920                     $x0 = $xi - count( $context );
00921                     $y0 = $yi - count( $context );
00922                     $block = array();
00923                     if ( $context ) {
00924                         $block[] = new _DiffOp_Copy( $context );
00925                     }
00926                 }
00927                 $block[] = $edit;
00928             }
00929 
00930             if ( $edit->orig ) {
00931                 $xi += count( $edit->orig );
00932             }
00933             if ( $edit->closing ) {
00934                 $yi += count( $edit->closing );
00935             }
00936         }
00937 
00938         if ( is_array( $block ) ) {
00939             $this->_block( $x0, $xi - $x0,
00940                 $y0, $yi - $y0,
00941                 $block );
00942         }
00943 
00944         $end = $this->_end_diff();
00945         wfProfileOut( __METHOD__ );
00946         return $end;
00947     }
00948 
00956     function _block( $xbeg, $xlen, $ybeg, $ylen, &$edits ) {
00957         wfProfileIn( __METHOD__ );
00958         $this->_start_block( $this->_block_header( $xbeg, $xlen, $ybeg, $ylen ) );
00959         foreach ( $edits as $edit ) {
00960             if ( $edit->type == 'copy' ) {
00961                 $this->_context( $edit->orig );
00962             } elseif ( $edit->type == 'add' ) {
00963                 $this->_added( $edit->closing );
00964             } elseif ( $edit->type == 'delete' ) {
00965                 $this->_deleted( $edit->orig );
00966             } elseif ( $edit->type == 'change' ) {
00967                 $this->_changed( $edit->orig, $edit->closing );
00968             } else {
00969                 trigger_error( 'Unknown edit type', E_USER_ERROR );
00970             }
00971         }
00972         $this->_end_block();
00973         wfProfileOut( __METHOD__ );
00974     }
00975 
00976     function _start_diff() {
00977         ob_start();
00978     }
00979 
00983     function _end_diff() {
00984         $val = ob_get_contents();
00985         ob_end_clean();
00986         return $val;
00987     }
00988 
00996     function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
00997         if ( $xlen > 1 ) {
00998             $xbeg .= ',' . ( $xbeg + $xlen - 1 );
00999         }
01000         if ( $ylen > 1 ) {
01001             $ybeg .= ',' . ( $ybeg + $ylen - 1 );
01002         }
01003 
01004         return $xbeg . ( $xlen ? ( $ylen ? 'c' : 'd' ) : 'a' ) . $ybeg;
01005     }
01006 
01007     function _start_block( $header ) {
01008         echo $header . "\n";
01009     }
01010 
01011     function _end_block() {
01012     }
01013 
01018     function _lines( $lines, $prefix = ' ' ) {
01019         foreach ( $lines as $line ) {
01020             echo "$prefix $line\n";
01021         }
01022     }
01023 
01027     function _context( $lines ) {
01028         $this->_lines( $lines );
01029     }
01030 
01034     function _added( $lines ) {
01035         $this->_lines( $lines, '>' );
01036     }
01037 
01041     function _deleted( $lines ) {
01042         $this->_lines( $lines, '<' );
01043     }
01044 
01049     function _changed( $orig, $closing ) {
01050         $this->_deleted( $orig );
01051         echo "---\n";
01052         $this->_added( $closing );
01053     }
01054 }
01055 
01060 class UnifiedDiffFormatter extends DiffFormatter {
01061     var $leading_context_lines = 2;
01062     var $trailing_context_lines = 2;
01063 
01067     function _added( $lines ) {
01068         $this->_lines( $lines, '+' );
01069     }
01070 
01074     function _deleted( $lines ) {
01075         $this->_lines( $lines, '-' );
01076     }
01077 
01082     function _changed( $orig, $closing ) {
01083         $this->_deleted( $orig );
01084         $this->_added( $closing );
01085     }
01086 
01094     function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
01095         return "@@ -$xbeg,$xlen +$ybeg,$ylen @@";
01096     }
01097 }
01098 
01103 class ArrayDiffFormatter extends DiffFormatter {
01104 
01109     function format( $diff ) {
01110         $oldline = 1;
01111         $newline = 1;
01112         $retval = array();
01113         foreach ( $diff->edits as $edit ) {
01114             switch ( $edit->type ) {
01115                 case 'add':
01116                     foreach ( $edit->closing as $l ) {
01117                         $retval[] = array(
01118                             'action' => 'add',
01119                             'new' => $l,
01120                             'newline' => $newline++
01121                         );
01122                     }
01123                     break;
01124                 case 'delete':
01125                     foreach ( $edit->orig as $l ) {
01126                         $retval[] = array(
01127                             'action' => 'delete',
01128                             'old' => $l,
01129                             'oldline' => $oldline++,
01130                         );
01131                     }
01132                     break;
01133                 case 'change':
01134                     foreach ( $edit->orig as $i => $l ) {
01135                         $retval[] = array(
01136                             'action' => 'change',
01137                             'old' => $l,
01138                             'new' => isset( $edit->closing[$i] ) ? $edit->closing[$i] : null,
01139                             'oldline' => $oldline++,
01140                             'newline' => $newline++,
01141                         );
01142                     }
01143                     break;
01144                 case 'copy':
01145                     $oldline += count( $edit->orig );
01146                     $newline += count( $edit->orig );
01147             }
01148         }
01149         return $retval;
01150     }
01151 }
01152 
01157 define( 'NBSP', '&#160;' ); // iso-8859-x non-breaking space.
01158 
01164 class _HWLDF_WordAccumulator {
01165     function __construct() {
01166         $this->_lines = array();
01167         $this->_line = '';
01168         $this->_group = '';
01169         $this->_tag = '';
01170     }
01171 
01175     function _flushGroup( $new_tag ) {
01176         if ( $this->_group !== '' ) {
01177             if ( $this->_tag == 'ins' ) {
01178                 $this->_line .= '<ins class="diffchange diffchange-inline">' .
01179                         htmlspecialchars( $this->_group ) . '</ins>';
01180             } elseif ( $this->_tag == 'del' ) {
01181                 $this->_line .= '<del class="diffchange diffchange-inline">' .
01182                         htmlspecialchars( $this->_group ) . '</del>';
01183             } else {
01184                 $this->_line .= htmlspecialchars( $this->_group );
01185             }
01186         }
01187         $this->_group = '';
01188         $this->_tag = $new_tag;
01189     }
01190 
01194     function _flushLine( $new_tag ) {
01195         $this->_flushGroup( $new_tag );
01196         if ( $this->_line != '' ) {
01197             array_push( $this->_lines, $this->_line );
01198         } else {
01199             # make empty lines visible by inserting an NBSP
01200             array_push( $this->_lines, NBSP );
01201         }
01202         $this->_line = '';
01203     }
01204 
01209     function addWords( $words, $tag = '' ) {
01210         if ( $tag != $this->_tag ) {
01211             $this->_flushGroup( $tag );
01212         }
01213 
01214         foreach ( $words as $word ) {
01215             // new-line should only come as first char of word.
01216             if ( $word == '' ) {
01217                 continue;
01218             }
01219             if ( $word[0] == "\n" ) {
01220                 $this->_flushLine( $tag );
01221                 $word = substr( $word, 1 );
01222             }
01223             assert( '!strstr( $word, "\n" )' );
01224             $this->_group .= $word;
01225         }
01226     }
01227 
01231     function getLines() {
01232         $this->_flushLine( '~done' );
01233         return $this->_lines;
01234     }
01235 }
01236 
01242 class WordLevelDiff extends MappedDiff {
01243     const MAX_LINE_LENGTH = 10000;
01244 
01249     function __construct( $orig_lines, $closing_lines ) {
01250         wfProfileIn( __METHOD__ );
01251 
01252         list( $orig_words, $orig_stripped ) = $this->_split( $orig_lines );
01253         list( $closing_words, $closing_stripped ) = $this->_split( $closing_lines );
01254 
01255         parent::__construct( $orig_words, $closing_words,
01256         $orig_stripped, $closing_stripped );
01257         wfProfileOut( __METHOD__ );
01258     }
01259 
01264     function _split( $lines ) {
01265         wfProfileIn( __METHOD__ );
01266 
01267         $words = array();
01268         $stripped = array();
01269         $first = true;
01270         foreach ( $lines as $line ) {
01271             # If the line is too long, just pretend the entire line is one big word
01272             # This prevents resource exhaustion problems
01273             if ( $first ) {
01274                 $first = false;
01275             } else {
01276                 $words[] = "\n";
01277                 $stripped[] = "\n";
01278             }
01279             if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
01280                 $words[] = $line;
01281                 $stripped[] = $line;
01282             } else {
01283                 $m = array();
01284                 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
01285                     $line, $m ) )
01286                 {
01287                     foreach ( $m[0] as $word ) {
01288                         $words[] = $word;
01289                     }
01290                     foreach ( $m[1] as $stripped_word ) {
01291                         $stripped[] = $stripped_word;
01292                     }
01293                 }
01294             }
01295         }
01296         wfProfileOut( __METHOD__ );
01297         return array( $words, $stripped );
01298     }
01299 
01303     function orig() {
01304         wfProfileIn( __METHOD__ );
01305         $orig = new _HWLDF_WordAccumulator;
01306 
01307         foreach ( $this->edits as $edit ) {
01308             if ( $edit->type == 'copy' ) {
01309                 $orig->addWords( $edit->orig );
01310             } elseif ( $edit->orig ) {
01311                 $orig->addWords( $edit->orig, 'del' );
01312             }
01313         }
01314         $lines = $orig->getLines();
01315         wfProfileOut( __METHOD__ );
01316         return $lines;
01317     }
01318 
01322     function closing() {
01323         wfProfileIn( __METHOD__ );
01324         $closing = new _HWLDF_WordAccumulator;
01325 
01326         foreach ( $this->edits as $edit ) {
01327             if ( $edit->type == 'copy' ) {
01328                 $closing->addWords( $edit->closing );
01329             } elseif ( $edit->closing ) {
01330                 $closing->addWords( $edit->closing, 'ins' );
01331             }
01332         }
01333         $lines = $closing->getLines();
01334         wfProfileOut( __METHOD__ );
01335         return $lines;
01336     }
01337 }
01338 
01345 class TableDiffFormatter extends DiffFormatter {
01346     function __construct() {
01347         $this->leading_context_lines = 2;
01348         $this->trailing_context_lines = 2;
01349     }
01350 
01356     public static function escapeWhiteSpace( $msg ) {
01357         $msg = preg_replace( '/^ /m', '&#160; ', $msg );
01358         $msg = preg_replace( '/ $/m', ' &#160;', $msg );
01359         $msg = preg_replace( '/  /', '&#160; ', $msg );
01360         return $msg;
01361     }
01362 
01370     function _block_header( $xbeg, $xlen, $ybeg, $ylen ) {
01371         $r = '<tr><td colspan="2" class="diff-lineno"><!--LINE ' . $xbeg . "--></td>\n" .
01372             '<td colspan="2" class="diff-lineno"><!--LINE ' . $ybeg . "--></td></tr>\n";
01373         return $r;
01374     }
01375 
01379     function _start_block( $header ) {
01380         echo $header;
01381     }
01382 
01383     function _end_block() {
01384     }
01385 
01386     function _lines( $lines, $prefix = ' ', $color = 'white' ) {
01387     }
01388 
01394     function addedLine( $line ) {
01395         return $this->wrapLine( '+', 'diff-addedline', $line );
01396     }
01397 
01403     function deletedLine( $line ) {
01404         return $this->wrapLine( '−', 'diff-deletedline', $line );
01405     }
01406 
01412     function contextLine( $line ) {
01413         return $this->wrapLine( '&#160;', 'diff-context', $line );
01414     }
01415 
01422     private function wrapLine( $marker, $class, $line ) {
01423         if ( $line !== '' ) {
01424             // The <div> wrapper is needed for 'overflow: auto' style to scroll properly
01425             $line = Xml::tags( 'div', null, $this->escapeWhiteSpace( $line ) );
01426         }
01427         return "<td class='diff-marker'>$marker</td><td class='$class'>$line</td>";
01428     }
01429 
01433     function emptyLine() {
01434         return '<td colspan="2">&#160;</td>';
01435     }
01436 
01440     function _added( $lines ) {
01441         foreach ( $lines as $line ) {
01442             echo '<tr>' . $this->emptyLine() .
01443             $this->addedLine( '<ins class="diffchange">' .
01444             htmlspecialchars( $line ) . '</ins>' ) . "</tr>\n";
01445         }
01446     }
01447 
01451     function _deleted( $lines ) {
01452         foreach ( $lines as $line ) {
01453             echo '<tr>' . $this->deletedLine( '<del class="diffchange">' .
01454             htmlspecialchars( $line ) . '</del>' ) .
01455             $this->emptyLine() . "</tr>\n";
01456         }
01457     }
01458 
01462     function _context( $lines ) {
01463         foreach ( $lines as $line ) {
01464             echo '<tr>' .
01465             $this->contextLine( htmlspecialchars( $line ) ) .
01466             $this->contextLine( htmlspecialchars( $line ) ) . "</tr>\n";
01467         }
01468     }
01469 
01474     function _changed( $orig, $closing ) {
01475         wfProfileIn( __METHOD__ );
01476 
01477         $diff = new WordLevelDiff( $orig, $closing );
01478         $del = $diff->orig();
01479         $add = $diff->closing();
01480 
01481         # Notice that WordLevelDiff returns HTML-escaped output.
01482         # Hence, we will be calling addedLine/deletedLine without HTML-escaping.
01483 
01484         while ( $line = array_shift( $del ) ) {
01485             $aline = array_shift( $add );
01486             echo '<tr>' . $this->deletedLine( $line ) .
01487             $this->addedLine( $aline ) . "</tr>\n";
01488         }
01489         foreach ( $add as $line ) { # If any leftovers
01490             echo '<tr>' . $this->emptyLine() .
01491             $this->addedLine( $line ) . "</tr>\n";
01492         }
01493         wfProfileOut( __METHOD__ );
01494     }
01495 }