[ Index ] |
PHP Cross Reference of moodle-2.8 |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Standard diff function plus some extras for handling XHTML diffs. 4 * @copyright © 2007 The Open University 5 * @author [email protected] 6 * @license http://www.gnu.org/copyleft/gpl.html GNU Public License 7 * @package ouwiki 8 *//** */ 9 10 // Standard diff 11 //////////////// 12 13 /** 14 * Basic diff utility function, using standard diff algorithm. 15 * 16 * Based on Bell Laboratories Computing Science Technical Report #41, 17 * July 1976, Hunt & McIlroy, Appendix A.1 and A.3. 18 * 19 * http://www.cs.dartmouth.edu/~doug/diff.ps 20 * 21 * @param array $file1 Array of lines in file 1. The first line in the file 22 * MUST BE INDEX 1 NOT ZERO!! 23 * @param array $file2 Array of lines in file 2, again starting from 1. 24 * @return array An array with one entry (again 1-based) for each line in 25 * file 1, with its corresponding position in file 2 or 0 if it isn't there. 26 */ 27 function ouwiki_diff_internal($file1,$file2) { 28 // Basic variables 29 $n=count($file2); 30 $m=count($file1); 31 32 // Special-case for empty file2 which otherwise causes error 33 if($n==0) 34 { 35 $result=array(); 36 for($i=1;$i<=$m;$i++) 37 { 38 $result[$i]=0; 39 } 40 return $result; 41 } 42 43 // Step 1 Build list of elements 44 ///////// 45 46 $V=array(); 47 for($j=1;$j<=$n;$j++) { 48 $V[$j]=new StdClass; 49 $V[$j]->serial=$j; 50 $V[$j]->hash=crc32($file2[$j]); 51 } 52 53 // Step 2 Sort by hash,serial 54 ///////// 55 56 usort($V,"ouwiki_diff_sort_v"); 57 58 // Make it start from 1 again 59 array_unshift($V,'bogus'); 60 unset($V[0]); 61 62 // $V is now an array including the line number 'serial' and hash 63 // of each line in file 2, sorted by hash and then serial. 64 65 // Step 3 Equivalence classes 66 ///////// 67 68 $E=array(); 69 $E[0]=new StdClass; 70 $E[0]->serial=0; 71 $E[0]->last=true; 72 for($j=1;$j<=$n;$j++) { 73 $E[$j]=new StdClass; 74 $E[$j]->serial=$V[$j]->serial; 75 $E[$j]->last=$j===$n || $V[$j]->hash!==$V[$j+1]->hash; 76 } 77 78 // E is now an array sorted the same way as $V which includes 79 // the line number 'serial' and whether or not that is the 'last' 80 // line in the given equivalence class, i.e. set of identical lines 81 82 // Step 4 For each line in file1, finds start of equivalence class 83 ///////// 84 $P=array(); 85 for($i=1;$i<=$m;$i++) { 86 // Find matching last entry from equivalence list 87 $P[$i]=ouwiki_diff_find_last($V,$E,crc32($file1[$i])); 88 } 89 90 // P is now an array that finds the index (within $V) of the *first* 91 // matching line in $V (referencing file 2, but not a line number, 92 // because sorted in $V order) for each line in file 1. In other words 93 // if you were to start at the P-value in $V and continue through, you 94 // would find all the lines from file 2 that are equal to the given line 95 // from file 1. 96 97 // Step 5 Initialise vector of candidates 98 ///////// 99 100 // I do not trust PHP references further than I can throw them (preferably 101 // at the idiot who came up with the idea) so I am using a separate array 102 // to store candidates and all references are integers into that. 103 104 $candidates=array(); 105 $candidates[0]=new StdClass; 106 $candidates[0]->a=0; 107 $candidates[0]->b=0; 108 $candidates[0]->previous=null; 109 $candidates[1]=new StdClass; 110 $candidates[1]->a=$m+1; 111 $candidates[1]->b=$n+1; 112 $candidates[1]->previous=null; 113 114 $K=array(); 115 $K[0]=0; // Ref to candidate 0 116 $K[1]=1; // Ref to candidate 1 117 $k=0; 118 119 // Step 6 Merge stage 120 ///////// 121 122 for($i=1;$i<=$m;$i++) { 123 if($P[$i]!==0) { 124 ouwiki_diff_merge($K,$k,$i,$E,$P[$i],$candidates); 125 } 126 } 127 128 // Step 7 129 ///////// 130 131 $J=array(); 132 for($i=1;$i<=$m;$i++) { 133 $J[$i]=0; 134 } 135 136 // Step 8 Follow candidate chain to make nice representation 137 ///////// 138 139 $index=$K[$k]; 140 while(!is_null($index)) { 141 // Stop when we reach the first, dummy candidate 142 if($candidates[$index]->a!=0) { 143 $J[$candidates[$index]->a]=$candidates[$index]->b; 144 } 145 $index=$candidates[$index]->previous; 146 } 147 148 // Step 9 Get rid of 'jackpots' (hash collisions) 149 ///////// 150 151 for($i=1;$i<=$m;$i++) { 152 if($J[$i]!=0 && $file1[$i]!=$file2[$J[$i]]) { 153 $J[$i]=0; 154 } 155 } 156 157 // Done! (Maybe.) 158 return $J; 159 } 160 161 // Functions needed by parts of the algorithm 162 ///////////////////////////////////////////// 163 164 // Merge, from step 7 (Appendix A.3) 165 function ouwiki_diff_merge(&$K,&$k,$i,&$E,$p,&$candidates) { 166 $r=0; 167 $c=$K[0]; 168 169 while(true) { 170 $j=$E[$p]->serial; // Paper says 'i' but this is wrong (OCR) 171 172 // Binary search in $K from $r to $k 173 $min=$r; 174 $max=$k+1; 175 176 while(true) { 177 $try = (int)(($min+$max)/2); 178 if($candidates[$K[$try]]->b >= $j) { 179 $max=$try; 180 } else if($candidates[$K[$try+1]]->b <= $j) { 181 $min=$try+1; 182 } else { // $try is less and $try+1 is more 183 $s=$try; 184 break; 185 } 186 if($max<=$min) { 187 $s=-1; 188 break; 189 } 190 } 191 192 if($s>-1) { 193 if($candidates[$K[$s+1]]->b > $j) { 194 // Create new candidate 195 $index=count($candidates); 196 $candidates[$index]=new StdClass; 197 $candidates[$index]->a=$i; 198 $candidates[$index]->b=$j; 199 $candidates[$index]->previous=$K[$s]; 200 $K[$r]=$c; 201 $r=$s+1; 202 $c=$index; // Or should this go before? 203 } 204 205 if($s===$k) { 206 $K[$k+2]=$K[$k+1]; 207 $k++; 208 break; 209 } 210 } 211 212 if($E[$p]->last) { 213 break; 214 } 215 216 $p++; 217 } 218 $K[$r]=$c; 219 220 } 221 222 // From Step 2 223 function ouwiki_diff_sort_v($a,$b) { 224 if($a->hash < $b->hash) { 225 return -1; 226 } else if($a->hash > $b->hash) { 227 return 1; 228 } else if($a->serial < $b->serial) { 229 return -1; 230 } else if($a->serial > $b->serial) { 231 return 1; 232 } else { 233 return 0; 234 } 235 } 236 237 // From Step 4 238 function ouwiki_diff_find_last(&$V,&$E,$hash) { 239 // Binary search in $V until we find something with $hash 240 241 // Min = 1, array is 1-indexed 242 $min=1; 243 // Max = 1 higher than highest key 244 end($V); 245 $max=key($V)+1; 246 while(true) { 247 $try = (int)(($min+$max)/2); 248 if($V[$try]->hash > $hash) { 249 $max=$try; 250 } else if($V[$try]->hash < $hash) { 251 $min=$try+1; 252 } else { // Equal 253 break; 254 } 255 if($max<=$min) { 256 // No matching line 257 return 0; 258 } 259 } 260 261 // Now check back in $E to find the first line of that equivalence class 262 for($j=$try;!$E[$j-1]->last;$j--) ; 263 return $j; 264 } 265 266 /////////////////////////// 267 268 269 /** 270 * Class representing one 'line' of HTML content for the purpose of 271 * text comparison. 272 */ 273 class ouwiki_line { 274 /** Array of ouwiki_words */ 275 var $words=array(); 276 277 /** 278 * Construct line object based on a chunk of text. 279 * @param string $data Text data that makes up this 'line'. (May include line breaks etc.) 280 * @param int $linepos Position number for first character in text 281 */ 282 function ouwiki_line($data,$linepos) { 283 // 1. Turn things we don't want into spaces (so that positioning stays same) 284 285 // Whitespace replaced with space 286 $data=preg_replace('/\s/',' ',$data); 287 288 // Various ways of writing non-breaking space replaced with space 289 // Note that using a single param for replace only works because all 290 // the search strings are 6 characters long 291 $data=str_replace(array(' ',' ',' '),' ',$data); 292 293 // Tags replaced with equal number of spaces 294 $data=preg_replace_callback('/<.*?'.'>/',create_function( 295 '$matches','return preg_replace("/./"," ",$matches[0]);'),$data); 296 297 // 2. Analyse string so that each space-separated thing 298 // is counted as a 'word' (note these may not be real words, 299 // for instance words may include punctuation at either end) 300 $pos=0; 301 while(true) { 302 // Find a non-space 303 $strlendata = strlen($data); 304 for(;$pos < $strlendata && substr($data,$pos,1)===' ';$pos++) ; 305 if($pos==$strlendata) { 306 // No more content 307 break; 308 } 309 310 // Aaaand find the next space after that 311 $space2=strpos($data,' ',$pos); 312 if($space2===false) { 313 // No more spaces? Everything left must be a word 314 $this->words[]=new ouwiki_word(substr($data,$pos),$pos+$linepos); 315 break; 316 } else { 317 $this->words[]=new ouwiki_word(substr($data,$pos,$space2-$pos),$pos+$linepos); 318 $pos=$space2; 319 } 320 } 321 } 322 323 /** 324 * @return string Normalised string representation of this line object 325 */ 326 function get_as_string() { 327 $result=''; 328 foreach($this->words as $word) { 329 if($result!=='') { 330 $result.=' '; 331 } 332 $result.=$word->word; 333 } 334 return $result; 335 } 336 337 /** 338 * Static function converts lines to strings. 339 * @param array $lines Array of ouwiki_line 340 * @return array Array of strings 341 */ 342 static function get_as_strings($lines) { 343 $strings=array(); 344 foreach($lines as $key=>$value) { 345 $strings[$key]=$value->get_as_string(); 346 } 347 return $strings; 348 } 349 350 351 /** 352 * @return True if there are no words in the line 353 */ 354 function is_empty() { 355 return count($this->words)===0; 356 } 357 } 358 359 /** 360 * Represents single word for html comparison. Note that words 361 * are just chunks of plain text and may not be actual words; 362 * they could include punctuation or (if there was e.g. a span 363 * in the middle of something) even be part-words. 364 */ 365 class ouwiki_word { 366 /** Word as plain string */ 367 var $word; 368 /** Start position in original xhtml */ 369 var $start; 370 371 function ouwiki_word($word,$start) { 372 $this->word=$word; 373 $this->start=$start; 374 } 375 } 376 377 /** 378 * Prepares XHTML content for text difference comparison. 379 * @param string $content XHTML content [NO SLASHES] 380 * @return array Array of ouwiki_line objects 381 */ 382 function ouwiki_diff_html_to_lines($content) { 383 // These functions are a pain mostly because PHP preg_* don't provide 384 // proper information as to the start/end position of matches. As a 385 // consequence there is a lot of hackery going down. At every point we 386 // replace things with spaces rather than getting rid, in order to store 387 // positions within original content. 388 389 // Get rid of all script, style, object tags (that might contain non-text 390 // outside tags) 391 $content=preg_replace_callback( 392 '^(<script .*?</script>)|(<object .*?</object>)|(<style .*?</style>)^i',create_function( 393 '$matches','return preg_replace("/./"," ",$matches[0]);'),$content); 394 395 // Get rid of all ` symbols as we are going to use these for a marker later. 396 $content=preg_replace('/[`]/',' ',$content); 397 398 // Put line breaks on block tags. Mark each line break with ` symbol 399 $blocktags=array('p','div','h1','h2','h3','h4','h5','h6','td','li'); 400 $taglist=''; 401 foreach($blocktags as $blocktag) { 402 if($taglist!=='') { 403 $taglist.='|'; 404 } 405 $taglist.="<$blocktag>|<\\/$blocktag>"; 406 } 407 $content=preg_replace_callback('/(('.$taglist.')\s*)+/i',create_function( 408 '$matches','return "`".preg_replace("/./"," ",substr($matches[0],1));'),$content); 409 410 // Now go through splitting each line 411 $lines=array(); $index=1; 412 $pos=0; 413 while($pos<strlen($content)) { 414 $nextline=strpos($content,'`',$pos); 415 if($nextline===false) { 416 // No more line breaks? Take content to end 417 $nextline=strlen($content); 418 } 419 420 $linestr=substr($content,$pos,$nextline-$pos); 421 $line=new ouwiki_line($linestr,$pos); 422 if(!$line->is_empty()) { 423 $lines[$index++]=$line; 424 } 425 $pos=$nextline+1; 426 } 427 return $lines; 428 } 429 430 /** 431 * Represents a changed area of file and where it is located in the 432 * two source files. 433 */ 434 class ouwiki_change_range { 435 var $file1start,$file1count; 436 var $file2start,$file2count; 437 } 438 439 /** 440 * A more logical representation of the results from ouwiki_internal_diff() 441 */ 442 class ouwiki_changes { 443 444 /** Array of indexes (in file 2) of added lines */ 445 var $adds; 446 447 /** Array of indexes (in file 1) of deleted lines */ 448 var $deletes; 449 450 /** Array of changed ranges */ 451 var $changes; 452 453 /** 454 * @param array $diff Array from line indices in file1 455 * to indices in file2. All indices 1-based. 456 * @param int $count2 Number of lines in file2 457 */ 458 function ouwiki_changes($diff,$count2) { 459 // Find deleted lines 460 $this->deletes=self::internal_find_deletes($diff,$count2); 461 462 // Added lines work the same way after the comparison is 463 // reversed. 464 $this->adds=self::internal_find_deletes( 465 ouwiki_diff_internal_flip($diff,$count2),count($diff)); 466 467 // Changed ranges are all the other lines from file 1 that 468 // weren't found in file 2 but aren't deleted, and the 469 // corresponding lines from file 2 (between the equivalent 470 // 'found' lines). 471 $this->changes=array(); 472 $matchbefore=0; 473 $inrange=-1; $lastrange=-1; 474 foreach($diff as $index1=>$index2) { 475 // Changed line if this isn't in 'deleted' section and 476 // doesn't have a match in file2. 477 if($index2===0 && !in_array($index1,$this->deletes)) { 478 if($inrange===-1) { 479 // Not already in a range, start a new one at array end 480 $inrange=count($this->changes); 481 $this->changes[$inrange]=new ouwiki_change_range; 482 $this->changes[$inrange]->file1start=$index1; 483 $this->changes[$inrange]->file1count=1; 484 $this->changes[$inrange]->file2start=$matchbefore+1; // Last valid from file2 485 $this->changes[$inrange]->file2count=0; 486 $lastrange=$inrange; 487 } else { 488 // One more line that gets added to the range 489 $this->changes[$inrange]->file1count++; 490 } 491 } else { 492 // Not in a range any more 493 $inrange=-1; 494 // If we have a line match... 495 if($index2!==0) { 496 // Remember this line as next range must start after it 497 $matchbefore=$index2; 498 // If last range is still looking for a number, fill that in too 499 if($lastrange!==-1) { 500 $this->changes[$lastrange]->file2count=$index2 501 -$this->changes[$lastrange]->file2start; 502 $lastrange=-1; 503 } 504 } 505 } 506 } 507 // Unfinished range in file2 gets end of file 508 if($lastrange!==-1) { 509 $this->changes[$lastrange]->file2count=$count2 510 -$this->changes[$lastrange]->file2start+1; 511 } 512 } 513 514 /** 515 * Find deleted lines. These are lines in file1 that 516 * cannot be present even in modified form in file2 517 * because we have matching lines around them. 518 * O(n) algorithm. 519 * @param array $diff Array of file1->file2 indexes 520 * @param int $count2 Count of lines in file2 521 */ 522 function internal_find_deletes($diff,$count2) { 523 $deletes=array(); 524 525 // 1. Create a new array that includes the lowest-valued 526 // index2 value below each run of 0s. 527 // I.e. if our array is say 1,2,0,0,0,3,0 then the 528 // resulting array will be -,-,3,3,3,-,0 529 $squidges=array(); 530 $lowest=0; 531 $countdiff = count($diff); 532 for($index1=$countdiff;$index1>=1;$index1--) { 533 $index2=$diff[$index1]; 534 if($index2===0) { 535 $squidges[$index1]=$lowest; 536 } else { 537 $lowest=$index2; 538 } 539 } 540 541 // 2. OK now we can use this new array to work out 542 // items that are known to be deleted because we 543 // have matching items either side 544 $highest=0; 545 foreach($diff as $index1=>$index2) { 546 if($index2===0) { 547 if($highest===$count2 || $highest+1===$squidges[$index1]) { 548 // Yep! Definitely deleted. 549 $deletes[]=$index1; 550 } 551 } else { 552 $highest=$index2; 553 } 554 } 555 return $deletes; 556 } 557 } 558 559 /** 560 * Flips around the array returned by ouwiki_diff_internal 561 * so that it refers to lines from the other file. 562 * @param array $diff Array of index1=>index2 563 * @param int $count2 Count of lines in file 2 564 * @return array Flipped version 565 */ 566 function ouwiki_diff_internal_flip($diff,$count2) { 567 $flip=array(); 568 for($i=1;$i<=$count2;$i++) { 569 $flip[$i]=0; 570 } 571 foreach($diff as $index1=>$index2) { 572 if($index2!==0) { 573 $flip[$index2]=$index1; 574 } 575 } 576 return $flip; 577 } 578 579 /** 580 * Compares two files based initially on lines and then on words within the lines that 581 * differ. 582 * @param array $lines1 Array of ouwiki_line 583 * @param array $lines2 Array of ouwiki_line 584 * @return array (deleted,added); deleted and added are arrays of ouwiki_word with 585 * position numbers from $lines1 and $lines2 respectively 586 */ 587 function ouwiki_diff_words($lines1,$lines2) { 588 // Prepare arrays 589 $deleted=array(); 590 $added=array(); 591 // Get line difference 592 $linediff=ouwiki_diff( 593 ouwiki_line::get_as_strings($lines1), 594 ouwiki_line::get_as_strings($lines2)); 595 596 // Handle lines that were entirely deleted 597 foreach($linediff->deletes as $deletedline) { 598 $deleted = array_merge($deleted, $lines1[$deletedline]->words); 599 } 600 // And ones that were entirely added 601 foreach($linediff->adds as $addedline) { 602 $added = array_merge($added, $lines2[$addedline]->words); 603 } 604 605 // Changes get diffed at the individual-word level 606 foreach($linediff->changes as $changerange) { 607 // Build list of all words in each side of the range 608 $file1words=array(); 609 for($index=$changerange->file1start; 610 $index<$changerange->file1start+$changerange->file1count;$index++) { 611 foreach($lines1[$index]->words as $word) { 612 $file1words[]=$word; 613 } 614 } 615 $file2words=array(); 616 for($index=$changerange->file2start; 617 $index<$changerange->file2start+$changerange->file2count;$index++) { 618 foreach($lines2[$index]->words as $word) { 619 $file2words[]=$word; 620 } 621 } 622 623 // Make arrays 1-based 624 array_unshift($file1words,'dummy'); 625 unset($file1words[0]); 626 array_unshift($file2words,'dummy'); 627 unset($file2words[0]); 628 629 // Convert word lists into plain strings 630 $file1strings=array(); 631 foreach($file1words as $index=>$word) { 632 $file1strings[$index]=$word->word; 633 } 634 $file2strings=array(); 635 foreach($file2words as $index=>$word) { 636 $file2strings[$index]=$word->word; 637 } 638 639 // Run diff on strings 640 $worddiff=ouwiki_diff($file1strings,$file2strings); 641 foreach($worddiff->adds as $index) { 642 $added[]=$file2words[$index]; 643 } 644 foreach($worddiff->deletes as $index) { 645 $deleted[]=$file1words[$index]; 646 } 647 foreach($worddiff->changes as $changerange) { 648 for($index=$changerange->file1start; 649 $index<$changerange->file1start+$changerange->file1count;$index++) { 650 $deleted[]=$file1words[$index]; 651 } 652 for($index=$changerange->file2start; 653 $index<$changerange->file2start+$changerange->file2count;$index++) { 654 $added[]=$file2words[$index]; 655 } 656 } 657 } 658 659 return array($deleted,$added); 660 } 661 662 /** 663 * Runs diff and interprets results into ouwiki_changes object. 664 * @param array $file1 Array of lines in file 1. The first line in the file 665 * MUST BE INDEX 1 NOT ZERO!! 666 * @param array $file2 Array of lines in file 2, again starting from 1. 667 * @return ouwiki_changes Object describing changes 668 */ 669 function ouwiki_diff($file1,$file2) { 670 return new ouwiki_changes(ouwiki_diff_internal($file1,$file2),count($file2)); 671 } 672 673 /** 674 * Adds HTML span elements to $html around the words listed in $words. 675 * @param string $html HTML content 676 * @param array $words Array of ouwiki_word to mark 677 * @param string $markerclass Name of class for span element 678 * @return HTML with markup added 679 */ 680 function ouwiki_diff_add_markers($html,$words,$markerclass,$beforetext,$aftertext) { 681 // Sort words by start position 682 usort($words, create_function('$a,$b','return $a->start-$b->start;')); 683 684 // Add marker for each word. We use an odd tag name which will 685 // be replaced by span later, this for ease of replacing 686 $spanstart="<ouwiki_diff_add_markers>"; 687 $pos=0; 688 $result=''; 689 foreach($words as $word) { 690 // Add everything up to the word 691 $result.=substr($html,$pos,$word->start-$pos); 692 // Add word 693 $result.=$spanstart.$word->word.'</ouwiki_diff_add_markers>'; 694 // Update position 695 $pos=$word->start+strlen($word->word); 696 } 697 698 // Add everything after last word 699 $result.=substr($html,$pos); 700 701 // If we end a marker then immediately start one, get rid of 702 // both the end and start 703 $result=preg_replace('^</ouwiki_diff_add_markers>(\s*)<ouwiki_diff_add_markers>^','$1',$result); 704 705 // Turn markers into proper span 706 $result=preg_replace('^<ouwiki_diff_add_markers>^',$beforetext.'<span class="'.$markerclass.'">',$result); 707 $result=preg_replace('^</ouwiki_diff_add_markers>^','</span>'.$aftertext,$result); 708 709 return $result; 710 } 711 712 /** 713 * Compares two HTML files. (This is the main function that everything else supports.) 714 * @param string $html1 XHTML for file 1 715 * @param string $html2 XHTML for file 2 716 * @return array ($result1,$result2) to be displayed indicating the differences 717 */ 718 function ouwiki_diff_html($html1,$html2) { 719 $lines1=ouwiki_diff_html_to_lines($html1); 720 $lines2=ouwiki_diff_html_to_lines($html2); 721 list($deleted,$added)=ouwiki_diff_words($lines1,$lines2); 722 $result1=ouwiki_diff_add_markers($html1,$deleted,'ouw_deleted', 723 '<strong class="accesshide">'.get_string('deletedbegins','wiki').'</strong>', 724 '<strong class="accesshide">'.get_string('deletedends','wiki').'</strong>'); 725 $result2=ouwiki_diff_add_markers($html2,$added,'ouw_added', 726 '<strong class="accesshide">'.get_string('addedbegins','wiki').'</strong>', 727 '<strong class="accesshide">'.get_string('addedends','wiki').'</strong>'); 728 return array($result1,$result2); 729 } 730
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Fri Nov 28 20:29:05 2014 | Cross-referenced by PHPXref 0.7.1 |