[ Index ] |
PHP Cross Reference of Phabricator |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Diff Match and Patch 4 * 5 * Copyright 2006 Google Inc. 6 * http://code.google.com/p/google-diff-match-patch/ 7 * 8 * php port by Tobias Buschor shwups.ch 9 * 10 * Licensed under the Apache License, Version 2.0 (the "License"); 11 * you may not use this file except in compliance with the License. 12 * You may obtain a copy of the License at 13 * 14 * http://www.apache.org/licenses/LICENSE-2.0 15 * 16 * Unless required by applicable law or agreed to in writing, software 17 * distributed under the License is distributed on an "AS IS" BASIS, 18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 * See the License for the specific language governing permissions and 20 * limitations under the License. 21 */ 22 23 /** 24 * @fileoverview Computes the difference between two texts to create a patch. 25 * Applies the patch onto another text, allowing for errors. 26 * @author [email protected] (Neil Fraser) 27 */ 28 29 /** 30 * Class containing the diff, match and patch methods. 31 * @constructor 32 */ 33 class diff_match_patch { 34 35 // Defaults. 36 // Redefine these in your program to override the defaults. 37 38 // Number of seconds to map a diff before giving up (0 for infinity). 39 public $Diff_Timeout = 1.0; 40 // Cost of an empty edit operation in terms of edit characters. 41 public $Diff_EditCost = 4; 42 // The size beyond which the double-ended diff activates. 43 // Double-ending is twice as fast, but less accurate. 44 public $Diff_DualThreshold = 32; 45 // At what point is no match declared (0.0 = perfection, 1.0 = very loose). 46 public $Match_Threshold = 0.5; 47 // How far to search for a match (0 = exact location, 1000+ = broad match). 48 // A match this many characters away from the expected location will add 49 // 1.0 to the score (0.0 is a perfect match). 50 public $Match_Distance = 1000; 51 // When deleting a large block of text (over ~64 characters), how close does 52 // the contents have to match the expected contents. (0.0 = perfection, 53 // 1.0 = very loose). Note that Match_Threshold controls how closely the 54 // end points of a delete need to match. 55 public $Patch_DeleteThreshold = 0.5; 56 // Chunk size for context length. 57 public $Patch_Margin = 4; 58 59 /** 60 * Compute the number of bits in an int. 61 * The normal answer for JavaScript is 32. 62 * @return {number} Max bits 63 64 function getMaxBits() { 65 var maxbits = 0; 66 var oldi = 1; 67 var newi = 2; 68 while (oldi != newi) { 69 maxbits++; 70 oldi = newi; 71 newi = newi << 1; 72 } 73 return maxbits; 74 } 75 // How many bits in a number? 76 this.Match_MaxBits = getMaxBits(); 77 */ 78 // DIFF FUNCTIONS 79 80 /** 81 * Find the differences between two texts. Simplifies the problem by stripping 82 * any common prefix or suffix off the texts before diffing. 83 * @param {string} text1 Old string to be diffed. 84 * @param {string} text2 New string to be diffed. 85 * @param {boolean} opt_checklines Optional speedup flag. If present and false, 86 * then don't run a line-level diff first to identify the changed areas. 87 * Defaults to true, which does a faster, slightly less optimal diff 88 * @return {Array.<Array.<number|string>>} Array of diff tuples. 89 */ 90 function diff_main($text1, $text2, $checklines = true) { 91 // Check for equality (speedup) 92 if ($text1 === $text2) { 93 return array ( array ( DIFF_EQUAL, $text1) ); 94 } 95 96 // Trim off common prefix (speedup) 97 $commonlength = $this->diff_commonPrefix($text1, $text2); 98 $commonprefix = mb_substr($text1, 0, $commonlength); 99 $text1 = mb_substr($text1, $commonlength); 100 $text2 = mb_substr($text2, $commonlength); 101 102 // Trim off common suffix (speedup) 103 $commonlength = $this->diff_commonSuffix($text1, $text2); 104 $commonsuffix = mb_substr($text1, mb_strlen($text1) - $commonlength); 105 $text1 = mb_substr($text1, 0, mb_strlen($text1) - $commonlength); 106 $text2 = mb_substr($text2, 0, mb_strlen($text2) - $commonlength); 107 108 // Compute the diff on the middle block 109 $diffs = $this->diff_compute($text1, $text2, $checklines); 110 111 // Restore the prefix and suffix 112 if ($commonprefix !== '') { 113 array_unshift($diffs, array ( DIFF_EQUAL, $commonprefix )); 114 } 115 if ($commonsuffix !== '') { 116 array_push($diffs, array ( DIFF_EQUAL, $commonsuffix )); 117 } 118 $this->diff_cleanupMerge($diffs); 119 return $diffs; 120 } 121 122 /** 123 * Find the differences between two texts. Assumes that the texts do not 124 * have any common prefix or suffix. 125 * @param {string} text1 Old string to be diffed. 126 * @param {string} text2 New string to be diffed. 127 * @param {boolean} checklines Speedup flag. If false, then don't run a 128 * line-level diff first to identify the changed areas. 129 * If true, then run a faster, slightly less optimal diff 130 * @return {Array.<Array.<number|string>>} Array of diff tuples. 131 * @private 132 */ 133 function diff_compute($text1, $text2, $checklines) { 134 135 if ($text1 === '') { 136 // Just add some text (speedup) 137 return array ( array ( DIFF_INSERT, $text2 ) ); 138 } 139 140 if ($text2 === '') { 141 // Just delete some text (speedup) 142 return array ( array ( DIFF_DELETE, $text1 ) ); 143 } 144 145 $longtext = mb_strlen($text1) > mb_strlen($text2) ? $text1 : $text2; 146 $shorttext = mb_strlen($text1) > mb_strlen($text2) ? $text2 : $text1; 147 $i = mb_strpos($longtext, $shorttext); 148 if ($i !== false) { 149 // Shorter text is inside the longer text (speedup) 150 $diffs = array ( 151 array ( DIFF_INSERT, mb_substr($longtext, 0, $i) ), 152 array ( DIFF_EQUAL, $shorttext ), 153 array ( DIFF_INSERT, mb_substr($longtext, $i +mb_strlen($shorttext)) ) 154 ); 155 156 // Swap insertions for deletions if diff is reversed. 157 if (mb_strlen($text1) > mb_strlen($text2)) { 158 $diffs[0][0] = $diffs[2][0] = DIFF_DELETE; 159 } 160 return $diffs; 161 } 162 $longtext = $shorttext = null; // Garbage collect 163 164 // Check to see if the problem can be split in two. 165 $hm = $this->diff_halfMatch($text1, $text2); 166 if ($hm) { 167 // A half-match was found, sort out the return data. 168 $text1_a = $hm[0]; 169 $text1_b = $hm[1]; 170 $text2_a = $hm[2]; 171 $text2_b = $hm[3]; 172 $mid_common = $hm[4]; 173 // Send both pairs off for separate processing. 174 $diffs_a = $this->diff_main($text1_a, $text2_a, $checklines); 175 $diffs_b = $this->diff_main($text1_b, $text2_b, $checklines); 176 // Merge the results. 177 return array_merge($diffs_a, array ( 178 array ( 179 DIFF_EQUAL, 180 $mid_common 181 ) 182 ), $diffs_b); 183 } 184 185 // Perform a real diff. 186 if ($checklines && (mb_strlen($text1) < 100 || mb_strlen($text2) < 100)) { 187 // Too trivial for the overhead. 188 $checklines = false; 189 } 190 $linearray = null; 191 if ($checklines) { 192 // Scan the text on a line-by-line basis first. 193 $a = $this->diff_linesToChars($text1, $text2); 194 $text1 = $a[0]; 195 $text2 = $a[1]; 196 $linearray = $a[2]; 197 } 198 $diffs = $this->diff_map($text1, $text2); 199 if (!$diffs) { 200 // No acceptable result. 201 $diffs = array ( 202 array ( 203 DIFF_DELETE, 204 $text1 205 ), 206 array ( 207 DIFF_INSERT, 208 $text2 209 ) 210 ); 211 } 212 if ($checklines) { 213 // Convert the diff back to original text. 214 $this->diff_charsToLines($diffs, $linearray); 215 // Eliminate freak matches (e.g. blank lines) 216 $this->diff_cleanupSemantic($diffs); 217 218 // Rediff any replacement blocks, this time character-by-character. 219 // Add a dummy entry at the end. 220 array_push($diffs, array ( 221 DIFF_EQUAL, 222 '' 223 )); 224 $pointer = 0; 225 $count_delete = 0; 226 $count_insert = 0; 227 $text_delete = ''; 228 $text_insert = ''; 229 while ($pointer < count($diffs)) { 230 switch ($diffs[$pointer][0]) { 231 case DIFF_INSERT : 232 $count_insert++; 233 $text_insert .= $diffs[$pointer][1]; 234 break; 235 case DIFF_DELETE : 236 $count_delete++; 237 $text_delete .= $diffs[$pointer][1]; 238 break; 239 case DIFF_EQUAL : 240 // Upon reaching an equality, check for prior redundancies. 241 if ($count_delete >= 1 && $count_insert >= 1) { 242 // Delete the offending records and add the merged ones. 243 $a = $this->diff_main($text_delete, $text_insert, false); 244 array_splice($diffs, $pointer - $count_delete - $count_insert, $count_delete + $count_insert); 245 246 $pointer = $pointer - $count_delete - $count_insert; 247 for ($j = count($a) - 1; $j >= 0; $j--) { 248 array_splice($diffs, $pointer, 0, array($a[$j])); 249 } 250 $pointer = $pointer +count($a); 251 } 252 $count_insert = 0; 253 $count_delete = 0; 254 $text_delete = ''; 255 $text_insert = ''; 256 break; 257 } 258 $pointer++; 259 } 260 array_pop($diffs); // Remove the dummy entry at the end. 261 } 262 return $diffs; 263 } 264 265 /** 266 * Split two texts into an array of strings. Reduce the texts to a string of 267 * hashes where each Unicode character represents one line. 268 * @param {string} text1 First string. 269 * @param {string} text2 Second string. 270 * @return {Array.<string|Array.<string>>} Three element Array, containing the 271 * encoded text1, the encoded text2 and the array of unique strings. The 272 * zeroth element of the array of unique strings is intentionally blank. 273 * @private 274 */ 275 function diff_linesToChars($text1, $text2) { 276 $lineArray = array(); // e.g. lineArray[4] == 'Hello\n' 277 $lineHash = array(); // e.g. lineHash['Hello\n'] == 4 278 279 // '\x00' is a valid character, but various debuggers don't like it. 280 // So we'll insert a junk entry to avoid generating a null character. 281 $lineArray[0] = ''; 282 283 $chars1 = $this->diff_linesToCharsMunge($text1, $lineArray, $lineHash); 284 $chars2 = $this->diff_linesToCharsMunge($text2, $lineArray, $lineHash); 285 return array ( 286 $chars1, 287 $chars2, 288 $lineArray 289 ); 290 } 291 292 /** 293 * Split a text into an array of strings. Reduce the texts to a string of 294 * hashes where each Unicode character represents one line. 295 * Modifies linearray and linehash through being a closure. 296 * @param {string} text String to encode 297 * @return {string} Encoded string 298 * @private 299 */ 300 function diff_linesToCharsMunge($text, &$lineArray, &$lineHash) { 301 $chars = ''; 302 // Walk the text, pulling out a mb_substring for each line. 303 // text.split('\n') would would temporarily double our memory footprint. 304 // Modifying text would create many large strings to garbage collect. 305 $lineStart = 0; 306 $lineEnd = -1; 307 // Keeping our own length variable is faster than looking it up. 308 $lineArrayLength = count($lineArray); 309 while ($lineEnd < mb_strlen($text) - 1) { 310 $lineEnd = mb_strpos($text, "\n", $lineStart); 311 if ($lineEnd === false) { 312 $lineEnd = mb_strlen($text) - 1; 313 } 314 $line = mb_substr($text, $lineStart, $lineEnd +1 -$lineStart); 315 $lineStart = $lineEnd +1; 316 317 if ( isset($lineHash[$line]) ) { 318 $chars .= mb_chr($lineHash[$line]); 319 } else { 320 $chars .= mb_chr($lineArrayLength); 321 $lineHash[$line] = $lineArrayLength; 322 $lineArray[$lineArrayLength++] = $line; 323 } 324 } 325 return $chars; 326 } 327 /** 328 * Rehydrate the text in a diff from a string of line hashes to real lines of 329 * text. 330 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples. 331 * @param {Array.<string>} lineArray Array of unique strings. 332 * @private 333 */ 334 function diff_charsToLines(&$diffs, $lineArray) { 335 for ($x = 0; $x < count($diffs); $x++) { 336 $chars = $diffs[$x][1]; 337 $text = array (); 338 for ($y = 0; $y < mb_strlen($chars); $y++) { 339 $text[$y] = $lineArray[charCodeAt($chars, $y)]; 340 } 341 $diffs[$x][1] = implode('',$text); 342 } 343 } 344 345 /** 346 * Explore the intersection points between the two texts. 347 * @param {string} text1 Old string to be diffed. 348 * @param {string} text2 New string to be diffed. 349 * @return {Array.<Array.<number|string>>?} Array of diff tuples or null if no 350 * diff available. 351 * @private 352 */ 353 function diff_map($text1, $text2) { 354 // Don't run for too long. 355 $ms_end = microtime(true) + $this->Diff_Timeout; 356 357 // Cache the text lengths to prevent multiple calls. 358 $text1_length = mb_strlen($text1); 359 $text2_length = mb_strlen($text2); 360 $max_d = $text1_length + $text2_length -1; 361 $doubleEnd = $this->Diff_DualThreshold * 2 < $max_d; 362 $v_map1 = array(); 363 $v_map2 = array(); 364 $v1 = array(); 365 $v2 = array(); 366 $v1[1] = 0; 367 $v2[1] = 0; 368 $x = null; 369 $y = null; 370 $footstep = null; // Used to track overlapping paths. 371 $footsteps = array(); 372 $done = false; 373 // Safari 1.x doesn't have hasOwnProperty 374 //? $hasOwnProperty = !!(footsteps.hasOwnProperty); 375 // If the total number of characters is odd, then the front path will collide 376 // with the reverse path. 377 $front = ($text1_length + $text2_length) % 2; 378 for ($d = 0; $d < $max_d; $d++) { 379 // Bail out if timeout reached. 380 if ($this->Diff_Timeout > 0 && microtime(true) > $ms_end) { 381 return null; // zzz 382 } 383 384 // Walk the front path one step. 385 $v_map1[$d] = array (); 386 for ($k = -$d; $k <= $d; $k += 2) { 387 if ($k == -$d || $k != $d && $v1[$k -1] < $v1[$k +1]) { 388 $x = $v1[$k +1]; 389 } else { 390 $x = $v1[$k -1] + 1; 391 } 392 $y = $x - $k; 393 if ($doubleEnd) { 394 $footstep = $x . ',' . $y; 395 if ($front && isset ($footsteps[$footstep])) { 396 $done = true; 397 } 398 if (!$front) { 399 $footsteps[$footstep] = $d; 400 } 401 } 402 while (!$done && ($x < $text1_length) && ($y < $text2_length) && (mb_substr($text1, $x, 1) == mb_substr($text2, $y, 1)) ) { 403 $x++; 404 $y++; 405 if ($doubleEnd) { 406 $footstep = $x . ',' . $y; 407 if ($front && isset ($footsteps[$footstep])) { 408 $done = true; 409 } 410 if (!$front) { 411 $footsteps[$footstep] = $d; 412 } 413 } 414 } 415 $v1[$k] = $x; 416 $v_map1[$d][$x . ',' . $y] = true; 417 if ($x == $text1_length && $y == $text2_length) { 418 // Reached the end in single-path mode. 419 return $this->diff_path1($v_map1, $text1, $text2); 420 } 421 elseif ($done) { 422 // Front path ran over reverse path. 423 424 $v_map2 = array_slice($v_map2, 0, $footsteps[$footstep] + 1); 425 $a = $this->diff_path1($v_map1, mb_substr($text1, 0, $x), mb_substr($text2, 0, $y)); 426 427 return array_merge($a, $this->diff_path2($v_map2, mb_substr($text1, $x), mb_substr($text2, $y))); 428 } 429 } 430 431 if ($doubleEnd) { 432 // Walk the reverse path one step. 433 $v_map2[$d] = array(); 434 for ($k = -$d; $k <= $d; $k += 2) { 435 if ($k == -$d || $k != $d && $v2[$k -1] < $v2[$k +1]) { 436 $x = $v2[$k +1]; 437 } else { 438 $x = $v2[$k -1] + 1; 439 } 440 $y = $x - $k; 441 $footstep = ($text1_length - $x) . ',' . ($text2_length - $y); 442 if (!$front && isset ($footsteps[$footstep])) { 443 $done = true; 444 } 445 if ($front) { 446 $footsteps[$footstep] = $d; 447 } 448 while (!$done && $x < $text1_length && $y < $text2_length && mb_substr($text1, $text1_length - $x -1, 1) == mb_substr($text2, $text2_length - $y -1, 1) ) { 449 $x++; 450 $y++; 451 $footstep = ($text1_length - $x) . ',' . ($text2_length - $y); 452 if (!$front && isset ($footsteps[$footstep])) { 453 $done = true; 454 } 455 if ($front) { 456 $footsteps[$footstep] = $d; 457 } 458 } 459 $v2[$k] = $x; 460 $v_map2[$d][$x . ',' . $y] = true; 461 if ($done) { 462 // Reverse path ran over front path. 463 $v_map1 = array_slice($v_map1, 0, $footsteps[$footstep] + 1); 464 $a = $this->diff_path1($v_map1, mb_substr($text1, 0, $text1_length - $x), mb_substr($text2, 0, $text2_length - $y)); 465 return array_merge($a, $this->diff_path2($v_map2, mb_substr($text1, $text1_length - $x), mb_substr($text2, $text2_length - $y))); 466 } 467 } 468 } 469 } 470 // Number of diffs equals number of characters, no commonality at all. 471 return null; 472 } 473 474 /** 475 * Work from the middle back to the start to determine the path. 476 * @param {Array.<Object>} v_map Array of paths.ers 477 * @param {string} text1 Old string fragment to be diffed. 478 * @param {string} text2 New string fragment to be diffed. 479 * @return {Array.<Array.<number|string>>} Array of diff tuples. 480 * @private 481 */ 482 function diff_path1($v_map, $text1, $text2) { 483 $path = array (); 484 $x = mb_strlen($text1); 485 $y = mb_strlen($text2); 486 /** @type {number?} */ 487 $last_op = null; 488 for ($d = count($v_map) - 2; $d >= 0; $d--) { 489 while (1) { 490 if (isset ($v_map[$d][($x -1) . ',' . $y])) { 491 $x--; 492 if ($last_op === DIFF_DELETE) { 493 $path[0][1] = mb_substr($text1, $x, 1) . $path[0][1]; 494 } else { 495 array_unshift($path, array ( 496 DIFF_DELETE, 497 mb_substr($text1, $x, 1) 498 )); 499 } 500 $last_op = DIFF_DELETE; 501 break; 502 } elseif (isset ($v_map[$d][$x . ',' . ($y -1)])) { 503 $y--; 504 if ($last_op === DIFF_INSERT) { 505 $path[0][1] = mb_substr($text2, $y, 1) . $path[0][1]; 506 } else { 507 array_unshift($path, array ( 508 DIFF_INSERT, 509 mb_substr($text2, $y, 1) 510 )); 511 } 512 $last_op = DIFF_INSERT; 513 break; 514 } else { 515 $x--; 516 $y--; 517 //if (text1.charAt(x) != text2.charAt(y)) { 518 // throw new Error('No diagonal. Can\'t happen. (diff_path1)'); 519 //} 520 if ($last_op === DIFF_EQUAL) { 521 $path[0][1] = mb_substr($text1, $x, 1) . $path[0][1]; 522 } else { 523 array_unshift($path, array ( 524 DIFF_EQUAL, 525 mb_substr($text1, $x, 1) 526 )); 527 } 528 $last_op = DIFF_EQUAL; 529 } 530 } 531 } 532 return $path; 533 } 534 535 /** 536 * Work from the middle back to the end to determine the path. 537 * @param {Array.<Object>} v_map Array of paths. 538 * @param {string} text1 Old string fragment to be diffed. 539 * @param {string} text2 New string fragment to be diffed. 540 * @return {Array.<Array.<number|string>>} Array of diff tuples. 541 * @private 542 */ 543 function diff_path2($v_map, $text1, $text2) { 544 $path = array (); 545 $pathLength = 0; 546 $x = mb_strlen($text1); 547 $y = mb_strlen($text2); 548 /** @type {number?} */ 549 $last_op = null; 550 for ($d = count($v_map) - 2; $d >= 0; $d--) { 551 while (1) { 552 if (isset ($v_map[$d][($x -1) . ',' . $y])) { 553 $x--; 554 if ($last_op === DIFF_DELETE) { 555 $path[$pathLength -1][1] .= $text1[mb_strlen($text1) - $x -1]; 556 } else { 557 $path[$pathLength++] = array ( 558 DIFF_DELETE, 559 $text1[mb_strlen($text1) - $x -1] 560 ); 561 } 562 $last_op = DIFF_DELETE; 563 break; 564 } 565 elseif (isset ($v_map[$d][$x . ',' . ($y -1)])) { 566 $y--; 567 if ($last_op === DIFF_INSERT) { 568 $path[$pathLength -1][1] .= $text2[mb_strlen($text2) - $y -1]; 569 } else { 570 $path[$pathLength++] = array ( 571 DIFF_INSERT, 572 $text2[mb_strlen($text2) - $y -1] 573 ); 574 } 575 $last_op = DIFF_INSERT; 576 break; 577 } else { 578 $x--; 579 $y--; 580 //if (text1.charAt(text1.length - x - 1) != 581 // text2.charAt(text2.length - y - 1)) { 582 // throw new Error('No diagonal. Can\'t happen. (diff_path2)'); 583 //} 584 if ($last_op === DIFF_EQUAL) { 585 $path[$pathLength -1][1] .= $text1[mb_strlen($text1) - $x -1]; 586 } else { 587 $path[$pathLength++] = array ( 588 DIFF_EQUAL, 589 $text1[mb_strlen($text1) - $x -1] 590 ); 591 } 592 $last_op = DIFF_EQUAL; 593 } 594 } 595 } 596 return $path; 597 } 598 599 /** 600 * Determine the common prefix of two strings 601 * @param {string} text1 First string. 602 * @param {string} text2 Second string. 603 * @return {number} The number of characters common to the start of each 604 * string. 605 */ 606 function diff_commonPrefix($text1, $text2) { 607 for ($i = 0; 1; $i++) { 608 $t1 = mb_substr($text1, $i, 1); 609 $t2 = mb_substr($text2, $i, 1); 610 if($t1==='' || $t2==='' || $t1 !== $t2 ){ 611 return $i; 612 } 613 } 614 } 615 616 /** 617 * Determine the common suffix of two strings 618 * @param {string} text1 First string. 619 * @param {string} text2 Second string. 620 * @return {number} The number of characters common to the end of each string. 621 */ 622 function diff_commonSuffix($text1, $text2) { 623 return $this->diff_commonPrefix( strrev($text1), strrev($text2) ); 624 } 625 626 /** 627 * Do the two texts share a mb_substring which is at least half the length of the 628 * longer text? 629 * @param {string} text1 First string. 630 * @param {string} text2 Second string. 631 * @return {Array.<string>?} Five element Array, containing the prefix of 632 * text1, the suffix of text1, the prefix of text2, the suffix of 633 * text2 and the common middle. Or null if there was no match. 634 */ 635 function diff_halfMatch($text1, $text2) { 636 $longtext = mb_strlen($text1) > mb_strlen($text2) ? $text1 : $text2; 637 $shorttext = mb_strlen($text1) > mb_strlen($text2) ? $text2 : $text1; 638 if (mb_strlen($longtext) < 10 || mb_strlen($shorttext) < 1) { 639 return null; // Pointless. 640 } 641 642 // First check if the second quarter is the seed for a half-match. 643 $hm1 = $this->diff_halfMatchI($longtext, $shorttext, ceil(mb_strlen($longtext) / 4)); 644 // Check again based on the third quarter. 645 $hm2 = $this->diff_halfMatchI($longtext, $shorttext, ceil(mb_strlen($longtext) / 2)); 646 647 if (!$hm1 && !$hm2) { 648 return null; 649 } elseif (!$hm2) { 650 $hm = $hm1; 651 } elseif (!$hm1) { 652 $hm = $hm2; 653 } else { 654 // Both matched. Select the longest. 655 $hm = mb_strlen($hm1[4]) > mb_strlen($hm2[4]) ? $hm1 : $hm2; 656 } 657 658 // A half-match was found, sort out the return data. 659 if (mb_strlen($text1) > mb_strlen($text2)) { 660 $text1_a = $hm[0]; 661 $text1_b = $hm[1]; 662 $text2_a = $hm[2]; 663 $text2_b = $hm[3]; 664 } else { 665 $text2_a = $hm[0]; 666 $text2_b = $hm[1]; 667 $text1_a = $hm[2]; 668 $text1_b = $hm[3]; 669 } 670 $mid_common = $hm[4]; 671 return array( $text1_a, $text1_b, $text2_a, $text2_b, $mid_common ); 672 } 673 674 /** 675 * Does a mb_substring of shorttext exist within longtext such that the mb_substring 676 * is at least half the length of longtext? 677 * Closure, but does not reference any external variables. 678 * @param {string} longtext Longer string. 679 * @param {string} shorttext Shorter string. 680 * @param {number} i Start index of quarter length mb_substring within longtext 681 * @return {Array.<string>?} Five element Array, containing the prefix of 682 * longtext, the suffix of longtext, the prefix of shorttext, the suffix 683 * of shorttext and the common middle. Or null if there was no match. 684 * @private 685 */ 686 function diff_halfMatchI($longtext, $shorttext, $i) { 687 // Start with a 1/4 length mb_substring at position i as a seed. 688 $seed = mb_substr($longtext, $i, floor(mb_strlen($longtext) / 4)); 689 690 $j = -1; 691 $best_common = ''; 692 $best_longtext_a = null; 693 $best_longtext_b = null; 694 $best_shorttext_a = null; 695 $best_shorttext_b = null; 696 while ( ($j = mb_strpos($shorttext, $seed, $j + 1)) !== false ) { 697 $prefixLength = $this->diff_commonPrefix(mb_substr($longtext, $i), mb_substr($shorttext, $j)); 698 $suffixLength = $this->diff_commonSuffix(mb_substr($longtext, 0, $i), mb_substr($shorttext, 0, $j)); 699 if (mb_strlen($best_common) < $suffixLength + $prefixLength) { 700 $best_common = mb_substr($shorttext, $j - $suffixLength, $suffixLength) . mb_substr($shorttext, $j, $prefixLength); 701 $best_longtext_a = mb_substr($longtext, 0, $i - $suffixLength); 702 $best_longtext_b = mb_substr($longtext, $i + $prefixLength); 703 $best_shorttext_a = mb_substr($shorttext, 0, $j - $suffixLength); 704 $best_shorttext_b = mb_substr($shorttext, $j + $prefixLength); 705 } 706 } 707 if (mb_strlen($best_common) >= mb_strlen($longtext) / 2) { 708 return array ( 709 $best_longtext_a, 710 $best_longtext_b, 711 $best_shorttext_a, 712 $best_shorttext_b, 713 $best_common 714 ); 715 } else { 716 return null; 717 } 718 } 719 720 /** 721 * Reduce the number of edits by eliminating semantically trivial equalities. 722 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples. 723 */ 724 function diff_cleanupSemantic(&$diffs) { 725 $changes = false; 726 $equalities = array (); // Stack of indices where equalities are found. 727 $equalitiesLength = 0; // Keeping our own length var is faster in JS. 728 $lastequality = null; // Always equal to equalities[equalitiesLength-1][1] 729 $pointer = 0; // Index of current position. 730 // Number of characters that changed prior to the equality. 731 $length_changes1 = 0; 732 // Number of characters that changed after the equality. 733 $length_changes2 = 0; 734 while ($pointer < count($diffs)) { 735 if ($diffs[$pointer][0] == DIFF_EQUAL) { // equality found 736 $equalities[$equalitiesLength++] = $pointer; 737 $length_changes1 = $length_changes2; 738 $length_changes2 = 0; 739 $lastequality = $diffs[$pointer][1]; 740 } else { // an insertion or deletion 741 $length_changes2 += mb_strlen($diffs[$pointer][1]); 742 if ($lastequality !== null && (mb_strlen($lastequality) <= $length_changes1) && (mb_strlen($lastequality) <= $length_changes2)) { 743 // Duplicate record 744 $zzz_diffs = array_splice($diffs, $equalities[$equalitiesLength -1], 0, array(array ( 745 DIFF_DELETE, 746 $lastequality 747 ))); 748 // Change second copy to insert. 749 $diffs[$equalities[$equalitiesLength -1] + 1][0] = DIFF_INSERT; 750 // Throw away the equality we just deleted. 751 $equalitiesLength--; 752 // Throw away the previous equality (it needs to be reevaluated). 753 $equalitiesLength--; 754 $pointer = $equalitiesLength > 0 ? $equalities[$equalitiesLength -1] : -1; 755 $length_changes1 = 0; // Reset the counters. 756 $length_changes2 = 0; 757 $lastequality = null; 758 $changes = true; 759 } 760 } 761 $pointer++; 762 } 763 if ($changes) { 764 $this->diff_cleanupMerge($diffs); 765 } 766 $this->diff_cleanupSemanticLossless($diffs); 767 } 768 769 /** 770 * Look for single edits surrounded on both sides by equalities 771 * which can be shifted sideways to align the edit to a word boundary. 772 * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. 773 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples. 774 */ 775 function diff_cleanupSemanticLossless(&$diffs) { 776 777 $pointer = 1; 778 // Intentionally ignore the first and last element (don't need checking). 779 while ($pointer < count($diffs) - 1) { 780 if ($diffs[$pointer -1][0] == DIFF_EQUAL && $diffs[$pointer +1][0] == DIFF_EQUAL) { 781 // This is a single edit surrounded by equalities. 782 $equality1 = $diffs[$pointer -1][1]; 783 $edit = $diffs[$pointer][1]; 784 $equality2 = $diffs[$pointer +1][1]; 785 786 // First, shift the edit as far left as possible. 787 $commonOffset = $this->diff_commonSuffix($equality1, $edit); 788 if ($commonOffset !== '') { 789 $commonString = mb_substr($edit, mb_strlen($edit) - $commonOffset); 790 $equality1 = mb_substr($equality1, 0, mb_strlen($equality1) - $commonOffset); 791 $edit = $commonString . mb_substr($edit, 0, mb_strlen($edit) - $commonOffset); 792 $equality2 = $commonString . $equality2; 793 } 794 795 // Second, step character by character right, looking for the best fit. 796 $bestEquality1 = $equality1; 797 $bestEdit = $edit; 798 $bestEquality2 = $equality2; 799 $bestScore = $this->diff_cleanupSemanticScore($equality1, $edit) + $this->diff_cleanupSemanticScore($edit, $equality2); 800 while (isset($equality2[0]) && $edit[0] === $equality2[0]) { 801 $equality1 .= $edit[0]; 802 $edit = mb_substr($edit, 1) . $equality2[0]; 803 $equality2 = mb_substr($equality2, 1); 804 $score = $this->diff_cleanupSemanticScore($equality1, $edit) + $this->diff_cleanupSemanticScore($edit, $equality2); 805 // The >= encourages trailing rather than leading whitespace on edits. 806 if ($score >= $bestScore) { 807 $bestScore = $score; 808 $bestEquality1 = $equality1; 809 $bestEdit = $edit; 810 $bestEquality2 = $equality2; 811 } 812 } 813 814 if ($diffs[$pointer -1][1] != $bestEquality1) { 815 // We have an improvement, save it back to the diff. 816 if ($bestEquality1) { 817 $diffs[$pointer -1][1] = $bestEquality1; 818 } else { 819 $zzz_diffs = array_splice($diffs, $pointer -1, 1); 820 $pointer--; 821 } 822 $diffs[$pointer][1] = $bestEdit; 823 if ($bestEquality2) { 824 $diffs[$pointer +1][1] = $bestEquality2; 825 } else { 826 $zzz_diffs = array_splice($diffs, $pointer +1, 1); 827 $pointer--; 828 } 829 } 830 } 831 $pointer++; 832 } 833 } 834 835 /** 836 * Given two strings, compute a score representing whether the internal 837 * boundary falls on logical boundaries. 838 * Scores range from 5 (best) to 0 (worst). 839 * Closure, makes reference to regex patterns defined above. 840 * @param {string} one First string 841 * @param {string} two Second string 842 * @return {number} The score. 843 */ 844 function diff_cleanupSemanticScore($one, $two) { 845 // Define some regex patterns for matching boundaries. 846 $punctuation = '/[^a-zA-Z0-9]/'; 847 $whitespace = '/\s/'; 848 $linebreak = '/[\r\n]/'; 849 $blanklineEnd = '/\n\r?\n$/'; 850 $blanklineStart = '/^\r?\n\r?\n/'; 851 852 if (!$one || !$two) { 853 // Edges are the best. 854 return 5; 855 } 856 857 // Each port of this function behaves slightly differently due to 858 // subtle differences in each language's definition of things like 859 // 'whitespace'. Since this function's purpose is largely cosmetic, 860 // the choice has been made to use each language's native features 861 // rather than force total conformity. 862 $score = 0; 863 // One point for non-alphanumeric. 864 if (preg_match($punctuation, $one[mb_strlen($one) - 1]) || preg_match($punctuation, $two[0])) { 865 $score++; 866 // Two points for whitespace. 867 if (preg_match($whitespace, $one[mb_strlen($one) - 1] ) || preg_match($whitespace, $two[0])) { 868 $score++; 869 // Three points for line breaks. 870 if (preg_match($linebreak, $one[mb_strlen($one) - 1]) || preg_match($linebreak, $two[0])) { 871 $score++; 872 // Four points for blank lines. 873 if (preg_match($blanklineEnd, $one) || preg_match($blanklineStart, $two)) { 874 $score++; 875 } 876 } 877 } 878 } 879 return $score; 880 } 881 882 /** 883 * Reduce the number of edits by eliminating operationally trivial equalities. 884 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples. 885 */ 886 function diff_cleanupEfficiency(&$diffs) { 887 $changes = false; 888 $equalities = array (); // Stack of indices where equalities are found. 889 $equalitiesLength = 0; // Keeping our own length var is faster in JS. 890 $lastequality = ''; // Always equal to equalities[equalitiesLength-1][1] 891 $pointer = 0; // Index of current position. 892 // Is there an insertion operation before the last equality. 893 $pre_ins = false; 894 // Is there a deletion operation before the last equality. 895 $pre_del = false; 896 // Is there an insertion operation after the last equality. 897 $post_ins = false; 898 // Is there a deletion operation after the last equality. 899 $post_del = false; 900 while ($pointer < count($diffs)) { 901 if ($diffs[$pointer][0] == DIFF_EQUAL) { // equality found 902 if (mb_strlen($diffs[$pointer][1]) < $this->Diff_EditCost && ($post_ins || $post_del)) { 903 // Candidate found. 904 $equalities[$equalitiesLength++] = $pointer; 905 $pre_ins = $post_ins; 906 $pre_del = $post_del; 907 $lastequality = $diffs[$pointer][1]; 908 } else { 909 // Not a candidate, and can never become one. 910 $equalitiesLength = 0; 911 $lastequality = ''; 912 } 913 $post_ins = $post_del = false; 914 } else { // an insertion or deletion 915 if ($diffs[$pointer][0] == DIFF_DELETE) { 916 $post_del = true; 917 } else { 918 $post_ins = true; 919 } 920 /* 921 * Five types to be split: 922 * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del> 923 * <ins>A</ins>X<ins>C</ins><del>D</del> 924 * <ins>A</ins><del>B</del>X<ins>C</ins> 925 * <ins>A</del>X<ins>C</ins><del>D</del> 926 * <ins>A</ins><del>B</del>X<del>C</del> 927 */ 928 if ($lastequality && (($pre_ins && $pre_del && $post_ins && $post_del) || ((mb_strlen($lastequality) < $this->Diff_EditCost / 2) && ($pre_ins + $pre_del + $post_ins + $post_del) == 3))) { 929 // Duplicate record 930 $zzz_diffs = array_splice($diffs, $equalities[$equalitiesLength -1], 0, array(array ( 931 DIFF_DELETE, 932 $lastequality 933 ))); 934 // Change second copy to insert. 935 $diffs[$equalities[$equalitiesLength -1] + 1][0] = DIFF_INSERT; 936 $equalitiesLength--; // Throw away the equality we just deleted; 937 $lastequality = ''; 938 if ($pre_ins && $pre_del) { 939 // No changes made which could affect previous entry, keep going. 940 $post_ins = $post_del = true; 941 $equalitiesLength = 0; 942 } else { 943 $equalitiesLength--; // Throw away the previous equality; 944 $pointer = $equalitiesLength > 0 ? $equalities[$equalitiesLength -1] : -1; 945 $post_ins = $post_del = false; 946 } 947 $changes = true; 948 } 949 } 950 $pointer++; 951 } 952 953 if ($changes) { 954 $this->diff_cleanupMerge($diffs); 955 } 956 } 957 958 /** 959 * Reorder and merge like edit sections. Merge equalities. 960 * Any edit section can move as long as it doesn't cross an equality. 961 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples. 962 */ 963 function diff_cleanupMerge(&$diffs) { 964 array_push($diffs, array ( DIFF_EQUAL, '' )); // Add a dummy entry at the end. 965 $pointer = 0; 966 $count_delete = 0; 967 $count_insert = 0; 968 $text_delete = ''; 969 $text_insert = ''; 970 $commonlength = null; 971 while ($pointer < count($diffs)) { 972 switch ($diffs[$pointer][0]) { 973 case DIFF_INSERT : 974 $count_insert++; 975 $text_insert .= $diffs[$pointer][1]; 976 $pointer++; 977 break; 978 case DIFF_DELETE : 979 $count_delete++; 980 $text_delete .= $diffs[$pointer][1]; 981 $pointer++; 982 break; 983 case DIFF_EQUAL : 984 // Upon reaching an equality, check for prior redundancies. 985 if ($count_delete !== 0 || $count_insert !== 0) { 986 if ($count_delete !== 0 && $count_insert !== 0) { 987 // Factor out any common prefixies. 988 $commonlength = $this->diff_commonPrefix($text_insert, $text_delete); 989 if ($commonlength !== 0) { 990 if (($pointer - $count_delete - $count_insert) > 0 && $diffs[$pointer - $count_delete - $count_insert -1][0] == DIFF_EQUAL) { 991 $diffs[$pointer - $count_delete - $count_insert -1][1] .= mb_substr($text_insert, 0, $commonlength); 992 } else { 993 array_splice($diffs, 0, 0, array(array ( 994 DIFF_EQUAL, 995 mb_substr($text_insert, 0, $commonlength) 996 ))); 997 $pointer++; 998 } 999 $text_insert = mb_substr($text_insert, $commonlength); 1000 $text_delete = mb_substr($text_delete, $commonlength); 1001 } 1002 // Factor out any common suffixies. 1003 $commonlength = $this->diff_commonSuffix($text_insert, $text_delete); 1004 if ($commonlength !== 0) { 1005 $diffs[$pointer][1] = mb_substr($text_insert, mb_strlen($text_insert) - $commonlength) . $diffs[$pointer][1]; 1006 $text_insert = mb_substr($text_insert, 0, mb_strlen($text_insert) - $commonlength); 1007 $text_delete = mb_substr($text_delete, 0, mb_strlen($text_delete) - $commonlength); 1008 } 1009 } 1010 // Delete the offending records and add the merged ones. 1011 if ($count_delete === 0) { 1012 array_splice($diffs, $pointer-$count_delete-$count_insert, $count_delete+$count_insert, array(array( 1013 DIFF_INSERT, 1014 $text_insert 1015 ))); 1016 } elseif ($count_insert === 0) { 1017 array_splice($diffs, $pointer-$count_delete-$count_insert, $count_delete+$count_insert, array(array( 1018 DIFF_DELETE, 1019 $text_delete 1020 ))); 1021 } else { 1022 array_splice($diffs, $pointer-$count_delete-$count_insert, $count_delete+$count_insert, array(array( 1023 DIFF_DELETE, 1024 $text_delete 1025 ), array ( 1026 DIFF_INSERT, 1027 $text_insert 1028 ))); 1029 } 1030 $pointer = $pointer - $count_delete - $count_insert + ($count_delete ? 1 : 0) + ($count_insert ? 1 : 0) + 1; 1031 } elseif ($pointer !== 0 && $diffs[$pointer -1][0] == DIFF_EQUAL) { 1032 // Merge this equality with the previous one. 1033 $diffs[$pointer -1][1] .= $diffs[$pointer][1]; 1034 array_splice($diffs, $pointer, 1); 1035 } else { 1036 $pointer++; 1037 } 1038 $count_insert = 0; 1039 $count_delete = 0; 1040 $text_delete = ''; 1041 $text_insert = ''; 1042 break; 1043 } 1044 } 1045 if ($diffs[count($diffs) - 1][1] === '') { 1046 array_pop($diffs); // Remove the dummy entry at the end. 1047 } 1048 1049 // Second pass: look for single edits surrounded on both sides by equalities 1050 // which can be shifted sideways to eliminate an equality. 1051 // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC 1052 $changes = false; 1053 $pointer = 1; 1054 // Intentionally ignore the first and last element (don't need checking). 1055 while ($pointer < count($diffs) - 1) { 1056 if ($diffs[$pointer-1][0] == DIFF_EQUAL && $diffs[$pointer+1][0] == DIFF_EQUAL) { 1057 // This is a single edit surrounded by equalities. 1058 if ( mb_substr($diffs[$pointer][1], mb_strlen($diffs[$pointer][1]) - mb_strlen($diffs[$pointer -1][1])) == $diffs[$pointer -1][1]) { 1059 // Shift the edit over the previous equality. 1060 $diffs[$pointer][1] = $diffs[$pointer -1][1] . mb_substr($diffs[$pointer][1], 0, mb_strlen($diffs[$pointer][1]) - mb_strlen($diffs[$pointer -1][1])); 1061 $diffs[$pointer +1][1] = $diffs[$pointer -1][1] . $diffs[$pointer +1][1]; 1062 array_splice($diffs, $pointer -1, 1); 1063 $changes = true; 1064 } elseif (mb_substr($diffs[$pointer][1], 0, mb_strlen($diffs[$pointer +1][1])) == $diffs[$pointer +1][1]) { 1065 // Shift the edit over the next equality. 1066 $diffs[$pointer -1][1] .= $diffs[$pointer +1][1]; 1067 1068 $diffs[$pointer][1] = mb_substr($diffs[$pointer][1], mb_strlen($diffs[$pointer +1][1])) . $diffs[$pointer +1][1]; 1069 array_splice($diffs, $pointer +1, 1); 1070 $changes = true; 1071 } 1072 } 1073 $pointer++; 1074 } 1075 // If shifts were made, the diff needs reordering and another shift sweep. 1076 if ($changes) { 1077 $this->diff_cleanupMerge($diffs); 1078 } 1079 } 1080 1081 /** 1082 * loc is a location in text1, compute and return the equivalent location in 1083 * text2. 1084 * e.g. 'The cat' vs 'The big cat', 1->1, 5->8 1085 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples. 1086 * @param {number} loc Location within text1. 1087 * @return {number} Location within text2. 1088 */ 1089 function diff_xIndex($diffs, $loc) { 1090 $chars1 = 0; 1091 $chars2 = 0; 1092 $last_chars1 = 0; 1093 $last_chars2 = 0; 1094 for ($x = 0; $x < count($diffs); $x++) { 1095 if ($diffs[$x][0] !== DIFF_INSERT) { // Equality or deletion. 1096 $chars1 += mb_strlen($diffs[$x][1]); 1097 } 1098 if ($diffs[$x][0] !== DIFF_DELETE) { // Equality or insertion. 1099 $chars2 += mb_strlen($diffs[$x][1]); 1100 } 1101 if ($chars1 > $loc) { // Overshot the location. 1102 break; 1103 } 1104 $last_chars1 = $chars1; 1105 $last_chars2 = $chars2; 1106 } 1107 // Was the location was deleted? 1108 if (count($diffs) != $x && $diffs[$x][0] === DIFF_DELETE) { 1109 return $last_chars2; 1110 } 1111 // Add the remaining character length. 1112 return $last_chars2 + ($loc - $last_chars1); 1113 } 1114 1115 /** 1116 * Convert a diff array into a pretty HTML report. 1117 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples. 1118 * @return {string} HTML representation. 1119 */ 1120 function diff_prettyHtml($diffs) { 1121 $html = array (); 1122 $i = 0; 1123 for ($x = 0; $x < count($diffs); $x++) { 1124 $op = $diffs[$x][0]; // Operation (insert, delete, equal) 1125 $data = $diffs[$x][1]; // Text of change. 1126 $text = preg_replace(array ( 1127 '/&/', 1128 '/</', 1129 '/>/', 1130 "/\n/" 1131 ), array ( 1132 '&', 1133 '<', 1134 '>', 1135 '¶<BR>' 1136 ), $data); 1137 1138 switch ($op) { 1139 case DIFF_INSERT : 1140 $html[$x] = '<INS STYLE="background:#E6FFE6;" TITLE="i=' . $i . '">' . $text . '</INS>'; 1141 break; 1142 case DIFF_DELETE : 1143 $html[$x] = '<DEL STYLE="background:#FFE6E6;" TITLE="i=' . $i . '">' . $text . '</DEL>'; 1144 break; 1145 case DIFF_EQUAL : 1146 $html[$x] = '<SPAN TITLE="i=' . $i . '">' . $text . '</SPAN>'; 1147 break; 1148 } 1149 if ($op !== DIFF_DELETE) { 1150 $i += mb_strlen($data); 1151 } 1152 } 1153 return implode('',$html); 1154 } 1155 1156 /** 1157 * Compute and return the source text (all equalities and deletions). 1158 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples. 1159 * @return {string} Source text. 1160 */ 1161 function diff_text1($diffs) { 1162 $text = array (); 1163 for ($x = 0; $x < count($diffs); $x++) { 1164 if ($diffs[$x][0] !== DIFF_INSERT) { 1165 $text[$x] = $diffs[$x][1]; 1166 } 1167 } 1168 return implode('',$text); 1169 } 1170 1171 /** 1172 * Compute and return the destination text (all equalities and insertions). 1173 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples. 1174 * @return {string} Destination text. 1175 */ 1176 function diff_text2($diffs) { 1177 $text = array (); 1178 for ($x = 0; $x < count($diffs); $x++) { 1179 if ($diffs[$x][0] !== DIFF_DELETE) { 1180 $text[$x] = $diffs[$x][1]; 1181 } 1182 } 1183 return implode('',$text); 1184 } 1185 1186 /** 1187 * Compute the Levenshtein distance; the number of inserted, deleted or 1188 * substituted characters. 1189 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples. 1190 * @return {number} Number of changes. 1191 */ 1192 function diff_levenshtein($diffs) { 1193 $levenshtein = 0; 1194 $insertions = 0; 1195 $deletions = 0; 1196 for ($x = 0; $x < count($diffs); $x++) { 1197 $op = $diffs[$x][0]; 1198 $data = $diffs[$x][1]; 1199 switch ($op) { 1200 case DIFF_INSERT : 1201 $insertions += mb_strlen($data); 1202 break; 1203 case DIFF_DELETE : 1204 $deletions += mb_strlen($data); 1205 break; 1206 case DIFF_EQUAL : 1207 // A deletion and an insertion is one substitution. 1208 $levenshtein += max($insertions, $deletions); 1209 $insertions = 0; 1210 $deletions = 0; 1211 break; 1212 } 1213 } 1214 $levenshtein += max($insertions, $deletions); 1215 return $levenshtein; 1216 } 1217 1218 /** 1219 * Crush the diff into an encoded string which describes the operations 1220 * required to transform text1 into text2. 1221 * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. 1222 * Operations are tab-separated. Inserted text is escaped using %xx notation. 1223 * @param {Array.<Array.<number|string>>} diffs Array of diff tuples. 1224 * @return {string} Delta text. 1225 */ 1226 function diff_toDelta($diffs) { 1227 $text = array (); 1228 for ($x = 0; $x < count($diffs); $x++) { 1229 switch ($diffs[$x][0]) { 1230 case DIFF_INSERT : 1231 $text[$x] = '+' .encodeURI($diffs[$x][1]); 1232 break; 1233 case DIFF_DELETE : 1234 $text[$x] = '-' .mb_strlen($diffs[$x][1]); 1235 break; 1236 case DIFF_EQUAL : 1237 $text[$x] = '=' .mb_strlen($diffs[$x][1]); 1238 break; 1239 } 1240 } 1241 return str_replace('%20', ' ', implode("\t", $text)); 1242 } 1243 1244 /** 1245 * Given the original text1, and an encoded string which describes the 1246 * operations required to transform text1 into text2, compute the full diff. 1247 * @param {string} text1 Source string for the diff. 1248 * @param {string} delta Delta text. 1249 * @return {Array.<Array.<number|string>>} Array of diff tuples. 1250 * @throws {Error} If invalid input. 1251 */ 1252 function diff_fromDelta($text1, $delta) { 1253 $diffs = array (); 1254 $diffsLength = 0; // Keeping our own length var is faster in JS. 1255 $pointer = 0; // Cursor in text1 1256 $tokens = preg_split("/\t/", $delta); 1257 1258 for ($x = 0; $x < count($tokens); $x++) { 1259 // Each token begins with a one character parameter which specifies the 1260 // operation of this token (delete, insert, equality). 1261 $param = mb_substr($tokens[$x], 1); 1262 switch ($tokens[$x][0]) { 1263 case '+' : 1264 try { 1265 $diffs[$diffsLength++] = array ( 1266 DIFF_INSERT, 1267 decodeURI($param) 1268 ); 1269 } catch (Exception $ex) { 1270 echo_Exception('Illegal escape in diff_fromDelta: ' . $param); 1271 // Malformed URI sequence. 1272 } 1273 break; 1274 case '-' : 1275 // Fall through. 1276 case '=' : 1277 $n = (int) $param; 1278 if ($n < 0) { 1279 echo_Exception('Invalid number in diff_fromDelta: ' . $param); 1280 } 1281 $text = mb_substr($text1, $pointer, $n); 1282 $pointer += $n; 1283 if ($tokens[$x][0] == '=') { 1284 $diffs[$diffsLength++] = array ( 1285 DIFF_EQUAL, 1286 $text 1287 ); 1288 } else { 1289 $diffs[$diffsLength++] = array ( 1290 DIFF_DELETE, 1291 $text 1292 ); 1293 } 1294 break; 1295 default : 1296 // Blank tokens are ok (from a trailing \t). 1297 // Anything else is an error. 1298 if ($tokens[$x]) { 1299 echo_Exception('Invalid diff operation in diff_fromDelta: ' . $tokens[$x]); 1300 } 1301 } 1302 } 1303 if ($pointer != mb_strlen($text1)) { 1304 // throw new Exception('Delta length (' . $pointer . ') does not equal source text length (' . mb_strlen($text1) . ').'); 1305 echo_Exception('Delta length (' . $pointer . ') does not equal source text length (' . mb_strlen($text1) . ').'); 1306 } 1307 return $diffs; 1308 } 1309 1310 // MATCH FUNCTIONS 1311 1312 /** 1313 * Locate the best instance of 'pattern' in 'text' near 'loc'. 1314 * @param {string} text The text to search. 1315 * @param {string} pattern The pattern to search for. 1316 * @param {number} loc The location to search around. 1317 * @return {number} Best match index or -1. 1318 */ 1319 function match_main($text, $pattern, $loc) { 1320 $loc = max(0, min($loc, mb_strlen($text))); 1321 if ($text == $pattern) { 1322 // Shortcut (potentially not guaranteed by the algorithm) 1323 return 0; 1324 } 1325 elseif (!mb_strlen($text)) { 1326 // Nothing to match. 1327 return -1; 1328 } 1329 elseif (mb_substr($text, $loc, mb_strlen($pattern)) == $pattern) { 1330 // Perfect match at the perfect spot! (Includes case of null pattern) 1331 return $loc; 1332 } else { 1333 // Do a fuzzy compare. 1334 return $this->match_bitap($text, $pattern, $loc); 1335 } 1336 } 1337 1338 /** 1339 * Locate the best instance of 'pattern' in 'text' near 'loc' using the 1340 * Bitap algorithm. 1341 * @param {string} text The text to search. 1342 * @param {string} pattern The pattern to search for. 1343 * @param {number} loc The location to search around. 1344 * @return {number} Best match index or -1. 1345 * @private 1346 */ 1347 function match_bitap($text, $pattern, $loc) { 1348 if (mb_strlen($pattern) > Match_MaxBits) { 1349 echo_Exception('Pattern too long for this browser.'); 1350 } 1351 1352 // Initialise the alphabet. 1353 $s = $this->match_alphabet($pattern); 1354 1355 // Highest score beyond which we give up. 1356 $score_threshold = $this->Match_Threshold; 1357 1358 // Is there a nearby exact match? (speedup) 1359 $best_loc = mb_strpos($text, $pattern, $loc); 1360 if ($best_loc !== false) { 1361 $score_threshold = min($this->match_bitapScore(0, $best_loc, $pattern, $loc), $score_threshold); 1362 } 1363 1364 // What about in the other direction? (speedup) 1365 $best_loc = mb_strrpos( $text, $pattern, min($loc + mb_strlen($pattern), mb_strlen($text)) ); 1366 if ($best_loc !== false) { 1367 $score_threshold = min($this->match_bitapScore(0, $best_loc, $pattern, $loc), $score_threshold); 1368 } 1369 1370 // Initialise the bit arrays. 1371 $matchmask = 1 << (mb_strlen($pattern) - 1); 1372 $best_loc = -1; 1373 1374 $bin_min = null; 1375 $bin_mid = null; 1376 $bin_max = mb_strlen($pattern) + mb_strlen($text); 1377 $last_rd = null; 1378 for ($d = 0; $d < mb_strlen($pattern); $d++) { 1379 // Scan for the best match; each iteration allows for one more error. 1380 // Run a binary search to determine how far from 'loc' we can stray at this 1381 // error level. 1382 $bin_min = 0; 1383 $bin_mid = $bin_max; 1384 while ($bin_min < $bin_mid) { 1385 if ($this->match_bitapScore($d, $loc + $bin_mid, $pattern, $loc) <= $score_threshold) { 1386 $bin_min = $bin_mid; 1387 } else { 1388 $bin_max = $bin_mid; 1389 } 1390 $bin_mid = floor(($bin_max - $bin_min) / 2 + $bin_min); 1391 } 1392 // Use the result from this iteration as the maximum for the next. 1393 $bin_max = $bin_mid; 1394 $start = max(1, $loc - $bin_mid +1); 1395 $finish = min($loc + $bin_mid, mb_strlen($text)) + mb_strlen($pattern); 1396 1397 $rd = Array ( 1398 $finish +2 1399 ); 1400 $rd[$finish +1] = (1 << $d) - 1; 1401 for ($j = $finish; $j >= $start; $j--) { 1402 // The alphabet (s) is a sparse hash, so the following line generates 1403 // warnings. 1404 @ $charMatch = $s[ $text[$j -1] ]; 1405 if ($d === 0) { // First pass: exact match. 1406 $rd[$j] = (($rd[$j +1] << 1) | 1) & $charMatch; 1407 } else { // Subsequent passes: fuzzy match. 1408 $rd[$j] = (($rd[$j +1] << 1) | 1) & $charMatch | ((($last_rd[$j +1] | $last_rd[$j]) << 1) | 1) | $last_rd[$j +1]; 1409 } 1410 if ($rd[$j] & $matchmask) { 1411 $score = $this->match_bitapScore($d, $j -1, $pattern, $loc); 1412 // This match will almost certainly be better than any existing match. 1413 // But check anyway. 1414 if ($score <= $score_threshold) { 1415 // Told you so. 1416 $score_threshold = $score; 1417 $best_loc = $j -1; 1418 if ($best_loc > $loc) { 1419 // When passing loc, don't exceed our current distance from loc. 1420 $start = max(1, 2 * $loc - $best_loc); 1421 } else { 1422 // Already passed loc, downhill from here on in. 1423 break; 1424 } 1425 } 1426 } 1427 } 1428 // No hope for a (better) match at greater error levels. 1429 if ($this->match_bitapScore($d +1, $loc, $pattern, $loc) > $score_threshold) { 1430 break; 1431 } 1432 $last_rd = $rd; 1433 } 1434 return (int)$best_loc; 1435 } 1436 1437 /** 1438 * Compute and return the score for a match with e errors and x location. 1439 * Accesses loc and pattern through being a closure. 1440 * @param {number} e Number of errors in match. 1441 * @param {number} x Location of match. 1442 * @return {number} Overall score for match (0.0 = good, 1.0 = bad). 1443 * @private 1444 */ 1445 function match_bitapScore($e, $x, $pattern, $loc) { 1446 $accuracy = $e / mb_strlen($pattern); 1447 $proximity = abs($loc - $x); 1448 if (!$this->Match_Distance) { 1449 // Dodge divide by zero error. 1450 return $proximity ? 1.0 : $accuracy; 1451 } 1452 return $accuracy + ($proximity / $this->Match_Distance); 1453 } 1454 1455 /** 1456 * Initialise the alphabet for the Bitap algorithm. 1457 * @param {string} pattern The text to encode. 1458 * @return {Object} Hash of character locations. 1459 * @private 1460 */ 1461 function match_alphabet($pattern) { 1462 $s = array (); 1463 for ($i = 0; $i < mb_strlen($pattern); $i++) { 1464 $s[ $pattern[$i] ] = 0; 1465 } 1466 for ($i = 0; $i < mb_strlen($pattern); $i++) { 1467 $s[ $pattern[$i] ] |= 1 << (mb_strlen($pattern) - $i - 1); 1468 } 1469 return $s; 1470 } 1471 1472 // PATCH FUNCTIONS 1473 1474 /** 1475 * Increase the context until it is unique, 1476 * but don't let the pattern expand beyond Match_MaxBits. 1477 * @param {patch_obj} patch The patch to grow. 1478 * @param {string} text Source text. 1479 * @private 1480 */ 1481 function patch_addContext($patch, $text) { 1482 $pattern = mb_substr($text, $patch->start2, $patch->length1 ); 1483 $previousPattern = null; 1484 $padding = 0; 1485 $i = 0; 1486 while ( 1487 ( mb_strlen($pattern) === 0 // Javascript's indexOf/lastIndexOd return 0/strlen respectively if pattern = '' 1488 || mb_strpos($text, $pattern) !== mb_strrpos($text, $pattern) 1489 ) 1490 && $pattern !== $previousPattern // avoid infinte loop 1491 && mb_strlen($pattern) < Match_MaxBits - $this->Patch_Margin - $this->Patch_Margin ) { 1492 $padding += $this->Patch_Margin; 1493 $previousPattern = $pattern; 1494 $pattern = mb_substr($text, max($patch->start2 - $padding,0), ($patch->start2 + $patch->length1 + $padding) - max($patch->start2 - $padding,0) ); 1495 } 1496 // Add one chunk for good luck. 1497 $padding += $this->Patch_Margin; 1498 // Add the prefix. 1499 $prefix = mb_substr($text, max($patch->start2 - $padding,0), $patch->start2 - max($patch->start2 - $padding,0) ); 1500 if ($prefix!=='') { 1501 array_unshift($patch->diffs, array ( 1502 DIFF_EQUAL, 1503 $prefix 1504 )); 1505 } 1506 // Add the suffix. 1507 $suffix = mb_substr($text, $patch->start2 + $patch->length1, ($patch->start2 + $patch->length1 + $padding) - ($patch->start2 + $patch->length1) ); 1508 if ($suffix!=='') { 1509 array_push($patch->diffs, array ( 1510 DIFF_EQUAL, 1511 $suffix 1512 )); 1513 } 1514 1515 // Roll back the start points. 1516 $patch->start1 -= mb_strlen($prefix); 1517 $patch->start2 -= mb_strlen($prefix); 1518 // Extend the lengths. 1519 $patch->length1 += mb_strlen($prefix) + mb_strlen($suffix); 1520 $patch->length2 += mb_strlen($prefix) + mb_strlen($suffix); 1521 } 1522 1523 /** 1524 * Compute a list of patches to turn text1 into text2. 1525 * Use diffs if provided, otherwise compute it ourselves. 1526 * There are four ways to call this function, depending on what data is 1527 * available to the caller: 1528 * Method 1: 1529 * a = text1, b = text2 1530 * Method 2: 1531 * a = diffs 1532 * Method 3 (optimal): 1533 * a = text1, b = diffs 1534 * Method 4 (deprecated, use method 3): 1535 * a = text1, b = text2, c = diffs 1536 * 1537 * @param {string|Array.<Array.<number|string>>} a text1 (methods 1,3,4) or 1538 * Array of diff tuples for text1 to text2 (method 2). 1539 * @param {string|Array.<Array.<number|string>>} opt_b text2 (methods 1,4) or 1540 * Array of diff tuples for text1 to text2 (method 3) or undefined (method 2). 1541 * @param {string|Array.<Array.<number|string>>} opt_c Array of diff tuples for 1542 * text1 to text2 (method 4) or undefined (methods 1,2,3). 1543 * @return {Array.<patch_obj>} Array of patch objects. 1544 */ 1545 function patch_make($a, $opt_b = null, $opt_c = null ) { 1546 if (is_string($a) && is_string($opt_b) && $opt_c === null ) { 1547 // Method 1: text1, text2 1548 // Compute diffs from text1 and text2. 1549 $text1 = $a; 1550 $diffs = $this->diff_main($text1, $opt_b, true); 1551 if ( count($diffs) > 2) { 1552 $this->diff_cleanupSemantic($diffs); 1553 $this->diff_cleanupEfficiency($diffs); 1554 } 1555 } elseif ( is_array($a) && $opt_b === null && $opt_c === null) { 1556 // Method 2: diffs 1557 // Compute text1 from diffs. 1558 $diffs = $a; 1559 $text1 = $this->diff_text1($diffs); 1560 } elseif ( is_string($a) && is_array($opt_b) && $opt_c === null) { 1561 // Method 3: text1, diffs 1562 $text1 = $a; 1563 $diffs = $opt_b; 1564 } elseif ( is_string($a) && is_string($opt_b) && is_array($opt_c) ) { 1565 // Method 4: text1, text2, diffs 1566 // text2 is not used. 1567 $text1 = $a; 1568 $diffs = $opt_c; 1569 } else { 1570 echo_Exception('Unknown call format to patch_make.'); 1571 } 1572 1573 if ( count($diffs) === 0) { 1574 return array(); // Get rid of the null case. 1575 } 1576 $patches = array(); 1577 $patch = new patch_obj(); 1578 $patchDiffLength = 0; // Keeping our own length var is faster in JS. 1579 $char_count1 = 0; // Number of characters into the text1 string. 1580 $char_count2 = 0; // Number of characters into the text2 string. 1581 // Start with text1 (prepatch_text) and apply the diffs until we arrive at 1582 // text2 (postpatch_text). We recreate the patches one by one to determine 1583 // context info. 1584 $prepatch_text = $text1; 1585 $postpatch_text = $text1; 1586 for ($x = 0; $x < count($diffs); $x++) { 1587 $diff_type = $diffs[$x][0]; 1588 $diff_text = $diffs[$x][1]; 1589 1590 if (!$patchDiffLength && $diff_type !== DIFF_EQUAL) { 1591 // A new patch starts here. 1592 $patch->start1 = $char_count1; 1593 $patch->start2 = $char_count2; 1594 } 1595 1596 switch ($diff_type) { 1597 case DIFF_INSERT : 1598 $patch->diffs[$patchDiffLength++] = $diffs[$x]; 1599 $patch->length2 += mb_strlen($diff_text); 1600 $postpatch_text = mb_substr($postpatch_text, 0, $char_count2) . $diff_text . mb_substr($postpatch_text,$char_count2); 1601 break; 1602 case DIFF_DELETE : 1603 $patch->length1 += mb_strlen($diff_text); 1604 $patch->diffs[$patchDiffLength++] = $diffs[$x]; 1605 $postpatch_text = mb_substr($postpatch_text, 0, $char_count2) . mb_substr($postpatch_text, $char_count2 + mb_strlen($diff_text) ); 1606 break; 1607 case DIFF_EQUAL : 1608 if ( mb_strlen($diff_text) <= 2 * $this->Patch_Margin && $patchDiffLength && count($diffs) != $x + 1) { 1609 // Small equality inside a patch. 1610 $patch->diffs[$patchDiffLength++] = $diffs[$x]; 1611 $patch->length1 += mb_strlen($diff_text); 1612 $patch->length2 += mb_strlen($diff_text); 1613 } elseif ( mb_strlen($diff_text) >= 2 * $this->Patch_Margin ) { 1614 // Time for a new patch. 1615 if ($patchDiffLength) { 1616 $this->patch_addContext($patch, $prepatch_text); 1617 array_push($patches,$patch); 1618 $patch = new patch_obj(); 1619 $patchDiffLength = 0; 1620 // Unlike Unidiff, our patch lists have a rolling context. 1621 // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff 1622 // Update prepatch text & pos to reflect the application of the 1623 // just completed patch. 1624 $prepatch_text = $postpatch_text; 1625 $char_count1 = $char_count2; 1626 } 1627 } 1628 break; 1629 } 1630 1631 // Update the current character count. 1632 if ($diff_type !== DIFF_INSERT) { 1633 $char_count1 += mb_strlen($diff_text); 1634 } 1635 if ($diff_type !== DIFF_DELETE) { 1636 $char_count2 += mb_strlen($diff_text); 1637 } 1638 } 1639 // Pick up the leftover patch if not empty. 1640 if ($patchDiffLength) { 1641 $this->patch_addContext($patch, $prepatch_text); 1642 array_push($patches, $patch); 1643 } 1644 1645 return $patches; 1646 } 1647 1648 /** 1649 * Given an array of patches, return another array that is identical. 1650 * @param {Array.<patch_obj>} patches Array of patch objects. 1651 * @return {Array.<patch_obj>} Array of patch objects. 1652 */ 1653 function patch_deepCopy($patches) { 1654 // Making deep copies is hard in JavaScript. 1655 $patchesCopy = array(); 1656 for ($x = 0; $x < count($patches); $x++) { 1657 $patch = $patches[$x]; 1658 $patchCopy = new patch_obj(); 1659 for ($y = 0; $y < count($patch->diffs); $y++) { 1660 $patchCopy->diffs[$y] = $patch->diffs[$y]; // ?? . slice(); 1661 } 1662 $patchCopy->start1 = $patch->start1; 1663 $patchCopy->start2 = $patch->start2; 1664 $patchCopy->length1 = $patch->length1; 1665 $patchCopy->length2 = $patch->length2; 1666 $patchesCopy[$x] = $patchCopy; 1667 } 1668 return $patchesCopy; 1669 } 1670 1671 /** 1672 * Merge a set of patches onto the text. Return a patched text, as well 1673 * as a list of true/false values indicating which patches were applied. 1674 * @param {Array.<patch_obj>} patches Array of patch objects. 1675 * @param {string} text Old text. 1676 * @return {Array.<string|Array.<boolean>>} Two element Array, containing the 1677 * new text and an array of boolean values. 1678 */ 1679 function patch_apply($patches, $text) { 1680 if ( count($patches) == 0) { 1681 return array($text,array()); 1682 } 1683 1684 // Deep copy the patches so that no changes are made to originals. 1685 $patches = $this->patch_deepCopy($patches); 1686 1687 $nullPadding = $this->patch_addPadding($patches); 1688 $text = $nullPadding . $text . $nullPadding; 1689 1690 $this->patch_splitMax($patches); 1691 // delta keeps track of the offset between the expected and actual location 1692 // of the previous patch. If there are patches expected at positions 10 and 1693 // 20, but the first patch was found at 12, delta is 2 and the second patch 1694 // has an effective expected position of 22. 1695 $delta = 0; 1696 $results = array(); 1697 for ($x = 0; $x < count($patches) ; $x++) { 1698 $expected_loc = $patches[$x]->start2 + $delta; 1699 $text1 = $this->diff_text1($patches[$x]->diffs); 1700 $start_loc = null; 1701 $end_loc = -1; 1702 if (mb_strlen($text1) > Match_MaxBits) { 1703 // patch_splitMax will only provide an oversized pattern in the case of 1704 // a monster delete. 1705 $start_loc = $this->match_main($text, mb_substr($text1, 0, Match_MaxBits ), $expected_loc); 1706 if ($start_loc != -1) { 1707 $end_loc = $this->match_main($text, mb_substr($text1,mb_strlen($text1) - Match_MaxBits), $expected_loc + mb_strlen($text1) - Match_MaxBits); 1708 if ($end_loc == -1 || $start_loc >= $end_loc) { 1709 // Can't find valid trailing context. Drop this patch. 1710 $start_loc = -1; 1711 } 1712 } 1713 } else { 1714 $start_loc = $this->match_main($text, $text1, $expected_loc); 1715 } 1716 if ($start_loc == -1) { 1717 // No match found. :( 1718 $results[$x] = false; 1719 // Subtract the delta for this failed patch from subsequent patches. 1720 $delta -= $patches[$x]->length2 - $patches[$x]->length1; 1721 } else { 1722 // Found a match. :) 1723 $results[$x] = true; 1724 $delta = $start_loc - $expected_loc; 1725 $text2 = null; 1726 if ($end_loc == -1) { 1727 $text2 = mb_substr($text, $start_loc, mb_strlen($text1) ); 1728 } else { 1729 $text2 = mb_substr($text, $start_loc, $end_loc + Match_MaxBits - $start_loc); 1730 } 1731 if ($text1 == $text2) { 1732 // Perfect match, just shove the replacement text in. 1733 $text = mb_substr($text, 0, $start_loc) . $this->diff_text2($patches[$x]->diffs) . mb_substr($text,$start_loc + mb_strlen($text1) ); 1734 } else { 1735 // Imperfect match. Run a diff to get a framework of equivalent 1736 // indices. 1737 $diffs = $this->diff_main($text1, $text2, false); 1738 if ( mb_strlen($text1) > Match_MaxBits && $this->diff_levenshtein($diffs) / mb_strlen($text1) > $this->Patch_DeleteThreshold) { 1739 // The end points match, but the content is unacceptably bad. 1740 $results[$x] = false; 1741 } else { 1742 $this->diff_cleanupSemanticLossless($diffs); 1743 $index1 = 0; 1744 $index2 = NULL; 1745 for ($y = 0; $y < count($patches[$x]->diffs); $y++) { 1746 $mod = $patches[$x]->diffs[$y]; 1747 if ($mod[0] !== DIFF_EQUAL) { 1748 $index2 = $this->diff_xIndex($diffs, $index1); 1749 } 1750 if ($mod[0] === DIFF_INSERT) { // Insertion 1751 $text = mb_substr($text, 0, $start_loc + $index2) . $mod[1] . mb_substr($text, $start_loc + $index2); 1752 } elseif ($mod[0] === DIFF_DELETE) { // Deletion 1753 $text = mb_substr($text, 0, $start_loc + $index2) . mb_substr($text,$start_loc + $this->diff_xIndex($diffs, $index1 + mb_strlen($mod[1]) )); 1754 } 1755 if ($mod[0] !== DIFF_DELETE) { 1756 $index1 += mb_strlen($mod[1]); 1757 } 1758 } 1759 } 1760 } 1761 } 1762 } 1763 // Strip the padding off. 1764 $text = mb_substr($text, mb_strlen($nullPadding), mb_strlen($text) - 2*mb_strlen($nullPadding) ); 1765 return array($text, $results); 1766 } 1767 1768 /** 1769 * Add some padding on text start and end so that edges can match something. 1770 * Intended to be called only from within patch_apply. 1771 * @param {Array.<patch_obj>} patches Array of patch objects. 1772 * @return {string} The padding string added to each side. 1773 */ 1774 function patch_addPadding(&$patches){ 1775 $paddingLength = $this->Patch_Margin; 1776 $nullPadding = ''; 1777 for ($x = 1; $x <= $paddingLength; $x++) { 1778 $nullPadding .= mb_chr($x); 1779 } 1780 1781 // Bump all the patches forward. 1782 for ($x = 0; $x < count($patches); $x++) { 1783 $patches[$x]->start1 += $paddingLength; 1784 $patches[$x]->start2 += $paddingLength; 1785 } 1786 1787 // Add some padding on start of first diff. 1788 $patch = &$patches[0]; 1789 $diffs = &$patch->diffs; 1790 if (count($diffs) == 0 || $diffs[0][0] != DIFF_EQUAL) { 1791 // Add nullPadding equality. 1792 array_unshift($diffs, array(DIFF_EQUAL, $nullPadding)); 1793 $patch->start1 -= $paddingLength; // Should be 0. 1794 $patch->start2 -= $paddingLength; // Should be 0. 1795 $patch->length1 += $paddingLength; 1796 $patch->length2 += $paddingLength; 1797 } elseif ($paddingLength > mb_strlen($diffs[0][1]) ) { 1798 // Grow first equality. 1799 $extraLength = $paddingLength - mb_strlen($diffs[0][1]); 1800 $diffs[0][1] = mb_substr( $nullPadding , mb_strlen($diffs[0][1]) ) . $diffs[0][1]; 1801 $patch->start1 -= $extraLength; 1802 $patch->start2 -= $extraLength; 1803 $patch->length1 += $extraLength; 1804 $patch->length2 += $extraLength; 1805 } 1806 1807 // Add some padding on end of last diff. 1808 $patch = &$patches[count($patches) - 1]; 1809 $diffs = &$patch->diffs; 1810 if ( count($diffs) == 0 || $diffs[ count($diffs) - 1][0] != DIFF_EQUAL) { 1811 // Add nullPadding equality. 1812 array_push($diffs, array(DIFF_EQUAL, $nullPadding) ); 1813 $patch->length1 += $paddingLength; 1814 $patch->length2 += $paddingLength; 1815 } elseif ($paddingLength > mb_strlen( $diffs[count($diffs)-1][1] ) ) { 1816 // Grow last equality. 1817 $extraLength = $paddingLength - mb_strlen( $diffs[count($diffs)-1][1] ); 1818 $diffs[ count($diffs)-1][1] .= mb_substr($nullPadding,0,$extraLength); 1819 $patch->length1 += $extraLength; 1820 $patch->length2 += $extraLength; 1821 } 1822 1823 return $nullPadding; 1824 } 1825 1826 /** 1827 * Look through the patches and break up any which are longer than the maximum 1828 * limit of the match algorithm. 1829 * @param {Array.<patch_obj>} patches Array of patch objects. 1830 */ 1831 function patch_splitMax(&$patches) { 1832 for ($x = 0; $x < count($patches); $x++) { 1833 if ( $patches[$x]->length1 > Match_MaxBits) { 1834 $bigpatch = $patches[$x]; 1835 // Remove the big old patch. 1836 array_splice($patches,$x--,1); 1837 $patch_size = Match_MaxBits; 1838 $start1 = $bigpatch->start1; 1839 $start2 = $bigpatch->start2; 1840 $precontext = ''; 1841 while ( count($bigpatch->diffs) !== 0) { 1842 // Create one of several smaller patches. 1843 $patch = new patch_obj(); 1844 $empty = true; 1845 $patch->start1 = $start1 - mb_strlen($precontext); 1846 $patch->start2 = $start2 - mb_strlen($precontext); 1847 if ($precontext !== '') { 1848 $patch->length1 = $patch->length2 = mb_strlen($precontext); 1849 array_push($patch->diffs, array(DIFF_EQUAL, $precontext) ); 1850 } 1851 while ( count($bigpatch->diffs) !== 0 && $patch->length1 < $patch_size - $this->Patch_Margin) { 1852 $diff_type = $bigpatch->diffs[0][0]; 1853 $diff_text = $bigpatch->diffs[0][1]; 1854 if ($diff_type === DIFF_INSERT) { 1855 // Insertions are harmless. 1856 $patch->length2 += mb_strlen($diff_text); 1857 $start2 += mb_strlen($diff_text); 1858 array_push($patch->diffs, array_shift($bigpatch->diffs) ); 1859 $empty = false; 1860 } else 1861 if ($diff_type === DIFF_DELETE && count($patch->diffs) == 1 && $patch->diffs[0][0] == DIFF_EQUAL && (mb_strlen($diff_text) > 2 * $patch_size) ) { 1862 // This is a large deletion. Let it pass in one chunk. 1863 $patch->length1 += mb_strlen($diff_text); 1864 $start1 += mb_strlen($diff_text); 1865 $empty = false; 1866 array_push( $patch->diffs, array($diff_type, $diff_text) ); 1867 array_shift($bigpatch->diffs); 1868 } else { 1869 // Deletion or equality. Only take as much as we can stomach. 1870 $diff_text = mb_substr($diff_text, 0, $patch_size - $patch->length1 - $this->Patch_Margin); 1871 $patch->length1 += mb_strlen($diff_text); 1872 $start1 += mb_strlen($diff_text); 1873 if ($diff_type === DIFF_EQUAL) { 1874 $patch->length2 += mb_strlen($diff_text); 1875 $start2 += mb_strlen($diff_text); 1876 } else { 1877 $empty = false; 1878 } 1879 array_push($patch->diffs, array($diff_type, $diff_text) ); 1880 if ($diff_text == $bigpatch->diffs[0][1]) { 1881 array_shift($bigpatch->diffs); 1882 } else { 1883 $bigpatch->diffs[0][1] = mb_substr( $bigpatch->diffs[0][1],mb_strlen($diff_text) ); 1884 } 1885 } 1886 } 1887 // Compute the head context for the next patch. 1888 $precontext = $this->diff_text2($patch->diffs); 1889 $precontext = mb_substr($precontext, mb_strlen($precontext)-$this->Patch_Margin); 1890 // Append the end context for this patch. 1891 $postcontext = mb_substr( $this->diff_text1($bigpatch->diffs), 0, $this->Patch_Margin ); 1892 if ($postcontext !== '') { 1893 $patch->length1 += mb_strlen($postcontext); 1894 $patch->length2 += mb_strlen($postcontext); 1895 if ( count($patch->diffs) !== 0 && $patch->diffs[ count($patch->diffs) - 1][0] === DIFF_EQUAL) { 1896 $patch->diffs[ count($patch->diffs) - 1][1] .= $postcontext; 1897 } else { 1898 array_push($patch->diffs, array(DIFF_EQUAL, $postcontext)); 1899 } 1900 } 1901 if (!$empty) { 1902 array_splice($patches, ++$x, 0, array($patch)); 1903 } 1904 } 1905 } 1906 } 1907 } 1908 1909 /** 1910 * Take a list of patches and return a textual representation. 1911 * @param {Array.<patch_obj>} patches Array of patch objects. 1912 * @return {string} Text representation of patches. 1913 */ 1914 function patch_toText($patches) { 1915 $text = array(); 1916 for ($x = 0; $x < count($patches) ; $x++) { 1917 $text[$x] = $patches[$x]; 1918 } 1919 return implode('',$text); 1920 } 1921 1922 /** 1923 * Parse a textual representation of patches and return a list of patch objects. 1924 * @param {string} textline Text representation of patches. 1925 * @return {Array.<patch_obj>} Array of patch objects. 1926 * @throws {Error} If invalid input. 1927 */ 1928 function patch_fromText($textline) { 1929 $patches = array(); 1930 if ($textline === '') { 1931 return $patches; 1932 } 1933 $text = explode("\n",$textline); 1934 foreach($text as $i=>$t){ if($t===''){ unset($text[$i]); } } 1935 $textPointer = 0; 1936 while ($textPointer < count($text) ) { 1937 $m = null; 1938 preg_match('/^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/',$text[$textPointer],$m); 1939 if (!$m) { 1940 echo_Exception('Invalid patch string: ' . $text[$textPointer]); 1941 } 1942 $patch = new patch_obj(); 1943 array_push($patches, $patch); 1944 @$patch->start1 = (int)$m[1]; 1945 if (@$m[2] === '') { 1946 $patch->start1--; 1947 $patch->length1 = 1; 1948 } elseif ( @$m[2] == '0') { 1949 $patch->length1 = 0; 1950 } else { 1951 $patch->start1--; 1952 @$patch->length1 = (int)$m[2]; 1953 } 1954 1955 @$patch->start2 = (int)$m[3]; 1956 if (@$m[4] === '') { 1957 $patch->start2--; 1958 $patch->length2 = 1; 1959 } elseif ( @$m[4] == '0') { 1960 $patch->length2 = 0; 1961 } else { 1962 $patch->start2--; 1963 @$patch->length2 = (int)$m[4]; 1964 } 1965 $textPointer++; 1966 1967 while ($textPointer < count($text) ) { 1968 $sign = $text[$textPointer][0]; 1969 try { 1970 $line = decodeURI( mb_substr($text[$textPointer],1) ); 1971 } catch (Exception $ex) { 1972 // Malformed URI sequence. 1973 throw new Exception('Illegal escape in patch_fromText: ' . $line); 1974 } 1975 if ($sign == '-') { 1976 // Deletion. 1977 array_push( $patch->diffs, array(DIFF_DELETE, $line) ); 1978 } elseif ($sign == '+') { 1979 // Insertion. 1980 array_push($patch->diffs, array(DIFF_INSERT, $line) ); 1981 } elseif ($sign == ' ') { 1982 // Minor equality. 1983 array_push($patch->diffs, array(DIFF_EQUAL, $line) ); 1984 } elseif ($sign == '@') { 1985 // Start of next patch. 1986 break; 1987 } elseif ($sign === '') { 1988 // Blank line? Whatever. 1989 } else { 1990 // WTF? 1991 echo_Exception('Invalid patch mode "' . $sign . '" in: ' . $line); 1992 } 1993 $textPointer++; 1994 } 1995 } 1996 return $patches; 1997 } 1998 } 1999 2000 /** 2001 * Class representing one patch operation. 2002 * @constructor 2003 */ 2004 class patch_obj { 2005 /** @type {Array.<Array.<number|string>>} */ 2006 public $diffs = array(); 2007 /** @type {number?} */ 2008 public $start1 = null; 2009 /** @type {number?} */ 2010 public $start2 = null; 2011 /** @type {number} */ 2012 public $length1 = 0; 2013 /** @type {number} */ 2014 public $length2 = 0; 2015 2016 /** 2017 * Emmulate GNU diff's format. 2018 * Header: @@ -382,8 +481,9 @@ 2019 * Indicies are printed as 1-based, not 0-based. 2020 * @return {string} The GNU diff string. 2021 */ 2022 function toString() { 2023 if ($this->length1 === 0) { 2024 $coords1 = $this->start1 . ',0'; 2025 } elseif ($this->length1 == 1) { 2026 $coords1 = $this->start1 + 1; 2027 } else { 2028 $coords1 = ($this->start1 + 1) . ',' . $this->length1; 2029 } 2030 if ($this->length2 === 0) { 2031 $coords2 = $this->start2 . ',0'; 2032 } elseif ($this->length2 == 1) { 2033 $coords2 = $this->start2 + 1; 2034 } else { 2035 $coords2 = ($this->start2 + 1) . ',' . $this->length2; 2036 } 2037 $text = array ( '@@ -' . $coords1 . ' +' . $coords2 . " @@\n" ); 2038 2039 // Escape the body of the patch with %xx notation. 2040 for ($x = 0; $x < count($this->diffs); $x++) { 2041 switch ($this->diffs[$x][0]) { 2042 case DIFF_INSERT : 2043 $op = '+'; 2044 break; 2045 case DIFF_DELETE : 2046 $op = '-'; 2047 break; 2048 case DIFF_EQUAL : 2049 $op = ' '; 2050 break; 2051 } 2052 $text[$x +1] = $op . encodeURI($this->diffs[$x][1]) . "\n"; 2053 } 2054 return str_replace('%20', ' ', implode('',$text)); 2055 } 2056 function __toString(){ 2057 return $this->toString(); 2058 } 2059 } 2060 2061 define('DIFF_DELETE', -1); 2062 define('DIFF_INSERT', 1); 2063 define('DIFF_EQUAL', 0); 2064 2065 define('Match_MaxBits', PHP_INT_SIZE * 8); 2066 2067 2068 function charCodeAt($str, $pos) { 2069 return mb_ord(mb_substr($str, $pos, 1)); 2070 } 2071 function mb_ord($v) { 2072 $k = mb_convert_encoding($v, 'UCS-2LE', 'UTF-8'); 2073 $k1 = ord(substr($k, 0, 1)); 2074 $k2 = ord(substr($k, 1, 1)); 2075 return $k2 * 256 + $k1; 2076 } 2077 function mb_chr($num){ 2078 return mb_convert_encoding('&#'.intval($num).';', 'UTF-8', 'HTML-ENTITIES'); 2079 } 2080 2081 /** 2082 * as in javascript encodeURI() following the MDN description 2083 * 2084 * @link https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURI 2085 * @param $url 2086 * @return string 2087 */ 2088 function encodeURI($url) { 2089 return strtr(rawurlencode($url), array ( 2090 '%3B' => ';', '%2C' => ',', '%2F' => '/', '%3F' => '?', '%3A' => ':', '%40' => '@', '%26' => '&', '%3D' => '=', 2091 '%2B' => '+', '%24' => '$', '%21' => '!', '%2A' => '*', '%27' => '\'', '%28' => '(', '%29' => ')', '%23' => '#', 2092 )); 2093 } 2094 2095 function decodeURI($encoded) { 2096 static $dontDecode; 2097 if (!$dontDecode) { 2098 $table = array ( 2099 '%3B' => ';', '%2C' => ',', '%2F' => '/', '%3F' => '?', '%3A' => ':', '%40' => '@', '%26' => '&', '%3D' => '=', 2100 '%2B' => '+', '%24' => '$', '%21' => '!', '%2A' => '*', '%27' => '\'', '%28' => '(', '%29' => ')', '%23' => '#', 2101 ); 2102 $dontDecode = array(); 2103 foreach ($table as $k => $v) { 2104 $dontDecode[$k] = encodeURI($k); 2105 } 2106 } 2107 return rawurldecode(strtr($encoded, $dontDecode)); 2108 } 2109 2110 function echo_Exception($str){ 2111 global $lastException; 2112 $lastException = $str; 2113 echo $str; 2114 } 2115 //mb_internal_encoding("UTF-8"); 2116 2117 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Sun Nov 30 09:20:46 2014 | Cross-referenced by PHPXref 0.7.1 |