[ Index ]

PHP Cross Reference of Phabricator

title

Body

[close]

/src/applications/differential/parser/ -> DifferentialHunkParser.php (source)

   1  <?php
   2  
   3  final class DifferentialHunkParser {
   4  
   5    private $oldLines;
   6    private $newLines;
   7    private $intraLineDiffs;
   8    private $visibleLinesMask;
   9    private $whitespaceMode;
  10  
  11    /**
  12     * Get a map of lines on which hunks start, other than line 1. This
  13     * datastructure is used to determine when to render "Context not available."
  14     * in diffs with multiple hunks.
  15     *
  16     * @return dict<int, bool>  Map of lines where hunks start, other than line 1.
  17     */
  18    public function getHunkStartLines(array $hunks) {
  19      assert_instances_of($hunks, 'DifferentialHunk');
  20  
  21      $map = array();
  22      foreach ($hunks as $hunk) {
  23        $line = $hunk->getOldOffset();
  24        if ($line > 1) {
  25          $map[$line] = true;
  26        }
  27      }
  28  
  29      return $map;
  30    }
  31  
  32    private function setVisibleLinesMask($mask) {
  33      $this->visibleLinesMask = $mask;
  34      return $this;
  35    }
  36    public function getVisibleLinesMask() {
  37      if ($this->visibleLinesMask === null) {
  38        throw new Exception(
  39          'You must generateVisibileLinesMask before accessing this data.'
  40        );
  41      }
  42      return $this->visibleLinesMask;
  43    }
  44  
  45    private function setIntraLineDiffs($intra_line_diffs) {
  46      $this->intraLineDiffs = $intra_line_diffs;
  47      return $this;
  48    }
  49    public function getIntraLineDiffs() {
  50      if ($this->intraLineDiffs === null) {
  51        throw new Exception(
  52          'You must generateIntraLineDiffs before accessing this data.'
  53        );
  54      }
  55      return $this->intraLineDiffs;
  56    }
  57  
  58    private function setNewLines($new_lines) {
  59      $this->newLines = $new_lines;
  60      return $this;
  61    }
  62    public function getNewLines() {
  63      if ($this->newLines === null) {
  64        throw new Exception(
  65          'You must parseHunksForLineData before accessing this data.'
  66        );
  67      }
  68      return $this->newLines;
  69    }
  70  
  71    private function setOldLines($old_lines) {
  72      $this->oldLines = $old_lines;
  73      return $this;
  74    }
  75    public function getOldLines() {
  76      if ($this->oldLines === null) {
  77        throw new Exception(
  78          'You must parseHunksForLineData before accessing this data.'
  79        );
  80      }
  81      return $this->oldLines;
  82    }
  83  
  84    public function getOldLineTypeMap() {
  85      $map = array();
  86      $old = $this->getOldLines();
  87      foreach ($old as $o) {
  88        if (!$o) {
  89          continue;
  90        }
  91        $map[$o['line']] = $o['type'];
  92      }
  93      return $map;
  94    }
  95  
  96    public function setOldLineTypeMap(array $map) {
  97      $lines = $this->getOldLines();
  98      foreach ($lines as $key => $data) {
  99        $lines[$key]['type'] = $map[$data['line']];
 100      }
 101      $this->oldLines = $lines;
 102      return $this;
 103    }
 104  
 105    public function getNewLineTypeMap() {
 106      $map = array();
 107      $new = $this->getNewLines();
 108      foreach ($new as $n) {
 109        if (!$n) {
 110          continue;
 111        }
 112        $map[$n['line']] = $n['type'];
 113      }
 114      return $map;
 115    }
 116  
 117    public function setNewLineTypeMap(array $map) {
 118      $lines = $this->getNewLines();
 119      foreach ($lines as $key => $data) {
 120        $lines[$key]['type'] = $map[$data['line']];
 121      }
 122      $this->newLines = $lines;
 123      return $this;
 124    }
 125  
 126  
 127    public function setWhitespaceMode($white_space_mode) {
 128      $this->whitespaceMode = $white_space_mode;
 129      return $this;
 130    }
 131  
 132    private function getWhitespaceMode() {
 133      if ($this->whitespaceMode === null) {
 134        throw new Exception(
 135          'You must setWhitespaceMode before accessing this data.'
 136        );
 137      }
 138      return $this->whitespaceMode;
 139    }
 140  
 141    public function getIsDeleted() {
 142      foreach ($this->getNewLines() as $line) {
 143        if ($line) {
 144          // At least one new line, so the entire file wasn't deleted.
 145          return false;
 146        }
 147      }
 148  
 149      foreach ($this->getOldLines() as $line) {
 150        if ($line) {
 151          // No new lines, at least one old line; the entire file was deleted.
 152          return true;
 153        }
 154      }
 155  
 156      // This is an empty file.
 157      return false;
 158    }
 159  
 160    /**
 161     * Returns true if the hunks change any text, not just whitespace.
 162     */
 163    public function getHasTextChanges() {
 164      return $this->getHasChanges('text');
 165    }
 166  
 167    /**
 168     * Returns true if the hunks change anything, including whitespace.
 169     */
 170    public function getHasAnyChanges() {
 171      return $this->getHasChanges('any');
 172    }
 173  
 174    private function getHasChanges($filter) {
 175      if ($filter !== 'any' && $filter !== 'text') {
 176        throw new Exception("Unknown change filter '{$filter}'.");
 177      }
 178  
 179      $old = $this->getOldLines();
 180      $new = $this->getNewLines();
 181  
 182      $is_any = ($filter === 'any');
 183  
 184      foreach ($old as $key => $o) {
 185        $n = $new[$key];
 186        if ($o === null || $n === null) {
 187          // One side is missing, and it's impossible for both sides to be null,
 188          // so the other side must have something, and thus the two sides are
 189          // different and the file has been changed under any type of filter.
 190          return true;
 191        }
 192  
 193        if ($o['type'] !== $n['type']) {
 194          // The types are different, so either the underlying text is actually
 195          // different or whatever whitespace rules we're using consider them
 196          // different.
 197          return true;
 198        }
 199  
 200        if ($o['text'] !== $n['text']) {
 201          if ($is_any) {
 202            // The text is different, so there's a change.
 203            return true;
 204          } else if (trim($o['text']) !== trim($n['text'])) {
 205            return true;
 206          }
 207        }
 208      }
 209  
 210      // No changes anywhere in the file.
 211      return false;
 212    }
 213  
 214  
 215    /**
 216     * This function takes advantage of the parsing work done in
 217     * @{method:parseHunksForLineData} and continues the struggle to hammer this
 218     * data into something we can display to a user.
 219     *
 220     * In particular, this function re-parses the hunks to make them equivalent
 221     * in length for easy rendering, adding `null` as necessary to pad the
 222     * length.
 223     *
 224     * Anyhoo, this function is not particularly well-named but I try.
 225     *
 226     * NOTE: this function must be called after
 227     * @{method:parseHunksForLineData}.
 228     */
 229    public function reparseHunksForSpecialAttributes() {
 230      $rebuild_old = array();
 231      $rebuild_new = array();
 232  
 233      $old_lines = array_reverse($this->getOldLines());
 234      $new_lines = array_reverse($this->getNewLines());
 235  
 236      while (count($old_lines) || count($new_lines)) {
 237        $old_line_data = array_pop($old_lines);
 238        $new_line_data = array_pop($new_lines);
 239  
 240        if ($old_line_data) {
 241          $o_type = $old_line_data['type'];
 242        } else {
 243          $o_type = null;
 244        }
 245  
 246        if ($new_line_data) {
 247          $n_type = $new_line_data['type'];
 248        } else {
 249          $n_type = null;
 250        }
 251  
 252        // This line does not exist in the new file.
 253        if (($o_type != null) && ($n_type == null)) {
 254          $rebuild_old[] = $old_line_data;
 255          $rebuild_new[] = null;
 256          if ($new_line_data) {
 257            array_push($new_lines, $new_line_data);
 258          }
 259          continue;
 260        }
 261  
 262        // This line does not exist in the old file.
 263        if (($n_type != null) && ($o_type == null)) {
 264          $rebuild_old[] = null;
 265          $rebuild_new[] = $new_line_data;
 266          if ($old_line_data) {
 267            array_push($old_lines, $old_line_data);
 268          }
 269          continue;
 270        }
 271  
 272        $rebuild_old[] = $old_line_data;
 273        $rebuild_new[] = $new_line_data;
 274      }
 275  
 276      $this->setOldLines($rebuild_old);
 277      $this->setNewLines($rebuild_new);
 278  
 279      $this->updateChangeTypesForWhitespaceMode();
 280  
 281      return $this;
 282    }
 283  
 284    private function updateChangeTypesForWhitespaceMode() {
 285      $mode = $this->getWhitespaceMode();
 286  
 287      $mode_show_all = DifferentialChangesetParser::WHITESPACE_SHOW_ALL;
 288      if ($mode === $mode_show_all) {
 289        // If we're showing all whitespace, we don't need to perform any updates.
 290        return;
 291      }
 292  
 293      $mode_trailing = DifferentialChangesetParser::WHITESPACE_IGNORE_TRAILING;
 294      $is_trailing = ($mode === $mode_trailing);
 295  
 296      $new = $this->getNewLines();
 297      $old = $this->getOldLines();
 298      foreach ($old as $key => $o) {
 299        $n = $new[$key];
 300  
 301        if (!$o || !$n) {
 302          continue;
 303        }
 304  
 305        if ($is_trailing) {
 306          // In "trailing" mode, we need to identify lines which are marked
 307          // changed but differ only by trailing whitespace. We mark these lines
 308          // unchanged.
 309          if ($o['type'] != $n['type']) {
 310            if (rtrim($o['text']) === rtrim($n['text'])) {
 311              $old[$key]['type'] = null;
 312              $new[$key]['type'] = null;
 313            }
 314          }
 315        } else {
 316          // In "ignore most" and "ignore all" modes, we need to identify lines
 317          // which are marked unchanged but have internal whitespace changes.
 318          // We want to ignore leading and trailing whitespace changes only, not
 319          // internal whitespace changes (`diff` doesn't have a mode for this, so
 320          // we have to fix it here). If the text is marked unchanged but the
 321          // old and new text differs by internal space, mark the lines changed.
 322          if ($o['type'] === null && $n['type'] === null) {
 323            if ($o['text'] !== $n['text']) {
 324              if (trim($o['text']) !== trim($n['text'])) {
 325                $old[$key]['type'] = '-';
 326                $new[$key]['type'] = '+';
 327              }
 328            }
 329          }
 330        }
 331      }
 332  
 333      $this->setOldLines($old);
 334      $this->setNewLines($new);
 335  
 336      return $this;
 337    }
 338  
 339    public function generateIntraLineDiffs() {
 340      $old = $this->getOldLines();
 341      $new = $this->getNewLines();
 342  
 343      $diffs = array();
 344      foreach ($old as $key => $o) {
 345        $n = $new[$key];
 346  
 347        if (!$o || !$n) {
 348          continue;
 349        }
 350  
 351        if ($o['type'] != $n['type']) {
 352          $diffs[$key] = ArcanistDiffUtils::generateIntralineDiff(
 353            $o['text'],
 354            $n['text']);
 355        }
 356      }
 357  
 358      $this->setIntraLineDiffs($diffs);
 359  
 360      return $this;
 361    }
 362  
 363    public function generateVisibileLinesMask() {
 364      $lines_context = DifferentialChangesetParser::LINES_CONTEXT;
 365      $old = $this->getOldLines();
 366      $new = $this->getNewLines();
 367      $max_length = max(count($old), count($new));
 368      $visible = false;
 369      $last = 0;
 370      $mask = array();
 371      for ($cursor = -$lines_context; $cursor < $max_length; $cursor++) {
 372        $offset = $cursor + $lines_context;
 373        if ((isset($old[$offset]) && $old[$offset]['type']) ||
 374            (isset($new[$offset]) && $new[$offset]['type'])) {
 375          $visible = true;
 376          $last = $offset;
 377        } else if ($cursor > $last + $lines_context) {
 378          $visible = false;
 379        }
 380        if ($visible && $cursor > 0) {
 381          $mask[$cursor] = 1;
 382        }
 383      }
 384  
 385      $this->setVisibleLinesMask($mask);
 386  
 387      return $this;
 388    }
 389  
 390    public function getOldCorpus() {
 391      return $this->getCorpus($this->getOldLines());
 392    }
 393  
 394    public function getNewCorpus() {
 395      return $this->getCorpus($this->getNewLines());
 396    }
 397  
 398    private function getCorpus(array $lines) {
 399  
 400      $corpus = array();
 401      foreach ($lines as $l) {
 402        if ($l['type'] != '\\') {
 403          if ($l['text'] === null) {
 404            // There's no text on this side of the diff, but insert a placeholder
 405            // newline so the highlighted line numbers match up.
 406            $corpus[] = "\n";
 407          } else {
 408            $corpus[] = $l['text'];
 409          }
 410        }
 411      }
 412      return $corpus;
 413    }
 414  
 415    public function parseHunksForLineData(array $hunks) {
 416      assert_instances_of($hunks, 'DifferentialHunk');
 417  
 418      $old_lines = array();
 419      $new_lines = array();
 420      foreach ($hunks as $hunk) {
 421  
 422        $lines = $hunk->getChanges();
 423        $lines = phutil_split_lines($lines);
 424  
 425        $line_type_map = array();
 426        $line_text = array();
 427        foreach ($lines as $line_index => $line) {
 428          if (isset($line[0])) {
 429            $char = $line[0];
 430            switch ($char) {
 431              case ' ':
 432                $line_type_map[$line_index] = null;
 433                $line_text[$line_index] = substr($line, 1);
 434                break;
 435              case "\r":
 436              case "\n":
 437                // NOTE: Normally, the first character is a space, plus, minus or
 438                // backslash, but it may be a newline if it used to be a space and
 439                // trailing whitespace has been stripped via email transmission or
 440                // some similar mechanism. In these cases, we essentially pretend
 441                // the missing space is still there.
 442                $line_type_map[$line_index] = null;
 443                $line_text[$line_index] = $line;
 444                break;
 445              case '+':
 446              case '-':
 447              case '\\':
 448                $line_type_map[$line_index] = $char;
 449                $line_text[$line_index] = substr($line, 1);
 450                break;
 451              default:
 452                throw new Exception(
 453                  pht(
 454                    'Unexpected leading character "%s" at line index %s!',
 455                    $char,
 456                    $line_index));
 457            }
 458          } else {
 459            $line_type_map[$line_index] = null;
 460            $line_text[$line_index] = '';
 461          }
 462        }
 463  
 464        $old_line = $hunk->getOldOffset();
 465        $new_line = $hunk->getNewOffset();
 466  
 467        $num_lines = count($lines);
 468        for ($cursor = 0; $cursor < $num_lines; $cursor++) {
 469          $type = $line_type_map[$cursor];
 470          $data = array(
 471            'type'  => $type,
 472            'text'  => $line_text[$cursor],
 473            'line'  => $new_line,
 474          );
 475          if ($type == '\\') {
 476            $type = $line_type_map[$cursor - 1];
 477            $data['text'] = ltrim($data['text']);
 478          }
 479          switch ($type) {
 480            case '+':
 481              $new_lines[] = $data;
 482              ++$new_line;
 483              break;
 484            case '-':
 485              $data['line'] = $old_line;
 486              $old_lines[] = $data;
 487              ++$old_line;
 488              break;
 489            default:
 490              $new_lines[] = $data;
 491              $data['line'] = $old_line;
 492              $old_lines[] = $data;
 493              ++$new_line;
 494              ++$old_line;
 495              break;
 496          }
 497        }
 498      }
 499  
 500      $this->setOldLines($old_lines);
 501      $this->setNewLines($new_lines);
 502  
 503      return $this;
 504    }
 505  
 506    public function parseHunksForHighlightMasks(
 507      array $changeset_hunks,
 508      array $old_hunks,
 509      array $new_hunks) {
 510      assert_instances_of($changeset_hunks, 'DifferentialHunk');
 511      assert_instances_of($old_hunks,       'DifferentialHunk');
 512      assert_instances_of($new_hunks,       'DifferentialHunk');
 513  
 514      // Put changes side by side.
 515      $olds = array();
 516      $news = array();
 517      foreach ($changeset_hunks as $hunk) {
 518        $n_old = $hunk->getOldOffset();
 519        $n_new = $hunk->getNewOffset();
 520        $changes = phutil_split_lines($hunk->getChanges());
 521        foreach ($changes as $line) {
 522          $diff_type = $line[0]; // Change type in diff of diffs.
 523          $orig_type = $line[1]; // Change type in the original diff.
 524          if ($diff_type == ' ') {
 525            // Use the same key for lines that are next to each other.
 526            $key = max(last_key($olds), last_key($news)) + 1;
 527            $olds[$key] = null;
 528            $news[$key] = null;
 529          } else if ($diff_type == '-') {
 530            $olds[] = array($n_old, $orig_type);
 531          } else if ($diff_type == '+') {
 532            $news[] = array($n_new, $orig_type);
 533          }
 534          if (($diff_type == '-' || $diff_type == ' ') && $orig_type != '-') {
 535            $n_old++;
 536          }
 537          if (($diff_type == '+' || $diff_type == ' ') && $orig_type != '-') {
 538            $n_new++;
 539          }
 540        }
 541      }
 542  
 543      $offsets_old = $this->computeOffsets($old_hunks);
 544      $offsets_new = $this->computeOffsets($new_hunks);
 545  
 546      // Highlight lines that were added on each side or removed on the other
 547      // side.
 548      $highlight_old = array();
 549      $highlight_new = array();
 550      $last = max(last_key($olds), last_key($news));
 551      for ($i = 0; $i <= $last; $i++) {
 552        if (isset($olds[$i])) {
 553          list($n, $type) = $olds[$i];
 554          if ($type == '+' ||
 555              ($type == ' ' && isset($news[$i]) && $news[$i][1] != ' ')) {
 556            $highlight_old[] = $offsets_old[$n];
 557          }
 558        }
 559        if (isset($news[$i])) {
 560          list($n, $type) = $news[$i];
 561          if ($type == '+' ||
 562              ($type == ' ' && isset($olds[$i]) && $olds[$i][1] != ' ')) {
 563            $highlight_new[] = $offsets_new[$n];
 564          }
 565        }
 566      }
 567  
 568      return array($highlight_old, $highlight_new);
 569    }
 570  
 571    public function makeContextDiff(
 572      array $hunks,
 573      $is_new,
 574      $line_number,
 575      $line_length,
 576      $add_context) {
 577  
 578      assert_instances_of($hunks, 'DifferentialHunk');
 579  
 580      $context = array();
 581  
 582      if ($is_new) {
 583        $prefix = '+';
 584      } else {
 585        $prefix = '-';
 586      }
 587  
 588      foreach ($hunks as $hunk) {
 589        if ($is_new) {
 590          $offset = $hunk->getNewOffset();
 591          $length = $hunk->getNewLen();
 592        } else {
 593          $offset = $hunk->getOldOffset();
 594          $length = $hunk->getOldLen();
 595        }
 596        $start = $line_number - $offset;
 597        $end = $start + $line_length;
 598        // We need to go in if $start == $length, because the last line
 599        // might be a "\No newline at end of file" marker, which we want
 600        // to show if the additional context is > 0.
 601        if ($start <= $length && $end >= 0) {
 602          $start = $start - $add_context;
 603          $end = $end + $add_context;
 604          $hunk_content = array();
 605          $hunk_pos = array( '-' => 0, '+' => 0 );
 606          $hunk_offset = array( '-' => null, '+' => null );
 607          $hunk_last = array( '-' => null, '+' => null );
 608          foreach (explode("\n", $hunk->getChanges()) as $line) {
 609            $in_common = strncmp($line, ' ', 1) === 0;
 610            $in_old = strncmp($line, '-', 1) === 0 || $in_common;
 611            $in_new = strncmp($line, '+', 1) === 0 || $in_common;
 612            $in_selected = strncmp($line, $prefix, 1) === 0;
 613            $skip = !$in_selected && !$in_common;
 614            if ($hunk_pos[$prefix] <= $end) {
 615              if ($start <= $hunk_pos[$prefix]) {
 616                if (!$skip || ($hunk_pos[$prefix] != $start &&
 617                  $hunk_pos[$prefix] != $end)) {
 618                    if ($in_old) {
 619                      if ($hunk_offset['-'] === null) {
 620                        $hunk_offset['-'] = $hunk_pos['-'];
 621                      }
 622                      $hunk_last['-'] = $hunk_pos['-'];
 623                    }
 624                    if ($in_new) {
 625                      if ($hunk_offset['+'] === null) {
 626                        $hunk_offset['+'] = $hunk_pos['+'];
 627                      }
 628                      $hunk_last['+'] = $hunk_pos['+'];
 629                    }
 630  
 631                    $hunk_content[] = $line;
 632                  }
 633              }
 634              if ($in_old) { ++$hunk_pos['-']; }
 635              if ($in_new) { ++$hunk_pos['+']; }
 636            }
 637          }
 638          if ($hunk_offset['-'] !== null || $hunk_offset['+'] !== null) {
 639            $header = '@@';
 640            if ($hunk_offset['-'] !== null) {
 641              $header .= ' -'.($hunk->getOldOffset() + $hunk_offset['-']).
 642                ','.($hunk_last['-'] - $hunk_offset['-'] + 1);
 643            }
 644            if ($hunk_offset['+'] !== null) {
 645              $header .= ' +'.($hunk->getNewOffset() + $hunk_offset['+']).
 646                ','.($hunk_last['+'] - $hunk_offset['+'] + 1);
 647            }
 648            $header .= ' @@';
 649            $context[] = $header;
 650            $context[] = implode("\n", $hunk_content);
 651          }
 652        }
 653      }
 654      return implode("\n", $context);
 655    }
 656  
 657    private function computeOffsets(array $hunks) {
 658      assert_instances_of($hunks, 'DifferentialHunk');
 659  
 660      $offsets = array();
 661      $n = 1;
 662      foreach ($hunks as $hunk) {
 663        for ($i = 0; $i < $hunk->getNewLen(); $i++) {
 664          $offsets[$n] = $hunk->getNewOffset() + $i;
 665          $n++;
 666        }
 667      }
 668      return $offsets;
 669    }
 670  }


Generated: Sun Nov 30 09:20:46 2014 Cross-referenced by PHPXref 0.7.1