[ Index ]

PHP Cross Reference of Phabricator

title

Body

[close]

/src/applications/repository/engine/ -> PhabricatorRepositoryDiscoveryEngine.php (source)

   1  <?php
   2  
   3  /**
   4   * @task discover   Discovering Repositories
   5   * @task svn        Discovering Subversion Repositories
   6   * @task git        Discovering Git Repositories
   7   * @task hg         Discovering Mercurial Repositories
   8   * @task internal   Internals
   9   */
  10  final class PhabricatorRepositoryDiscoveryEngine
  11    extends PhabricatorRepositoryEngine {
  12  
  13    private $repairMode;
  14    private $commitCache = array();
  15    private $workingSet = array();
  16  
  17    const MAX_COMMIT_CACHE_SIZE = 2048;
  18  
  19  
  20  /* -(  Discovering Repositories  )------------------------------------------- */
  21  
  22  
  23    public function setRepairMode($repair_mode) {
  24      $this->repairMode = $repair_mode;
  25      return $this;
  26    }
  27  
  28  
  29    public function getRepairMode() {
  30      return $this->repairMode;
  31    }
  32  
  33  
  34    /**
  35     * @task discovery
  36     */
  37    public function discoverCommits() {
  38      $repository = $this->getRepository();
  39  
  40      $vcs = $repository->getVersionControlSystem();
  41      switch ($vcs) {
  42        case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN:
  43          $refs = $this->discoverSubversionCommits();
  44          break;
  45        case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL:
  46          $refs = $this->discoverMercurialCommits();
  47          break;
  48        case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT:
  49          $refs = $this->discoverGitCommits();
  50          break;
  51        default:
  52          throw new Exception("Unknown VCS '{$vcs}'!");
  53      }
  54  
  55      // Clear the working set cache.
  56      $this->workingSet = array();
  57  
  58      // Record discovered commits and mark them in the cache.
  59      foreach ($refs as $ref) {
  60        $this->recordCommit(
  61          $repository,
  62          $ref->getIdentifier(),
  63          $ref->getEpoch(),
  64          $ref->getCanCloseImmediately(),
  65          $ref->getParents());
  66  
  67        $this->commitCache[$ref->getIdentifier()] = true;
  68      }
  69  
  70      return $refs;
  71    }
  72  
  73  
  74  /* -(  Discovering Git Repositories  )--------------------------------------- */
  75  
  76  
  77    /**
  78     * @task git
  79     */
  80    private function discoverGitCommits() {
  81      $repository = $this->getRepository();
  82  
  83      if (!$repository->isHosted()) {
  84        $this->verifyGitOrigin($repository);
  85      }
  86  
  87      $branches = id(new DiffusionLowLevelGitRefQuery())
  88        ->setRepository($repository)
  89        ->withIsOriginBranch(true)
  90        ->execute();
  91  
  92      if (!$branches) {
  93        // This repository has no branches at all, so we don't need to do
  94        // anything. Generally, this means the repository is empty.
  95        return array();
  96      }
  97  
  98      $branches = $this->sortBranches($branches);
  99      $branches = mpull($branches, 'getCommitIdentifier', 'getShortName');
 100  
 101      $this->log(
 102        pht(
 103          'Discovering commits in repository %s.',
 104          $repository->getCallsign()));
 105  
 106      $this->fillCommitCache(array_values($branches));
 107  
 108      $refs = array();
 109      foreach ($branches as $name => $commit) {
 110        $this->log(pht('Examining branch "%s", at "%s".', $name, $commit));
 111  
 112        if (!$repository->shouldTrackBranch($name)) {
 113          $this->log(pht('Skipping, branch is untracked.'));
 114          continue;
 115        }
 116  
 117        if ($this->isKnownCommit($commit)) {
 118          $this->log(pht('Skipping, HEAD is known.'));
 119          continue;
 120        }
 121  
 122        $this->log(pht('Looking for new commits.'));
 123  
 124        $branch_refs = $this->discoverStreamAncestry(
 125          new PhabricatorGitGraphStream($repository, $commit),
 126          $commit,
 127          $repository->shouldAutocloseBranch($name));
 128  
 129        $this->didDiscoverRefs($branch_refs);
 130  
 131        $refs[] = $branch_refs;
 132      }
 133  
 134      return array_mergev($refs);
 135    }
 136  
 137  
 138  /* -(  Discovering Subversion Repositories  )-------------------------------- */
 139  
 140  
 141    /**
 142     * @task svn
 143     */
 144    private function discoverSubversionCommits() {
 145      $repository = $this->getRepository();
 146  
 147      if (!$repository->isHosted()) {
 148        $this->verifySubversionRoot($repository);
 149      }
 150  
 151      $upper_bound = null;
 152      $limit = 1;
 153      $refs = array();
 154      do {
 155        // Find all the unknown commits on this path. Note that we permit
 156        // importing an SVN subdirectory rather than the entire repository, so
 157        // commits may be nonsequential.
 158  
 159        if ($upper_bound === null) {
 160          $at_rev = 'HEAD';
 161        } else {
 162          $at_rev = ($upper_bound - 1);
 163        }
 164  
 165        try {
 166          list($xml, $stderr) = $repository->execxRemoteCommand(
 167            'log --xml --quiet --limit %d %s',
 168            $limit,
 169            $repository->getSubversionBaseURI($at_rev));
 170        } catch (CommandException $ex) {
 171          $stderr = $ex->getStdErr();
 172          if (preg_match('/(path|File) not found/', $stderr)) {
 173            // We've gone all the way back through history and this path was not
 174            // affected by earlier commits.
 175            break;
 176          }
 177          throw $ex;
 178        }
 179  
 180        $xml = phutil_utf8ize($xml);
 181        $log = new SimpleXMLElement($xml);
 182        foreach ($log->logentry as $entry) {
 183          $identifier = (int)$entry['revision'];
 184          $epoch = (int)strtotime((string)$entry->date[0]);
 185          $refs[$identifier] = id(new PhabricatorRepositoryCommitRef())
 186            ->setIdentifier($identifier)
 187            ->setEpoch($epoch)
 188            ->setCanCloseImmediately(true);
 189  
 190          if ($upper_bound === null) {
 191            $upper_bound = $identifier;
 192          } else {
 193            $upper_bound = min($upper_bound, $identifier);
 194          }
 195        }
 196  
 197        // Discover 2, 4, 8, ... 256 logs at a time. This allows us to initially
 198        // import large repositories fairly quickly, while pulling only as much
 199        // data as we need in the common case (when we've already imported the
 200        // repository and are just grabbing one commit at a time).
 201        $limit = min($limit * 2, 256);
 202  
 203      } while ($upper_bound > 1 && !$this->isKnownCommit($upper_bound));
 204  
 205      krsort($refs);
 206      while ($refs && $this->isKnownCommit(last($refs)->getIdentifier())) {
 207        array_pop($refs);
 208      }
 209      $refs = array_reverse($refs);
 210  
 211      $this->didDiscoverRefs($refs);
 212  
 213      return $refs;
 214    }
 215  
 216  
 217    private function verifySubversionRoot(PhabricatorRepository $repository) {
 218      list($xml) = $repository->execxRemoteCommand(
 219        'info --xml %s',
 220        $repository->getSubversionPathURI());
 221  
 222      $xml = phutil_utf8ize($xml);
 223      $xml = new SimpleXMLElement($xml);
 224  
 225      $remote_root = (string)($xml->entry[0]->repository[0]->root[0]);
 226      $expect_root = $repository->getSubversionPathURI();
 227  
 228      $normal_type_svn = PhabricatorRepositoryURINormalizer::TYPE_SVN;
 229  
 230      $remote_normal = id(new PhabricatorRepositoryURINormalizer(
 231        $normal_type_svn,
 232        $remote_root))->getNormalizedPath();
 233  
 234      $expect_normal = id(new PhabricatorRepositoryURINormalizer(
 235        $normal_type_svn,
 236        $expect_root))->getNormalizedPath();
 237  
 238      if ($remote_normal != $expect_normal) {
 239        throw new Exception(
 240          pht(
 241            'Repository "%s" does not have a correctly configured remote URI. '.
 242            'The remote URI for a Subversion repository MUST point at the '.
 243            'repository root. The root for this repository is "%s", but the '.
 244            'configured URI is "%s". To resolve this error, set the remote URI '.
 245            'to point at the repository root. If you want to import only part '.
 246            'of a Subversion repository, use the "Import Only" option.',
 247            $repository->getCallsign(),
 248            $remote_root,
 249            $expect_root));
 250      }
 251    }
 252  
 253  
 254  /* -(  Discovering Mercurial Repositories  )--------------------------------- */
 255  
 256  
 257    /**
 258     * @task hg
 259     */
 260    private function discoverMercurialCommits() {
 261      $repository = $this->getRepository();
 262  
 263      $branches = id(new DiffusionLowLevelMercurialBranchesQuery())
 264        ->setRepository($repository)
 265        ->execute();
 266  
 267      $this->fillCommitCache(mpull($branches, 'getCommitIdentifier'));
 268  
 269      $refs = array();
 270      foreach ($branches as $branch) {
 271        // NOTE: Mercurial branches may have multiple heads, so the names may
 272        // not be unique.
 273        $name = $branch->getShortName();
 274        $commit = $branch->getCommitIdentifier();
 275  
 276        $this->log(pht('Examining branch "%s" head "%s".', $name, $commit));
 277        if (!$repository->shouldTrackBranch($name)) {
 278          $this->log(pht('Skipping, branch is untracked.'));
 279          continue;
 280        }
 281  
 282        if ($this->isKnownCommit($commit)) {
 283          $this->log(pht('Skipping, this head is a known commit.'));
 284          continue;
 285        }
 286  
 287        $this->log(pht('Looking for new commits.'));
 288  
 289        $branch_refs = $this->discoverStreamAncestry(
 290          new PhabricatorMercurialGraphStream($repository, $commit),
 291          $commit,
 292          $close_immediately = true);
 293  
 294        $this->didDiscoverRefs($branch_refs);
 295  
 296        $refs[] = $branch_refs;
 297      }
 298  
 299      return array_mergev($refs);
 300    }
 301  
 302  
 303  /* -(  Internals  )---------------------------------------------------------- */
 304  
 305  
 306    private function discoverStreamAncestry(
 307      PhabricatorRepositoryGraphStream $stream,
 308      $commit,
 309      $close_immediately) {
 310  
 311      $discover = array($commit);
 312      $graph = array();
 313      $seen = array();
 314  
 315      // Find all the reachable, undiscovered commits. Build a graph of the
 316      // edges.
 317      while ($discover) {
 318        $target = array_pop($discover);
 319  
 320        if (empty($graph[$target])) {
 321          $graph[$target] = array();
 322        }
 323  
 324        $parents = $stream->getParents($target);
 325        foreach ($parents as $parent) {
 326          if ($this->isKnownCommit($parent)) {
 327            continue;
 328          }
 329  
 330          $graph[$target][$parent] = true;
 331  
 332          if (empty($seen[$parent])) {
 333            $seen[$parent] = true;
 334            $discover[] = $parent;
 335          }
 336        }
 337      }
 338  
 339      // Now, sort them topographically.
 340      $commits = $this->reduceGraph($graph);
 341  
 342      $refs = array();
 343      foreach ($commits as $commit) {
 344        $refs[] = id(new PhabricatorRepositoryCommitRef())
 345          ->setIdentifier($commit)
 346          ->setEpoch($stream->getCommitDate($commit))
 347          ->setCanCloseImmediately($close_immediately)
 348          ->setParents($stream->getParents($commit));
 349      }
 350  
 351      return $refs;
 352    }
 353  
 354  
 355    private function reduceGraph(array $edges) {
 356      foreach ($edges as $commit => $parents) {
 357        $edges[$commit] = array_keys($parents);
 358      }
 359  
 360      $graph = new PhutilDirectedScalarGraph();
 361      $graph->addNodes($edges);
 362  
 363      $commits = $graph->getTopographicallySortedNodes();
 364  
 365      // NOTE: We want the most ancestral nodes first, so we need to reverse the
 366      // list we get out of AbstractDirectedGraph.
 367      $commits = array_reverse($commits);
 368  
 369      return $commits;
 370    }
 371  
 372  
 373    private function isKnownCommit($identifier) {
 374      if (isset($this->commitCache[$identifier])) {
 375        return true;
 376      }
 377  
 378      if (isset($this->workingSet[$identifier])) {
 379        return true;
 380      }
 381  
 382      if ($this->repairMode) {
 383        // In repair mode, rediscover the entire repository, ignoring the
 384        // database state. We can hit the local cache above, but if we miss it
 385        // stop the script from going to the database cache.
 386        return false;
 387      }
 388  
 389      $this->fillCommitCache(array($identifier));
 390  
 391      return isset($this->commitCache[$identifier]);
 392    }
 393  
 394    private function fillCommitCache(array $identifiers) {
 395      if (!$identifiers) {
 396        return;
 397      }
 398  
 399      $commits = id(new PhabricatorRepositoryCommit())->loadAllWhere(
 400        'repositoryID = %d AND commitIdentifier IN (%Ls)',
 401        $this->getRepository()->getID(),
 402        $identifiers);
 403  
 404      foreach ($commits as $commit) {
 405        $this->commitCache[$commit->getCommitIdentifier()] = true;
 406      }
 407  
 408      while (count($this->commitCache) > self::MAX_COMMIT_CACHE_SIZE) {
 409        array_shift($this->commitCache);
 410      }
 411    }
 412  
 413    /**
 414     * Sort branches so we process closeable branches first. This makes the
 415     * whole import process a little cheaper, since we can close these commits
 416     * the first time through rather than catching them in the refs step.
 417     *
 418     * @task internal
 419     *
 420     * @param   list<DiffusionRepositoryRef> List of branch heads.
 421     * @return  list<DiffusionRepositoryRef> Sorted list of branch heads.
 422     */
 423    private function sortBranches(array $branches) {
 424      $repository = $this->getRepository();
 425  
 426      $head_branches = array();
 427      $tail_branches = array();
 428      foreach ($branches as $branch) {
 429        $name = $branch->getShortName();
 430  
 431        if ($repository->shouldAutocloseBranch($name)) {
 432          $head_branches[] = $branch;
 433        } else {
 434          $tail_branches[] = $branch;
 435        }
 436      }
 437  
 438      return array_merge($head_branches, $tail_branches);
 439    }
 440  
 441  
 442    private function recordCommit(
 443      PhabricatorRepository $repository,
 444      $commit_identifier,
 445      $epoch,
 446      $close_immediately,
 447      array $parents) {
 448  
 449      $commit = new PhabricatorRepositoryCommit();
 450      $commit->setRepositoryID($repository->getID());
 451      $commit->setCommitIdentifier($commit_identifier);
 452      $commit->setEpoch($epoch);
 453      if ($close_immediately) {
 454        $commit->setImportStatus(PhabricatorRepositoryCommit::IMPORTED_CLOSEABLE);
 455      }
 456  
 457      $data = new PhabricatorRepositoryCommitData();
 458  
 459      $conn_w = $repository->establishConnection('w');
 460  
 461      try {
 462  
 463        // If this commit has parents, look up their IDs. The parent commits
 464        // should always exist already.
 465  
 466        $parent_ids = array();
 467        if ($parents) {
 468          $parent_rows = queryfx_all(
 469            $conn_w,
 470            'SELECT id, commitIdentifier FROM %T
 471              WHERE commitIdentifier IN (%Ls) AND repositoryID = %d',
 472            $commit->getTableName(),
 473            $parents,
 474            $repository->getID());
 475  
 476          $parent_map = ipull($parent_rows, 'id', 'commitIdentifier');
 477  
 478          foreach ($parents as $parent) {
 479            if (empty($parent_map[$parent])) {
 480              throw new Exception(
 481                pht('Unable to identify parent "%s"!', $parent));
 482            }
 483            $parent_ids[] = $parent_map[$parent];
 484          }
 485        } else {
 486          // Write an explicit 0 so we can distinguish between "really no
 487          // parents" and "data not available".
 488          if (!$repository->isSVN()) {
 489            $parent_ids = array(0);
 490          }
 491        }
 492  
 493        $commit->openTransaction();
 494          $commit->save();
 495  
 496          $data->setCommitID($commit->getID());
 497          $data->save();
 498  
 499          foreach ($parent_ids as $parent_id) {
 500            queryfx(
 501              $conn_w,
 502              'INSERT IGNORE INTO %T (childCommitID, parentCommitID)
 503                VALUES (%d, %d)',
 504              PhabricatorRepository::TABLE_PARENTS,
 505              $commit->getID(),
 506              $parent_id);
 507          }
 508        $commit->saveTransaction();
 509  
 510        $this->insertTask($repository, $commit);
 511  
 512        queryfx(
 513          $conn_w,
 514          'INSERT INTO %T (repositoryID, size, lastCommitID, epoch)
 515            VALUES (%d, 1, %d, %d)
 516            ON DUPLICATE KEY UPDATE
 517              size = size + 1,
 518              lastCommitID =
 519                IF(VALUES(epoch) > epoch, VALUES(lastCommitID), lastCommitID),
 520              epoch = IF(VALUES(epoch) > epoch, VALUES(epoch), epoch)',
 521          PhabricatorRepository::TABLE_SUMMARY,
 522          $repository->getID(),
 523          $commit->getID(),
 524          $epoch);
 525  
 526        if ($this->repairMode) {
 527          // Normally, the query should throw a duplicate key exception. If we
 528          // reach this in repair mode, we've actually performed a repair.
 529          $this->log(pht('Repaired commit "%s".', $commit_identifier));
 530        }
 531  
 532        PhutilEventEngine::dispatchEvent(
 533          new PhabricatorEvent(
 534            PhabricatorEventType::TYPE_DIFFUSION_DIDDISCOVERCOMMIT,
 535            array(
 536              'repository'  => $repository,
 537              'commit'      => $commit,
 538            )));
 539  
 540  
 541  
 542      } catch (AphrontDuplicateKeyQueryException $ex) {
 543        $commit->killTransaction();
 544        // Ignore. This can happen because we discover the same new commit
 545        // more than once when looking at history, or because of races or
 546        // data inconsistency or cosmic radiation; in any case, we're still
 547        // in a good state if we ignore the failure.
 548      }
 549    }
 550  
 551    private function didDiscoverRefs(array $refs) {
 552      foreach ($refs as $ref) {
 553        $this->workingSet[$ref->getIdentifier()] = true;
 554      }
 555    }
 556  
 557    private function insertTask(
 558      PhabricatorRepository $repository,
 559      PhabricatorRepositoryCommit $commit,
 560      $data = array()) {
 561  
 562      $vcs = $repository->getVersionControlSystem();
 563      switch ($vcs) {
 564        case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT:
 565          $class = 'PhabricatorRepositoryGitCommitMessageParserWorker';
 566          break;
 567        case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN:
 568          $class = 'PhabricatorRepositorySvnCommitMessageParserWorker';
 569          break;
 570        case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL:
 571          $class = 'PhabricatorRepositoryMercurialCommitMessageParserWorker';
 572          break;
 573        default:
 574          throw new Exception("Unknown repository type '{$vcs}'!");
 575      }
 576  
 577      $data['commitID'] = $commit->getID();
 578  
 579      PhabricatorWorker::scheduleTask($class, $data);
 580    }
 581  
 582  }


Generated: Sun Nov 30 09:20:46 2014 Cross-referenced by PHPXref 0.7.1