[ Index ] |
PHP Cross Reference of Phabricator |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * @task discover Discovering Repositories 5 * @task svn Discovering Subversion Repositories 6 * @task git Discovering Git Repositories 7 * @task hg Discovering Mercurial Repositories 8 * @task internal Internals 9 */ 10 final class PhabricatorRepositoryDiscoveryEngine 11 extends PhabricatorRepositoryEngine { 12 13 private $repairMode; 14 private $commitCache = array(); 15 private $workingSet = array(); 16 17 const MAX_COMMIT_CACHE_SIZE = 2048; 18 19 20 /* -( Discovering Repositories )------------------------------------------- */ 21 22 23 public function setRepairMode($repair_mode) { 24 $this->repairMode = $repair_mode; 25 return $this; 26 } 27 28 29 public function getRepairMode() { 30 return $this->repairMode; 31 } 32 33 34 /** 35 * @task discovery 36 */ 37 public function discoverCommits() { 38 $repository = $this->getRepository(); 39 40 $vcs = $repository->getVersionControlSystem(); 41 switch ($vcs) { 42 case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: 43 $refs = $this->discoverSubversionCommits(); 44 break; 45 case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: 46 $refs = $this->discoverMercurialCommits(); 47 break; 48 case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: 49 $refs = $this->discoverGitCommits(); 50 break; 51 default: 52 throw new Exception("Unknown VCS '{$vcs}'!"); 53 } 54 55 // Clear the working set cache. 56 $this->workingSet = array(); 57 58 // Record discovered commits and mark them in the cache. 59 foreach ($refs as $ref) { 60 $this->recordCommit( 61 $repository, 62 $ref->getIdentifier(), 63 $ref->getEpoch(), 64 $ref->getCanCloseImmediately(), 65 $ref->getParents()); 66 67 $this->commitCache[$ref->getIdentifier()] = true; 68 } 69 70 return $refs; 71 } 72 73 74 /* -( Discovering Git Repositories )--------------------------------------- */ 75 76 77 /** 78 * @task git 79 */ 80 private function discoverGitCommits() { 81 $repository = $this->getRepository(); 82 83 if (!$repository->isHosted()) { 84 $this->verifyGitOrigin($repository); 85 } 86 87 $branches = id(new DiffusionLowLevelGitRefQuery()) 88 ->setRepository($repository) 89 ->withIsOriginBranch(true) 90 ->execute(); 91 92 if (!$branches) { 93 // This repository has no branches at all, so we don't need to do 94 // anything. Generally, this means the repository is empty. 95 return array(); 96 } 97 98 $branches = $this->sortBranches($branches); 99 $branches = mpull($branches, 'getCommitIdentifier', 'getShortName'); 100 101 $this->log( 102 pht( 103 'Discovering commits in repository %s.', 104 $repository->getCallsign())); 105 106 $this->fillCommitCache(array_values($branches)); 107 108 $refs = array(); 109 foreach ($branches as $name => $commit) { 110 $this->log(pht('Examining branch "%s", at "%s".', $name, $commit)); 111 112 if (!$repository->shouldTrackBranch($name)) { 113 $this->log(pht('Skipping, branch is untracked.')); 114 continue; 115 } 116 117 if ($this->isKnownCommit($commit)) { 118 $this->log(pht('Skipping, HEAD is known.')); 119 continue; 120 } 121 122 $this->log(pht('Looking for new commits.')); 123 124 $branch_refs = $this->discoverStreamAncestry( 125 new PhabricatorGitGraphStream($repository, $commit), 126 $commit, 127 $repository->shouldAutocloseBranch($name)); 128 129 $this->didDiscoverRefs($branch_refs); 130 131 $refs[] = $branch_refs; 132 } 133 134 return array_mergev($refs); 135 } 136 137 138 /* -( Discovering Subversion Repositories )-------------------------------- */ 139 140 141 /** 142 * @task svn 143 */ 144 private function discoverSubversionCommits() { 145 $repository = $this->getRepository(); 146 147 if (!$repository->isHosted()) { 148 $this->verifySubversionRoot($repository); 149 } 150 151 $upper_bound = null; 152 $limit = 1; 153 $refs = array(); 154 do { 155 // Find all the unknown commits on this path. Note that we permit 156 // importing an SVN subdirectory rather than the entire repository, so 157 // commits may be nonsequential. 158 159 if ($upper_bound === null) { 160 $at_rev = 'HEAD'; 161 } else { 162 $at_rev = ($upper_bound - 1); 163 } 164 165 try { 166 list($xml, $stderr) = $repository->execxRemoteCommand( 167 'log --xml --quiet --limit %d %s', 168 $limit, 169 $repository->getSubversionBaseURI($at_rev)); 170 } catch (CommandException $ex) { 171 $stderr = $ex->getStdErr(); 172 if (preg_match('/(path|File) not found/', $stderr)) { 173 // We've gone all the way back through history and this path was not 174 // affected by earlier commits. 175 break; 176 } 177 throw $ex; 178 } 179 180 $xml = phutil_utf8ize($xml); 181 $log = new SimpleXMLElement($xml); 182 foreach ($log->logentry as $entry) { 183 $identifier = (int)$entry['revision']; 184 $epoch = (int)strtotime((string)$entry->date[0]); 185 $refs[$identifier] = id(new PhabricatorRepositoryCommitRef()) 186 ->setIdentifier($identifier) 187 ->setEpoch($epoch) 188 ->setCanCloseImmediately(true); 189 190 if ($upper_bound === null) { 191 $upper_bound = $identifier; 192 } else { 193 $upper_bound = min($upper_bound, $identifier); 194 } 195 } 196 197 // Discover 2, 4, 8, ... 256 logs at a time. This allows us to initially 198 // import large repositories fairly quickly, while pulling only as much 199 // data as we need in the common case (when we've already imported the 200 // repository and are just grabbing one commit at a time). 201 $limit = min($limit * 2, 256); 202 203 } while ($upper_bound > 1 && !$this->isKnownCommit($upper_bound)); 204 205 krsort($refs); 206 while ($refs && $this->isKnownCommit(last($refs)->getIdentifier())) { 207 array_pop($refs); 208 } 209 $refs = array_reverse($refs); 210 211 $this->didDiscoverRefs($refs); 212 213 return $refs; 214 } 215 216 217 private function verifySubversionRoot(PhabricatorRepository $repository) { 218 list($xml) = $repository->execxRemoteCommand( 219 'info --xml %s', 220 $repository->getSubversionPathURI()); 221 222 $xml = phutil_utf8ize($xml); 223 $xml = new SimpleXMLElement($xml); 224 225 $remote_root = (string)($xml->entry[0]->repository[0]->root[0]); 226 $expect_root = $repository->getSubversionPathURI(); 227 228 $normal_type_svn = PhabricatorRepositoryURINormalizer::TYPE_SVN; 229 230 $remote_normal = id(new PhabricatorRepositoryURINormalizer( 231 $normal_type_svn, 232 $remote_root))->getNormalizedPath(); 233 234 $expect_normal = id(new PhabricatorRepositoryURINormalizer( 235 $normal_type_svn, 236 $expect_root))->getNormalizedPath(); 237 238 if ($remote_normal != $expect_normal) { 239 throw new Exception( 240 pht( 241 'Repository "%s" does not have a correctly configured remote URI. '. 242 'The remote URI for a Subversion repository MUST point at the '. 243 'repository root. The root for this repository is "%s", but the '. 244 'configured URI is "%s". To resolve this error, set the remote URI '. 245 'to point at the repository root. If you want to import only part '. 246 'of a Subversion repository, use the "Import Only" option.', 247 $repository->getCallsign(), 248 $remote_root, 249 $expect_root)); 250 } 251 } 252 253 254 /* -( Discovering Mercurial Repositories )--------------------------------- */ 255 256 257 /** 258 * @task hg 259 */ 260 private function discoverMercurialCommits() { 261 $repository = $this->getRepository(); 262 263 $branches = id(new DiffusionLowLevelMercurialBranchesQuery()) 264 ->setRepository($repository) 265 ->execute(); 266 267 $this->fillCommitCache(mpull($branches, 'getCommitIdentifier')); 268 269 $refs = array(); 270 foreach ($branches as $branch) { 271 // NOTE: Mercurial branches may have multiple heads, so the names may 272 // not be unique. 273 $name = $branch->getShortName(); 274 $commit = $branch->getCommitIdentifier(); 275 276 $this->log(pht('Examining branch "%s" head "%s".', $name, $commit)); 277 if (!$repository->shouldTrackBranch($name)) { 278 $this->log(pht('Skipping, branch is untracked.')); 279 continue; 280 } 281 282 if ($this->isKnownCommit($commit)) { 283 $this->log(pht('Skipping, this head is a known commit.')); 284 continue; 285 } 286 287 $this->log(pht('Looking for new commits.')); 288 289 $branch_refs = $this->discoverStreamAncestry( 290 new PhabricatorMercurialGraphStream($repository, $commit), 291 $commit, 292 $close_immediately = true); 293 294 $this->didDiscoverRefs($branch_refs); 295 296 $refs[] = $branch_refs; 297 } 298 299 return array_mergev($refs); 300 } 301 302 303 /* -( Internals )---------------------------------------------------------- */ 304 305 306 private function discoverStreamAncestry( 307 PhabricatorRepositoryGraphStream $stream, 308 $commit, 309 $close_immediately) { 310 311 $discover = array($commit); 312 $graph = array(); 313 $seen = array(); 314 315 // Find all the reachable, undiscovered commits. Build a graph of the 316 // edges. 317 while ($discover) { 318 $target = array_pop($discover); 319 320 if (empty($graph[$target])) { 321 $graph[$target] = array(); 322 } 323 324 $parents = $stream->getParents($target); 325 foreach ($parents as $parent) { 326 if ($this->isKnownCommit($parent)) { 327 continue; 328 } 329 330 $graph[$target][$parent] = true; 331 332 if (empty($seen[$parent])) { 333 $seen[$parent] = true; 334 $discover[] = $parent; 335 } 336 } 337 } 338 339 // Now, sort them topographically. 340 $commits = $this->reduceGraph($graph); 341 342 $refs = array(); 343 foreach ($commits as $commit) { 344 $refs[] = id(new PhabricatorRepositoryCommitRef()) 345 ->setIdentifier($commit) 346 ->setEpoch($stream->getCommitDate($commit)) 347 ->setCanCloseImmediately($close_immediately) 348 ->setParents($stream->getParents($commit)); 349 } 350 351 return $refs; 352 } 353 354 355 private function reduceGraph(array $edges) { 356 foreach ($edges as $commit => $parents) { 357 $edges[$commit] = array_keys($parents); 358 } 359 360 $graph = new PhutilDirectedScalarGraph(); 361 $graph->addNodes($edges); 362 363 $commits = $graph->getTopographicallySortedNodes(); 364 365 // NOTE: We want the most ancestral nodes first, so we need to reverse the 366 // list we get out of AbstractDirectedGraph. 367 $commits = array_reverse($commits); 368 369 return $commits; 370 } 371 372 373 private function isKnownCommit($identifier) { 374 if (isset($this->commitCache[$identifier])) { 375 return true; 376 } 377 378 if (isset($this->workingSet[$identifier])) { 379 return true; 380 } 381 382 if ($this->repairMode) { 383 // In repair mode, rediscover the entire repository, ignoring the 384 // database state. We can hit the local cache above, but if we miss it 385 // stop the script from going to the database cache. 386 return false; 387 } 388 389 $this->fillCommitCache(array($identifier)); 390 391 return isset($this->commitCache[$identifier]); 392 } 393 394 private function fillCommitCache(array $identifiers) { 395 if (!$identifiers) { 396 return; 397 } 398 399 $commits = id(new PhabricatorRepositoryCommit())->loadAllWhere( 400 'repositoryID = %d AND commitIdentifier IN (%Ls)', 401 $this->getRepository()->getID(), 402 $identifiers); 403 404 foreach ($commits as $commit) { 405 $this->commitCache[$commit->getCommitIdentifier()] = true; 406 } 407 408 while (count($this->commitCache) > self::MAX_COMMIT_CACHE_SIZE) { 409 array_shift($this->commitCache); 410 } 411 } 412 413 /** 414 * Sort branches so we process closeable branches first. This makes the 415 * whole import process a little cheaper, since we can close these commits 416 * the first time through rather than catching them in the refs step. 417 * 418 * @task internal 419 * 420 * @param list<DiffusionRepositoryRef> List of branch heads. 421 * @return list<DiffusionRepositoryRef> Sorted list of branch heads. 422 */ 423 private function sortBranches(array $branches) { 424 $repository = $this->getRepository(); 425 426 $head_branches = array(); 427 $tail_branches = array(); 428 foreach ($branches as $branch) { 429 $name = $branch->getShortName(); 430 431 if ($repository->shouldAutocloseBranch($name)) { 432 $head_branches[] = $branch; 433 } else { 434 $tail_branches[] = $branch; 435 } 436 } 437 438 return array_merge($head_branches, $tail_branches); 439 } 440 441 442 private function recordCommit( 443 PhabricatorRepository $repository, 444 $commit_identifier, 445 $epoch, 446 $close_immediately, 447 array $parents) { 448 449 $commit = new PhabricatorRepositoryCommit(); 450 $commit->setRepositoryID($repository->getID()); 451 $commit->setCommitIdentifier($commit_identifier); 452 $commit->setEpoch($epoch); 453 if ($close_immediately) { 454 $commit->setImportStatus(PhabricatorRepositoryCommit::IMPORTED_CLOSEABLE); 455 } 456 457 $data = new PhabricatorRepositoryCommitData(); 458 459 $conn_w = $repository->establishConnection('w'); 460 461 try { 462 463 // If this commit has parents, look up their IDs. The parent commits 464 // should always exist already. 465 466 $parent_ids = array(); 467 if ($parents) { 468 $parent_rows = queryfx_all( 469 $conn_w, 470 'SELECT id, commitIdentifier FROM %T 471 WHERE commitIdentifier IN (%Ls) AND repositoryID = %d', 472 $commit->getTableName(), 473 $parents, 474 $repository->getID()); 475 476 $parent_map = ipull($parent_rows, 'id', 'commitIdentifier'); 477 478 foreach ($parents as $parent) { 479 if (empty($parent_map[$parent])) { 480 throw new Exception( 481 pht('Unable to identify parent "%s"!', $parent)); 482 } 483 $parent_ids[] = $parent_map[$parent]; 484 } 485 } else { 486 // Write an explicit 0 so we can distinguish between "really no 487 // parents" and "data not available". 488 if (!$repository->isSVN()) { 489 $parent_ids = array(0); 490 } 491 } 492 493 $commit->openTransaction(); 494 $commit->save(); 495 496 $data->setCommitID($commit->getID()); 497 $data->save(); 498 499 foreach ($parent_ids as $parent_id) { 500 queryfx( 501 $conn_w, 502 'INSERT IGNORE INTO %T (childCommitID, parentCommitID) 503 VALUES (%d, %d)', 504 PhabricatorRepository::TABLE_PARENTS, 505 $commit->getID(), 506 $parent_id); 507 } 508 $commit->saveTransaction(); 509 510 $this->insertTask($repository, $commit); 511 512 queryfx( 513 $conn_w, 514 'INSERT INTO %T (repositoryID, size, lastCommitID, epoch) 515 VALUES (%d, 1, %d, %d) 516 ON DUPLICATE KEY UPDATE 517 size = size + 1, 518 lastCommitID = 519 IF(VALUES(epoch) > epoch, VALUES(lastCommitID), lastCommitID), 520 epoch = IF(VALUES(epoch) > epoch, VALUES(epoch), epoch)', 521 PhabricatorRepository::TABLE_SUMMARY, 522 $repository->getID(), 523 $commit->getID(), 524 $epoch); 525 526 if ($this->repairMode) { 527 // Normally, the query should throw a duplicate key exception. If we 528 // reach this in repair mode, we've actually performed a repair. 529 $this->log(pht('Repaired commit "%s".', $commit_identifier)); 530 } 531 532 PhutilEventEngine::dispatchEvent( 533 new PhabricatorEvent( 534 PhabricatorEventType::TYPE_DIFFUSION_DIDDISCOVERCOMMIT, 535 array( 536 'repository' => $repository, 537 'commit' => $commit, 538 ))); 539 540 541 542 } catch (AphrontDuplicateKeyQueryException $ex) { 543 $commit->killTransaction(); 544 // Ignore. This can happen because we discover the same new commit 545 // more than once when looking at history, or because of races or 546 // data inconsistency or cosmic radiation; in any case, we're still 547 // in a good state if we ignore the failure. 548 } 549 } 550 551 private function didDiscoverRefs(array $refs) { 552 foreach ($refs as $ref) { 553 $this->workingSet[$ref->getIdentifier()] = true; 554 } 555 } 556 557 private function insertTask( 558 PhabricatorRepository $repository, 559 PhabricatorRepositoryCommit $commit, 560 $data = array()) { 561 562 $vcs = $repository->getVersionControlSystem(); 563 switch ($vcs) { 564 case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: 565 $class = 'PhabricatorRepositoryGitCommitMessageParserWorker'; 566 break; 567 case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN: 568 $class = 'PhabricatorRepositorySvnCommitMessageParserWorker'; 569 break; 570 case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: 571 $class = 'PhabricatorRepositoryMercurialCommitMessageParserWorker'; 572 break; 573 default: 574 throw new Exception("Unknown repository type '{$vcs}'!"); 575 } 576 577 $data['commitID'] = $commit->getID(); 578 579 PhabricatorWorker::scheduleTask($class, $data); 580 } 581 582 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Sun Nov 30 09:20:46 2014 | Cross-referenced by PHPXref 0.7.1 |