[ Index ] |
PHP Cross Reference of Phabricator |
[Summary view] [Print] [Text view]
1 <?php 2 3 final class DivinerGenerateWorkflow extends DivinerWorkflow { 4 5 private $atomCache; 6 7 public function didConstruct() { 8 $this 9 ->setName('generate') 10 ->setSynopsis(pht('Generate documentation.')) 11 ->setArguments( 12 array( 13 array( 14 'name' => 'clean', 15 'help' => 'Clear the caches before generating documentation.', 16 ), 17 array( 18 'name' => 'book', 19 'param' => 'path', 20 'help' => 'Path to a Diviner book configuration.', 21 ), 22 )); 23 } 24 25 protected function getAtomCache() { 26 if (!$this->atomCache) { 27 $book_root = $this->getConfig('root'); 28 $book_name = $this->getConfig('name'); 29 $cache_directory = $book_root.'/.divinercache/'.$book_name; 30 $this->atomCache = new DivinerAtomCache($cache_directory); 31 } 32 return $this->atomCache; 33 } 34 35 protected function log($message) { 36 $console = PhutilConsole::getConsole(); 37 $console->writeErr($message."\n"); 38 } 39 40 public function execute(PhutilArgumentParser $args) { 41 $book = $args->getArg('book'); 42 if ($book) { 43 $books = array($book); 44 } else { 45 $cwd = getcwd(); 46 $this->log(pht('FINDING DOCUMENTATION BOOKS')); 47 $books = id(new FileFinder($cwd)) 48 ->withType('f') 49 ->withSuffix('book') 50 ->find(); 51 52 if (!$books) { 53 throw new PhutilArgumentUsageException( 54 pht( 55 "There are no Diviner '.book' files anywhere beneath the ". 56 "current directory. Use '--book <book>' to specify a ". 57 "documentation book to generate.")); 58 } else { 59 $this->log(pht('Found %s book(s).', new PhutilNumber(count($books)))); 60 } 61 } 62 63 foreach ($books as $book) { 64 $short_name = basename($book); 65 66 $this->log(pht('Generating book "%s"...', $short_name)); 67 $this->generateBook($book, $args); 68 $this->log(pht('Completed generation of "%s".', $short_name)."\n"); 69 } 70 } 71 72 private function generateBook($book, PhutilArgumentParser $args) { 73 $this->atomCache = null; 74 75 $this->readBookConfiguration($book); 76 77 if ($args->getArg('clean')) { 78 $this->log(pht('CLEARING CACHES')); 79 $this->getAtomCache()->delete(); 80 $this->log(pht('Done.')."\n"); 81 } 82 83 // The major challenge of documentation generation is one of dependency 84 // management. When regenerating documentation, we want to do the smallest 85 // amount of work we can, so that regenerating documentation after minor 86 // changes is quick. 87 // 88 // ATOM CACHE 89 // 90 // In the first stage, we find all the direct changes to source code since 91 // the last run. This stage relies on two data structures: 92 // 93 // - File Hash Map: map<file_hash, node_hash> 94 // - Atom Map: map<node_hash, true> 95 // 96 // First, we hash all the source files in the project to detect any which 97 // have changed since the previous run (i.e., their hash is not present in 98 // the File Hash Map). If a file's content hash appears in the map, it has 99 // not changed, so we don't need to reparse it. 100 // 101 // We break the contents of each file into "atoms", which represent a unit 102 // of source code (like a function, method, class or file). Each atom has a 103 // "node hash" based on the content of the atom: if a function definition 104 // changes, the node hash of the atom changes too. The primary output of 105 // the atom cache is a list of node hashes which exist in the project. This 106 // is the Atom Map. The node hash depends only on the definition of the atom 107 // and the atomizer implementation. It ends with an "N", for "node". 108 // 109 // (We need the Atom Map in addition to the File Hash Map because each file 110 // may have several atoms in it (e.g., multiple functions, or a class and 111 // its methods). The File Hash Map contains an exhaustive list of all atoms 112 // with type "file", but not child atoms of those top-level atoms.) 113 // 114 // GRAPH CACHE 115 // 116 // We now know which atoms exist, and can compare the Atom Map to some 117 // existing cache to figure out what has changed. However, this isn't 118 // sufficient to figure out which documentation actually needs to be 119 // regnerated, because atoms depend on other atoms. For example, if "B 120 // extends A" and the definition for A changes, we need to regenerate the 121 // documentation in B. Similarly, if X links to Y and Y changes, we should 122 // regenerate X. (In both these cases, the documentation for the connected 123 // atom may not acutally change, but in some cases it will, and the extra 124 // work we need to do is generally very small compared to the size of the 125 // project.) 126 // 127 // To figure out which other nodes have changed, we compute a "graph hash" 128 // for each node. This hash combines the "node hash" with the node hashes 129 // of connected nodes. Our primary output is a list of graph hashes, which 130 // a documentation generator can use to easily determine what work needs 131 // to be done by comparing the list with a list of cached graph hashes, 132 // then generating documentation for new hashes and deleting documentation 133 // for missing hashes. The graph hash ends with a "G", for "graph". 134 // 135 // In this stage, we rely on three data structures: 136 // 137 // - Symbol Map: map<node_hash, symbol_hash> 138 // - Edge Map: map<node_hash, list<symbol_hash>> 139 // - Graph Map: map<node_hash, graph_hash> 140 // 141 // Calculating the graph hash requires several steps, because we need to 142 // figure out which nodes an atom is attached to. The atom contains symbolic 143 // references to other nodes by name (e.g., "extends SomeClass") in the form 144 // of DivinerAtomRefs. We can also build a symbolic reference for any atom 145 // from the atom itself. Each DivinerAtomRef generates a symbol hash, 146 // which ends with an "S", for "symbol". 147 // 148 // First, we update the symbol map. We remove (and mark dirty) any symbols 149 // associated with node hashes which no longer exist (e.g., old/dead nodes). 150 // Second, we add (and mark dirty) any symbols associated with new nodes. 151 // We also add edges defined by new nodes to the graph. 152 // 153 // We initialize a list of dirty nodes to the list of new nodes, then 154 // find all nodes connected to dirty symbols and add them to the dirty 155 // node list. This list now contains every node with a new or changed 156 // graph hash. 157 // 158 // We walk the dirty list and compute the new graph hashes, adding them 159 // to the graph hash map. This Graph Map can then be passed to an actual 160 // documentation generator, which can compare the graph hashes to a list 161 // of already-generated graph hashes and easily assess which documents need 162 // to be regenerated and which can be deleted. 163 164 $this->buildAtomCache(); 165 $this->buildGraphCache(); 166 167 $this->publishDocumentation($args->getArg('clean')); 168 } 169 170 /* -( Atom Cache )--------------------------------------------------------- */ 171 172 private function buildAtomCache() { 173 $this->log(pht('BUILDING ATOM CACHE')); 174 175 $file_hashes = $this->findFilesInProject(); 176 177 $this->log(pht('Found %d file(s) in project.', count($file_hashes))); 178 179 $this->deleteDeadAtoms($file_hashes); 180 181 $atomize = $this->getFilesToAtomize($file_hashes); 182 183 $this->log(pht('Found %d unatomized, uncached file(s).', count($atomize))); 184 185 $file_atomizers = $this->getAtomizersForFiles($atomize); 186 187 $this->log(pht('Found %d file(s) to atomize.', count($file_atomizers))); 188 189 $futures = $this->buildAtomizerFutures($file_atomizers); 190 191 $this->log(pht('Atomizing %d file(s).', count($file_atomizers))); 192 193 if ($futures) { 194 $this->resolveAtomizerFutures($futures, $file_hashes); 195 $this->log(pht('Atomization complete.')); 196 } else { 197 $this->log(pht('Atom cache is up to date, no files to atomize.')); 198 } 199 200 $this->log(pht('Writing atom cache.')); 201 202 $this->getAtomCache()->saveAtoms(); 203 204 $this->log(pht('Done.')."\n"); 205 } 206 207 private function getAtomizersForFiles(array $files) { 208 $rules = $this->getRules(); 209 $exclude = $this->getExclude(); 210 211 $atomizers = array(); 212 213 foreach ($files as $file) { 214 foreach ($exclude as $pattern) { 215 if (preg_match($pattern, $file)) { 216 continue 2; 217 } 218 } 219 220 foreach ($rules as $rule => $atomizer) { 221 $ok = preg_match($rule, $file); 222 if ($ok === false) { 223 throw new Exception( 224 "Rule '{$rule}' is not a valid regular expression."); 225 } 226 if ($ok) { 227 $atomizers[$file] = $atomizer; 228 continue; 229 } 230 } 231 } 232 233 return $atomizers; 234 } 235 236 private function getRules() { 237 $rules = $this->getConfig('rules', array( 238 '/\\.diviner$/' => 'DivinerArticleAtomizer', 239 '/\\.php$/' => 'DivinerPHPAtomizer', 240 )); 241 242 return $rules; 243 } 244 245 private function getExclude() { 246 $exclude = (array)$this->getConfig('exclude', array()); 247 return $exclude; 248 } 249 250 251 private function findFilesInProject() { 252 $raw_hashes = id(new FileFinder($this->getConfig('root'))) 253 ->excludePath('*/.*') 254 ->withType('f') 255 ->setGenerateChecksums(true) 256 ->find(); 257 258 $version = $this->getDivinerAtomWorldVersion(); 259 260 $file_hashes = array(); 261 foreach ($raw_hashes as $file => $md5_hash) { 262 $rel_file = Filesystem::readablePath($file, $this->getConfig('root')); 263 // We want the hash to change if the file moves or Diviner gets updated, 264 // not just if the file content changes. Derive a hash from everything 265 // we care about. 266 $file_hashes[$rel_file] = md5("{$rel_file}\0{$md5_hash}\0{$version}").'F'; 267 } 268 269 return $file_hashes; 270 } 271 272 private function deleteDeadAtoms(array $file_hashes) { 273 $atom_cache = $this->getAtomCache(); 274 275 $hash_to_file = array_flip($file_hashes); 276 foreach ($atom_cache->getFileHashMap() as $hash => $atom) { 277 if (empty($hash_to_file[$hash])) { 278 $atom_cache->deleteFileHash($hash); 279 } 280 } 281 } 282 283 private function getFilesToAtomize(array $file_hashes) { 284 $atom_cache = $this->getAtomCache(); 285 286 $atomize = array(); 287 foreach ($file_hashes as $file => $hash) { 288 if (!$atom_cache->fileHashExists($hash)) { 289 $atomize[] = $file; 290 } 291 } 292 293 return $atomize; 294 } 295 296 private function buildAtomizerFutures(array $file_atomizers) { 297 $atomizers = array(); 298 foreach ($file_atomizers as $file => $atomizer) { 299 $atomizers[$atomizer][] = $file; 300 } 301 302 $root = dirname(phutil_get_library_root('phabricator')); 303 $config_root = $this->getConfig('root'); 304 305 $bar = id(new PhutilConsoleProgressBar()) 306 ->setTotal(count($file_atomizers)); 307 308 $futures = array(); 309 foreach ($atomizers as $class => $files) { 310 foreach (array_chunk($files, 32) as $chunk) { 311 $future = new ExecFuture( 312 '%s atomize --ugly --book %s --atomizer %s -- %Ls', 313 $root.'/bin/diviner', 314 $this->getBookConfigPath(), 315 $class, 316 $chunk); 317 $future->setCWD($config_root); 318 319 $futures[] = $future; 320 321 $bar->update(count($chunk)); 322 } 323 } 324 325 $bar->done(); 326 327 return $futures; 328 } 329 330 private function resolveAtomizerFutures(array $futures, array $file_hashes) { 331 assert_instances_of($futures, 'Future'); 332 333 $atom_cache = $this->getAtomCache(); 334 $bar = id(new PhutilConsoleProgressBar()) 335 ->setTotal(count($futures)); 336 foreach (Futures($futures)->limit(4) as $key => $future) { 337 try { 338 $atoms = $future->resolveJSON(); 339 340 foreach ($atoms as $atom) { 341 if ($atom['type'] == DivinerAtom::TYPE_FILE) { 342 $file_hash = $file_hashes[$atom['file']]; 343 $atom_cache->addFileHash($file_hash, $atom['hash']); 344 } 345 $atom_cache->addAtom($atom); 346 } 347 } catch (Exception $e) { 348 phlog($e); 349 } 350 351 $bar->update(1); 352 } 353 $bar->done(); 354 } 355 356 357 /** 358 * Get a global version number, which changes whenever any atom or atomizer 359 * implementation changes in a way which is not backward-compatible. 360 */ 361 private function getDivinerAtomWorldVersion() { 362 $version = array(); 363 $version['atom'] = DivinerAtom::getAtomSerializationVersion(); 364 $version['rules'] = $this->getRules(); 365 366 $atomizers = id(new PhutilSymbolLoader()) 367 ->setAncestorClass('DivinerAtomizer') 368 ->setConcreteOnly(true) 369 ->selectAndLoadSymbols(); 370 371 $atomizer_versions = array(); 372 foreach ($atomizers as $atomizer) { 373 $atomizer_versions[$atomizer['name']] = call_user_func( 374 array( 375 $atomizer['name'], 376 'getAtomizerVersion', 377 )); 378 } 379 380 ksort($atomizer_versions); 381 $version['atomizers'] = $atomizer_versions; 382 383 return md5(serialize($version)); 384 } 385 386 387 /* -( Graph Cache )-------------------------------------------------------- */ 388 389 390 private function buildGraphCache() { 391 $this->log(pht('BUILDING GRAPH CACHE')); 392 393 $atom_cache = $this->getAtomCache(); 394 $symbol_map = $atom_cache->getSymbolMap(); 395 $atoms = $atom_cache->getAtomMap(); 396 397 $dirty_symbols = array(); 398 $dirty_nhashes = array(); 399 400 $del_atoms = array_diff_key($symbol_map, $atoms); 401 $this->log(pht('Found %d obsolete atom(s) in graph.', count($del_atoms))); 402 foreach ($del_atoms as $nhash => $shash) { 403 $atom_cache->deleteSymbol($nhash); 404 $dirty_symbols[$shash] = true; 405 406 $atom_cache->deleteEdges($nhash); 407 $atom_cache->deleteGraph($nhash); 408 } 409 410 $new_atoms = array_diff_key($atoms, $symbol_map); 411 $this->log(pht('Found %d new atom(s) in graph.', count($new_atoms))); 412 foreach ($new_atoms as $nhash => $ignored) { 413 $shash = $this->computeSymbolHash($nhash); 414 $atom_cache->addSymbol($nhash, $shash); 415 $dirty_symbols[$shash] = true; 416 417 $atom_cache->addEdges( 418 $nhash, 419 $this->getEdges($nhash)); 420 421 $dirty_nhashes[$nhash] = true; 422 } 423 424 $this->log(pht('Propagating changes through the graph.')); 425 426 // Find all the nodes which point at a dirty node, and dirty them. Then 427 // find all the nodes which point at those nodes and dirty them, and so 428 // on. (This is slightly overkill since we probably don't need to propagate 429 // dirtiness across documentation "links" between symbols, but we do want 430 // to propagate it across "extends", and we suffer only a little bit of 431 // collateral damage by over-dirtying as long as the documentation isn't 432 // too well-connected.) 433 434 $symbol_stack = array_keys($dirty_symbols); 435 while ($symbol_stack) { 436 $symbol_hash = array_pop($symbol_stack); 437 438 foreach ($atom_cache->getEdgesWithDestination($symbol_hash) as $edge) { 439 $dirty_nhashes[$edge] = true; 440 $src_hash = $this->computeSymbolHash($edge); 441 if (empty($dirty_symbols[$src_hash])) { 442 $dirty_symbols[$src_hash] = true; 443 $symbol_stack[] = $src_hash; 444 } 445 } 446 } 447 448 $this->log(pht('Found %d affected atoms.', count($dirty_nhashes))); 449 450 foreach ($dirty_nhashes as $nhash => $ignored) { 451 $atom_cache->addGraph($nhash, $this->computeGraphHash($nhash)); 452 } 453 454 $this->log(pht('Writing graph cache.')); 455 456 $atom_cache->saveGraph(); 457 $atom_cache->saveEdges(); 458 $atom_cache->saveSymbols(); 459 460 $this->log(pht('Done.')."\n"); 461 } 462 463 private function computeSymbolHash($node_hash) { 464 $atom_cache = $this->getAtomCache(); 465 $atom = $atom_cache->getAtom($node_hash); 466 467 if (!$atom) { 468 throw new Exception("No such atom with node hash '{$node_hash}'!"); 469 } 470 471 $ref = DivinerAtomRef::newFromDictionary($atom['ref']); 472 return $ref->toHash(); 473 } 474 475 private function getEdges($node_hash) { 476 $atom_cache = $this->getAtomCache(); 477 $atom = $atom_cache->getAtom($node_hash); 478 479 $refs = array(); 480 481 // Make the atom depend on its own symbol, so that all atoms with the same 482 // symbol are dirtied (e.g., if a codebase defines the function "f()" 483 // several times, all of them should be dirtied when one is dirtied). 484 $refs[DivinerAtomRef::newFromDictionary($atom)->toHash()] = true; 485 486 foreach (array_merge($atom['extends'], $atom['links']) as $ref_dict) { 487 $ref = DivinerAtomRef::newFromDictionary($ref_dict); 488 if ($ref->getBook() == $atom['book']) { 489 $refs[$ref->toHash()] = true; 490 } 491 } 492 493 return array_keys($refs); 494 } 495 496 private function computeGraphHash($node_hash) { 497 $atom_cache = $this->getAtomCache(); 498 $atom = $atom_cache->getAtom($node_hash); 499 500 $edges = $this->getEdges($node_hash); 501 sort($edges); 502 503 $inputs = array( 504 'atomHash' => $atom['hash'], 505 'edges' => $edges, 506 ); 507 508 return md5(serialize($inputs)).'G'; 509 } 510 511 512 private function publishDocumentation($clean) { 513 $atom_cache = $this->getAtomCache(); 514 $graph_map = $atom_cache->getGraphMap(); 515 516 $this->log(pht('PUBLISHING DOCUMENTATION')); 517 518 $publisher = new DivinerLivePublisher(); 519 $publisher->setDropCaches($clean); 520 $publisher->setConfig($this->getAllConfig()); 521 $publisher->setAtomCache($atom_cache); 522 $publisher->setRenderer(new DivinerDefaultRenderer()); 523 $publisher->publishAtoms(array_values($graph_map)); 524 525 $this->log(pht('Done.')); 526 } 527 528 529 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Sun Nov 30 09:20:46 2014 | Cross-referenced by PHPXref 0.7.1 |