[ Index ]

PHP Cross Reference of Phabricator

title

Body

[close]

/src/applications/diviner/workflow/ -> DivinerGenerateWorkflow.php (source)

   1  <?php
   2  
   3  final class DivinerGenerateWorkflow extends DivinerWorkflow {
   4  
   5    private $atomCache;
   6  
   7    public function didConstruct() {
   8      $this
   9        ->setName('generate')
  10        ->setSynopsis(pht('Generate documentation.'))
  11        ->setArguments(
  12          array(
  13            array(
  14              'name' => 'clean',
  15              'help' => 'Clear the caches before generating documentation.',
  16            ),
  17            array(
  18              'name' => 'book',
  19              'param' => 'path',
  20              'help' => 'Path to a Diviner book configuration.',
  21            ),
  22          ));
  23    }
  24  
  25    protected function getAtomCache() {
  26      if (!$this->atomCache) {
  27        $book_root = $this->getConfig('root');
  28        $book_name = $this->getConfig('name');
  29        $cache_directory = $book_root.'/.divinercache/'.$book_name;
  30        $this->atomCache = new DivinerAtomCache($cache_directory);
  31      }
  32      return $this->atomCache;
  33    }
  34  
  35    protected function log($message) {
  36      $console = PhutilConsole::getConsole();
  37      $console->writeErr($message."\n");
  38    }
  39  
  40    public function execute(PhutilArgumentParser $args) {
  41      $book = $args->getArg('book');
  42      if ($book) {
  43        $books = array($book);
  44      } else {
  45        $cwd = getcwd();
  46        $this->log(pht('FINDING DOCUMENTATION BOOKS'));
  47        $books = id(new FileFinder($cwd))
  48          ->withType('f')
  49          ->withSuffix('book')
  50          ->find();
  51  
  52        if (!$books) {
  53          throw new PhutilArgumentUsageException(
  54            pht(
  55              "There are no Diviner '.book' files anywhere beneath the ".
  56              "current directory. Use '--book <book>' to specify a ".
  57              "documentation book to generate."));
  58        } else {
  59          $this->log(pht('Found %s book(s).', new PhutilNumber(count($books))));
  60        }
  61      }
  62  
  63      foreach ($books as $book) {
  64        $short_name = basename($book);
  65  
  66        $this->log(pht('Generating book "%s"...', $short_name));
  67        $this->generateBook($book, $args);
  68        $this->log(pht('Completed generation of "%s".', $short_name)."\n");
  69      }
  70    }
  71  
  72    private function generateBook($book, PhutilArgumentParser $args) {
  73      $this->atomCache = null;
  74  
  75      $this->readBookConfiguration($book);
  76  
  77      if ($args->getArg('clean')) {
  78        $this->log(pht('CLEARING CACHES'));
  79        $this->getAtomCache()->delete();
  80        $this->log(pht('Done.')."\n");
  81      }
  82  
  83      // The major challenge of documentation generation is one of dependency
  84      // management. When regenerating documentation, we want to do the smallest
  85      // amount of work we can, so that regenerating documentation after minor
  86      // changes is quick.
  87      //
  88      // ATOM CACHE
  89      //
  90      // In the first stage, we find all the direct changes to source code since
  91      // the last run. This stage relies on two data structures:
  92      //
  93      //  - File Hash Map: map<file_hash, node_hash>
  94      //  - Atom Map: map<node_hash, true>
  95      //
  96      // First, we hash all the source files in the project to detect any which
  97      // have changed since the previous run (i.e., their hash is not present in
  98      // the File Hash Map). If a file's content hash appears in the map, it has
  99      // not changed, so we don't need to reparse it.
 100      //
 101      // We break the contents of each file into "atoms", which represent a unit
 102      // of source code (like a function, method, class or file). Each atom has a
 103      // "node hash" based on the content of the atom: if a function definition
 104      // changes, the node hash of the atom changes too. The primary output of
 105      // the atom cache is a list of node hashes which exist in the project. This
 106      // is the Atom Map. The node hash depends only on the definition of the atom
 107      // and the atomizer implementation. It ends with an "N", for "node".
 108      //
 109      // (We need the Atom Map in addition to the File Hash Map because each file
 110      // may have several atoms in it (e.g., multiple functions, or a class and
 111      // its methods). The File Hash Map contains an exhaustive list of all atoms
 112      // with type "file", but not child atoms of those top-level atoms.)
 113      //
 114      // GRAPH CACHE
 115      //
 116      // We now know which atoms exist, and can compare the Atom Map to some
 117      // existing cache to figure out what has changed. However, this isn't
 118      // sufficient to figure out which documentation actually needs to be
 119      // regnerated, because atoms depend on other atoms. For example, if "B
 120      // extends A" and the definition for A changes, we need to regenerate the
 121      // documentation in B. Similarly, if X links to Y and Y changes, we should
 122      // regenerate X. (In both these cases, the documentation for the connected
 123      // atom may not acutally change, but in some cases it will, and the extra
 124      // work we need to do is generally very small compared to the size of the
 125      // project.)
 126      //
 127      // To figure out which other nodes have changed, we compute a "graph hash"
 128      // for each node. This hash combines the "node hash" with the node hashes
 129      // of connected nodes. Our primary output is a list of graph hashes, which
 130      // a documentation generator can use to easily determine what work needs
 131      // to be done by comparing the list with a list of cached graph hashes,
 132      // then generating documentation for new hashes and deleting documentation
 133      // for missing hashes. The graph hash ends with a "G", for "graph".
 134      //
 135      // In this stage, we rely on three data structures:
 136      //
 137      //  - Symbol Map: map<node_hash, symbol_hash>
 138      //  - Edge Map: map<node_hash, list<symbol_hash>>
 139      //  - Graph Map: map<node_hash, graph_hash>
 140      //
 141      // Calculating the graph hash requires several steps, because we need to
 142      // figure out which nodes an atom is attached to. The atom contains symbolic
 143      // references to other nodes by name (e.g., "extends SomeClass") in the form
 144      // of DivinerAtomRefs. We can also build a symbolic reference for any atom
 145      // from the atom itself. Each DivinerAtomRef generates a symbol hash,
 146      // which ends with an "S", for "symbol".
 147      //
 148      // First, we update the symbol map. We remove (and mark dirty) any symbols
 149      // associated with node hashes which no longer exist (e.g., old/dead nodes).
 150      // Second, we add (and mark dirty) any symbols associated with new nodes.
 151      // We also add edges defined by new nodes to the graph.
 152      //
 153      // We initialize a list of dirty nodes to the list of new nodes, then
 154      // find all nodes connected to dirty symbols and add them to the dirty
 155      // node list. This list now contains every node with a new or changed
 156      // graph hash.
 157      //
 158      // We walk the dirty list and compute the new graph hashes, adding them
 159      // to the graph hash map. This Graph Map can then be passed to an actual
 160      // documentation generator, which can compare the graph hashes to a list
 161      // of already-generated graph hashes and easily assess which documents need
 162      // to be regenerated and which can be deleted.
 163  
 164      $this->buildAtomCache();
 165      $this->buildGraphCache();
 166  
 167      $this->publishDocumentation($args->getArg('clean'));
 168    }
 169  
 170  /* -(  Atom Cache  )--------------------------------------------------------- */
 171  
 172    private function buildAtomCache() {
 173      $this->log(pht('BUILDING ATOM CACHE'));
 174  
 175      $file_hashes = $this->findFilesInProject();
 176  
 177      $this->log(pht('Found %d file(s) in project.', count($file_hashes)));
 178  
 179      $this->deleteDeadAtoms($file_hashes);
 180  
 181      $atomize = $this->getFilesToAtomize($file_hashes);
 182  
 183      $this->log(pht('Found %d unatomized, uncached file(s).', count($atomize)));
 184  
 185      $file_atomizers = $this->getAtomizersForFiles($atomize);
 186  
 187      $this->log(pht('Found %d file(s) to atomize.', count($file_atomizers)));
 188  
 189      $futures = $this->buildAtomizerFutures($file_atomizers);
 190  
 191      $this->log(pht('Atomizing %d file(s).', count($file_atomizers)));
 192  
 193      if ($futures) {
 194        $this->resolveAtomizerFutures($futures, $file_hashes);
 195        $this->log(pht('Atomization complete.'));
 196      } else {
 197        $this->log(pht('Atom cache is up to date, no files to atomize.'));
 198      }
 199  
 200      $this->log(pht('Writing atom cache.'));
 201  
 202      $this->getAtomCache()->saveAtoms();
 203  
 204      $this->log(pht('Done.')."\n");
 205    }
 206  
 207    private function getAtomizersForFiles(array $files) {
 208      $rules = $this->getRules();
 209      $exclude = $this->getExclude();
 210  
 211      $atomizers = array();
 212  
 213      foreach ($files as $file) {
 214        foreach ($exclude as $pattern) {
 215          if (preg_match($pattern, $file)) {
 216            continue 2;
 217          }
 218        }
 219  
 220        foreach ($rules as $rule => $atomizer) {
 221          $ok = preg_match($rule, $file);
 222          if ($ok === false) {
 223            throw new Exception(
 224              "Rule '{$rule}' is not a valid regular expression.");
 225          }
 226          if ($ok) {
 227            $atomizers[$file] = $atomizer;
 228            continue;
 229          }
 230        }
 231      }
 232  
 233      return $atomizers;
 234    }
 235  
 236    private function getRules() {
 237      $rules = $this->getConfig('rules', array(
 238        '/\\.diviner$/' => 'DivinerArticleAtomizer',
 239        '/\\.php$/' => 'DivinerPHPAtomizer',
 240      ));
 241  
 242      return $rules;
 243    }
 244  
 245    private function getExclude() {
 246      $exclude = (array)$this->getConfig('exclude', array());
 247      return $exclude;
 248    }
 249  
 250  
 251    private function findFilesInProject() {
 252      $raw_hashes = id(new FileFinder($this->getConfig('root')))
 253        ->excludePath('*/.*')
 254        ->withType('f')
 255        ->setGenerateChecksums(true)
 256        ->find();
 257  
 258      $version = $this->getDivinerAtomWorldVersion();
 259  
 260      $file_hashes = array();
 261      foreach ($raw_hashes as $file => $md5_hash) {
 262        $rel_file = Filesystem::readablePath($file, $this->getConfig('root'));
 263        // We want the hash to change if the file moves or Diviner gets updated,
 264        // not just if the file content changes. Derive a hash from everything
 265        // we care about.
 266        $file_hashes[$rel_file] = md5("{$rel_file}\0{$md5_hash}\0{$version}").'F';
 267      }
 268  
 269      return $file_hashes;
 270    }
 271  
 272    private function deleteDeadAtoms(array $file_hashes) {
 273      $atom_cache = $this->getAtomCache();
 274  
 275      $hash_to_file = array_flip($file_hashes);
 276      foreach ($atom_cache->getFileHashMap() as $hash => $atom) {
 277        if (empty($hash_to_file[$hash])) {
 278          $atom_cache->deleteFileHash($hash);
 279        }
 280      }
 281    }
 282  
 283    private function getFilesToAtomize(array $file_hashes) {
 284      $atom_cache = $this->getAtomCache();
 285  
 286      $atomize = array();
 287      foreach ($file_hashes as $file => $hash) {
 288        if (!$atom_cache->fileHashExists($hash)) {
 289          $atomize[] = $file;
 290        }
 291      }
 292  
 293      return $atomize;
 294    }
 295  
 296    private function buildAtomizerFutures(array $file_atomizers) {
 297      $atomizers = array();
 298      foreach ($file_atomizers as $file => $atomizer) {
 299        $atomizers[$atomizer][] = $file;
 300      }
 301  
 302      $root = dirname(phutil_get_library_root('phabricator'));
 303      $config_root = $this->getConfig('root');
 304  
 305      $bar = id(new PhutilConsoleProgressBar())
 306        ->setTotal(count($file_atomizers));
 307  
 308      $futures = array();
 309      foreach ($atomizers as $class => $files) {
 310        foreach (array_chunk($files, 32) as $chunk) {
 311          $future = new ExecFuture(
 312            '%s atomize --ugly --book %s --atomizer %s -- %Ls',
 313            $root.'/bin/diviner',
 314            $this->getBookConfigPath(),
 315            $class,
 316            $chunk);
 317          $future->setCWD($config_root);
 318  
 319          $futures[] = $future;
 320  
 321          $bar->update(count($chunk));
 322        }
 323      }
 324  
 325      $bar->done();
 326  
 327      return $futures;
 328    }
 329  
 330    private function resolveAtomizerFutures(array $futures, array $file_hashes) {
 331      assert_instances_of($futures, 'Future');
 332  
 333      $atom_cache = $this->getAtomCache();
 334      $bar = id(new PhutilConsoleProgressBar())
 335        ->setTotal(count($futures));
 336      foreach (Futures($futures)->limit(4) as $key => $future) {
 337        try {
 338          $atoms = $future->resolveJSON();
 339  
 340          foreach ($atoms as $atom) {
 341            if ($atom['type'] == DivinerAtom::TYPE_FILE) {
 342              $file_hash = $file_hashes[$atom['file']];
 343              $atom_cache->addFileHash($file_hash, $atom['hash']);
 344            }
 345            $atom_cache->addAtom($atom);
 346          }
 347        } catch (Exception $e) {
 348          phlog($e);
 349        }
 350  
 351        $bar->update(1);
 352      }
 353      $bar->done();
 354    }
 355  
 356  
 357    /**
 358     * Get a global version number, which changes whenever any atom or atomizer
 359     * implementation changes in a way which is not backward-compatible.
 360     */
 361    private function getDivinerAtomWorldVersion() {
 362      $version = array();
 363      $version['atom'] = DivinerAtom::getAtomSerializationVersion();
 364      $version['rules'] = $this->getRules();
 365  
 366      $atomizers = id(new PhutilSymbolLoader())
 367        ->setAncestorClass('DivinerAtomizer')
 368        ->setConcreteOnly(true)
 369        ->selectAndLoadSymbols();
 370  
 371      $atomizer_versions = array();
 372      foreach ($atomizers as $atomizer) {
 373        $atomizer_versions[$atomizer['name']] = call_user_func(
 374          array(
 375            $atomizer['name'],
 376            'getAtomizerVersion',
 377          ));
 378      }
 379  
 380      ksort($atomizer_versions);
 381      $version['atomizers'] = $atomizer_versions;
 382  
 383      return md5(serialize($version));
 384    }
 385  
 386  
 387  /* -(  Graph Cache  )-------------------------------------------------------- */
 388  
 389  
 390    private function buildGraphCache() {
 391      $this->log(pht('BUILDING GRAPH CACHE'));
 392  
 393      $atom_cache = $this->getAtomCache();
 394      $symbol_map = $atom_cache->getSymbolMap();
 395      $atoms = $atom_cache->getAtomMap();
 396  
 397      $dirty_symbols = array();
 398      $dirty_nhashes = array();
 399  
 400      $del_atoms = array_diff_key($symbol_map, $atoms);
 401      $this->log(pht('Found %d obsolete atom(s) in graph.', count($del_atoms)));
 402      foreach ($del_atoms as $nhash => $shash) {
 403        $atom_cache->deleteSymbol($nhash);
 404        $dirty_symbols[$shash] = true;
 405  
 406        $atom_cache->deleteEdges($nhash);
 407        $atom_cache->deleteGraph($nhash);
 408      }
 409  
 410      $new_atoms = array_diff_key($atoms, $symbol_map);
 411      $this->log(pht('Found %d new atom(s) in graph.', count($new_atoms)));
 412      foreach ($new_atoms as $nhash => $ignored) {
 413        $shash = $this->computeSymbolHash($nhash);
 414        $atom_cache->addSymbol($nhash, $shash);
 415        $dirty_symbols[$shash] = true;
 416  
 417        $atom_cache->addEdges(
 418          $nhash,
 419          $this->getEdges($nhash));
 420  
 421        $dirty_nhashes[$nhash] = true;
 422      }
 423  
 424      $this->log(pht('Propagating changes through the graph.'));
 425  
 426      // Find all the nodes which point at a dirty node, and dirty them. Then
 427      // find all the nodes which point at those nodes and dirty them, and so
 428      // on. (This is slightly overkill since we probably don't need to propagate
 429      // dirtiness across documentation "links" between symbols, but we do want
 430      // to propagate it across "extends", and we suffer only a little bit of
 431      // collateral damage by over-dirtying as long as the documentation isn't
 432      // too well-connected.)
 433  
 434      $symbol_stack = array_keys($dirty_symbols);
 435      while ($symbol_stack) {
 436        $symbol_hash = array_pop($symbol_stack);
 437  
 438        foreach ($atom_cache->getEdgesWithDestination($symbol_hash) as $edge) {
 439          $dirty_nhashes[$edge] = true;
 440          $src_hash = $this->computeSymbolHash($edge);
 441          if (empty($dirty_symbols[$src_hash])) {
 442            $dirty_symbols[$src_hash] = true;
 443            $symbol_stack[] = $src_hash;
 444          }
 445        }
 446      }
 447  
 448      $this->log(pht('Found %d affected atoms.', count($dirty_nhashes)));
 449  
 450      foreach ($dirty_nhashes as $nhash => $ignored) {
 451        $atom_cache->addGraph($nhash, $this->computeGraphHash($nhash));
 452      }
 453  
 454      $this->log(pht('Writing graph cache.'));
 455  
 456      $atom_cache->saveGraph();
 457      $atom_cache->saveEdges();
 458      $atom_cache->saveSymbols();
 459  
 460      $this->log(pht('Done.')."\n");
 461    }
 462  
 463    private function computeSymbolHash($node_hash) {
 464      $atom_cache = $this->getAtomCache();
 465      $atom = $atom_cache->getAtom($node_hash);
 466  
 467      if (!$atom) {
 468        throw new Exception("No such atom with node hash '{$node_hash}'!");
 469      }
 470  
 471      $ref = DivinerAtomRef::newFromDictionary($atom['ref']);
 472      return $ref->toHash();
 473    }
 474  
 475    private function getEdges($node_hash) {
 476      $atom_cache = $this->getAtomCache();
 477      $atom = $atom_cache->getAtom($node_hash);
 478  
 479      $refs = array();
 480  
 481      // Make the atom depend on its own symbol, so that all atoms with the same
 482      // symbol are dirtied (e.g., if a codebase defines the function "f()"
 483      // several times, all of them should be dirtied when one is dirtied).
 484      $refs[DivinerAtomRef::newFromDictionary($atom)->toHash()] = true;
 485  
 486      foreach (array_merge($atom['extends'], $atom['links']) as $ref_dict) {
 487        $ref = DivinerAtomRef::newFromDictionary($ref_dict);
 488        if ($ref->getBook() == $atom['book']) {
 489          $refs[$ref->toHash()] = true;
 490        }
 491      }
 492  
 493      return array_keys($refs);
 494    }
 495  
 496    private function computeGraphHash($node_hash) {
 497      $atom_cache = $this->getAtomCache();
 498      $atom = $atom_cache->getAtom($node_hash);
 499  
 500      $edges = $this->getEdges($node_hash);
 501      sort($edges);
 502  
 503      $inputs = array(
 504        'atomHash' => $atom['hash'],
 505        'edges' => $edges,
 506      );
 507  
 508      return md5(serialize($inputs)).'G';
 509    }
 510  
 511  
 512    private function publishDocumentation($clean) {
 513      $atom_cache = $this->getAtomCache();
 514      $graph_map = $atom_cache->getGraphMap();
 515  
 516      $this->log(pht('PUBLISHING DOCUMENTATION'));
 517  
 518      $publisher = new DivinerLivePublisher();
 519      $publisher->setDropCaches($clean);
 520      $publisher->setConfig($this->getAllConfig());
 521      $publisher->setAtomCache($atom_cache);
 522      $publisher->setRenderer(new DivinerDefaultRenderer());
 523      $publisher->publishAtoms(array_values($graph_map));
 524  
 525      $this->log(pht('Done.'));
 526    }
 527  
 528  
 529  }


Generated: Sun Nov 30 09:20:46 2014 Cross-referenced by PHPXref 0.7.1