Package epydoc :: Module docparser
[hide private]
[frames] | no frames]

Source Code for Module epydoc.docparser

   1  # epydoc -- Source code parsing 
   2  # 
   3  # Copyright (C) 2005 Edward Loper 
   4  # Author: Edward Loper <[email protected]> 
   5  # URL: <http://epydoc.sf.net> 
   6  # 
   7  # $Id: docparser.py 1511 2007-02-16 02:08:51Z dvarrazzo $ 
   8   
   9  """ 
  10  Extract API documentation about python objects by parsing their source 
  11  code. 
  12   
  13  The function L{parse_docs()}, which provides the main interface 
  14  of this module, reads and parses the Python source code for a 
  15  module, and uses it to create an L{APIDoc} object containing 
  16  the API documentation for the variables and values defined in 
  17  that modules. 
  18   
  19  Currently, C{parse_docs()} extracts documentation from the following 
  20  source code constructions: 
  21   
  22    - module docstring 
  23    - import statements 
  24    - class definition blocks 
  25    - function definition blocks 
  26    - assignment statements 
  27      - simple assignment statements 
  28      - assignment statements with multiple C{'='}s 
  29      - assignment statements with unpacked left-hand sides 
  30      - assignment statements that wrap a function in classmethod 
  31        or staticmethod. 
  32      - assignment to special variables __path__, __all__, and 
  33        __docformat__. 
  34    - delete statements 
  35   
  36  C{parse_docs()} does not yet support the following source code 
  37  constructions: 
  38   
  39    - assignment statements that create properties 
  40   
  41  By default, C{parse_docs()} will expore the contents of top-level 
  42  C{try} and C{if} blocks.  If desired, C{parse_docs()} can also 
  43  be configured to explore the contents of C{while} and C{for} blocks. 
  44  (See the configuration constants, below.) 
  45   
  46  @todo: Make it possible to extend the functionality of C{parse_docs()}, 
  47         by replacing process_line with a dispatch table that can be 
  48         customized (similarly to C{docintrospector.register_introspector()}). 
  49  """ 
  50  __docformat__ = 'epytext en' 
  51   
  52  ###################################################################### 
  53  ## Imports 
  54  ###################################################################### 
  55   
  56  # Python source code parsing: 
  57  import token, tokenize 
  58  # Finding modules: 
  59  import imp 
  60  # File services: 
  61  import os, os.path, sys 
  62  # Unicode: 
  63  import codecs 
  64  # API documentation encoding: 
  65  from epydoc.apidoc import * 
  66  # For looking up the docs of builtins: 
  67  import __builtin__, exceptions 
  68  import epydoc.docintrospecter  
  69  # Misc utility functions: 
  70  from epydoc.util import * 
  71  # Backwards compatibility 
  72  from epydoc.compat import * 
  73   
  74  ###################################################################### 
  75  ## Doc Parser 
  76  ###################################################################### 
  77   
78 -class ParseError(Exception):
79 """ 80 An exception that is used to signify that C{docparser} encountered 81 syntactically invalid Python code while processing a Python source 82 file. 83 """
84 85 _moduledoc_cache = {} 86 """A cache of C{ModuleDoc}s that we've already created. 87 C{_moduledoc_cache} is a dictionary mapping from filenames to 88 C{ValueDoc} objects. 89 @type: C{dict}""" 90 91 #//////////////////////////////////////////////////////////// 92 # Configuration Constants 93 #//////////////////////////////////////////////////////////// 94 95 #{ Configuration Constants: Control Flow 96 PARSE_TRY_BLOCKS = True 97 """Should the contents of C{try} blocks be examined?""" 98 PARSE_EXCEPT_BLOCKS = True 99 """Should the contents of C{except} blocks be examined?""" 100 PARSE_FINALLY_BLOCKS = True 101 """Should the contents of C{finally} blocks be examined?""" 102 PARSE_IF_BLOCKS = True 103 """Should the contents of C{if} blocks be examined?""" 104 PARSE_ELSE_BLOCKS = True 105 """Should the contents of C{else} and C{elif} blocks be examined?""" 106 PARSE_WHILE_BLOCKS = False 107 """Should the contents of C{while} blocks be examined?""" 108 PARSE_FOR_BLOCKS = False 109 """Should the contents of C{for} blocks be examined?""" 110 111 #{ Configuration Constants: Imports 112 IMPORT_HANDLING = 'link' 113 """What should C{docparser} do when it encounters an import 114 statement? 115 - C{'link'}: Create variabledoc objects with imported_from pointers 116 to the source object. 117 - C{'parse'}: Parse the imported file, to find the actual 118 documentation for the imported object. (This will fall back 119 to the 'link' behavior if the imported file can't be parsed, 120 e.g., if it's a builtin.) 121 """ 122 123 IMPORT_STAR_HANDLING = 'parse' 124 """When C{docparser} encounters a C{'from M{m} import *'} 125 statement, and is unable to parse C{M{m}} (either because 126 L{IMPORT_HANDLING}=C{'link'}, or because parsing failed), how 127 should it determine the list of identifiers expored by C{M{m}}? 128 - C{'ignore'}: ignore the import statement, and don't create 129 any new variables. 130 - C{'parse'}: parse it to find a list of the identifiers that it 131 exports. (This will fall back to the 'ignore' behavior if the 132 imported file can't be parsed, e.g., if it's a builtin.) 133 - C{'introspect'}: import the module and introspect it (using C{dir}) 134 to find a list of the identifiers that it exports. (This will 135 fall back to the 'ignore' behavior if the imported file can't 136 be parsed, e.g., if it's a builtin.) 137 """ 138 139 DEFAULT_DECORATOR_BEHAVIOR = 'transparent' 140 """When C{DocParse} encounters an unknown decorator, what should 141 it do to the documentation of the decorated function? 142 - C{'transparent'}: leave the function's documentation as-is. 143 - C{'opaque'}: replace the function's documentation with an 144 empty C{ValueDoc} object, reflecting the fact that we have no 145 knowledge about what value the decorator returns. 146 """ 147 148 BASE_HANDLING = 'parse'#'link' 149 """What should C{docparser} do when it encounters a base class that 150 was imported from another module? 151 - C{'link'}: Create a valuedoc with a C{proxy_for} pointer to the 152 base class. 153 - C{'parse'}: Parse the file containing the base class, to find 154 the actual documentation for it. (This will fall back to the 155 'link' behavior if the imported file can't be parsed, e.g., if 156 it's a builtin.) 157 """ 158 159 #{ Configuration Constants: Comment docstrings 160 COMMENT_DOCSTRING_MARKER = '#:' 161 """The prefix used to mark comments that contain attribute 162 docstrings for variables.""" 163 164 #{ Configuration Constants: Grouping 165 START_GROUP_MARKER = '#{' 166 """The prefix used to mark a comment that starts a group. This marker 167 should be followed (on the same line) by the name of the group. 168 Following a start-group comment, all variables defined at the same 169 indentation level will be assigned to this group name, until the 170 parser reaches the end of the file, a matching end-group comment, or 171 another start-group comment at the same indentation level. 172 """ 173 174 END_GROUP_MARKER = '#}' 175 """The prefix used to mark a comment that ends a group. See 176 L{START_GROUP_MARKER}.""" 177 178 #///////////////////////////////////////////////////////////////// 179 #{ Module parser 180 #///////////////////////////////////////////////////////////////// 181
182 -def parse_docs(filename=None, name=None, context=None, is_script=False):
183 """ 184 Generate the API documentation for a specified object by 185 parsing Python source files, and return it as a L{ValueDoc}. 186 The object to generate documentation for may be specified 187 using the C{filename} parameter I{or} the C{name} parameter. 188 (It is an error to specify both a filename and a name; or to 189 specify neither a filename nor a name). 190 191 @param filename: The name of the file that contains the python 192 source code for a package, module, or script. If 193 C{filename} is specified, then C{parse} will return a 194 C{ModuleDoc} describing its contents. 195 @param name: The fully-qualified python dotted name of any 196 value (including packages, modules, classes, and 197 functions). C{parse_docs()} will automatically figure out 198 which module(s) it needs to parse in order to find the 199 documentation for the specified object. 200 @param context: The API documentation for the package that 201 contains C{filename}. If no context is given, then 202 C{filename} is assumed to contain a top-level module or 203 package. It is an error to specify a C{context} if the 204 C{name} argument is used. 205 @rtype: L{ValueDoc} 206 """ 207 # Always introspect __builtins__ & exceptions (e.g., in case 208 # they're used as base classes.) 209 epydoc.docintrospecter.introspect_docs(__builtin__) 210 epydoc.docintrospecter.introspect_docs(exceptions) 211 212 # If our input is a python object name, then delegate to 213 # _find(). 214 if filename is None and name is not None: 215 if context: 216 raise ValueError("context should only be specified together " 217 "with filename, not with name.") 218 name = DottedName(name) 219 val_doc = _find(name) 220 if val_doc.canonical_name is UNKNOWN: 221 val_doc.canonical_name = name 222 return val_doc 223 224 # If our input is a filename, then create a ModuleDoc for it, 225 # and use process_file() to populate its attributes. 226 elif filename is not None and name is None: 227 # Use a python source version, if possible. 228 if not is_script: 229 try: filename = py_src_filename(filename) 230 except ValueError, e: raise ImportError('%s' % e) 231 232 # Check the cache, first. 233 if _moduledoc_cache.has_key(filename): 234 return _moduledoc_cache[filename] 235 236 log.info("Parsing %s" % filename) 237 238 # If the context wasn't provided, then check if the file is in 239 # a package directory. If so, then update basedir & name to 240 # contain the topmost package's directory and the fully 241 # qualified name for this file. (This update assume the 242 # default value of __path__ for the parent packages; if the 243 # parent packages override their __path__s, then this can 244 # cause us not to find the value.) 245 if context is None and not is_script: 246 basedir = os.path.split(filename)[0] 247 name = os.path.splitext(os.path.split(filename)[1])[0] 248 if name == '__init__': 249 basedir, name = os.path.split(basedir) 250 context = _parse_package(basedir) 251 252 # Figure out the canonical name of the module we're parsing. 253 if not is_script: 254 module_name, is_pkg = _get_module_name(filename, context) 255 else: 256 module_name = DottedName(munge_script_name(filename)) 257 is_pkg = False 258 259 # Create a new ModuleDoc for the module, & add it to the cache. 260 module_doc = ModuleDoc(canonical_name=module_name, variables={}, 261 sort_spec=[], imports=[], 262 filename=filename, package=context, 263 is_package=is_pkg, submodules=[], 264 docs_extracted_by='parser') 265 module_doc.defining_module = module_doc 266 _moduledoc_cache[filename] = module_doc 267 268 # Set the module's __path__ to its default value. 269 if is_pkg: 270 module_doc.path = [os.path.split(module_doc.filename)[0]] 271 272 # Add this module to the parent package's list of submodules. 273 if context is not None: 274 context.submodules.append(module_doc) 275 276 # Tokenize & process the contents of the module's source file. 277 try: 278 process_file(module_doc) 279 except tokenize.TokenError, e: 280 msg, (srow, scol) = e.args 281 raise ParseError('Error during parsing: %s ' 282 '(%s, line %d, char %d)' % 283 (msg, module_doc.filename, srow, scol)) 284 except IndentationError, e: 285 raise ParseError('Error during parsing: %s (%s)' % 286 (e, module_doc.filename)) 287 288 # Handle any special variables (__path__, __docformat__, etc.) 289 handle_special_module_vars(module_doc) 290 291 # Return the completed ModuleDoc 292 return module_doc 293 else: 294 raise ValueError("Expected exactly one of the following " 295 "arguments: name, filename")
296
297 -def _parse_package(package_dir):
298 """ 299 If the given directory is a package directory, then parse its 300 __init__.py file (and the __init__.py files of all ancestor 301 packages); and return its C{ModuleDoc}. 302 """ 303 if not is_package_dir(package_dir): 304 return None 305 parent_dir = os.path.split(package_dir)[0] 306 parent_doc = _parse_package(parent_dir) 307 package_file = os.path.join(package_dir, '__init__') 308 return parse_docs(filename=package_file, context=parent_doc)
309 310 # Special vars: 311 # C{__docformat__}, C{__all__}, and C{__path__}.
312 -def handle_special_module_vars(module_doc):
313 # If __docformat__ is defined, parse its value. 314 toktree = _module_var_toktree(module_doc, '__docformat__') 315 if toktree is not None: 316 try: module_doc.docformat = parse_string(toktree) 317 except: pass 318 del module_doc.variables['__docformat__'] 319 320 # If __all__ is defined, parse its value. 321 toktree = _module_var_toktree(module_doc, '__all__') 322 if toktree is not None: 323 try: 324 public_names = set(parse_string_list(toktree)) 325 for name, var_doc in module_doc.variables.items(): 326 if name in public_names: 327 var_doc.is_public = True 328 if not isinstance(var_doc, ModuleDoc): 329 var_doc.is_imported = False 330 else: 331 var_doc.is_public = False 332 except ParseError: 333 # If we couldn't parse the list, give precedence to introspection. 334 for name, var_doc in module_doc.variables.items(): 335 if not isinstance(var_doc, ModuleDoc): 336 var_doc.is_imported = UNKNOWN 337 del module_doc.variables['__all__'] 338 339 # If __path__ is defined, then extract its value (pkgs only) 340 if module_doc.is_package: 341 toktree = _module_var_toktree(module_doc, '__path__') 342 if toktree is not None: 343 try: 344 module_doc.path = parse_string_list(toktree) 345 except ParseError: 346 pass # [xx] 347 del module_doc.variables['__path__']
348
349 -def _module_var_toktree(module_doc, name):
350 var_doc = module_doc.variables.get(name) 351 if (var_doc is None or var_doc.value in (None, UNKNOWN) or 352 var_doc.value.toktree is UNKNOWN): 353 return None 354 else: 355 return var_doc.value.toktree
356 357 #//////////////////////////////////////////////////////////// 358 #{ Module Lookup 359 #//////////////////////////////////////////////////////////// 360
361 -def _find(name, package_doc=None):
362 """ 363 Return the API documentaiton for the object whose name is 364 C{name}. C{package_doc}, if specified, is the API 365 documentation for the package containing the named object. 366 """ 367 # If we're inside a package, then find the package's path. 368 if package_doc is None: 369 path = None 370 elif package_doc.path is not UNKNOWN: 371 path = package_doc.path 372 else: 373 path = [os.path.split(package_doc.filename)[0]] 374 375 # The leftmost identifier in `name` should be a module or 376 # package on the given path; find it and parse it. 377 filename = _get_filename(name[0], path) 378 module_doc = parse_docs(filename, context=package_doc) 379 380 # If the name just has one identifier, then the module we just 381 # parsed is the object we're looking for; return it. 382 if len(name) == 1: return module_doc 383 384 # Otherwise, we're looking for something inside the module. 385 # First, check to see if it's in a variable (but ignore 386 # variables that just contain imported submodules). 387 if not _is_submodule_import_var(module_doc, name[1]): 388 try: return _find_in_namespace(name[1:], module_doc) 389 except ImportError: pass 390 391 # If not, then check to see if it's in a subpackage. 392 if module_doc.is_package: 393 return _find(name[1:], module_doc) 394 395 # If it's not in a variable or a subpackage, then we can't 396 # find it. 397 raise ImportError('Could not find value')
398
399 -def _is_submodule_import_var(module_doc, var_name):
400 """ 401 Return true if C{var_name} is the name of a variable in 402 C{module_doc} that just contains an C{imported_from} link to a 403 submodule of the same name. (I.e., is a variable created when 404 a package imports one of its own submodules.) 405 """ 406 var_doc = module_doc.variables.get(var_name) 407 full_var_name = DottedName(module_doc.canonical_name, var_name) 408 return (var_doc is not None and 409 var_doc.imported_from == full_var_name)
410
411 -def _find_in_namespace(name, namespace_doc):
412 if name[0] not in namespace_doc.variables: 413 raise ImportError('Could not find value') 414 415 # Look up the variable in the namespace. 416 var_doc = namespace_doc.variables[name[0]] 417 if var_doc.value is UNKNOWN: 418 raise ImportError('Could not find value') 419 val_doc = var_doc.value 420 421 # If the variable's value was imported, then follow its 422 # alias link. 423 if var_doc.imported_from not in (None, UNKNOWN): 424 return _find(var_doc.imported_from+name[1:]) 425 426 # Otherwise, if the name has one identifier, then this is the 427 # value we're looking for; return it. 428 elif len(name) == 1: 429 return val_doc 430 431 # Otherwise, if this value is a namespace, look inside it. 432 elif isinstance(val_doc, NamespaceDoc): 433 return _find_in_namespace(name[1:], val_doc) 434 435 # Otherwise, we ran into a dead end. 436 else: 437 raise ImportError('Could not find value')
438
439 -def _get_filename(identifier, path=None):
440 if path is UNKNOWN: path = None 441 try: 442 fp, filename, (s,m,typ) = imp.find_module(identifier, path) 443 if fp is not None: fp.close() 444 except ImportError: 445 raise ImportError, 'No Python source file found.' 446 447 if typ == imp.PY_SOURCE: 448 return filename 449 elif typ == imp.PY_COMPILED: 450 # See if we can find a corresponding non-compiled version. 451 filename = re.sub('.py\w$', '.py', filename) 452 if not os.path.exists(filename): 453 raise ImportError, 'No Python source file found.' 454 return filename 455 elif typ == imp.PKG_DIRECTORY: 456 filename = os.path.join(filename, '__init__.py') 457 if not os.path.exists(filename): 458 filename = os.path.join(filename, '__init__.pyw') 459 if not os.path.exists(filename): 460 raise ImportError, 'No package file found.' 461 return filename 462 elif typ == imp.C_BUILTIN: 463 raise ImportError, 'No Python source file for builtin modules.' 464 elif typ == imp.C_EXTENSION: 465 raise ImportError, 'No Python source file for c extensions.' 466 else: 467 raise ImportError, 'No Python source file found.'
468 469 #///////////////////////////////////////////////////////////////// 470 #{ File tokenization loop 471 #///////////////////////////////////////////////////////////////// 472
473 -def process_file(module_doc):
474 """ 475 Read the given C{ModuleDoc}'s file, and add variables 476 corresponding to any objects defined in that file. In 477 particular, read and tokenize C{module_doc.filename}, and 478 process each logical line using L{process_line()}. 479 """ 480 # Keep track of the current line number: 481 lineno = None 482 483 # Use this list to collect the tokens on a single logical line: 484 line_toks = [] 485 486 # This list contains one APIDoc for each indentation level. 487 # The first element is the APIDoc for the module, and each 488 # subsequent element is the APIDoc for the object at that 489 # indentation level. The final element of the list is the 490 # C{APIDoc} for the entity that we're currently processing. 491 parent_docs = [module_doc] 492 493 # The APIDoc for the object that was defined by the previous 494 # line, if any; or None otherwise. This is used to update 495 # parent_docs when we encounter an indent; and to decide what 496 # object (if any) is described by a docstring. 497 prev_line_doc = module_doc 498 499 # A list of comments that occur before or on the current 500 # logical line, used to build the comment docstring. Each 501 # element is a tuple (comment_text, comment_lineno). 502 comments = [] 503 504 # A list of decorator lines that occur before the current 505 # logical line. This is used so we can process a function 506 # declaration line and its decorators all at once. 507 decorators = [] 508 509 # A list of group names, one for each indentation level. This is 510 # used to keep track groups that are defined by comment markers 511 # START_GROUP_MARKER and END_GROUP_MARKER. 512 groups = [None] 513 514 # When we encounter a comment start group marker, set this to the 515 # name of the group; but wait until we're ready to process the 516 # next line before we actually set groups[-1] to this value. This 517 # is necessary because at the top of a block, the tokenizer gives 518 # us comments before the INDENT token; but if we encounter a group 519 # start marker at the top of a block, then we want it to apply 520 # inside that block, not outside it. 521 start_group = None 522 523 # Check if the source file declares an encoding. 524 encoding = get_module_encoding(module_doc.filename) 525 526 # The token-eating loop: 527 try: 528 module_file = codecs.open(module_doc.filename, 'rU', encoding) 529 except LookupError: 530 log.warning("Unknown encoding %r for %s; using the default" 531 "encoding instead (iso-8859-1)" % 532 (encoding, module_doc.filename)) 533 encoding = 'iso-8859-1' 534 module_file = codecs.open(module_doc.filename, 'rU', encoding) 535 tok_iter = tokenize.generate_tokens(module_file.readline) 536 for toktype, toktext, (srow,scol), (erow,ecol), line_str in tok_iter: 537 # BOM encoding marker: ignore. 538 if (toktype == token.ERRORTOKEN and 539 (toktext == u'\ufeff' or 540 toktext.encode(encoding) == '\xef\xbb\xbf')): 541 pass 542 543 # Error token: abort 544 elif toktype == token.ERRORTOKEN: 545 raise ParseError('Error during parsing: invalid syntax ' 546 '(%s, line %d, char %d: %r)' % 547 (module_doc.filename, srow, scol, toktext)) 548 549 # Indent token: update the parent_doc stack. 550 elif toktype == token.INDENT: 551 if prev_line_doc is None: 552 parent_docs.append(parent_docs[-1]) 553 else: 554 parent_docs.append(prev_line_doc) 555 groups.append(None) 556 557 # Dedent token: update the parent_doc stack. 558 elif toktype == token.DEDENT: 559 if line_toks == []: 560 parent_docs.pop() 561 groups.pop() 562 else: 563 # This *should* only happen if the file ends on an 564 # indented line, with no final newline. 565 # (otherwise, this is the wrong thing to do.) 566 pass 567 568 # Line-internal newline token: if we're still at the start of 569 # the logical line, and we've seen one or more comment lines, 570 # then discard them: blank lines are not allowed between a 571 # comment block and the thing it describes. 572 elif toktype == tokenize.NL: 573 if comments and not line_toks: 574 log.warning('Ignoring docstring comment block followed by ' 575 'a blank line in %r on line %r' % 576 (module_doc.filename, srow-1)) 577 comments = [] 578 579 # Comment token: add to comments if appropriate. 580 elif toktype == tokenize.COMMENT: 581 if toktext.startswith(COMMENT_DOCSTRING_MARKER): 582 comment_line = toktext[len(COMMENT_DOCSTRING_MARKER):].rstrip() 583 if comment_line.startswith(" "): 584 comment_line = comment_line[1:] 585 comments.append( [comment_line, srow]) 586 elif toktext.startswith(START_GROUP_MARKER): 587 start_group = toktext[len(START_GROUP_MARKER):].strip() 588 elif toktext.startswith(END_GROUP_MARKER): 589 for i in range(len(groups)-1, -1, -1): 590 if groups[i]: 591 groups[i] = None 592 break 593 else: 594 log.warning("Got group end marker without a corresponding " 595 "start marker in %r on line %r" % 596 (module_doc.filename, srow)) 597 598 # Normal token: Add it to line_toks. (If it's a non-unicode 599 # string literal, then we need to re-encode using the file's 600 # encoding, to get back to the original 8-bit data; and then 601 # convert that string with 8-bit data to a 7-bit ascii 602 # representation.) 603 elif toktype != token.NEWLINE and toktype != token.ENDMARKER: 604 if lineno is None: lineno = srow 605 if toktype == token.STRING: 606 str_prefixes = re.match('[^\'"]*', toktext).group() 607 if 'u' not in str_prefixes: 608 s = toktext.encode(encoding) 609 toktext = decode_with_backslashreplace(s) 610 line_toks.append( (toktype, toktext) ) 611 612 # Decorator line: add it to the decorators list. 613 elif line_toks and line_toks[0] == (token.OP, '@'): 614 decorators.append(shallow_parse(line_toks)) 615 line_toks = [] 616 617 # End of line token, but nothing to do. 618 elif line_toks == []: 619 pass 620 621 # End of line token: parse the logical line & process it. 622 else: 623 if start_group: 624 groups[-1] = start_group 625 start_group = None 626 627 if parent_docs[-1] != 'skip_block': 628 try: 629 prev_line_doc = process_line( 630 shallow_parse(line_toks), parent_docs, prev_line_doc, 631 lineno, comments, decorators, encoding) 632 except ParseError, e: 633 raise ParseError('Error during parsing: invalid ' 634 'syntax (%s, line %d) -- %s' % 635 (module_doc.filename, lineno, e)) 636 except KeyboardInterrupt, e: raise 637 except Exception, e: 638 log.error('Internal error during parsing (%s, line ' 639 '%s):\n%s' % (module_doc.filename, lineno, e)) 640 raise 641 642 # grouping... 643 if groups[-1] and prev_line_doc not in (None, 'skip_block'): 644 if isinstance(prev_line_doc, VariableDoc): 645 # prev_line_doc's container will only be 646 # UNKNOWN if it's an instance variable that 647 # didn't have a doc-comment, but might still 648 # be followed by a docstring. Since we 649 # tokenize in order, we can't do lookahead to 650 # see if the variable will have a comment; but 651 # it should only be added to the container if 652 # it does. So we defer the grouping of that 653 # to be handled by process_docstring instead. 654 if prev_line_doc.container is not UNKNOWN: 655 add_to_group(prev_line_doc.container, 656 prev_line_doc, groups[-1]) 657 elif isinstance(parent_docs[-1], NamespaceDoc): 658 add_to_group(parent_docs[-1], prev_line_doc, 659 groups[-1]) 660 else: 661 prev_line_doc = None 662 663 # Reset line contents. 664 line_toks = [] 665 lineno = None 666 comments = [] 667 decorators = []
668
669 -def add_to_group(container, api_doc, group_name):
670 if container.group_specs is UNKNOWN: 671 container.group_specs = [] 672 673 if isinstance(api_doc, VariableDoc): 674 var_name = api_doc.name 675 else: 676 if api_doc.canonical_name is UNKNOWN: log.debug('ouch', `api_doc`) 677 var_name = api_doc.canonical_name[-1] 678 679 for (name, group_vars) in container.group_specs: 680 if name == group_name: 681 group_vars.append(var_name) 682 return 683 else: 684 container.group_specs.append( (group_name, [var_name]) )
685
686 -def script_guard(line):
687 """Detect the idiomatic trick C{if __name__ == "__main__":}""" 688 return (len(line) == 5 689 and line[1][1] == '__name__' # this is the most selective 690 and line[0][1] == 'if' 691 and line[2][1] == '==' 692 and line[4][1] == ':' 693 and line[3][1][1:-1] == '__main__')
694 695 #///////////////////////////////////////////////////////////////// 696 #{ Shallow parser 697 #///////////////////////////////////////////////////////////////// 698
699 -def shallow_parse(line_toks):
700 """ 701 Given a flat list of tokens, return a nested tree structure 702 (called a X{token tree}), whose leaves are identical to the 703 original list, but whose structure reflects the structure 704 implied by the grouping tokens (i.e., parenthases, braces, and 705 brackets). If the parenthases, braces, and brackets do not 706 match, or are not balanced, then raise a ParseError. 707 708 Assign some structure to a sequence of structure (group parens). 709 """ 710 stack = [[]] 711 parens = [] 712 for tok in line_toks: 713 toktype, toktext = tok 714 if toktext in ('(','[','{'): 715 parens.append(tok) 716 stack.append([tok]) 717 elif toktext in ('}',']',')'): 718 if not parens: 719 raise ParseError('Unbalanced parens') 720 left_paren = parens.pop()[1] 721 if left_paren+toktext not in ('()', '[]', '{}'): 722 raise ParseError('Mismatched parens') 723 lst = stack.pop() 724 lst.append(tok) 725 stack[-1].append(lst) 726 else: 727 stack[-1].append(tok) 728 if len(stack) != 1 or len(parens) != 0: 729 raise ParseError('Unbalanced parens') 730 return stack[0]
731 732 #///////////////////////////////////////////////////////////////// 733 #{ Line processing 734 #///////////////////////////////////////////////////////////////// 735 # The methods process_*() are used to handle lines. 736
737 -def process_line(line, parent_docs, prev_line_doc, lineno, 738 comments, decorators, encoding):
739 """ 740 @return: C{new-doc}, C{decorator}..? 741 """ 742 args = (line, parent_docs, prev_line_doc, lineno, 743 comments, decorators, encoding) 744 745 if not line: # blank line. 746 return None 747 elif (token.OP, ':') in line[:-1]: 748 return process_one_line_block(*args) 749 elif (token.OP, ';') in line: 750 return process_multi_stmt(*args) 751 elif line[0] == (token.NAME, 'def'): 752 return process_funcdef(*args) 753 elif line[0] == (token.OP, '@'): 754 return process_funcdef(*args) 755 elif line[0] == (token.NAME, 'class'): 756 return process_classdef(*args) 757 elif line[0] == (token.NAME, 'import'): 758 return process_import(*args) 759 elif line[0] == (token.NAME, 'from'): 760 return process_from_import(*args) 761 elif line[0] == (token.NAME, 'del'): 762 return process_del(*args) 763 elif len(line)==1 and line[0][0] == token.STRING: 764 return process_docstring(*args) 765 elif (token.OP, '=') in line: 766 return process_assignment(*args) 767 elif (line[0][0] == token.NAME and 768 line[0][1] in CONTROL_FLOW_KEYWORDS): 769 return process_control_flow_line(*args) 770 else: 771 return None
772 # [xx] do something with control structures like for/if? 773 774 #///////////////////////////////////////////////////////////////// 775 # Line handler: control flow 776 #///////////////////////////////////////////////////////////////// 777 778 CONTROL_FLOW_KEYWORDS = [ 779 #: A list of the control flow keywords. If a line begins with 780 #: one of these keywords, then it should be handled by 781 #: C{process_control_flow_line}. 782 'if', 'elif', 'else', 'while', 'for', 'try', 'except', 'finally'] 783
784 -def process_control_flow_line(line, parent_docs, prev_line_doc, 785 lineno, comments, decorators, encoding):
786 keyword = line[0][1] 787 788 # If it's a 'for' block: create the loop variable. 789 if keyword == 'for' and PARSE_FOR_BLOCKS: 790 loopvar_name = parse_dotted_name( 791 split_on(line[1:], (token.NAME, 'in'))[0]) 792 parent = get_lhs_parent(loopvar_name, parent_docs) 793 if parent is not None: 794 var_doc = VariableDoc(name=loopvar_name[-1], is_alias=False, 795 is_imported=False, is_instvar=False, 796 docs_extracted_by='parser') 797 set_variable(parent, var_doc) 798 799 if ((keyword == 'if' and PARSE_IF_BLOCKS and not script_guard(line)) or 800 (keyword == 'elif' and PARSE_ELSE_BLOCKS) or 801 (keyword == 'else' and PARSE_ELSE_BLOCKS) or 802 (keyword == 'while' and PARSE_WHILE_BLOCKS) or 803 (keyword == 'for' and PARSE_FOR_BLOCKS) or 804 (keyword == 'try' and PARSE_TRY_BLOCKS) or 805 (keyword == 'except' and PARSE_EXCEPT_BLOCKS) or 806 (keyword == 'finally' and PARSE_FINALLY_BLOCKS)): 807 # Return "None" to indicate that we should process the 808 # block using the same context that we were already in. 809 return None 810 else: 811 # Return 'skip_block' to indicate that we should ignore 812 # the contents of this block. 813 return 'skip_block'
814 815 #///////////////////////////////////////////////////////////////// 816 # Line handler: imports 817 #///////////////////////////////////////////////////////////////// 818 # [xx] I could optionally add ValueDoc's for the imported 819 # variables with proxy_for set to the imported source; but 820 # I don't think I gain much of anything by doing so. 821
822 -def process_import(line, parent_docs, prev_line_doc, lineno, 823 comments, decorators, encoding):
824 if not isinstance(parent_docs[-1], NamespaceDoc): return 825 826 names = split_on(line[1:], (token.OP, ',')) 827 828 for name in names: 829 name_pieces = split_on(name, (token.NAME, 'as')) 830 if len(name_pieces) == 1: 831 src_name = parse_dotted_name(name_pieces[0]) 832 _import_var(src_name, parent_docs) 833 elif len(name_pieces) == 2: 834 if len(name_pieces[1]) != 1: 835 raise ParseError('Expected identifier after "as"') 836 src_name = parse_dotted_name(name_pieces[0]) 837 var_name = parse_name(name_pieces[1][0]) 838 _import_var_as(src_name, var_name, parent_docs) 839 else: 840 raise ParseError('Multiple "as" tokens in import')
841
842 -def process_from_import(line, parent_docs, prev_line_doc, lineno, 843 comments, decorators, encoding):
844 if not isinstance(parent_docs[-1], NamespaceDoc): return 845 846 pieces = split_on(line[1:], (token.NAME, 'import')) 847 if len(pieces) != 2 or not pieces[0] or not pieces[1]: 848 raise ParseError("Bad from-import") 849 lhs, rhs = pieces 850 851 # The RHS might be parenthasized, as specified by PEP 328: 852 # http://www.python.org/peps/pep-0328.html 853 if (len(rhs) == 1 and isinstance(rhs[0], list) and 854 rhs[0][0] == (token.OP, '(') and rhs[0][-1] == (token.OP, ')')): 855 rhs = rhs[0][1:-1] 856 857 # >>> from __future__ import nested_scopes 858 if lhs == [(token.NAME, '__future__')]: 859 return 860 861 # >>> from sys import * 862 elif rhs == [(token.OP, '*')]: 863 src_name = parse_dotted_name(lhs) 864 _process_fromstar_import(src_name, parent_docs) 865 866 # >>> from os.path import join, split 867 else: 868 src_name = parse_dotted_name(lhs) 869 parts = split_on(rhs, (token.OP, ',')) 870 for part in parts: 871 # from m import x 872 if len(part) == 1: 873 var_name = parse_name(part[0]) 874 _import_var_as(DottedName(src_name, var_name), 875 var_name, parent_docs) 876 877 # from m import x as y 878 elif len(part) == 3 and part[1] == (token.NAME, 'as'): 879 orig_name = parse_name(part[0]) 880 var_name = parse_name(part[2]) 881 _import_var_as(DottedName(src_name, orig_name), 882 var_name, parent_docs) 883 884 else: 885 ParseError("Bad from-import")
886
887 -def _process_fromstar_import(src, parent_docs):
888 """ 889 Handle a statement of the form: 890 >>> from <src> import * 891 892 If L{IMPORT_HANDLING} is C{'parse'}, then first try to parse 893 the module C{M{<src>}}, and copy all of its exported variables 894 to C{parent_docs[-1]}. 895 896 Otherwise, try to determine the names of the variables exported by 897 C{M{<src>}}, and create a new variable for each export. If 898 L{IMPORT_STAR_HANDLING} is C{'parse'}, then the list of exports if 899 found by parsing C{M{<src>}}; if it is C{'introspect'}, then the 900 list of exports is found by importing and introspecting 901 C{M{<src>}}. 902 """ 903 # Record the import 904 parent_docs[0].imports.append(src) # mark that it's .*?? 905 906 if not isinstance(parent_docs[-1], NamespaceDoc): return 907 908 # If src is package-local, then convert it to a global name. 909 src = _global_name(src, parent_docs) 910 911 # [xx] add check for if we already have the source docs in our 912 # cache?? 913 914 if (IMPORT_HANDLING == 'parse' or 915 IMPORT_STAR_HANDLING == 'parse'): # [xx] is this ok? 916 try: module_doc = _find(src) 917 except ImportError: module_doc = None 918 if isinstance(module_doc, ModuleDoc): 919 for name, imp_var in module_doc.variables.items(): 920 # [xx] this is not exactly correct, but close. It 921 # does the wrong thing if a __var__ is explicitly 922 # listed in __all__. 923 if (imp_var.is_public and 924 not (name.startswith('__') and name.endswith('__'))): 925 var_doc = _add_import_var(DottedName(src, name), name, 926 parent_docs[-1]) 927 if IMPORT_HANDLING == 'parse': 928 var_doc.value = imp_var.value 929 930 # If we got here, then either IMPORT_HANDLING='link' or we 931 # failed to parse the `src` module. 932 if IMPORT_STAR_HANDLING == 'introspect': 933 try: module = __import__(str(src), {}, {}, [0]) 934 except: return # We couldn't import it. 935 if module is None: return # We couldn't import it. 936 if hasattr(module, '__all__'): 937 names = list(module.__all__) 938 else: 939 names = [n for n in dir(module) if not n.startswith('_')] 940 for name in names: 941 _add_import_var(DottedName(src, name), name, parent_docs[-1])
942
943 -def _import_var(name, parent_docs):
944 """ 945 Handle a statement of the form: 946 >>> import <name> 947 948 If L{IMPORT_HANDLING} is C{'parse'}, then first try to find 949 the value by parsing; and create an appropriate variable in 950 parentdoc. 951 952 Otherwise, add a variable for the imported variable. (More than 953 one variable may be created for cases like C{'import a.b'}, where 954 we need to create a variable C{'a'} in parentdoc containing a 955 proxy module; and a variable C{'b'} in the proxy module. 956 """ 957 # Record the import 958 parent_docs[0].imports.append(name) 959 960 if not isinstance(parent_docs[-1], NamespaceDoc): return 961 962 # If name is package-local, then convert it to a global name. 963 src = _global_name(name, parent_docs) 964 src_prefix = src[:len(src)-len(name)] 965 966 # [xx] add check for if we already have the source docs in our 967 # cache?? 968 969 if IMPORT_HANDLING == 'parse': 970 # Check to make sure that we can actually find the value. 971 try: val_doc = _find(src) 972 except ImportError: val_doc = None 973 if val_doc is not None: 974 # We found it; but it's not the value itself we want to 975 # import, but the module containing it; so import that 976 # module (=top_mod) and create a variable for it. 977 top_mod = src_prefix+name[0] 978 var_doc = _add_import_var(top_mod, name[0], parent_docs[-1]) 979 var_doc.value = _find(DottedName(name[0])) 980 return 981 982 # If we got here, then either IMPORT_HANDLING='link', or we 983 # did not successfully find the value's docs by parsing; use 984 # a variable with an UNKNOWN value. 985 986 # Create any necessary intermediate proxy module values. 987 container = parent_docs[-1] 988 for i, identifier in enumerate(name[:-1]): 989 if (identifier not in container.variables or 990 not isinstance(container.variables[identifier], ModuleDoc)): 991 var_doc = _add_import_var(name[:i+1], identifier, container) 992 var_doc.value = ModuleDoc(variables={}, sort_spec=[], 993 proxy_for=src_prefix+name[:i+1], 994 submodules={}, 995 docs_extracted_by='parser') 996 container = container.variables[identifier].value 997 998 # Add the variable to the container. 999 _add_import_var(src, name[-1], container)
1000
1001 -def _import_var_as(src, name, parent_docs):
1002 """ 1003 Handle a statement of the form: 1004 >>> import src as name 1005 1006 If L{IMPORT_HANDLING} is C{'parse'}, then first try to find 1007 the value by parsing; and create an appropriate variable in 1008 parentdoc. 1009 1010 Otherwise, create a variables with its C{imported_from} attribute 1011 pointing to the imported object. 1012 """ 1013 # Record the import 1014 parent_docs[0].imports.append(src) 1015 1016 if not isinstance(parent_docs[-1], NamespaceDoc): return 1017 1018 # If src is package-local, then convert it to a global name. 1019 src = _global_name(src, parent_docs) 1020 1021 if IMPORT_HANDLING == 'parse': 1022 # Parse the value and create a variable for it. 1023 try: val_doc = _find(src) 1024 except ImportError: val_doc = None 1025 if val_doc is not None: 1026 var_doc = VariableDoc(name=name, value=val_doc, 1027 is_imported=True, is_alias=False, 1028 imported_from=src, 1029 docs_extracted_by='parser') 1030 set_variable(parent_docs[-1], var_doc) 1031 return 1032 1033 # If we got here, then either IMPORT_HANDLING='link', or we 1034 # did not successfully find the value's docs by parsing; use a 1035 # variable with a proxy value. 1036 _add_import_var(src, name, parent_docs[-1])
1037
1038 -def _add_import_var(src, name, container):
1039 """ 1040 Add a new imported variable named C{name} to C{container}, with 1041 C{imported_from=src}. 1042 """ 1043 var_doc = VariableDoc(name=name, is_imported=True, is_alias=False, 1044 imported_from=src, docs_extracted_by='parser') 1045 set_variable(container, var_doc) 1046 return var_doc
1047
1048 -def _global_name(name, parent_docs):
1049 """ 1050 If the given name is package-local (relative to the current 1051 context, as determined by C{parent_docs}), then convert it 1052 to a global name. 1053 """ 1054 # Get the containing package from parent_docs. 1055 if parent_docs[0].is_package: 1056 package = parent_docs[0] 1057 else: 1058 package = parent_docs[0].package 1059 1060 # Check each package (from closest to furthest) to see if it 1061 # contains a module named name[0]; if so, then treat `name` as 1062 # relative to that package. 1063 while package not in (None, UNKNOWN): 1064 try: 1065 fp = imp.find_module(name[0], package.path)[0] 1066 if fp is not None: fp.close() 1067 except ImportError: 1068 # No submodule found here; try the next package up. 1069 package = package.package 1070 continue 1071 # A submodule was found; return its name. 1072 return package.canonical_name + name 1073 1074 # We didn't find any package containing `name`; so just return 1075 # `name` as-is. 1076 return name
1077 1078 #///////////////////////////////////////////////////////////////// 1079 # Line handler: assignment 1080 #///////////////////////////////////////////////////////////////// 1081
1082 -def process_assignment(line, parent_docs, prev_line_doc, lineno, 1083 comments, decorators, encoding):
1084 # Divide the assignment statement into its pieces. 1085 pieces = split_on(line, (token.OP, '=')) 1086 1087 lhs_pieces = pieces[:-1] 1088 rhs = pieces[-1] 1089 1090 # Decide whether the variable is an instance variable or not. 1091 # If it's an instance var, then discard the value. 1092 is_instvar = lhs_is_instvar(lhs_pieces, parent_docs) 1093 1094 # if it's not an instance var, and we're not in a namespace, 1095 # then it's just a local var -- so ignore it. 1096 if not (is_instvar or isinstance(parent_docs[-1], NamespaceDoc)): 1097 return None 1098 1099 # Evaluate the right hand side. 1100 if not is_instvar: 1101 rhs_val, is_alias = rhs_to_valuedoc(rhs, parent_docs) 1102 else: 1103 rhs_val, is_alias = UNKNOWN, False 1104 1105 # Assign the right hand side value to each left hand side. 1106 # (Do the rightmost assignment first) 1107 lhs_pieces.reverse() 1108 for lhs in lhs_pieces: 1109 # Try treating the LHS as a simple dotted name. 1110 try: lhs_name = parse_dotted_name(lhs) 1111 except: lhs_name = None 1112 if lhs_name is not None: 1113 lhs_parent = get_lhs_parent(lhs_name, parent_docs) 1114 if lhs_parent is None: continue 1115 1116 # Skip a special class variable. 1117 if lhs_name[-1] == '__slots__': 1118 continue 1119 1120 # Create the VariableDoc. 1121 var_doc = VariableDoc(name=lhs_name[-1], value=rhs_val, 1122 is_imported=False, is_alias=is_alias, 1123 is_instvar=is_instvar, 1124 docs_extracted_by='parser') 1125 # Extract a docstring from the comments, when present, 1126 # but only if there's a single LHS. 1127 if len(lhs_pieces) == 1: 1128 add_docstring_from_comments(var_doc, comments) 1129 1130 # Assign the variable to the containing namespace, 1131 # *unless* the variable is an instance variable 1132 # without a comment docstring. In that case, we'll 1133 # only want to add it if we later discover that it's 1134 # followed by a variable docstring. If it is, then 1135 # process_docstring will take care of adding it to the 1136 # containing clas. (This is a little hackish, but 1137 # unfortunately is necessary because we won't know if 1138 # this assignment line is followed by a docstring 1139 # until later.) 1140 if (not is_instvar) or comments: 1141 set_variable(lhs_parent, var_doc, True) 1142 1143 # If it's the only var, then return the VarDoc for use 1144 # as the new `prev_line_doc`. 1145 if (len(lhs_pieces) == 1 and 1146 (len(lhs_name) == 1 or is_instvar)): 1147 return var_doc 1148 1149 # Otherwise, the LHS must be a complex expression; use 1150 # dotted_names_in() to decide what variables it contains, 1151 # and create VariableDoc's for all of them (with UNKNOWN 1152 # value). 1153 else: 1154 for lhs_name in dotted_names_in(lhs_pieces): 1155 lhs_parent = get_lhs_parent(lhs_name, parent_docs) 1156 if lhs_parent is None: continue 1157 var_doc = VariableDoc(name=lhs_name[-1], 1158 is_imported=False, 1159 is_alias=is_alias, 1160 is_instvar=is_instvar, 1161 docs_extracted_by='parser') 1162 set_variable(lhs_parent, var_doc, True) 1163 1164 # If we have multiple left-hand-sides, then all but the 1165 # rightmost one are considered aliases. 1166 is_alias = True
1167 1168
1169 -def lhs_is_instvar(lhs_pieces, parent_docs):
1170 if not isinstance(parent_docs[-1], RoutineDoc): 1171 return False 1172 # make sure that lhs_pieces is <self>.<name>, where <self> is 1173 # the name of the first arg to the containing routinedoc, and 1174 # <name> is a simple name. 1175 posargs = parent_docs[-1].posargs 1176 if posargs is UNKNOWN: return False 1177 if not (len(lhs_pieces)==1 and len(posargs) > 0 and 1178 len(lhs_pieces[0]) == 3 and 1179 lhs_pieces[0][0] == (token.NAME, posargs[0]) and 1180 lhs_pieces[0][1] == (token.OP, '.') and 1181 lhs_pieces[0][2][0] == token.NAME): 1182 return False 1183 # Make sure we're in an instance method, and not a 1184 # module-level function. 1185 for i in range(len(parent_docs)-1, -1, -1): 1186 if isinstance(parent_docs[i], ClassDoc): 1187 return True 1188 elif parent_docs[i] != parent_docs[-1]: 1189 return False 1190 return False
1191
1192 -def rhs_to_valuedoc(rhs, parent_docs):
1193 # Dotted variable: 1194 try: 1195 rhs_name = parse_dotted_name(rhs) 1196 rhs_val = lookup_value(rhs_name, parent_docs) 1197 if rhs_val is not None and rhs_val is not UNKNOWN: 1198 return rhs_val, True 1199 except ParseError: 1200 pass 1201 1202 # Decorators: 1203 if (len(rhs)==2 and rhs[0][0] == token.NAME and 1204 isinstance(rhs[1], list)): 1205 arg_val, _ = rhs_to_valuedoc(rhs[1][1:-1], parent_docs) 1206 if isinstance(arg_val, RoutineDoc): 1207 doc = apply_decorator(DottedName(rhs[0][1]), arg_val) 1208 doc.canonical_name = UNKNOWN 1209 doc.parse_repr = pp_toktree(rhs) 1210 return doc, False 1211 1212 # Nothing else to do: make a val with the source as its repr. 1213 return GenericValueDoc(parse_repr=pp_toktree(rhs), toktree=rhs, 1214 defining_module=parent_docs[0], 1215 docs_extracted_by='parser'), False
1216
1217 -def get_lhs_parent(lhs_name, parent_docs):
1218 assert isinstance(lhs_name, DottedName) 1219 1220 # For instance vars inside an __init__ method: 1221 if isinstance(parent_docs[-1], RoutineDoc): 1222 for i in range(len(parent_docs)-1, -1, -1): 1223 if isinstance(parent_docs[i], ClassDoc): 1224 return parent_docs[i] 1225 else: 1226 raise ValueError("%r is not a namespace or method" % 1227 parent_docs[-1]) 1228 1229 # For local variables: 1230 if len(lhs_name) == 1: 1231 return parent_docs[-1] 1232 1233 # For non-local variables: 1234 return lookup_value(lhs_name.container(), parent_docs)
1235 1236 #///////////////////////////////////////////////////////////////// 1237 # Line handler: single-line blocks 1238 #///////////////////////////////////////////////////////////////// 1239
1240 -def process_one_line_block(line, parent_docs, prev_line_doc, lineno, 1241 comments, decorators, encoding):
1242 """ 1243 The line handler for single-line blocks, such as: 1244 1245 >>> def f(x): return x*2 1246 1247 This handler calls L{process_line} twice: once for the tokens 1248 up to and including the colon, and once for the remaining 1249 tokens. The comment docstring is applied to the first line 1250 only. 1251 @return: C{None} 1252 """ 1253 i = line.index((token.OP, ':')) 1254 doc1 = process_line(line[:i+1], parent_docs, prev_line_doc, 1255 lineno, comments, decorators, encoding) 1256 doc2 = process_line(line[i+1:], parent_docs+[doc1], 1257 doc1, lineno, None, [], encoding) 1258 return doc1
1259 1260 #///////////////////////////////////////////////////////////////// 1261 # Line handler: semicolon-separated statements 1262 #///////////////////////////////////////////////////////////////// 1263
1264 -def process_multi_stmt(line, parent_docs, prev_line_doc, lineno, 1265 comments, decorators, encoding):
1266 """ 1267 The line handler for semicolon-separated statements, such as: 1268 1269 >>> x=1; y=2; z=3 1270 1271 This handler calls L{process_line} once for each statement. 1272 The comment docstring is not passed on to any of the 1273 sub-statements. 1274 @return: C{None} 1275 """ 1276 for statement in split_on(line, (token.OP, ';')): 1277 if not statement: continue 1278 doc = process_line(statement, parent_docs, prev_line_doc, 1279 lineno, None, decorators, encoding) 1280 prev_line_doc = doc 1281 decorators = [] 1282 return None
1283 1284 #///////////////////////////////////////////////////////////////// 1285 # Line handler: delete statements 1286 #///////////////////////////////////////////////////////////////// 1287
1288 -def process_del(line, parent_docs, prev_line_doc, lineno, 1289 comments, decorators, encoding):
1290 """ 1291 The line handler for delete statements, such as: 1292 1293 >>> del x, y.z 1294 1295 This handler calls L{del_variable} for each dotted variable in 1296 the variable list. The variable list may be nested. Complex 1297 expressions in the variable list (such as C{x[3]}) are ignored. 1298 @return: C{None} 1299 """ 1300 # If we're not in a namespace, then ignore it. 1301 parent_doc = parent_docs[-1] 1302 if not isinstance(parent_doc, NamespaceDoc): return 1303 1304 var_list = split_on(line[1:], (token.OP, ',')) 1305 for var_name in dotted_names_in(var_list): 1306 del_variable(parent_docs[-1], var_name) 1307 1308 return None
1309 1310 #///////////////////////////////////////////////////////////////// 1311 # Line handler: docstrings 1312 #///////////////////////////////////////////////////////////////// 1313
1314 -def process_docstring(line, parent_docs, prev_line_doc, lineno, 1315 comments, decorators, encoding):
1316 """ 1317 The line handler for bare string literals. If 1318 C{prev_line_doc} is not C{None}, then the string literal is 1319 added to that C{APIDoc} as a docstring. If it already has a 1320 docstring (from comment docstrings), then the new docstring 1321 will be appended to the old one. 1322 """ 1323 if prev_line_doc is None: return 1324 docstring = parse_string(line) 1325 1326 # If the docstring is a str, then convert it to unicode. 1327 # According to a strict reading of PEP 263, this might not be the 1328 # right thing to do; but it will almost always be what the 1329 # module's author intended. 1330 if isinstance(docstring, str): 1331 try: 1332 docstring = docstring.decode(encoding) 1333 except UnicodeDecodeError: 1334 # If decoding failed, then fall back on using 1335 # decode_with_backslashreplace, which will map e.g. 1336 # "\xe9" -> u"\\xe9". 1337 docstring = decode_with_backslashreplace(docstring) 1338 log.warning("While parsing %s: docstring is not a unicode " 1339 "string, but it contains non-ascii data." % 1340 prev_line_doc.canonical_name) 1341 1342 # If the modified APIDoc is an instance variable, and it has 1343 # not yet been added to its class's C{variables} list, 1344 # then add it now. This is done here, rather than in the 1345 # process_assignment() call that created the variable, because 1346 # we only want to add instance variables if they have an 1347 # associated docstring. (For more info, see the comment above 1348 # the set_variable() call in process_assignment().) 1349 added_instvar = False 1350 if (isinstance(prev_line_doc, VariableDoc) and 1351 prev_line_doc.is_instvar and 1352 prev_line_doc.docstring in (None, UNKNOWN)): 1353 for i in range(len(parent_docs)-1, -1, -1): 1354 if isinstance(parent_docs[i], ClassDoc): 1355 set_variable(parent_docs[i], prev_line_doc, True) 1356 added_instvar = True 1357 break 1358 1359 if prev_line_doc.docstring not in (None, UNKNOWN): 1360 log.warning("%s has both a comment-docstring and a normal " 1361 "(string) docstring; ignoring the comment-" 1362 "docstring." % prev_line_doc.canonical_name) 1363 1364 prev_line_doc.docstring = docstring 1365 prev_line_doc.docstring_lineno = lineno 1366 1367 # If the modified APIDoc is an instance variable, and we added it 1368 # to the class's variables list here, then it still needs to be 1369 # grouped too; so return it for use as the new "prev_line_doc." 1370 if added_instvar: 1371 return prev_line_doc
1372 1373 1374 #///////////////////////////////////////////////////////////////// 1375 # Line handler: function declarations 1376 #///////////////////////////////////////////////////////////////// 1377
1378 -def process_funcdef(line, parent_docs, prev_line_doc, lineno, 1379 comments, decorators, encoding):
1380 """ 1381 The line handler for function declaration lines, such as: 1382 1383 >>> def f(a, b=22, (c,d)): 1384 1385 This handler creates and initializes a new C{VariableDoc} 1386 containing a C{RoutineDoc}, adds the C{VariableDoc} to the 1387 containing namespace, and returns the C{RoutineDoc}. 1388 """ 1389 # Check syntax. 1390 if len(line) != 4 or line[3] != (token.OP, ':'): 1391 raise ParseError("Bad function definition line") 1392 1393 # If we're not in a namespace, then ignore it. 1394 parent_doc = parent_docs[-1] 1395 if not isinstance(parent_doc, NamespaceDoc): return 1396 1397 # Get the function's name 1398 func_name = parse_name(line[1]) 1399 canonical_name = DottedName(parent_doc.canonical_name, func_name) 1400 1401 # Create the function's RoutineDoc. 1402 func_doc = RoutineDoc(canonical_name=canonical_name, 1403 defining_module=parent_docs[0], 1404 lineno=lineno, docs_extracted_by='parser') 1405 1406 # Process the signature. 1407 init_arglist(func_doc, line[2]) 1408 1409 # If the preceeding comment includes a docstring, then add it. 1410 add_docstring_from_comments(func_doc, comments) 1411 1412 # Apply any decorators. 1413 func_doc.decorators = [pp_toktree(deco[1:]) for deco in decorators] 1414 decorators.reverse() 1415 for decorator in decorators: 1416 try: 1417 deco_name = parse_dotted_name(decorator[1:]) 1418 except ParseError: 1419 deco_name = None 1420 if func_doc.canonical_name is not UNKNOWN: 1421 deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]), 1422 func_doc.canonical_name) 1423 elif func_doc.parse_repr not in (None, UNKNOWN): 1424 # [xx] this case should be improved.. when will func_doc 1425 # have a known parse_repr?? 1426 deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]), 1427 func_doc.parse_repr) 1428 else: 1429 deco_repr = UNKNOWN 1430 func_doc = apply_decorator(deco_name, func_doc) 1431 func_doc.parse_repr = deco_repr 1432 # [XX] Is there a reson the following should be done? It 1433 # causes the grouping code to break. Presumably the canonical 1434 # name should remain valid if we're just applying a standard 1435 # decorator. 1436 #func_doc.canonical_name = UNKNOWN 1437 1438 # Add a variable to the containing namespace. 1439 var_doc = VariableDoc(name=func_name, value=func_doc, 1440 is_imported=False, is_alias=False, 1441 docs_extracted_by='parser') 1442 set_variable(parent_doc, var_doc) 1443 1444 # Return the new ValueDoc. 1445 return func_doc
1446
1447 -def apply_decorator(decorator_name, func_doc):
1448 # [xx] what if func_doc is not a RoutineDoc? 1449 if decorator_name == DottedName('staticmethod'): 1450 return StaticMethodDoc(**func_doc.__dict__) 1451 elif decorator_name == DottedName('classmethod'): 1452 return ClassMethodDoc(**func_doc.__dict__) 1453 elif DEFAULT_DECORATOR_BEHAVIOR == 'transparent': 1454 return func_doc.__class__(**func_doc.__dict__) # make a copy. 1455 elif DEFAULT_DECORATOR_BEHAVIOR == 'opaque': 1456 return GenericValueDoc(docs_extracted_by='parser') 1457 else: 1458 raise ValueError, 'Bad value for DEFAULT_DECORATOR_BEHAVIOR'
1459
1460 -def init_arglist(func_doc, arglist):
1461 if not isinstance(arglist, list) or arglist[0] != (token.OP, '('): 1462 raise ParseError("Bad argument list") 1463 1464 # Initialize to defaults. 1465 func_doc.posargs = [] 1466 func_doc.posarg_defaults = [] 1467 func_doc.vararg = None 1468 func_doc.kwarg = None 1469 1470 # Divide the arglist into individual args. 1471 args = split_on(arglist[1:-1], (token.OP, ',')) 1472 1473 # Keyword argument. 1474 if args and args[-1][0] == (token.OP, '**'): 1475 if len(args[-1]) != 2 or args[-1][1][0] != token.NAME: 1476 raise ParseError("Expected name after ** in argument list") 1477 func_doc.kwarg = args[-1][1][1] 1478 args.pop() 1479 1480 # Vararg argument. 1481 if args and args[-1][0] == (token.OP, '*'): 1482 if len(args[-1]) != 2 or args[-1][1][0] != token.NAME: 1483 raise ParseError("Expected name after * in argument list") 1484 func_doc.vararg = args[-1][1][1] 1485 args.pop() 1486 1487 # Positional arguments. 1488 for arg in args: 1489 func_doc.posargs.append(parse_funcdef_arg(arg[0])) 1490 if len(arg) == 1: 1491 func_doc.posarg_defaults.append(None) 1492 elif arg[1] != (token.OP, '=') or len(arg) == 2: 1493 raise ParseError("Bad argument list") 1494 else: 1495 default_repr = pp_toktree(arg[2:], 'tight') 1496 default_val = GenericValueDoc(parse_repr=default_repr, 1497 docs_extracted_by='parser') 1498 func_doc.posarg_defaults.append(default_val)
1499 1500 #///////////////////////////////////////////////////////////////// 1501 # Line handler: class declarations 1502 #///////////////////////////////////////////////////////////////// 1503
1504 -def process_classdef(line, parent_docs, prev_line_doc, lineno, 1505 comments, decorators, encoding):
1506 """ 1507 The line handler for class declaration lines, such as: 1508 1509 >>> class Foo(Bar, Baz): 1510 1511 This handler creates and initializes a new C{VariableDoc} 1512 containing a C{ClassDoc}, adds the C{VariableDoc} to the 1513 containing namespace, and returns the C{ClassDoc}. 1514 """ 1515 # Check syntax 1516 if len(line)<3 or len(line)>4 or line[-1] != (token.OP, ':'): 1517 raise ParseError("Bad class definition line") 1518 1519 # If we're not in a namespace, then ignore it. 1520 parent_doc = parent_docs[-1] 1521 if not isinstance(parent_doc, NamespaceDoc): return 1522 1523 # Get the class's name 1524 class_name = parse_name(line[1]) 1525 canonical_name = DottedName(parent_doc.canonical_name, class_name) 1526 1527 # Create the class's ClassDoc & VariableDoc. 1528 class_doc = ClassDoc(variables={}, sort_spec=[], 1529 bases=[], subclasses=[], 1530 canonical_name=canonical_name, 1531 defining_module=parent_docs[0], 1532 docs_extracted_by='parser') 1533 var_doc = VariableDoc(name=class_name, value=class_doc, 1534 is_imported=False, is_alias=False, 1535 docs_extracted_by='parser') 1536 1537 # Add the bases. 1538 if len(line) == 4: 1539 if (not isinstance(line[2], list) or 1540 line[2][0] != (token.OP, '(')): 1541 raise ParseError("Expected base list") 1542 try: 1543 for base_name in parse_classdef_bases(line[2]): 1544 class_doc.bases.append(find_base(base_name, parent_docs)) 1545 except ParseError, e: 1546 log.warning("Unable to extract the base list for %s: %s" % 1547 (canonical_name, e)) 1548 class_doc.bases = UNKNOWN 1549 else: 1550 class_doc.bases = [] 1551 1552 # Register ourselves as a subclass to our bases. 1553 if class_doc.bases is not UNKNOWN: 1554 for basedoc in class_doc.bases: 1555 if isinstance(basedoc, ClassDoc): 1556 # This test avoids that a subclass gets listed twice when 1557 # both introspection and parsing. 1558 # [XXX] This check only works because currently parsing is 1559 # always performed just after introspection of the same 1560 # class. A more complete fix shuld be independent from 1561 # calling order; probably the subclasses list should be 1562 # replaced by a ClassDoc set or a {name: ClassDoc} mapping. 1563 if (basedoc.subclasses 1564 and basedoc.subclasses[-1].canonical_name 1565 != class_doc.canonical_name): 1566 basedoc.subclasses.append(class_doc) 1567 1568 # If the preceeding comment includes a docstring, then add it. 1569 add_docstring_from_comments(class_doc, comments) 1570 1571 # Add the VariableDoc to our container. 1572 set_variable(parent_doc, var_doc) 1573 1574 return class_doc
1575
1576 -def _proxy_base(**attribs):
1577 return ClassDoc(variables={}, sort_spec=[], bases=[], subclasses=[], 1578 docs_extracted_by='parser', **attribs)
1579
1580 -def find_base(name, parent_docs):
1581 assert isinstance(name, DottedName) 1582 1583 # Find the variable containing the base. 1584 base_var = lookup_variable(name, parent_docs) 1585 if base_var is None: 1586 # If we didn't find it, then it must have been imported. 1587 # First, check if it looks like it's contained in any 1588 # known imported variable: 1589 if len(name) > 1: 1590 src = lookup_name(name[0], parent_docs) 1591 if (src is not None and 1592 src.imported_from not in (None, UNKNOWN)): 1593 base_src = DottedName(src.imported_from, name[1:]) 1594 base_var = VariableDoc(name=name[-1], is_imported=True, 1595 is_alias=False, imported_from=base_src, 1596 docs_extracted_by='parser') 1597 # Otherwise, it must have come from an "import *" statement 1598 # (or from magic, such as direct manipulation of the module's 1599 # dictionary), so we don't know where it came from. So 1600 # there's nothing left but to use an empty proxy. 1601 if base_var is None: 1602 return _proxy_base(parse_repr=str(name)) 1603 #raise ParseError("Could not find %s" % name) 1604 1605 # If the variable has a value, return that value. 1606 if base_var.value is not UNKNOWN: 1607 return base_var.value 1608 1609 # Otherwise, if BASE_HANDLING is 'parse', try parsing the docs for 1610 # the base class; if that fails, or if BASE_HANDLING is 'link', 1611 # just make a proxy object. 1612 if base_var.imported_from not in (None, UNKNOWN): 1613 if BASE_HANDLING == 'parse': 1614 old_sys_path = sys.path 1615 try: 1616 dirname = os.path.split(parent_docs[0].filename)[0] 1617 sys.path = [dirname] + sys.path 1618 try: 1619 return parse_docs(name=str(base_var.imported_from)) 1620 except ParseError: 1621 log.info('Unable to parse base', base_var.imported_from) 1622 except ImportError: 1623 log.info('Unable to find base', base_var.imported_from) 1624 finally: 1625 sys.path = old_sys_path 1626 1627 # Either BASE_HANDLING='link' or parsing the base class failed; 1628 # return a proxy value for the base class. 1629 return _proxy_base(proxy_for=base_var.imported_from) 1630 else: 1631 return _proxy_base(parse_repr=str(name))
1632 1633 #///////////////////////////////////////////////////////////////// 1634 #{ Parsing 1635 #///////////////////////////////////////////////////////////////// 1636
1637 -def dotted_names_in(elt_list):
1638 """ 1639 Return a list of all simple dotted names in the given 1640 expression. 1641 """ 1642 names = [] 1643 while elt_list: 1644 elt = elt_list.pop() 1645 if len(elt) == 1 and isinstance(elt[0], list): 1646 # Nested list: process the contents 1647 elt_list.extend(split_on(elt[0][1:-1], (token.OP, ','))) 1648 else: 1649 try: 1650 names.append(parse_dotted_name(elt)) 1651 except ParseError: 1652 pass # complex expression -- ignore 1653 return names
1654
1655 -def parse_name(elt, strip_parens=False):
1656 """ 1657 If the given token tree element is a name token, then return 1658 that name as a string. Otherwise, raise ParseError. 1659 @param strip_parens: If true, then if elt is a single name 1660 enclosed in parenthases, then return that name. 1661 """ 1662 if strip_parens and isinstance(elt, list): 1663 while (isinstance(elt, list) and len(elt) == 3 and 1664 elt[0] == (token.OP, '(') and 1665 elt[-1] == (token.OP, ')')): 1666 elt = elt[1] 1667 if isinstance(elt, list) or elt[0] != token.NAME: 1668 raise ParseError("Bad name") 1669 return elt[1]
1670
1671 -def parse_dotted_name(elt_list, strip_parens=True):
1672 """ 1673 @bug: does not handle 'x.(y).z' 1674 """ 1675 if len(elt_list) == 0: raise ParseError("Bad dotted name") 1676 1677 # Handle ((x.y).z). (If the contents of the parens include 1678 # anything other than dotted names, such as (x,y), then we'll 1679 # catch it below and raise a ParseError. 1680 while (isinstance(elt_list[0], list) and 1681 len(elt_list[0]) >= 3 and 1682 elt_list[0][0] == (token.OP, '(') and 1683 elt_list[0][-1] == (token.OP, ')')): 1684 elt_list[:1] = elt_list[0][1:-1] 1685 1686 if len(elt_list) % 2 != 1: raise ParseError("Bad dotted name") 1687 name = DottedName(parse_name(elt_list[0], True)) 1688 for i in range(2, len(elt_list), 2): 1689 dot, identifier = elt_list[i-1], elt_list[i] 1690 if dot != (token.OP, '.'): 1691 raise ParseError("Bad dotted name") 1692 name = DottedName(name, parse_name(identifier, True)) 1693 return name
1694
1695 -def split_on(elt_list, split_tok):
1696 # [xx] add code to guarantee each elt is non-empty. 1697 result = [[]] 1698 for elt in elt_list: 1699 if elt == split_tok: 1700 if result[-1] == []: raise ParseError("Empty element from split") 1701 result.append([]) 1702 else: 1703 result[-1].append(elt) 1704 if result[-1] == []: result.pop() 1705 return result
1706
1707 -def parse_funcdef_arg(elt):
1708 """ 1709 If the given tree token element contains a valid function 1710 definition argument (i.e., an identifier token or nested list 1711 of identifiers), then return a corresponding string identifier 1712 or nested list of string identifiers. Otherwise, raise a 1713 ParseError. 1714 """ 1715 if isinstance(elt, list): 1716 if elt[0] == (token.OP, '('): 1717 if len(elt) == 3: 1718 return parse_funcdef_arg(elt[1]) 1719 else: 1720 return [parse_funcdef_arg(e) 1721 for e in elt[1:-1] 1722 if e != (token.OP, ',')] 1723 else: 1724 raise ParseError("Bad argument -- expected name or tuple") 1725 elif elt[0] == token.NAME: 1726 return elt[1] 1727 else: 1728 raise ParseError("Bad argument -- expected name or tuple")
1729
1730 -def parse_classdef_bases(elt):
1731 """ 1732 If the given tree token element contains a valid base list 1733 (that contains only dotted names), then return a corresponding 1734 list of L{DottedName}s. Otherwise, raise a ParseError. 1735 1736 @bug: Does not handle either of:: 1737 - class A( (base.in.parens) ): pass 1738 - class B( (lambda:calculated.base)() ): pass 1739 """ 1740 if (not isinstance(elt, list) or 1741 elt[0] != (token.OP, '(')): 1742 raise ParseError("Bad base list") 1743 1744 return [parse_dotted_name(n) 1745 for n in split_on(elt[1:-1], (token.OP, ','))]
1746 1747 # Used by: base list; 'del'; ...
1748 -def parse_dotted_name_list(elt_list):
1749 """ 1750 If the given list of tree token elements contains a 1751 comma-separated list of dotted names, then return a 1752 corresponding list of L{DottedName} objects. Otherwise, raise 1753 ParseError. 1754 """ 1755 names = [] 1756 1757 state = 0 1758 for elt in elt_list: 1759 # State 0 -- Expecting a name, or end of arglist 1760 if state == 0: 1761 # Make sure it's a name 1762 if isinstance(elt, tuple) and elt[0] == token.NAME: 1763 names.append(DottedName(elt[1])) 1764 state = 1 1765 else: 1766 raise ParseError("Expected a name") 1767 # State 1 -- Expecting comma, period, or end of arglist 1768 elif state == 1: 1769 if elt == (token.OP, '.'): 1770 state = 2 1771 elif elt == (token.OP, ','): 1772 state = 0 1773 else: 1774 raise ParseError("Expected '.' or ',' or end of list") 1775 # State 2 -- Continuation of dotted name. 1776 elif state == 2: 1777 if isinstance(elt, tuple) and elt[0] == token.NAME: 1778 names[-1] = DottedName(names[-1], elt[1]) 1779 state = 1 1780 else: 1781 raise ParseError("Expected a name") 1782 if state == 2: 1783 raise ParseError("Expected a name") 1784 return names
1785
1786 -def parse_string(elt_list):
1787 if len(elt_list) == 1 and elt_list[0][0] == token.STRING: 1788 # [xx] use something safer here? But it needs to deal with 1789 # any string type (eg r"foo\bar" etc). 1790 return eval(elt_list[0][1]) 1791 else: 1792 raise ParseError("Expected a string")
1793 1794 # ['1', 'b', 'c']
1795 -def parse_string_list(elt_list):
1796 if (len(elt_list) == 1 and isinstance(elt_list, list) and 1797 elt_list[0][0][1] in ('(', '[')): 1798 elt_list = elt_list[0][1:-1] 1799 1800 string_list = [] 1801 for string_elt in split_on(elt_list, (token.OP, ',')): 1802 string_list.append(parse_string(string_elt)) 1803 1804 return string_list
1805 1806 #///////////////////////////////////////////////////////////////// 1807 #{ Variable Manipulation 1808 #///////////////////////////////////////////////////////////////// 1809
1810 -def set_variable(namespace, var_doc, preserve_docstring=False):
1811 """ 1812 Add var_doc to namespace. If namespace already contains a 1813 variable with the same name, then discard the old variable. If 1814 C{preserve_docstring} is true, then keep the old variable's 1815 docstring when overwriting a variable. 1816 """ 1817 # Choose which dictionary we'll be storing the variable in. 1818 if not isinstance(namespace, NamespaceDoc): 1819 return 1820 # If we already have a variable with this name, then remove the 1821 # old VariableDoc from the sort_spec list; and if we gave its 1822 # value a canonical name, then delete it. 1823 if var_doc.name in namespace.variables: 1824 namespace.sort_spec.remove(var_doc.name) 1825 old_var_doc = namespace.variables[var_doc.name] 1826 if (old_var_doc.is_alias == False and 1827 old_var_doc.value is not UNKNOWN): 1828 old_var_doc.value.canonical_name = UNKNOWN 1829 if (preserve_docstring and var_doc.docstring in (None, UNKNOWN) and 1830 old_var_doc.docstring not in (None, UNKNOWN)): 1831 var_doc.docstring = old_var_doc.docstring 1832 var_doc.docstring_lineno = old_var_doc.docstring_lineno 1833 # Add the variable to the namespace. 1834 namespace.variables[var_doc.name] = var_doc 1835 namespace.sort_spec.append(var_doc.name) 1836 assert var_doc.container is UNKNOWN 1837 var_doc.container = namespace
1838
1839 -def del_variable(namespace, name):
1840 if not isinstance(namespace, NamespaceDoc): 1841 return 1842 1843 if name[0] in namespace.variables: 1844 if len(name) == 1: 1845 var_doc = namespace.variables[name[0]] 1846 namespace.sort_spec.remove(name[0]) 1847 del namespace.variables[name[0]] 1848 if not var_doc.is_alias and var_doc.value is not UNKNOWN: 1849 var_doc.value.canonical_name = UNKNOWN 1850 else: 1851 del_variable(namespace.variables[name[0]].value, name[1:])
1852 1853 #///////////////////////////////////////////////////////////////// 1854 #{ Name Lookup 1855 #///////////////////////////////////////////////////////////////// 1856
1857 -def lookup_name(identifier, parent_docs):
1858 """ 1859 Find and return the documentation for the variable named by 1860 the given identifier. 1861 1862 @rtype: L{VariableDoc} or C{None} 1863 """ 1864 # We need to check 3 namespaces: locals, globals, and builtins. 1865 # Note that this is true even if we're in a version of python with 1866 # nested scopes, because nested scope lookup does not apply to 1867 # nested class definitions, and we're not worried about variables 1868 # in nested functions. 1869 if not isinstance(identifier, basestring): 1870 raise TypeError('identifier must be a string') 1871 1872 # Locals 1873 if isinstance(parent_docs[-1], NamespaceDoc): 1874 if parent_docs[-1].variables.has_key(identifier): 1875 return parent_docs[-1].variables[identifier] 1876 1877 # Globals (aka the containing module) 1878 if isinstance(parent_docs[0], NamespaceDoc): 1879 if parent_docs[0].variables.has_key(identifier): 1880 return parent_docs[0].variables[identifier] 1881 1882 # Builtins 1883 builtins = epydoc.docintrospecter.introspect_docs(__builtin__) 1884 if isinstance(builtins, NamespaceDoc): 1885 if builtins.variables.has_key(identifier): 1886 return builtins.variables[identifier] 1887 1888 # We didn't find it; return None. 1889 return None
1890
1891 -def lookup_variable(dotted_name, parent_docs):
1892 assert isinstance(dotted_name, DottedName) 1893 # If it's a simple identifier, use lookup_name. 1894 if len(dotted_name) == 1: 1895 return lookup_name(dotted_name[0], parent_docs) 1896 1897 # If it's a dotted name with multiple pieces, look up the 1898 # namespace containing the var (=parent) first; and then 1899 # look for the var in that namespace. 1900 else: 1901 parent = lookup_value(dotted_name[:-1], parent_docs) 1902 if (isinstance(parent, NamespaceDoc) and 1903 dotted_name[-1] in parent.variables): 1904 return parent.variables[dotted_name[-1]] 1905 else: 1906 return None # var not found.
1907
1908 -def lookup_value(dotted_name, parent_docs):
1909 """ 1910 Find and return the documentation for the value contained in 1911 the variable with the given name in the current namespace. 1912 """ 1913 assert isinstance(dotted_name, DottedName) 1914 var_doc = lookup_name(dotted_name[0], parent_docs) 1915 1916 for i in range(1, len(dotted_name)): 1917 if var_doc is None: return None 1918 1919 if isinstance(var_doc.value, NamespaceDoc): 1920 var_dict = var_doc.value.variables 1921 elif (var_doc.value is UNKNOWN and 1922 var_doc.imported_from not in (None, UNKNOWN)): 1923 src_name = var_doc.imported_from + dotted_name[i:] 1924 # [xx] do I want to create a proxy here?? 1925 return GenericValueDoc(proxy_for=src_name, 1926 parse_repr=str(dotted_name), 1927 docs_extracted_by='parser') 1928 else: 1929 return None 1930 1931 var_doc = var_dict.get(dotted_name[i]) 1932 1933 if var_doc is None: return None 1934 return var_doc.value
1935 1936 #///////////////////////////////////////////////////////////////// 1937 #{ Docstring Comments 1938 #///////////////////////////////////////////////////////////////// 1939
1940 -def add_docstring_from_comments(api_doc, comments):
1941 if api_doc is None or not comments: return 1942 api_doc.docstring = '\n'.join([line for (line, lineno) in comments]) 1943 api_doc.docstring_lineno = comments[0][1]
1944 1945 #///////////////////////////////////////////////////////////////// 1946 #{ Tree tokens 1947 #///////////////////////////////////////////////////////////////// 1948
1949 -def _join_toktree(s1, s2):
1950 # Join them. s1 = left side; s2 = right side. 1951 if (s2=='' or s1=='' or 1952 s1 in ('-','`') or s2 in ('}',']',')','`',':') or 1953 s2[0] in ('.',',') or s1[-1] in ('(','[','{','.','\n',' ') or 1954 (s2[0] == '(' and s1[-1] not in (',','='))): 1955 return '%s%s' % (s1,s2) 1956 elif (spacing=='tight' and 1957 s1[-1] in '+-*/=,' or s2[0] in '+-*/=,'): 1958 return '%s%s' % (s1, s2) 1959 else: 1960 return '%s %s' % (s1, s2)
1961
1962 -def _pp_toktree_add_piece(spacing, pieces, piece):
1963 s1 = pieces[-1] 1964 s2 = piece 1965 1966 if (s2=='' or s1=='' or 1967 s1 in ('-','`') or s2 in ('}',']',')','`',':') or 1968 s2[0] in ('.',',') or s1[-1] in ('(','[','{','.','\n',' ') or 1969 (s2[0] == '(' and s1[-1] not in (',','='))): 1970 pass 1971 elif (spacing=='tight' and 1972 s1[-1] in '+-*/=,' or s2[0] in '+-*/=,'): 1973 pass 1974 else: 1975 pieces.append(' ') 1976 1977 pieces.append(piece)
1978
1979 -def pp_toktree(elts, spacing='normal', indent=0):
1980 pieces = [''] 1981 _pp_toktree(elts, spacing, indent, pieces) 1982 return ''.join(pieces)
1983
1984 -def _pp_toktree(elts, spacing, indent, pieces):
1985 add_piece = _pp_toktree_add_piece 1986 1987 for elt in elts: 1988 # Put a blank line before class & def statements. 1989 if elt == (token.NAME, 'class') or elt == (token.NAME, 'def'): 1990 add_piece(spacing, pieces, '\n%s' % (' '*indent)) 1991 1992 if isinstance(elt, tuple): 1993 if elt[0] == token.NEWLINE: 1994 add_piece(spacing, pieces, ' '+elt[1]) 1995 add_piece(spacing, pieces, '\n%s' % (' '*indent)) 1996 elif elt[0] == token.INDENT: 1997 add_piece(spacing, pieces, ' ') 1998 indent += 1 1999 elif elt[0] == token.DEDENT: 2000 assert pieces[-1] == ' ' 2001 pieces.pop() 2002 indent -= 1 2003 elif elt[0] == tokenize.COMMENT: 2004 add_piece(spacing, pieces, elt[1].rstrip() + '\n') 2005 add_piece(' '*indent) 2006 else: 2007 add_piece(spacing, pieces, elt[1]) 2008 else: 2009 _pp_toktree(elt, spacing, indent, pieces)
2010 2011 #///////////////////////////////////////////////////////////////// 2012 #{ Helper Functions 2013 #///////////////////////////////////////////////////////////////// 2014
2015 -def get_module_encoding(filename):
2016 """ 2017 @see: U{PEP 263<http://www.python.org/peps/pep-0263.html>} 2018 """ 2019 module_file = open(filename, 'rU') 2020 try: 2021 lines = [module_file.readline() for i in range(2)] 2022 if lines[0].startswith('\xef\xbb\xbf'): 2023 return 'utf-8' 2024 else: 2025 for line in lines: 2026 m = re.search("coding[:=]\s*([-\w.]+)", line) 2027 if m: return m.group(1) 2028 2029 # Fall back on Python's default encoding. 2030 return 'iso-8859-1' # aka 'latin-1' 2031 finally: 2032 module_file.close()
2033
2034 -def _get_module_name(filename, package_doc):
2035 """ 2036 Return (dotted_name, is_package) 2037 """ 2038 name = re.sub(r'.py\w?$', '', os.path.split(filename)[1]) 2039 if name == '__init__': 2040 is_package = True 2041 name = os.path.split(os.path.split(filename)[0])[1] 2042 else: 2043 is_package = False 2044 2045 # [XX] if the module contains a script, then `name` may not 2046 # necessarily be a valid identifier -- which will cause 2047 # DottedName to raise an exception. Is that what I want? 2048 if package_doc is None: 2049 dotted_name = DottedName(name) 2050 else: 2051 dotted_name = DottedName(package_doc.canonical_name, name) 2052 2053 # Check if the module looks like it's shadowed by a variable. 2054 # If so, then add a "'" to the end of its canonical name, to 2055 # distinguish it from the variable. 2056 if package_doc is not None and name in package_doc.variables: 2057 vardoc = package_doc.variables[name] 2058 if (vardoc.value not in (None, UNKNOWN) and 2059 vardoc.imported_from != dotted_name): 2060 log.warning("Module %s might be shadowed by a variable with " 2061 "the same name." % dotted_name) 2062 dotted_name = DottedName(str(dotted_name)+"'") 2063 2064 return dotted_name, is_package
2065
2066 -def flatten(lst, out=None):
2067 """ 2068 @return: a flat list containing the leaves of the given nested 2069 list. 2070 @param lst: The nested list that should be flattened. 2071 """ 2072 if out is None: out = [] 2073 for elt in lst: 2074 if isinstance(elt, (list, tuple)): 2075 flatten(elt, out) 2076 else: 2077 out.append(elt) 2078 return out
2079