Package epydoc :: Module docstringparser
[hide private]
[frames] | no frames]

Source Code for Module epydoc.docstringparser

   1  # epydoc -- Docstring processing 
   2  # 
   3  # Copyright (C) 2005 Edward Loper 
   4  # Author: Edward Loper <[email protected]> 
   5  # URL: <http://epydoc.sf.net> 
   6  # 
   7  # $Id: docstringparser.py 1529 2007-02-18 12:34:08Z dvarrazzo $ 
   8   
   9  """ 
  10  Parse docstrings and handle any fields it defines, such as C{@type} 
  11  and C{@author}.  Fields are used to describe specific information 
  12  about an object.  There are two classes of fields: X{simple fields} 
  13  and X{special fields}. 
  14   
  15  Simple fields are fields that get stored directly in an C{APIDoc}'s 
  16  metadata dictionary, without any special processing.  The set of 
  17  simple fields is defined by the list L{STANDARD_FIELDS}, whose 
  18  elements are L{DocstringField}s. 
  19   
  20  Special fields are fields that perform some sort of processing on the 
  21  C{APIDoc}, or add information to attributes other than the metadata 
  22  dictionary.  Special fields are are handled by field handler 
  23  functions, which are registered using L{register_field_handler}. 
  24  """ 
  25  __docformat__ = 'epytext en' 
  26   
  27   
  28  ###################################################################### 
  29  ## Imports 
  30  ###################################################################### 
  31   
  32  import re, sys 
  33  from epydoc import markup 
  34  from epydoc.markup import epytext 
  35  from epydoc.apidoc import * 
  36  from epydoc.docintrospecter import introspect_docstring_lineno 
  37  from epydoc.util import py_src_filename 
  38  from epydoc import log 
  39  import epydoc.docparser 
  40  import __builtin__, exceptions 
  41   
  42  ###################################################################### 
  43  # Docstring Fields 
  44  ###################################################################### 
  45   
46 -class DocstringField:
47 """ 48 A simple docstring field, which can be used to describe specific 49 information about an object, such as its author or its version. 50 Simple docstring fields are fields that take no arguments, and 51 are displayed as simple sections. 52 53 @ivar tags: The set of tags that can be used to identify this 54 field. 55 @ivar singular: The label that should be used to identify this 56 field in the output, if the field contains one value. 57 @ivar plural: The label that should be used to identify this 58 field in the output, if the field contains multiple values. 59 @ivar short: If true, then multiple values should be combined 60 into a single comma-delimited list. If false, then 61 multiple values should be listed separately in a bulleted 62 list. 63 @ivar multivalue: If true, then multiple values may be given 64 for this field; if false, then this field can only take a 65 single value, and a warning should be issued if it is 66 redefined. 67 @ivar takes_arg: If true, then this field expects an argument; 68 and a separate field section will be constructed for each 69 argument value. The label (and plural label) should include 70 a '%s' to mark where the argument's string rep should be 71 added. 72 """
73 - def __init__(self, tags, label, plural=None, 74 short=0, multivalue=1, takes_arg=0, 75 varnames=None):
76 if type(tags) in (list, tuple): 77 self.tags = tuple(tags) 78 elif type(tags) is str: 79 self.tags = (tags,) 80 else: raise TypeError('Bad tags: %s' % tags) 81 self.singular = label 82 if plural is None: self.plural = label 83 else: self.plural = plural 84 self.multivalue = multivalue 85 self.short = short 86 self.takes_arg = takes_arg 87 self.varnames = varnames or []
88
89 - def __cmp__(self, other):
90 if not isinstance(other, DocstringField): return -1 91 return cmp(self.tags, other.tags)
92
93 - def __hash__(self):
94 return hash(self.tags)
95
96 - def __repr__(self):
97 return '<Field: %s>' % self.tags[0]
98 99 STANDARD_FIELDS = [ 100 #: A list of the standard simple fields accepted by epydoc. This 101 #: list can be augmented at run-time by a docstring with the special 102 #: C{@deffield} field. The order in which fields are listed here 103 #: determines the order in which they will be displayed in the 104 #: output. 105 106 # If it's deprecated, put that first. 107 DocstringField(['deprecated', 'depreciated'], 108 'Deprecated', multivalue=0, varnames=['__deprecated__']), 109 110 # Status info 111 DocstringField(['version'], 'Version', multivalue=0, 112 varnames=['__version__']), 113 DocstringField(['date'], 'Date', multivalue=0, 114 varnames=['__date__']), 115 DocstringField(['status'], 'Status', multivalue=0), 116 117 # Bibliographic Info 118 DocstringField(['author', 'authors'], 'Author', 'Authors', short=1, 119 varnames=['__author__', '__authors__']), 120 DocstringField(['contact'], 'Contact', 'Contacts', short=1, 121 varnames=['__contact__']), 122 DocstringField(['organization', 'org'], 123 'Organization', 'Organizations'), 124 DocstringField(['copyright', '(c)'], 'Copyright', multivalue=0, 125 varnames=['__copyright__']), 126 DocstringField(['license'], 'License', multivalue=0, 127 varnames=['__license__']), 128 129 # Various warnings etc. 130 DocstringField(['bug'], 'Bug', 'Bugs'), 131 DocstringField(['warning', 'warn'], 'Warning', 'Warnings'), 132 DocstringField(['attention'], 'Attention'), 133 DocstringField(['note'], 'Note', 'Notes'), 134 135 # Formal conditions 136 DocstringField(['requires', 'require', 'requirement'], 'Requires'), 137 DocstringField(['precondition', 'precond'], 138 'Precondition', 'Preconditions'), 139 DocstringField(['postcondition', 'postcond'], 140 'Postcondition', 'Postconditions'), 141 DocstringField(['invariant'], 'Invariant'), 142 143 # When was it introduced (version # or date) 144 DocstringField(['since'], 'Since', multivalue=0), 145 146 # Changes made 147 DocstringField(['change', 'changed'], 'Change Log'), 148 149 # Crossreferences 150 DocstringField(['see', 'seealso'], 'See Also', short=1), 151 152 # Future Work 153 DocstringField(['todo'], 'To Do', takes_arg=True), 154 155 # Permissions (used by zope-based projects) 156 DocstringField(['permission', 'permissions'], 'Permission', 'Permissions') 157 ] 158 159 ###################################################################### 160 #{ Docstring Parsing 161 ###################################################################### 162 163 DEFAULT_DOCFORMAT = 'epytext' 164 """The name of the default markup languge used to process docstrings.""" 165 166 # [xx] keep track of which ones we've already done, in case we're 167 # asked to process one twice? e.g., for @include we might have to 168 # parse the included docstring earlier than we might otherwise..?? 169
170 -def parse_docstring(api_doc, docindex):
171 """ 172 Process the given C{APIDoc}'s docstring. In particular, populate 173 the C{APIDoc}'s C{descr} and C{summary} attributes, and add any 174 information provided by fields in the docstring. 175 176 @param docindex: A DocIndex, used to find the containing 177 module (to look up the docformat); and to find any 178 user docfields defined by containing objects. 179 """ 180 if api_doc.metadata is not UNKNOWN: 181 if not (isinstance(api_doc, RoutineDoc) 182 and api_doc.canonical_name[-1] == '__init__'): 183 log.debug("%s's docstring processed twice" % api_doc.canonical_name) 184 return 185 186 initialize_api_doc(api_doc) 187 188 # If there's no docstring, then there's nothing more to do. 189 if (api_doc.docstring in (None, UNKNOWN)): 190 return 191 192 # Remove leading indentation from the docstring. 193 api_doc.docstring = unindent_docstring(api_doc.docstring) 194 195 # Extract a signature from the docstring, if it has one. This 196 # overrides any signature we got via introspection/parsing. 197 if isinstance(api_doc, RoutineDoc): 198 parse_function_signature(api_doc) 199 200 # Parse the docstring. Any errors encountered are stored as 201 # `ParseError` objects in the errors list. 202 docformat = get_docformat(api_doc, docindex) 203 parse_errors = [] 204 parsed_docstring = markup.parse(api_doc.docstring, docformat, 205 parse_errors) 206 207 # Divide the docstring into a description and a list of 208 # fields. 209 descr, fields = parsed_docstring.split_fields(parse_errors) 210 api_doc.descr = descr 211 212 field_warnings = [] 213 214 # Handle the constructor fields that have been defined in the class 215 # docstring. This code assumes that a class docstring is parsed before 216 # the same class __init__ docstring. 217 if isinstance(api_doc, ClassDoc): 218 219 # Parse ahead the __init__ docstring for this class 220 initvar = api_doc.variables.get('__init__') 221 if initvar and initvar.value not in (None, UNKNOWN): 222 init_api_doc = initvar.value 223 parse_docstring(init_api_doc, docindex) 224 225 parse_function_signature(init_api_doc, api_doc) 226 init_fields = split_init_fields(fields, field_warnings) 227 228 # Process fields 229 for field in init_fields: 230 try: 231 process_field(init_api_doc, docindex, field.tag(), 232 field.arg(), field.body()) 233 except ValueError, e: field_warnings.append(str(e)) 234 235 # Process fields 236 for field in fields: 237 try: 238 process_field(api_doc, docindex, field.tag(), 239 field.arg(), field.body()) 240 except ValueError, e: field_warnings.append(str(e)) 241 242 # Check to make sure that all type parameters correspond to 243 # some documented parameter. 244 check_type_fields(api_doc, field_warnings) 245 246 # Check for special variables (e.g., __version__) 247 if isinstance(api_doc, NamespaceDoc): 248 for field in STANDARD_FIELDS + user_docfields(api_doc, docindex): 249 add_metadata_from_var(api_doc, field) 250 251 # Extract a summary 252 if api_doc.summary is None and api_doc.descr is not None: 253 api_doc.summary, api_doc.other_docs = api_doc.descr.summary() 254 255 # If the summary is empty, but the return field is not, then use 256 # the return field to generate a summary description. 257 if (isinstance(api_doc, RoutineDoc) and api_doc.summary is None and 258 api_doc.return_descr is not None): 259 s, o = api_doc.return_descr.summary() 260 api_doc.summary = RETURN_PDS + s 261 api_doc.other_docs = o 262 263 # [XX] Make sure we don't have types/param descrs for unknown 264 # vars/params? 265 266 # Report any errors that occured 267 report_errors(api_doc, docindex, parse_errors, field_warnings)
268
269 -def add_metadata_from_var(api_doc, field):
270 if not field.multivalue: 271 for (f,a,d) in api_doc.metadata: 272 if field == f: 273 return # We already have a value for this metadata. 274 for varname in field.varnames: 275 # Check if api_doc has a variable w/ the given name. 276 if varname not in api_doc.variables: continue 277 var_doc = api_doc.variables[varname] 278 if var_doc.value is UNKNOWN: continue 279 val_doc = var_doc.value 280 value = [] 281 282 # Try extracting the value from the pyval. 283 ok_types = (basestring, int, float, bool, type(None)) 284 if val_doc.pyval is not UNKNOWN: 285 if isinstance(val_doc.pyval, ok_types): 286 value = [val_doc.pyval] 287 elif field.multivalue: 288 if isinstance(val_doc.pyval, (tuple, list)): 289 for elt in val_doc.pyval: 290 if not isinstance(elt, ok_types): break 291 else: 292 value = list(val_doc.pyval) 293 294 # Try extracting the value from the parse tree. 295 elif val_doc.toktree is not UNKNOWN: 296 try: value = [epydoc.docparser.parse_string(val_doc.toktree)] 297 except KeyboardInterrupt: raise 298 except: pass 299 if field.multivalue and not value: 300 try: value = epydoc.docparser.parse_string_list(val_doc.toktree) 301 except KeyboardInterrupt: raise 302 except: raise 303 304 # Add any values that we found. 305 for elt in value: 306 if isinstance(elt, str): 307 elt = decode_with_backslashreplace(elt) 308 else: 309 elt = unicode(elt) 310 elt = epytext.ParsedEpytextDocstring( 311 epytext.parse_as_para(elt)) 312 313 # Add in the metadata and remove from the variables 314 api_doc.metadata.append( (field, varname, elt) ) 315 if var_doc.docstring in (None, UNKNOWN): 316 del api_doc.variables[varname]
317
318 -def initialize_api_doc(api_doc):
319 """A helper function for L{parse_docstring()} that initializes 320 the attributes that C{parse_docstring()} will write to.""" 321 if api_doc.descr is UNKNOWN: 322 api_doc.descr = None 323 if api_doc.summary is UNKNOWN: 324 api_doc.summary = None 325 if api_doc.metadata is UNKNOWN: 326 api_doc.metadata = [] 327 if isinstance(api_doc, RoutineDoc): 328 if api_doc.arg_descrs is UNKNOWN: 329 api_doc.arg_descrs = [] 330 if api_doc.arg_types is UNKNOWN: 331 api_doc.arg_types = {} 332 if api_doc.return_descr is UNKNOWN: 333 api_doc.return_descr = None 334 if api_doc.return_type is UNKNOWN: 335 api_doc.return_type = None 336 if api_doc.exception_descrs is UNKNOWN: 337 api_doc.exception_descrs = [] 338 if isinstance(api_doc, (VariableDoc, PropertyDoc)): 339 if api_doc.type_descr is UNKNOWN: 340 api_doc.type_descr = None 341 if isinstance(api_doc, NamespaceDoc): 342 if api_doc.group_specs is UNKNOWN: 343 api_doc.group_specs = [] 344 if api_doc.sort_spec is UNKNOWN: 345 api_doc.sort_spec = []
346
347 -def split_init_fields(fields, warnings):
348 """ 349 Remove the fields related to the constructor from a class docstring 350 fields list. 351 352 @param fields: The fields to process. The list will be modified in place 353 @type fields: C{list} of L{markup.Field} 354 @param warnings: A list to emit processing warnings 355 @type warnings: C{list} 356 @return: The C{fields} items to be applied to the C{__init__} method 357 @rtype: C{list} of L{markup.Field} 358 """ 359 init_fields = [] 360 361 # Split fields in lists according to their argument, keeping order. 362 arg_fields = {} 363 args_order = [] 364 i = 0 365 while i < len(fields): 366 field = fields[i] 367 368 # gather together all the fields with the same arg 369 if field.arg() is not None: 370 arg_fields.setdefault(field.arg(), []).append(fields.pop(i)) 371 args_order.append(field.arg()) 372 else: 373 i += 1 374 375 # Now check that for each argument there is at most a single variable 376 # and a single parameter, and at most a single type for each of them. 377 for arg in args_order: 378 ff = arg_fields.pop(arg, None) 379 if ff is None: 380 continue 381 382 var = tvar = par = tpar = None 383 for field in ff: 384 if field.tag() in VARIABLE_TAGS: 385 if var is None: 386 var = field 387 fields.append(field) 388 else: 389 warnings.append( 390 "There is more than one variable named '%s'" 391 % arg) 392 elif field.tag() in PARAMETER_TAGS: 393 if par is None: 394 par = field 395 init_fields.append(field) 396 else: 397 warnings.append( 398 "There is more than one parameter named '%s'" 399 % arg) 400 401 elif field.tag() == 'type': 402 if var is None and par is None: 403 # type before obj 404 tvar = tpar = field 405 else: 406 if var is not None and tvar is None: 407 tvar = field 408 if par is not None and tpar is None: 409 tpar = field 410 411 elif field.tag() in EXCEPTION_TAGS: 412 init_fields.append(field) 413 414 else: # Unespected field 415 fields.append(field) 416 417 # Put selected types into the proper output lists 418 if tvar is not None: 419 if var is not None: 420 fields.append(tvar) 421 else: 422 pass # [xx] warn about type w/o object? 423 424 if tpar is not None: 425 if par is not None: 426 init_fields.append(tpar) 427 else: 428 pass # [xx] warn about type w/o object? 429 430 return init_fields
431
432 -def report_errors(api_doc, docindex, parse_errors, field_warnings):
433 """A helper function for L{parse_docstring()} that reports any 434 markup warnings and field warnings that we encountered while 435 processing C{api_doc}'s docstring.""" 436 if not parse_errors and not field_warnings: return 437 438 # Get the name of the item containing the error, and the 439 # filename of its containing module. 440 name = api_doc.canonical_name 441 module = api_doc.defining_module 442 if module is not UNKNOWN and module.filename not in (None, UNKNOWN): 443 try: filename = py_src_filename(module.filename) 444 except: filename = module.filename 445 else: 446 filename = '??' 447 448 # [xx] Don't report markup errors for standard builtins. 449 if (isinstance(api_doc, ValueDoc) and api_doc != module and 450 (api_doc.pyval in __builtin__.__dict__.values() or 451 (module not in (None, UNKNOWN) and 452 module.pyval in (__builtin__, exceptions)))): 453 return 454 455 # Get the start line of the docstring containing the error. 456 startline = api_doc.docstring_lineno 457 if startline in (None, UNKNOWN): 458 startline = introspect_docstring_lineno(api_doc) 459 if startline in (None, UNKNOWN): 460 startline = None 461 462 # Display a block header. 463 header = 'File %s, ' % filename 464 if startline is not None: 465 header += 'line %d, ' % startline 466 header += 'in %s' % name 467 log.start_block(header) 468 469 470 # Display all parse errors. But first, combine any errors 471 # with duplicate description messages. 472 if startline is None: 473 # remove dups, but keep original order: 474 dups = {} 475 for error in parse_errors: 476 message = error.descr() 477 if message not in dups: 478 log.docstring_warning(message) 479 dups[message] = 1 480 else: 481 # Combine line number fields for dup messages: 482 messages = {} # maps message -> list of linenum 483 for error in parse_errors: 484 error.set_linenum_offset(startline) 485 message = error.descr() 486 messages.setdefault(message, []).append(error.linenum()) 487 message_items = messages.items() 488 message_items.sort(lambda a,b:cmp(min(a[1]), min(b[1]))) 489 for message, linenums in message_items: 490 linenums = [n for n in linenums if n is not None] 491 if len(linenums) == 0: 492 log.docstring_warning(message) 493 elif len(linenums) == 1: 494 log.docstring_warning("Line %s: %s" % (linenums[0], message)) 495 else: 496 linenums = ', '.join(['%s' % l for l in linenums]) 497 log.docstring_warning("Lines %s: %s" % (linenums, message)) 498 499 # Display all field warnings. 500 for warning in field_warnings: 501 log.docstring_warning(warning) 502 503 # End the message block. 504 log.end_block()
505 506 RETURN_PDS = markup.parse('Returns:', markup='epytext') 507 """A ParsedDocstring containing the text 'Returns'. This is used to 508 construct summary descriptions for routines that have empty C{descr}, 509 but non-empty C{return_descr}.""" 510 511 ###################################################################### 512 #{ Field Processing Error Messages 513 ###################################################################### 514 515 UNEXPECTED_ARG = '%r did not expect an argument' 516 EXPECTED_ARG = '%r expected an argument' 517 EXPECTED_SINGLE_ARG = '%r expected a single argument' 518 BAD_CONTEXT = 'Invalid context for %r' 519 REDEFINED = 'Redefinition of %s' 520 UNKNOWN_TAG = 'Unknown field tag %r' 521 BAD_PARAM = '@%s for unknown parameter %s' 522 523 ###################################################################### 524 #{ Field Processing 525 ###################################################################### 526
527 -def process_field(api_doc, docindex, tag, arg, descr):
528 """ 529 Process a single field, and use it to update C{api_doc}. If 530 C{tag} is the name of a special field, then call its handler 531 function. If C{tag} is the name of a simple field, then use 532 C{process_simple_field} to process it. Otherwise, check if it's a 533 user-defined field, defined in this docstring or the docstring of 534 a containing object; and if so, process it with 535 C{process_simple_field}. 536 537 @param tag: The field's tag, such as C{'author'} 538 @param arg: The field's optional argument 539 @param descr: The description following the field tag and 540 argument. 541 @raise ValueError: If a problem was encountered while processing 542 the field. The C{ValueError}'s string argument is an 543 explanation of the problem, which should be displayed as a 544 warning message. 545 """ 546 # standard special fields 547 if tag in _field_dispatch_table: 548 handler = _field_dispatch_table[tag] 549 handler(api_doc, docindex, tag, arg, descr) 550 return 551 552 # standard simple fields & user-defined fields 553 for field in STANDARD_FIELDS + user_docfields(api_doc, docindex): 554 if tag in field.tags: 555 # [xx] check if it's redefined if it's not multivalue?? 556 if not field.takes_arg: 557 _check(api_doc, tag, arg, expect_arg=False) 558 api_doc.metadata.append((field, arg, descr)) 559 return 560 561 # If we didn't handle the field, then report a warning. 562 raise ValueError(UNKNOWN_TAG % tag)
563
564 -def user_docfields(api_doc, docindex):
565 """ 566 Return a list of user defined fields that can be used for the 567 given object. This list is taken from the given C{api_doc}, and 568 any of its containing C{NamepaceDoc}s. 569 570 @note: We assume here that a parent's docstring will always be 571 parsed before its childrens'. This is indeed the case when we 572 are called via L{docbuilder.build_doc_index()}. If a child's 573 docstring is parsed before its parents, then its parent won't 574 yet have had its C{extra_docstring_fields} attribute 575 initialized. 576 """ 577 docfields = [] 578 # Get any docfields from `api_doc` itself 579 if api_doc.extra_docstring_fields not in (None, UNKNOWN): 580 docfields += api_doc.extra_docstring_fields 581 # Get any docfields from `api_doc`'s ancestors 582 for i in range(len(api_doc.canonical_name)-1, 0, -1): 583 ancestor = docindex.get_valdoc(api_doc.canonical_name.container()) 584 if ancestor is not None \ 585 and ancestor.extra_docstring_fields not in (None, UNKNOWN): 586 docfields += ancestor.extra_docstring_fields 587 return docfields
588 589 _field_dispatch_table = {}
590 -def register_field_handler(handler, *field_tags):
591 """ 592 Register the given field handler function for processing any 593 of the given field tags. Field handler functions should 594 have the following signature: 595 596 >>> def field_handler(api_doc, docindex, tag, arg, descr): 597 ... '''update api_doc in response to the field.''' 598 599 Where C{api_doc} is the documentation object to update; 600 C{docindex} is a L{DocIndex} that can be used to look up the 601 documentation for related objects; C{tag} is the field tag that 602 was used; C{arg} is the optional argument; and C{descr} is the 603 description following the field tag and argument. 604 """ 605 for field_tag in field_tags: 606 _field_dispatch_table[field_tag] = handler
607 608 ###################################################################### 609 #{ Field Handler Functions 610 ###################################################################### 611
612 -def process_summary_field(api_doc, docindex, tag, arg, descr):
613 """Store C{descr} in C{api_doc.summary}""" 614 _check(api_doc, tag, arg, expect_arg=False) 615 if api_doc.summary is not None: 616 raise ValueError(REDEFINED % tag) 617 api_doc.summary = descr
618
619 -def process_include_field(api_doc, docindex, tag, arg, descr):
620 """Copy the docstring contents from the object named in C{descr}""" 621 _check(api_doc, tag, arg, expect_arg=False) 622 # options: 623 # a. just append the descr to our own 624 # b. append descr and update metadata 625 # c. append descr and process all fields. 626 # in any case, mark any errors we may find as coming from an 627 # imported docstring. 628 629 # how does this interact with documentation inheritance?? 630 raise ValueError('%s not implemented yet' % tag)
631
632 -def process_undocumented_field(api_doc, docindex, tag, arg, descr):
633 """Remove any documentation for the variables named in C{descr}""" 634 _check(api_doc, tag, arg, context=NamespaceDoc, expect_arg=False) 635 for ident in _descr_to_identifiers(descr): 636 var_name_re = re.compile('^%s$' % ident.replace('*', '(.*)')) 637 for var_name, var_doc in api_doc.variables.items(): 638 if var_name_re.match(var_name): 639 # Remove the variable from `variables`. 640 api_doc.variables.pop(var_name, None) 641 # For modules, remove any submodules that match var_name_re. 642 if isinstance(api_doc, ModuleDoc): 643 removed = set([m for m in api_doc.submodules 644 if var_name_re.match(m.canonical_name[-1])]) 645 if removed: 646 # Remove the indicated submodules from this module. 647 api_doc.submodules = [m for m in api_doc.submodules 648 if m not in removed] 649 # Remove all ancestors of the indicated submodules 650 # from the docindex root. E.g., if module x 651 # declares y to be undocumented, then x.y.z should 652 # also be undocumented. 653 for elt in docindex.root[:]: 654 for m in removed: 655 if m.canonical_name.dominates(elt.canonical_name): 656 docindex.root.remove(elt)
657
658 -def process_group_field(api_doc, docindex, tag, arg, descr):
659 """Define a group named C{arg} containing the variables whose 660 names are listed in C{descr}.""" 661 _check(api_doc, tag, arg, context=NamespaceDoc, expect_arg=True) 662 api_doc.group_specs.append( (arg, _descr_to_identifiers(descr)) )
663 # [xx] should this also set sort order? 664
665 -def process_deffield_field(api_doc, docindex, tag, arg, descr):
666 """Define a new custom field.""" 667 _check(api_doc, tag, arg, expect_arg=True) 668 if api_doc.extra_docstring_fields is UNKNOWN: 669 api_doc.extra_docstring_fields = [] 670 try: 671 docstring_field = _descr_to_docstring_field(arg, descr) 672 docstring_field.varnames.append("__%s__" % arg) 673 api_doc.extra_docstring_fields.append(docstring_field) 674 except ValueError, e: 675 raise ValueError('Bad %s: %s' % (tag, e))
676
677 -def process_raise_field(api_doc, docindex, tag, arg, descr):
678 """Record the fact that C{api_doc} can raise the exception named 679 C{tag} in C{api_doc.exception_descrs}.""" 680 _check(api_doc, tag, arg, context=RoutineDoc, expect_arg='single') 681 try: name = DottedName(arg) 682 except DottedName.InvalidDottedName: name = arg 683 api_doc.exception_descrs.append( (name, descr) )
684
685 -def process_sort_field(api_doc, docindex, tag, arg, descr):
686 _check(api_doc, tag, arg, context=NamespaceDoc, expect_arg=False) 687 api_doc.sort_spec = _descr_to_identifiers(descr) + api_doc.sort_spec
688 689 # [xx] should I notice when they give a type for an unknown var?
690 -def process_type_field(api_doc, docindex, tag, arg, descr):
691 # In namespace, "@type var: ..." describes the type of a var. 692 if isinstance(api_doc, NamespaceDoc): 693 _check(api_doc, tag, arg, expect_arg='single') 694 set_var_type(api_doc, arg, descr) 695 696 # For variables & properties, "@type: ..." describes the variable. 697 elif isinstance(api_doc, (VariableDoc, PropertyDoc)): 698 _check(api_doc, tag, arg, expect_arg=False) 699 if api_doc.type_descr is not None: 700 raise ValueError(REDEFINED % tag) 701 api_doc.type_descr = descr 702 703 # For routines, "@type param: ..." describes a parameter. 704 elif isinstance(api_doc, RoutineDoc): 705 _check(api_doc, tag, arg, expect_arg='single') 706 if arg in api_doc.arg_types: 707 raise ValueError(REDEFINED % ('type for '+arg)) 708 api_doc.arg_types[arg] = descr 709 710 else: 711 raise ValueError(BAD_CONTEXT % tag)
712
713 -def process_var_field(api_doc, docindex, tag, arg, descr):
714 _check(api_doc, tag, arg, context=ModuleDoc, expect_arg=True) 715 for ident in re.split('[:;, ] *', arg): 716 set_var_descr(api_doc, ident, descr)
717
718 -def process_cvar_field(api_doc, docindex, tag, arg, descr):
719 # If @cvar is used *within* a variable, then use it as the 720 # variable's description, and treat the variable as a class var. 721 if (isinstance(api_doc, VariableDoc) and 722 isinstance(api_doc.container, ClassDoc)): 723 _check(api_doc, tag, arg, expect_arg=False) 724 api_doc.is_instvar = False 725 api_doc.descr = markup.ConcatenatedDocstring(api_doc.descr, descr) 726 api_doc.summary, api_doc.other_docs = descr.summary() 727 728 # Otherwise, @cvar should be used in a class. 729 else: 730 _check(api_doc, tag, arg, context=ClassDoc, expect_arg=True) 731 for ident in re.split('[:;, ] *', arg): 732 set_var_descr(api_doc, ident, descr) 733 api_doc.variables[ident].is_instvar = False
734
735 -def process_ivar_field(api_doc, docindex, tag, arg, descr):
736 # If @ivar is used *within* a variable, then use it as the 737 # variable's description, and treat the variable as an instvar. 738 if (isinstance(api_doc, VariableDoc) and 739 isinstance(api_doc.container, ClassDoc)): 740 _check(api_doc, tag, arg, expect_arg=False) 741 # require that there be no other descr? 742 api_doc.is_instvar = True 743 api_doc.descr = markup.ConcatenatedDocstring(api_doc.descr, descr) 744 api_doc.summary, api_doc.other_docs = descr.summary() 745 746 # Otherwise, @ivar should be used in a class. 747 else: 748 _check(api_doc, tag, arg, context=ClassDoc, expect_arg=True) 749 for ident in re.split('[:;, ] *', arg): 750 set_var_descr(api_doc, ident, descr) 751 api_doc.variables[ident].is_instvar = True
752 753 # [xx] '@return: foo' used to get used as a descr if no other 754 # descr was present. is that still true?
755 -def process_return_field(api_doc, docindex, tag, arg, descr):
756 _check(api_doc, tag, arg, context=RoutineDoc, expect_arg=False) 757 if api_doc.return_descr is not None: 758 raise ValueError(REDEFINED % 'return value description') 759 api_doc.return_descr = descr
760
761 -def process_rtype_field(api_doc, docindex, tag, arg, descr):
762 _check(api_doc, tag, arg, context=RoutineDoc, expect_arg=False) 763 if api_doc.return_type is not None: 764 raise ValueError(REDEFINED % 'return value type') 765 api_doc.return_type = descr
766
767 -def process_arg_field(api_doc, docindex, tag, arg, descr):
768 _check(api_doc, tag, arg, context=RoutineDoc, expect_arg=True) 769 idents = re.split('[:;, ] *', arg) 770 api_doc.arg_descrs.append( (idents, descr) ) 771 # Check to make sure that the documented parameter(s) are 772 # actually part of the function signature. 773 all_args = api_doc.all_args() 774 if all_args not in (['...'], UNKNOWN): 775 bad_params = ['"%s"' % i for i in idents if i not in all_args] 776 if bad_params: 777 raise ValueError(BAD_PARAM % (tag, ', '.join(bad_params)))
778
779 -def process_kwarg_field(api_doc, docindex, tag, arg, descr):
780 # [xx] these should -not- be checked if they exist.. 781 # and listed separately or not?? 782 _check(api_doc, tag, arg, context=RoutineDoc, expect_arg=True) 783 idents = re.split('[:;, ] *', arg) 784 api_doc.arg_descrs.append( (idents, descr) )
785 786 register_field_handler(process_group_field, 'group') 787 register_field_handler(process_deffield_field, 'deffield', 'newfield') 788 register_field_handler(process_sort_field, 'sort') 789 register_field_handler(process_summary_field, 'summary') 790 register_field_handler(process_undocumented_field, 'undocumented') 791 register_field_handler(process_include_field, 'include') 792 register_field_handler(process_var_field, 'var', 'variable') 793 register_field_handler(process_type_field, 'type') 794 register_field_handler(process_cvar_field, 'cvar', 'cvariable') 795 register_field_handler(process_ivar_field, 'ivar', 'ivariable') 796 register_field_handler(process_return_field, 'return', 'returns') 797 register_field_handler(process_rtype_field, 'rtype', 'returntype') 798 register_field_handler(process_arg_field, 'arg', 'argument', 799 'parameter', 'param') 800 register_field_handler(process_kwarg_field, 'kwarg', 'keyword', 'kwparam') 801 register_field_handler(process_raise_field, 'raise', 'raises', 802 'except', 'exception') 803 804 # Tags related to function parameters 805 PARAMETER_TAGS = ('arg', 'argument', 'parameter', 'param', 806 'kwarg', 'keyword', 'kwparam') 807 808 # Tags related to variables in a class 809 VARIABLE_TAGS = ('cvar', 'cvariable', 'ivar', 'ivariable') 810 811 # Tags related to exceptions 812 EXCEPTION_TAGS = ('raise', 'raises', 'except', 'exception') 813 814 ###################################################################### 815 #{ Helper Functions 816 ###################################################################### 817
818 -def check_type_fields(api_doc, field_warnings):
819 """Check to make sure that all type fields correspond to some 820 documented parameter; if not, append a warning to field_warnings.""" 821 if isinstance(api_doc, RoutineDoc): 822 for arg in api_doc.arg_types: 823 if arg not in api_doc.all_args(): 824 for args, descr in api_doc.arg_descrs: 825 if arg in args: 826 break 827 else: 828 field_warnings.append(BAD_PARAM % ('type', '"%s"' % arg))
829
830 -def set_var_descr(api_doc, ident, descr):
831 if ident not in api_doc.variables: 832 api_doc.variables[ident] = VariableDoc( 833 container=api_doc, name=ident, 834 canonical_name=api_doc.canonical_name+ident) 835 836 var_doc = api_doc.variables[ident] 837 if var_doc.descr not in (None, UNKNOWN): 838 raise ValueError(REDEFINED % ('description for '+ident)) 839 var_doc.descr = descr 840 if var_doc.summary in (None, UNKNOWN): 841 var_doc.summary, var_doc.other_docs = var_doc.descr.summary()
842
843 -def set_var_type(api_doc, ident, descr):
844 if ident not in api_doc.variables: 845 api_doc.variables[ident] = VariableDoc( 846 container=api_doc, name=ident, 847 canonical_name=api_doc.canonical_name+ident) 848 849 var_doc = api_doc.variables[ident] 850 if var_doc.type_descr not in (None, UNKNOWN): 851 raise ValueError(REDEFINED % ('type for '+ident)) 852 var_doc.type_descr = descr
853
854 -def _check(api_doc, tag, arg, context=None, expect_arg=None):
855 if context is not None: 856 if not isinstance(api_doc, context): 857 raise ValueError(BAD_CONTEXT % tag) 858 if expect_arg is not None: 859 if expect_arg == True: 860 if arg is None: 861 raise ValueError(EXPECTED_ARG % tag) 862 elif expect_arg == False: 863 if arg is not None: 864 raise ValueError(UNEXPECTED_ARG % tag) 865 elif expect_arg == 'single': 866 if (arg is None or ' ' in arg): 867 raise ValueError(EXPECTED_SINGLE_ARG % tag) 868 else: 869 assert 0, 'bad value for expect_arg'
870
871 -def get_docformat(api_doc, docindex):
872 """ 873 Return the name of the markup language that should be used to 874 parse the API documentation for the given object. 875 """ 876 # Find the module that defines api_doc. 877 module = api_doc.defining_module 878 # Look up its docformat. 879 if module is not UNKNOWN and module.docformat not in (None, UNKNOWN): 880 docformat = module.docformat 881 else: 882 docformat = DEFAULT_DOCFORMAT 883 # Convert to lower case & strip region codes. 884 try: return docformat.lower().split()[0] 885 except: return DEFAULT_DOCFORMAT
886
887 -def unindent_docstring(docstring):
888 # [xx] copied from inspect.getdoc(); we can't use inspect.getdoc() 889 # itself, since it expects an object, not a string. 890 891 if not docstring: return '' 892 lines = docstring.expandtabs().split('\n') 893 894 # Find minimum indentation of any non-blank lines after first line. 895 margin = sys.maxint 896 for line in lines[1:]: 897 content = len(line.lstrip()) 898 if content: 899 indent = len(line) - content 900 margin = min(margin, indent) 901 # Remove indentation. 902 if lines: 903 lines[0] = lines[0].lstrip() 904 if margin < sys.maxint: 905 for i in range(1, len(lines)): lines[i] = lines[i][margin:] 906 # Remove any trailing (but not leading!) blank lines. 907 while lines and not lines[-1]: 908 lines.pop() 909 #while lines and not lines[0]: 910 # lines.pop(0) 911 return '\n'.join(lines)
912 913 _IDENTIFIER_LIST_REGEXP = re.compile(r'^[\w.\*]+([\s,:;]\s*[\w.\*]+)*$')
914 -def _descr_to_identifiers(descr):
915 """ 916 Given a C{ParsedDocstring} that contains a list of identifiers, 917 return a list of those identifiers. This is used by fields such 918 as C{@group} and C{@sort}, which expect lists of identifiers as 919 their values. To extract the identifiers, the docstring is first 920 converted to plaintext, and then split. The plaintext content of 921 the docstring must be a a list of identifiers, separated by 922 spaces, commas, colons, or semicolons. 923 924 @rtype: C{list} of C{string} 925 @return: A list of the identifier names contained in C{descr}. 926 @type descr: L{markup.ParsedDocstring} 927 @param descr: A C{ParsedDocstring} containing a list of 928 identifiers. 929 @raise ValueError: If C{descr} does not contain a valid list of 930 identifiers. 931 """ 932 idents = descr.to_plaintext(None).strip() 933 idents = re.sub(r'\s+', ' ', idents) 934 if not _IDENTIFIER_LIST_REGEXP.match(idents): 935 raise ValueError, 'Bad Identifier list: %r' % idents 936 rval = re.split('[:;, ] *', idents) 937 return rval
938
939 -def _descr_to_docstring_field(arg, descr):
940 tags = [s.lower() for s in re.split('[:;, ] *', arg)] 941 descr = descr.to_plaintext(None).strip() 942 args = re.split('[:;,] *', descr) 943 if len(args) == 0 or len(args) > 3: 944 raise ValueError, 'Wrong number of arguments' 945 singular = args[0] 946 if len(args) >= 2: plural = args[1] 947 else: plural = None 948 short = 0 949 if len(args) >= 3: 950 if args[2] == 'short': short = 1 951 else: raise ValueError('Bad arg 2 (expected "short")') 952 return DocstringField(tags, singular, plural, short)
953 954 ###################################################################### 955 #{ Function Signature Extraction 956 ###################################################################### 957 958 # [XX] todo: add optional type modifiers? 959 _SIGNATURE_RE = re.compile( 960 # Class name (for builtin methods) 961 r'^\s*((?P<self>\w+)\.)?' + 962 # The function name (must match exactly) [XX] not anymore! 963 r'(?P<func>\w+)' + 964 # The parameters 965 r'\((?P<params>(\s*\[?\s*\*{0,2}[\w\-\.]+(=.+?)?'+ 966 r'(\s*\[?\s*,\s*\]?\s*\*{0,2}[\w\-\.]+(=.+?)?)*\]*)?)\s*\)' + 967 # The return value (optional) 968 r'(\s*(->)\s*(?P<return>\S.*?))?'+ 969 # The end marker 970 r'\s*(\n|\s+(--|<=+>)\s+|$|\.\s+|\.\n)') 971 """A regular expression that is used to extract signatures from 972 docstrings.""" 973
974 -def parse_function_signature(func_doc, doc_source=None):
975 """ 976 Construct the signature for a builtin function or method from 977 its docstring. If the docstring uses the standard convention 978 of including a signature in the first line of the docstring 979 (and formats that signature according to standard 980 conventions), then it will be used to extract a signature. 981 Otherwise, the signature will be set to a single varargs 982 variable named C{"..."}. 983 984 @param func_doc: The target object where to store parsed signature. Also 985 container of the docstring to parse if doc_source is C{None} 986 @type func_doc: L{RoutineDoc} 987 @param doc_source: Contains the docstring to parse. If C{None}, parse 988 L{func_doc} docstring instead 989 @type doc_source: L{APIDoc} 990 @rtype: C{None} 991 """ 992 if doc_source is None: 993 doc_source = func_doc 994 995 # If there's no docstring, then don't do anything. 996 if not doc_source.docstring: return False 997 998 m = _SIGNATURE_RE.match(doc_source.docstring) 999 if m is None: return False 1000 1001 # Do I want to be this strict? 1002 # Notice that __init__ must match the class name instead, if the signature 1003 # comes from the class docstring 1004 # if not (m.group('func') == func_doc.canonical_name[-1] or 1005 # '_'+m.group('func') == func_doc.canonical_name[-1]): 1006 # log.warning("Not extracting function signature from %s's " 1007 # "docstring, since the name doesn't match." % 1008 # func_doc.canonical_name) 1009 # return False 1010 1011 params = m.group('params') 1012 rtype = m.group('return') 1013 selfparam = m.group('self') 1014 1015 # Extract the parameters from the signature. 1016 func_doc.posargs = [] 1017 func_doc.vararg = None 1018 func_doc.kwarg = None 1019 if func_doc.posarg_defaults is UNKNOWN: 1020 func_doc.posarg_defaults = [] 1021 if params: 1022 # Figure out which parameters are optional. 1023 while '[' in params or ']' in params: 1024 m2 = re.match(r'(.*)\[([^\[\]]+)\](.*)', params) 1025 if not m2: return False 1026 (start, mid, end) = m2.groups() 1027 mid = re.sub(r'((,|^)\s*[\w\-\.]+)', r'\1=...', mid) 1028 params = start+mid+end 1029 1030 params = re.sub(r'=...=' , r'=', params) 1031 for name in params.split(','): 1032 if '=' in name: 1033 (name, default_repr) = name.split('=',1) 1034 default = GenericValueDoc(parse_repr=default_repr) 1035 else: 1036 default = None 1037 name = name.strip() 1038 if name == '...': 1039 func_doc.vararg = '...' 1040 elif name.startswith('**'): 1041 func_doc.kwarg = name[2:] 1042 elif name.startswith('*'): 1043 func_doc.vararg = name[1:] 1044 else: 1045 func_doc.posargs.append(name) 1046 if len(func_doc.posarg_defaults) < len(func_doc.posargs): 1047 func_doc.posarg_defaults.append(default) 1048 elif default is not None: 1049 argnum = len(func_doc.posargs)-1 1050 func_doc.posarg_defaults[argnum] = default 1051 1052 # Extract the return type/value from the signature 1053 if rtype: 1054 func_doc.return_descr = markup.parse(rtype, 'plaintext') 1055 1056 # Add the self parameter, if it was specified. 1057 if selfparam: 1058 func_doc.posargs.insert(0, selfparam) 1059 func_doc.posarg_defaults.insert(0, None) 1060 1061 # Remove the signature from the docstring. 1062 doc_source.docstring = doc_source.docstring[m.end():] 1063 1064 # We found a signature. 1065 return True
1066