epydoc.markup.epytext

1 # 2 # epytext.py: epydoc formatted docstring parsing 3 # Edward Loper 4 # 5 # Created [04/10/01 12:00 AM] 6 # $Id: epytext.py 1552 2007-02-26 23:12:41Z edloper $ 7 # 8 9 """ 10 Parser for epytext strings. Epytext is a lightweight markup whose 11 primary intended application is Python documentation strings. This 12 parser converts Epytext strings to a simple DOM-like representation 13 (encoded as a tree of L{Element} objects and strings). Epytext 14 strings can contain the following X{structural blocks}: 15 16 - X{epytext}: The top-level element of the DOM tree. 17 - X{para}: A paragraph of text. Paragraphs contain no newlines, 18 and all spaces are soft. 19 - X{section}: A section or subsection. 20 - X{field}: A tagged field. These fields provide information 21 about specific aspects of a Python object, such as the 22 description of a function's parameter, or the author of a 23 module. 24 - X{literalblock}: A block of literal text. This text should be 25 displayed as it would be displayed in plaintext. The 26 parser removes the appropriate amount of leading whitespace 27 from each line in the literal block. 28 - X{doctestblock}: A block containing sample python code, 29 formatted according to the specifications of the C{doctest} 30 module. 31 - X{ulist}: An unordered list. 32 - X{olist}: An ordered list. 33 - X{li}: A list item. This tag is used both for unordered list 34 items and for ordered list items. 35 36 Additionally, the following X{inline regions} may be used within 37 C{para} blocks: 38 39 - X{code}: Source code and identifiers. 40 - X{math}: Mathematical expressions. 41 - X{index}: A term which should be included in an index, if one 42 is generated. 43 - X{italic}: Italicized text. 44 - X{bold}: Bold-faced text. 45 - X{uri}: A Universal Resource Indicator (URI) or Universal 46 Resource Locator (URL) 47 - X{link}: A Python identifier which should be hyperlinked to 48 the named object's documentation, when possible. 49 50 The returned DOM tree will conform to the the following Document Type 51 Description:: 52 53 <!ENTITY % colorized '(code | math | index | italic | 54 bold | uri | link | symbol)*'> 55 56 <!ELEMENT epytext ((para | literalblock | doctestblock | 57 section | ulist | olist)*, fieldlist?)> 58 59 <!ELEMENT para (#PCDATA | %colorized;)*> 60 61 <!ELEMENT section (para | listblock | doctestblock | 62 section | ulist | olist)+> 63 64 <!ELEMENT fieldlist (field+)> 65 <!ELEMENT field (tag, arg?, (para | listblock | doctestblock) 66 ulist | olist)+)> 67 <!ELEMENT tag (#PCDATA)> 68 <!ELEMENT arg (#PCDATA)> 69 70 <!ELEMENT literalblock (#PCDATA | %colorized;)*> 71 <!ELEMENT doctestblock (#PCDATA)> 72 73 <!ELEMENT ulist (li+)> 74 <!ELEMENT olist (li+)> 75 <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+> 76 <!ATTLIST li bullet NMTOKEN #IMPLIED> 77 <!ATTLIST olist start NMTOKEN #IMPLIED> 78 79 <!ELEMENT uri (name, target)> 80 <!ELEMENT link (name, target)> 81 <!ELEMENT name (#PCDATA | %colorized;)*> 82 <!ELEMENT target (#PCDATA)> 83 84 <!ELEMENT code (#PCDATA | %colorized;)*> 85 <!ELEMENT math (#PCDATA | %colorized;)*> 86 <!ELEMENT italic (#PCDATA | %colorized;)*> 87 <!ELEMENT bold (#PCDATA | %colorized;)*> 88 <!ELEMENT indexed (#PCDATA | %colorized;)> 89 <!ATTLIST code style CDATA #IMPLIED> 90 91 <!ELEMENT symbol (#PCDATA)> 92 93 @var SYMBOLS: A list of the of escape symbols that are supported 94 by epydoc. Currently the following symbols are supported: 95 <<<SYMBOLS>>> 96 """ 97 # Note: the symbol list is appended to the docstring automatically, 98 # below. 99 100 __docformat__ = 'epytext en' 101 102 # Code organization.. 103 # 1. parse() 104 # 2. tokenize() 105 # 3. colorize() 106 # 4. helpers 107 # 5. testing 108 109 import re, string, types, sys, os.path 110 from epydoc.markup import * 111 from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex 112 from epydoc.markup.doctest import doctest_to_html, doctest_to_latex 113 114 ################################################## 115 ## DOM-Like Encoding 116 ################################################## 117

118 -class Element:

119 """ 120 A very simple DOM-like representation for parsed epytext 121 documents. Each epytext document is encoded as a tree whose nodes 122 are L{Element} objects, and whose leaves are C{string}s. Each 123 node is marked by a I{tag} and zero or more I{attributes}. Each 124 attribute is a mapping from a string key to a string value. 125 """

126 - def __init__(self, tag, *children, **attribs):

127 self.tag = tag 128 """A string tag indicating the type of this element. 129 @type: C{string}""" 130 131 self.children = list(children) 132 """A list of the children of this element. 133 @type: C{list} of (C{string} or C{Element})""" 134 135 self.attribs = attribs 136 """A dictionary mapping attribute names to attribute values 137 for this element. 138 @type: C{dict} from C{string} to C{string}"""

139

140 - def __str__(self):

141 """ 142 Return a string representation of this element, using XML 143 notation. 144 @bug: Doesn't escape '<' or '&' or '>'. 145 """ 146 attribs = ''.join([' %s=%r' % t for t in self.attribs.items()]) 147 return ('<%s%s>' % (self.tag, attribs) + 148 ''.join([str(child) for child in self.children]) + 149 '</%s>' % self.tag)

150

151 - def __repr__(self):

152 attribs = ''.join([', %s=%r' % t for t in self.attribs.items()]) 153 args = ''.join([', %r' % c for c in self.children]) 154 return 'Element(%s%s%s)' % (self.tag, args, attribs)

155 156 ################################################## 157 ## Constants 158 ################################################## 159 160 # The possible heading underline characters, listed in order of 161 # heading depth. 162 _HEADING_CHARS = "=-~" 163 164 # Escape codes. These should be needed very rarely. 165 _ESCAPES = {'lb':'{', 'rb': '}'} 166 167 # Symbols. These can be generated via S{...} escapes. 168 SYMBOLS = [ 169 # Arrows 170 '<-', '->', '^', 'v', 171 172 # Greek letters 173 'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 174 'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu', 175 'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma', 176 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega', 177 'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta', 178 'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu', 179 'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma', 180 'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega', 181 182 # HTML character entities 183 'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr', 184 'lArr', 'rArr', 'uArr', 'dArr', 'hArr', 185 'copy', 'times', 'forall', 'exist', 'part', 186 'empty', 'isin', 'notin', 'ni', 'prod', 'sum', 187 'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup', 188 'int', 'there4', 'sim', 'cong', 'asymp', 'ne', 189 'equiv', 'le', 'ge', 'sub', 'sup', 'nsub', 190 'sube', 'supe', 'oplus', 'otimes', 'perp', 191 192 # Alternate (long) names 193 'infinity', 'integral', 'product', 194 '>=', '<=', 195 ] 196 # Convert to a dictionary, for quick lookup 197 _SYMBOLS = {} 198 for symbol in SYMBOLS: _SYMBOLS[symbol] = 1 199 200 # Add symbols to the docstring. 201 symblist = ' ' 202 symblist += ';\n '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol) 203 for symbol in SYMBOLS]) 204 __doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist) 205 del symbol, symblist 206 207 # Tags for colorizing text. 208 _COLORIZING_TAGS = { 209 'C': 'code', 210 'M': 'math', 211 'X': 'indexed', 212 'I': 'italic', 213 'B': 'bold', 214 'U': 'uri', 215 'L': 'link', # A Python identifier that should be linked to 216 'E': 'escape', # escapes characters or creates symbols 217 'S': 'symbol', 218 'G': 'graph', 219 } 220 221 # Which tags can use "link syntax" (e.g., U{Python<www.python.org>})? 222 _LINK_COLORIZING_TAGS = ['link', 'uri'] 223 224 ################################################## 225 ## Structuring (Top Level) 226 ################################################## 227

228 -def parse(str, errors = None):

229 """ 230 Return a DOM tree encoding the contents of an epytext string. Any 231 errors generated during parsing will be stored in C{errors}. 232 233 @param str: The epytext string to parse. 234 @type str: C{string} 235 @param errors: A list where any errors generated during parsing 236 will be stored. If no list is specified, then fatal errors 237 will generate exceptions, and non-fatal errors will be 238 ignored. 239 @type errors: C{list} of L{ParseError} 240 @return: a DOM tree encoding the contents of an epytext string. 241 @rtype: C{Element} 242 @raise ParseError: If C{errors} is C{None} and an error is 243 encountered while parsing. 244 """ 245 # Initialize errors list. 246 if errors == None: 247 errors = [] 248 raise_on_error = 1 249 else: 250 raise_on_error = 0 251 252 # Preprocess the string. 253 str = re.sub('\015\012', '\012', str) 254 str = string.expandtabs(str) 255 256 # Tokenize the input string. 257 tokens = _tokenize(str, errors) 258 259 # Have we encountered a field yet? 260 encountered_field = 0 261 262 # Create an document to hold the epytext. 263 doc = Element('epytext') 264 265 # Maintain two parallel stacks: one contains DOM elements, and 266 # gives the ancestors of the current block. The other contains 267 # indentation values, and gives the indentation of the 268 # corresponding DOM elements. An indentation of "None" reflects 269 # an unknown indentation. However, the indentation must be 270 # greater than, or greater than or equal to, the indentation of 271 # the prior element (depending on what type of DOM element it 272 # corresponds to). No 2 consecutive indent_stack values will be 273 # ever be "None." Use initial dummy elements in the stack, so we 274 # don't have to worry about bounds checking. 275 stack = [None, doc] 276 indent_stack = [-1, None] 277 278 for token in tokens: 279 # Uncomment this for debugging: 280 #print ('%s: %s\n%s: %s\n' % 281 # (''.join(['%-11s' % (t and t.tag) for t in stack]), 282 # token.tag, ''.join(['%-11s' % i for i in indent_stack]), 283 # token.indent)) 284 285 # Pop any completed blocks off the stack. 286 _pop_completed_blocks(token, stack, indent_stack) 287 288 # If Token has type PARA, colorize and add the new paragraph 289 if token.tag == Token.PARA: 290 _add_para(doc, token, stack, indent_stack, errors) 291 292 # If Token has type HEADING, add the new section 293 elif token.tag == Token.HEADING: 294 _add_section(doc, token, stack, indent_stack, errors) 295 296 # If Token has type LBLOCK, add the new literal block 297 elif token.tag == Token.LBLOCK: 298 stack[-1].children.append(token.to_dom(doc)) 299 300 # If Token has type DTBLOCK, add the new doctest block 301 elif token.tag == Token.DTBLOCK: 302 stack[-1].children.append(token.to_dom(doc)) 303 304 # If Token has type BULLET, add the new list/list item/field 305 elif token.tag == Token.BULLET: 306 _add_list(doc, token, stack, indent_stack, errors) 307 else: 308 assert 0, 'Unknown token type: '+token.tag 309 310 # Check if the DOM element we just added was a field.. 311 if stack[-1].tag == 'field': 312 encountered_field = 1 313 elif encountered_field == 1: 314 if len(stack) <= 3: 315 estr = ("Fields must be the final elements in an "+ 316 "epytext string.") 317 errors.append(StructuringError(estr, token.startline)) 318 319 # If there was an error, then signal it! 320 if len([e for e in errors if e.is_fatal()]) > 0: 321 if raise_on_error: 322 raise errors[0] 323 else: 324 return None 325 326 # Return the top-level epytext DOM element. 327 return doc

328

329 -def _pop_completed_blocks(token, stack, indent_stack):

330 """ 331 Pop any completed blocks off the stack. This includes any 332 blocks that we have dedented past, as well as any list item 333 blocks that we've dedented to. The top element on the stack 334 should only be a list if we're about to start a new list 335 item (i.e., if the next token is a bullet). 336 """ 337 indent = token.indent 338 if indent != None: 339 while (len(stack) > 2): 340 pop = 0 341 342 # Dedent past a block 343 if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1 344 elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1 345 346 # Dedent to a list item, if it is follwed by another list 347 # item with the same indentation. 348 elif (token.tag == 'bullet' and indent==indent_stack[-2] and 349 stack[-1].tag in ('li', 'field')): pop=1 350 351 # End of a list (no more list items available) 352 elif (stack[-1].tag in ('ulist', 'olist') and 353 (token.tag != 'bullet' or token.contents[-1] == ':')): 354 pop=1 355 356 # Pop the block, if it's complete. Otherwise, we're done. 357 if pop == 0: return 358 stack.pop() 359 indent_stack.pop()

360

361 -def _add_para(doc, para_token, stack, indent_stack, errors):

362 """Colorize the given paragraph, and add it to the DOM tree.""" 363 # Check indentation, and update the parent's indentation 364 # when appropriate. 365 if indent_stack[-1] == None: 366 indent_stack[-1] = para_token.indent 367 if para_token.indent == indent_stack[-1]: 368 # Colorize the paragraph and add it. 369 para = _colorize(doc, para_token, errors) 370 stack[-1].children.append(para) 371 else: 372 estr = "Improper paragraph indentation." 373 errors.append(StructuringError(estr, para_token.startline))

374

375 -def _add_section(doc, heading_token, stack, indent_stack, errors):

376 """Add a new section to the DOM tree, with the given heading.""" 377 if indent_stack[-1] == None: 378 indent_stack[-1] = heading_token.indent 379 elif indent_stack[-1] != heading_token.indent: 380 estr = "Improper heading indentation." 381 errors.append(StructuringError(estr, heading_token.startline)) 382 383 # Check for errors. 384 for tok in stack[2:]: 385 if tok.tag != "section": 386 estr = "Headings must occur at the top level." 387 errors.append(StructuringError(estr, heading_token.startline)) 388 break 389 if (heading_token.level+2) > len(stack): 390 estr = "Wrong underline character for heading." 391 errors.append(StructuringError(estr, heading_token.startline)) 392 393 # Pop the appropriate number of headings so we're at the 394 # correct level. 395 stack[heading_token.level+2:] = [] 396 indent_stack[heading_token.level+2:] = [] 397 398 # Colorize the heading 399 head = _colorize(doc, heading_token, errors, 'heading') 400 401 # Add the section's and heading's DOM elements. 402 sec = Element("section") 403 stack[-1].children.append(sec) 404 stack.append(sec) 405 sec.children.append(head) 406 indent_stack.append(None)

407

408 -def _add_list(doc, bullet_token, stack, indent_stack, errors):

409 """ 410 Add a new list item or field to the DOM tree, with the given 411 bullet or field tag. When necessary, create the associated 412 list. 413 """ 414 # Determine what type of bullet it is. 415 if bullet_token.contents[-1] == '-': 416 list_type = 'ulist' 417 elif bullet_token.contents[-1] == '.': 418 list_type = 'olist' 419 elif bullet_token.contents[-1] == ':': 420 list_type = 'fieldlist' 421 else: 422 raise AssertionError('Bad Bullet: %r' % bullet_token.contents) 423 424 # Is this a new list? 425 newlist = 0 426 if stack[-1].tag != list_type: 427 newlist = 1 428 elif list_type == 'olist' and stack[-1].tag == 'olist': 429 old_listitem = stack[-1].children[-1] 430 old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1] 431 new_bullet = bullet_token.contents.split('.')[:-1] 432 if (new_bullet[:-1] != old_bullet[:-1] or 433 int(new_bullet[-1]) != int(old_bullet[-1])+1): 434 newlist = 1 435 436 # Create the new list. 437 if newlist: 438 if stack[-1].tag is 'fieldlist': 439 # The new list item is not a field list item (since this 440 # is a new list); but it's indented the same as the field 441 # list. This either means that they forgot to indent the 442 # list, or they are trying to put something after the 443 # field list. The first one seems more likely, so we'll 444 # just warn about that (to avoid confusion). 445 estr = "Lists must be indented." 446 errors.append(StructuringError(estr, bullet_token.startline)) 447 if stack[-1].tag in ('ulist', 'olist', 'fieldlist'): 448 stack.pop() 449 indent_stack.pop() 450 451 if (list_type != 'fieldlist' and indent_stack[-1] is not None and 452 bullet_token.indent == indent_stack[-1]): 453 # Ignore this error if there's text on the same line as 454 # the comment-opening quote -- epydoc can't reliably 455 # determine the indentation for that line. 456 if bullet_token.startline != 1 or bullet_token.indent != 0: 457 estr = "Lists must be indented." 458 errors.append(StructuringError(estr, bullet_token.startline)) 459 460 if list_type == 'fieldlist': 461 # Fieldlist should be at the top-level. 462 for tok in stack[2:]: 463 if tok.tag != "section": 464 estr = "Fields must be at the top level." 465 errors.append( 466 StructuringError(estr, bullet_token.startline)) 467 break 468 stack[2:] = [] 469 indent_stack[2:] = [] 470 471 # Add the new list. 472 lst = Element(list_type) 473 stack[-1].children.append(lst) 474 stack.append(lst) 475 indent_stack.append(bullet_token.indent) 476 if list_type == 'olist': 477 start = bullet_token.contents.split('.')[:-1] 478 if start != '1': 479 lst.attribs["start"] = start[-1] 480 481 # Fields are treated somewhat specially: A "fieldlist" 482 # node is created to make the parsing simpler, but fields 483 # are adjoined directly into the "epytext" node, not into 484 # the "fieldlist" node. 485 if list_type == 'fieldlist': 486 li = Element("field") 487 token_words = bullet_token.contents[1:-1].split(None, 1) 488 tag_elt = Element("tag") 489 tag_elt.children.append(token_words[0]) 490 li.children.append(tag_elt) 491 492 if len(token_words) > 1: 493 arg_elt = Element("arg") 494 arg_elt.children.append(token_words[1]) 495 li.children.append(arg_elt) 496 else: 497 li = Element("li") 498 if list_type == 'olist': 499 li.attribs["bullet"] = bullet_token.contents 500 501 # Add the bullet. 502 stack[-1].children.append(li) 503 stack.append(li) 504 indent_stack.append(None)

505 506 ################################################## 507 ## Tokenization 508 ################################################## 509

510 -class Token:

511 """ 512 C{Token}s are an intermediate data structure used while 513 constructing the structuring DOM tree for a formatted docstring. 514 There are five types of C{Token}: 515 516 - Paragraphs 517 - Literal blocks 518 - Doctest blocks 519 - Headings 520 - Bullets 521 522 The text contained in each C{Token} is stored in the 523 C{contents} variable. The string in this variable has been 524 normalized. For paragraphs, this means that it has been converted 525 into a single line of text, with newline/indentation replaced by 526 single spaces. For literal blocks and doctest blocks, this means 527 that the appropriate amount of leading whitespace has been removed 528 from each line. 529 530 Each C{Token} has an indentation level associated with it, 531 stored in the C{indent} variable. This indentation level is used 532 by the structuring procedure to assemble hierarchical blocks. 533 534 @type tag: C{string} 535 @ivar tag: This C{Token}'s type. Possible values are C{Token.PARA} 536 (paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK} 537 (doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}. 538 539 @type startline: C{int} 540 @ivar startline: The line on which this C{Token} begins. This 541 line number is only used for issuing errors. 542 543 @type contents: C{string} 544 @ivar contents: The normalized text contained in this C{Token}. 545 546 @type indent: C{int} or C{None} 547 @ivar indent: The indentation level of this C{Token} (in 548 number of leading spaces). A value of C{None} indicates an 549 unknown indentation; this is used for list items and fields 550 that begin with one-line paragraphs. 551 552 @type level: C{int} or C{None} 553 @ivar level: The heading-level of this C{Token} if it is a 554 heading; C{None}, otherwise. Valid heading levels are 0, 1, 555 and 2. 556 557 @type PARA: C{string} 558 @cvar PARA: The C{tag} value for paragraph C{Token}s. 559 @type LBLOCK: C{string} 560 @cvar LBLOCK: The C{tag} value for literal C{Token}s. 561 @type DTBLOCK: C{string} 562 @cvar DTBLOCK: The C{tag} value for doctest C{Token}s. 563 @type HEADING: C{string} 564 @cvar HEADING: The C{tag} value for heading C{Token}s. 565 @type BULLET: C{string} 566 @cvar BULLET: The C{tag} value for bullet C{Token}s. This C{tag} 567 value is also used for field tag C{Token}s, since fields 568 function syntactically the same as list items. 569 """ 570 # The possible token types. 571 PARA = "para" 572 LBLOCK = "literalblock" 573 DTBLOCK = "doctestblock" 574 HEADING = "heading" 575 BULLET = "bullet" 576

577 - def __init__(self, tag, startline, contents, indent, level=None):

578 """ 579 Create a new C{Token}. 580 581 @param tag: The type of the new C{Token}. 582 @type tag: C{string} 583 @param startline: The line on which the new C{Token} begins. 584 @type startline: C{int} 585 @param contents: The normalized contents of the new C{Token}. 586 @type contents: C{string} 587 @param indent: The indentation of the new C{Token} (in number 588 of leading spaces). A value of C{None} indicates an 589 unknown indentation. 590 @type indent: C{int} or C{None} 591 @param level: The heading-level of this C{Token} if it is a 592 heading; C{None}, otherwise. 593 @type level: C{int} or C{None} 594 """ 595 self.tag = tag 596 self.startline = startline 597 self.contents = contents 598 self.indent = indent 599 self.level = level

600

601 - def __repr__(self):

602 """ 603 @rtype: C{string} 604 @return: the formal representation of this C{Token}. 605 C{Token}s have formal representaitons of the form:: 606 <Token: para at line 12> 607 """ 608 return '<Token: %s at line %s>' % (self.tag, self.startline)

609

610 - def to_dom(self, doc):

611 """ 612 @return: a DOM representation of this C{Token}. 613 @rtype: L{Element} 614 """ 615 e = Element(self.tag) 616 e.children.append(self.contents) 617 return e

618 619 # Construct regular expressions for recognizing bullets. These are 620 # global so they don't have to be reconstructed each time we tokenize 621 # a docstring. 622 _ULIST_BULLET = '[-]( +|$)' 623 _OLIST_BULLET = '(\d+[.])+( +|$)' 624 _FIELD_BULLET = '@\w+( [^{}:\n]+)?:' 625 _BULLET_RE = re.compile(_ULIST_BULLET + '|' + 626 _OLIST_BULLET + '|' + 627 _FIELD_BULLET) 628 _LIST_BULLET_RE = re.compile(_ULIST_BULLET + '|' + _OLIST_BULLET) 629 _FIELD_BULLET_RE = re.compile(_FIELD_BULLET) 630 del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET 631

632 -def _tokenize_doctest(lines, start, block_indent, tokens, errors):

633 """ 634 Construct a L{Token} containing the doctest block starting at 635 C{lines[start]}, and append it to C{tokens}. C{block_indent} 636 should be the indentation of the doctest block. Any errors 637 generated while tokenizing the doctest block will be appended to 638 C{errors}. 639 640 @param lines: The list of lines to be tokenized 641 @param start: The index into C{lines} of the first line of the 642 doctest block to be tokenized. 643 @param block_indent: The indentation of C{lines[start]}. This is 644 the indentation of the doctest block. 645 @param errors: A list where any errors generated during parsing 646 will be stored. If no list is specified, then errors will 647 generate exceptions. 648 @return: The line number of the first line following the doctest 649 block. 650 651 @type lines: C{list} of C{string} 652 @type start: C{int} 653 @type block_indent: C{int} 654 @type tokens: C{list} of L{Token} 655 @type errors: C{list} of L{ParseError} 656 @rtype: C{int} 657 """ 658 # If they dedent past block_indent, keep track of the minimum 659 # indentation. This is used when removing leading indentation 660 # from the lines of the doctest block. 661 min_indent = block_indent 662 663 linenum = start + 1 664 while linenum < len(lines): 665 # Find the indentation of this line. 666 line = lines[linenum] 667 indent = len(line) - len(line.lstrip()) 668 669 # A blank line ends doctest block. 670 if indent == len(line): break 671 672 # A Dedent past block_indent is an error. 673 if indent < block_indent: 674 min_indent = min(min_indent, indent) 675 estr = 'Improper doctest block indentation.' 676 errors.append(TokenizationError(estr, linenum)) 677 678 # Go on to the next line. 679 linenum += 1 680 681 # Add the token, and return the linenum after the token ends. 682 contents = [line[min_indent:] for line in lines[start:linenum]] 683 contents = '\n'.join(contents) 684 tokens.append(Token(Token.DTBLOCK, start, contents, block_indent)) 685 return linenum

686

687 -def _tokenize_literal(lines, start, block_indent, tokens, errors):

688 """ 689 Construct a L{Token} containing the literal block starting at 690 C{lines[start]}, and append it to C{tokens}. C{block_indent} 691 should be the indentation of the literal block. Any errors 692 generated while tokenizing the literal block will be appended to 693 C{errors}. 694 695 @param lines: The list of lines to be tokenized 696 @param start: The index into C{lines} of the first line of the 697 literal block to be tokenized. 698 @param block_indent: The indentation of C{lines[start]}. This is 699 the indentation of the literal block. 700 @param errors: A list of the errors generated by parsing. Any 701 new errors generated while will tokenizing this paragraph 702 will be appended to this list. 703 @return: The line number of the first line following the literal 704 block. 705 706 @type lines: C{list} of C{string} 707 @type start: C{int} 708 @type block_indent: C{int} 709 @type tokens: C{list} of L{Token} 710 @type errors: C{list} of L{ParseError} 711 @rtype: C{int} 712 """ 713 linenum = start + 1 714 while linenum < len(lines): 715 # Find the indentation of this line. 716 line = lines[linenum] 717 indent = len(line) - len(line.lstrip()) 718 719 # A Dedent to block_indent ends the literal block. 720 # (Ignore blank likes, though) 721 if len(line) != indent and indent <= block_indent: 722 break 723 724 # Go on to the next line. 725 linenum += 1 726 727 # Add the token, and return the linenum after the token ends. 728 contents = [line[block_indent+1:] for line in lines[start:linenum]] 729 contents = '\n'.join(contents) 730 contents = re.sub('(\A[ \n]*\n)|(\n[ \n]*\Z)', '', contents) 731 tokens.append(Token(Token.LBLOCK, start, contents, block_indent)) 732 return linenum

733

734 -def _tokenize_listart(lines, start, bullet_indent, tokens, errors):

735 """ 736 Construct L{Token}s for the bullet and the first paragraph of the 737 list item (or field) starting at C{lines[start]}, and append them 738 to C{tokens}. C{bullet_indent} should be the indentation of the 739 list item. Any errors generated while tokenizing will be 740 appended to C{errors}. 741 742 @param lines: The list of lines to be tokenized 743 @param start: The index into C{lines} of the first line of the 744 list item to be tokenized. 745 @param bullet_indent: The indentation of C{lines[start]}. This is 746 the indentation of the list item. 747 @param errors: A list of the errors generated by parsing. Any 748 new errors generated while will tokenizing this paragraph 749 will be appended to this list. 750 @return: The line number of the first line following the list 751 item's first paragraph. 752 753 @type lines: C{list} of C{string} 754 @type start: C{int} 755 @type bullet_indent: C{int} 756 @type tokens: C{list} of L{Token} 757 @type errors: C{list} of L{ParseError} 758 @rtype: C{int} 759 """ 760 linenum = start + 1 761 para_indent = None 762 doublecolon = lines[start].rstrip()[-2:] == '::' 763 764 # Get the contents of the bullet. 765 para_start = _BULLET_RE.match(lines[start], bullet_indent).end() 766 bcontents = lines[start][bullet_indent:para_start].strip() 767 768 while linenum < len(lines): 769 # Find the indentation of this line. 770 line = lines[linenum] 771 indent = len(line) - len(line.lstrip()) 772 773 # "::" markers end paragraphs. 774 if doublecolon: break 775 if line.rstrip()[-2:] == '::': doublecolon = 1 776 777 # A blank line ends the token 778 if indent == len(line): break 779 780 # Dedenting past bullet_indent ends the list item. 781 if indent < bullet_indent: break 782 783 # A line beginning with a bullet ends the token. 784 if _BULLET_RE.match(line, indent): break 785 786 # If this is the second line, set the paragraph indentation, or 787 # end the token, as appropriate. 788 if para_indent == None: para_indent = indent 789 790 # A change in indentation ends the token 791 if indent != para_indent: break 792 793 # Go on to the next line. 794 linenum += 1 795 796 # Add the bullet token. 797 tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent)) 798 799 # Add the paragraph token. 800 pcontents = ([lines[start][para_start:].strip()] + 801 [line.strip() for line in lines[start+1:linenum]]) 802 pcontents = ' '.join(pcontents).strip() 803 if pcontents: 804 tokens.append(Token(Token.PARA, start, pcontents, para_indent)) 805 806 # Return the linenum after the paragraph token ends. 807 return linenum

808

809 -def _tokenize_para(lines, start, para_indent, tokens, errors):

810 """ 811 Construct a L{Token} containing the paragraph starting at 812 C{lines[start]}, and append it to C{tokens}. C{para_indent} 813 should be the indentation of the paragraph . Any errors 814 generated while tokenizing the paragraph will be appended to 815 C{errors}. 816 817 @param lines: The list of lines to be tokenized 818 @param start: The index into C{lines} of the first line of the 819 paragraph to be tokenized. 820 @param para_indent: The indentation of C{lines[start]}. This is 821 the indentation of the paragraph. 822 @param errors: A list of the errors generated by parsing. Any 823 new errors generated while will tokenizing this paragraph 824 will be appended to this list. 825 @return: The line number of the first line following the 826 paragraph. 827 828 @type lines: C{list} of C{string} 829 @type start: C{int} 830 @type para_indent: C{int} 831 @type tokens: C{list} of L{Token} 832 @type errors: C{list} of L{ParseError} 833 @rtype: C{int} 834 """ 835 linenum = start + 1 836 doublecolon = 0 837 while linenum < len(lines): 838 # Find the indentation of this line. 839 line = lines[linenum] 840 indent = len(line) - len(line.lstrip()) 841 842 # "::" markers end paragraphs. 843 if doublecolon: break 844 if line.rstrip()[-2:] == '::': doublecolon = 1 845 846 # Blank lines end paragraphs 847 if indent == len(line): break 848 849 # Indentation changes end paragraphs 850 if indent != para_indent: break 851 852 # List bullets end paragraphs 853 if _BULLET_RE.match(line, indent): break 854 855 # Check for mal-formatted field items. 856 if line[indent] == '@': 857 estr = "Possible mal-formatted field item." 858 errors.append(TokenizationError(estr, linenum, is_fatal=0)) 859 860 # Go on to the next line. 861 linenum += 1 862 863 contents = [line.strip() for line in lines[start:linenum]] 864 865 # Does this token look like a heading? 866 if ((len(contents) < 2) or 867 (contents[1][0] not in _HEADING_CHARS) or 868 (abs(len(contents[0])-len(contents[1])) > 5)): 869 looks_like_heading = 0 870 else: 871 looks_like_heading = 1 872 for char in contents[1]: 873 if char != contents[1][0]: 874 looks_like_heading = 0 875 break 876 877 if looks_like_heading: 878 if len(contents[0]) != len(contents[1]): 879 estr = ("Possible heading typo: the number of "+ 880 "underline characters must match the "+ 881 "number of heading characters.") 882 errors.append(TokenizationError(estr, start, is_fatal=0)) 883 else: 884 level = _HEADING_CHARS.index(contents[1][0]) 885 tokens.append(Token(Token.HEADING, start, 886 contents[0], para_indent, level)) 887 return start+2 888 889 # Add the paragraph token, and return the linenum after it ends. 890 contents = ' '.join(contents) 891 tokens.append(Token(Token.PARA, start, contents, para_indent)) 892 return linenum

893

894 -def _tokenize(str, errors):

895 """ 896 Split a given formatted docstring into an ordered list of 897 C{Token}s, according to the epytext markup rules. 898 899 @param str: The epytext string 900 @type str: C{string} 901 @param errors: A list where any errors generated during parsing 902 will be stored. If no list is specified, then errors will 903 generate exceptions. 904 @type errors: C{list} of L{ParseError} 905 @return: a list of the C{Token}s that make up the given string. 906 @rtype: C{list} of L{Token} 907 """ 908 tokens = [] 909 lines = str.split('\n') 910 911 # Scan through the lines, determining what @type of token we're 912 # dealing with, and tokenizing it, as appropriate. 913 linenum = 0 914 while linenum < len(lines): 915 # Get the current line and its indentation. 916 line = lines[linenum] 917 indent = len(line)-len(line.lstrip()) 918 919 if indent == len(line): 920 # Ignore blank lines. 921 linenum += 1 922 continue 923 elif line[indent:indent+4] == '>>> ': 924 # blocks starting with ">>> " are doctest block tokens. 925 linenum = _tokenize_doctest(lines, linenum, indent, 926 tokens, errors) 927 elif _BULLET_RE.match(line, indent): 928 # blocks starting with a bullet are LI start tokens. 929 linenum = _tokenize_listart(lines, linenum, indent, 930 tokens, errors) 931 if tokens[-1].indent != None: 932 indent = tokens[-1].indent 933 else: 934 # Check for mal-formatted field items. 935 if line[indent] == '@': 936 estr = "Possible mal-formatted field item." 937 errors.append(TokenizationError(estr, linenum, is_fatal=0)) 938 939 # anything else is either a paragraph or a heading. 940 linenum = _tokenize_para(lines, linenum, indent, tokens, errors) 941 942 # Paragraph tokens ending in '::' initiate literal blocks. 943 if (tokens[-1].tag == Token.PARA and 944 tokens[-1].contents[-2:] == '::'): 945 tokens[-1].contents = tokens[-1].contents[:-1] 946 linenum = _tokenize_literal(lines, linenum, indent, tokens, errors) 947 948 return tokens

949 950 951 ################################################## 952 ## Inline markup ("colorizing") 953 ################################################## 954 955 # Assorted regular expressions used for colorizing. 956 _BRACE_RE = re.compile('{|}') 957 _TARGET_RE = re.compile('^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$') 958

959 -def _colorize(doc, token, errors, tagName='para'):

960 """ 961 Given a string containing the contents of a paragraph, produce a 962 DOM C{Element} encoding that paragraph. Colorized regions are 963 represented using DOM C{Element}s, and text is represented using 964 DOM C{Text}s. 965 966 @param errors: A list of errors. Any newly generated errors will 967 be appended to this list. 968 @type errors: C{list} of C{string} 969 970 @param tagName: The element tag for the DOM C{Element} that should 971 be generated. 972 @type tagName: C{string} 973 974 @return: a DOM C{Element} encoding the given paragraph. 975 @returntype: C{Element} 976 """ 977 str = token.contents 978 linenum = 0 979 980 # Maintain a stack of DOM elements, containing the ancestors of 981 # the text currently being analyzed. New elements are pushed when 982 # "{" is encountered, and old elements are popped when "}" is 983 # encountered. 984 stack = [Element(tagName)] 985 986 # This is just used to make error-reporting friendlier. It's a 987 # stack parallel to "stack" containing the index of each element's 988 # open brace. 989 openbrace_stack = [0] 990 991 # Process the string, scanning for '{' and '}'s. start is the 992 # index of the first unprocessed character. Each time through the 993 # loop, we process the text from the first unprocessed character 994 # to the next open or close brace. 995 start = 0 996 while 1: 997 match = _BRACE_RE.search(str, start) 998 if match == None: break 999 end = match.start() 1000 1001 # Open braces start new colorizing elements. When preceeded 1002 # by a capital letter, they specify a colored region, as 1003 # defined by the _COLORIZING_TAGS dictionary. Otherwise, 1004 # use a special "literal braces" element (with tag "litbrace"), 1005 # and convert them to literal braces once we find the matching 1006 # close-brace. 1007 if match.group() == '{': 1008 if (end>0) and 'A' <= str[end-1] <= 'Z': 1009 if (end-1) > start: 1010 stack[-1].children.append(str[start:end-1]) 1011 if not _COLORIZING_TAGS.has_key(str[end-1]): 1012 estr = "Unknown inline markup tag." 1013 errors.append(ColorizingError(estr, token, end-1)) 1014 stack.append(Element('unknown')) 1015 else: 1016 tag = _COLORIZING_TAGS[str[end-1]] 1017 stack.append(Element(tag)) 1018 else: 1019 if end > start: 1020 stack[-1].children.append(str[start:end]) 1021 stack.append(Element('litbrace')) 1022 openbrace_stack.append(end) 1023 stack[-2].children.append(stack[-1]) 1024 1025 # Close braces end colorizing elements. 1026 elif match.group() == '}': 1027 # Check for (and ignore) unbalanced braces. 1028 if len(stack) <= 1: 1029 estr = "Unbalanced '}'." 1030 errors.append(ColorizingError(estr, token, end)) 1031 start = end + 1 1032 continue 1033 1034 # Add any remaining text. 1035 if end > start: 1036 stack[-1].children.append(str[start:end]) 1037 1038 # Special handling for symbols: 1039 if stack[-1].tag == 'symbol': 1040 if (len(stack[-1].children) != 1 or 1041 not isinstance(stack[-1].children[0], basestring)): 1042 estr = "Invalid symbol code." 1043 errors.append(ColorizingError(estr, token, end)) 1044 else: 1045 symb = stack[-1].children[0] 1046 if _SYMBOLS.has_key(symb): 1047 # It's a symbol 1048 stack[-2].children[-1] = Element('symbol', symb) 1049 else: 1050 estr = "Invalid symbol code." 1051 errors.append(ColorizingError(estr, token, end)) 1052 1053 # Special handling for escape elements: 1054 if stack[-1].tag == 'escape': 1055 if (len(stack[-1].children) != 1 or 1056 not isinstance(stack[-1].children[0], basestring)): 1057 estr = "Invalid escape code." 1058 errors.append(ColorizingError(estr, token, end)) 1059 else: 1060 escp = stack[-1].children[0] 1061 if _ESCAPES.has_key(escp): 1062 # It's an escape from _ESCPAES 1063 stack[-2].children[-1] = _ESCAPES[escp] 1064 elif len(escp) == 1: 1065 # It's a single-character escape (eg E{.}) 1066 stack[-2].children[-1] = escp 1067 else: 1068 estr = "Invalid escape code." 1069 errors.append(ColorizingError(estr, token, end)) 1070 1071 # Special handling for literal braces elements: 1072 if stack[-1].tag == 'litbrace': 1073 stack[-2].children[-1:] = ['{'] + stack[-1].children + ['}'] 1074 1075 # Special handling for graphs: 1076 if stack[-1].tag == 'graph': 1077 _colorize_graph(doc, stack[-1], token, end, errors) 1078 1079 # Special handling for link-type elements: 1080 if stack[-1].tag in _LINK_COLORIZING_TAGS: 1081 _colorize_link(doc, stack[-1], token, end, errors) 1082 1083 # Pop the completed element. 1084 openbrace_stack.pop() 1085 stack.pop() 1086 1087 start = end+1 1088 1089 # Add any final text. 1090 if start < len(str): 1091 stack[-1].children.append(str[start:]) 1092 1093 if len(stack) != 1: 1094 estr = "Unbalanced '{'." 1095 errors.append(ColorizingError(estr, token, openbrace_stack[-1])) 1096 1097 return stack[0]

1098 1099 GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph'] 1100

1101 -def _colorize_graph(doc, graph, token, end, errors):

1102 """ 1103 Eg:: 1104 G{classtree} 1105 G{classtree x, y, z} 1106 G{importgraph} 1107 """ 1108 bad_graph_spec = False 1109 1110 children = graph.children[:] 1111 graph.children = [] 1112 1113 if len(children) != 1 or not isinstance(children[0], basestring): 1114 bad_graph_spec = "Bad graph specification" 1115 else: 1116 pieces = children[0].split(None, 1) 1117 graphtype = pieces[0].replace(':','').strip().lower() 1118 if graphtype in GRAPH_TYPES: 1119 if len(pieces) == 2: 1120 if re.match(r'\s*:?\s*([\w\.]+\s*,?\s*)*', pieces[1]): 1121 args = pieces[1].replace(',', ' ').replace(':','').split() 1122 else: 1123 bad_graph_spec = "Bad graph arg list" 1124 else: 1125 args = [] 1126 else: 1127 bad_graph_spec = ("Bad graph type %s -- use one of %s" % 1128 (pieces[0], ', '.join(GRAPH_TYPES))) 1129 1130 if bad_graph_spec: 1131 errors.append(ColorizingError(bad_graph_spec, token, end)) 1132 graph.children.append('none') 1133 graph.children.append('') 1134 return 1135 1136 graph.children.append(graphtype) 1137 for arg in args: 1138 graph.children.append(arg)

1139

1140 -def _colorize_link(doc, link, token, end, errors):

1141 variables = link.children[:] 1142 1143 # If the last child isn't text, we know it's bad. 1144 if len(variables)==0 or not isinstance(variables[-1], basestring): 1145 estr = "Bad %s target." % link.tag 1146 errors.append(ColorizingError(estr, token, end)) 1147 return 1148 1149 # Did they provide an explicit target? 1150 match2 = _TARGET_RE.match(variables[-1]) 1151 if match2: 1152 (text, target) = match2.groups() 1153 variables[-1] = text 1154 # Can we extract an implicit target? 1155 elif len(variables) == 1: 1156 target = variables[0] 1157 else: 1158 estr = "Bad %s target." % link.tag 1159 errors.append(ColorizingError(estr, token, end)) 1160 return 1161 1162 # Construct the name element. 1163 name_elt = Element('name', *variables) 1164 1165 # Clean up the target. For URIs, assume http or mailto if they 1166 # don't specify (no relative urls) 1167 target = re.sub(r'\s', '', target) 1168 if link.tag=='uri': 1169 if not re.match(r'\w+:', target): 1170 if re.match(r'\w+@(\w+)(\.\w+)*', target): 1171 target = 'mailto:' + target 1172 else: 1173 target = 'http://'+target 1174 elif link.tag=='link': 1175 # Remove arg lists for functions (e.g., L{_colorize_link()}) 1176 target = re.sub(r'$.*$$', '', target) 1177 if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target): 1178 estr = "Bad link target." 1179 errors.append(ColorizingError(estr, token, end)) 1180 return 1181 1182 # Construct the target element. 1183 target_elt = Element('target', target) 1184 1185 # Add them to the link element. 1186 link.children = [name_elt, target_elt]

1187 1188 ################################################## 1189 ## Formatters 1190 ################################################## 1191

1192 -def to_epytext(tree, indent=0, seclevel=0):

1193 """ 1194 Convert a DOM document encoding epytext back to an epytext string. 1195 This is the inverse operation from L{parse}. I.e., assuming there 1196 are no errors, the following is true: 1197 - C{parse(to_epytext(tree)) == tree} 1198 1199 The inverse is true, except that whitespace, line wrapping, and 1200 character escaping may be done differently. 1201 - C{to_epytext(parse(str)) == str} (approximately) 1202 1203 @param tree: A DOM document encoding of an epytext string. 1204 @type tree: C{Element} 1205 @param indent: The indentation for the string representation of 1206 C{tree}. Each line of the returned string will begin with 1207 C{indent} space characters. 1208 @type indent: C{int} 1209 @param seclevel: The section level that C{tree} appears at. This 1210 is used to generate section headings. 1211 @type seclevel: C{int} 1212 @return: The epytext string corresponding to C{tree}. 1213 @rtype: C{string} 1214 """ 1215 if isinstance(tree, basestring): 1216 str = re.sub(r'\{', '\0', tree) 1217 str = re.sub(r'\}', '\1', str) 1218 return str 1219 1220 if tree.tag == 'epytext': indent -= 2 1221 if tree.tag == 'section': seclevel += 1 1222 variables = [to_epytext(c, indent+2, seclevel) for c in tree.children] 1223 childstr = ''.join(variables) 1224 1225 # Clean up for literal blocks (add the double "::" back) 1226 childstr = re.sub(':(\s*)\2', '::\\1', childstr) 1227 1228 if tree.tag == 'para': 1229 str = wordwrap(childstr, indent)+'\n' 1230 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) 1231 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) 1232 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) 1233 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) 1234 str = re.sub('\0', 'E{lb}', str) 1235 str = re.sub('\1', 'E{rb}', str) 1236 return str 1237 elif tree.tag == 'li': 1238 bullet = tree.attribs.get('bullet') or '-' 1239 return indent*' '+ bullet + ' ' + childstr.lstrip() 1240 elif tree.tag == 'heading': 1241 str = re.sub('\0', 'E{lb}',childstr) 1242 str = re.sub('\1', 'E{rb}', str) 1243 uline = len(childstr)*_HEADING_CHARS[seclevel-1] 1244 return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n' 1245 elif tree.tag == 'doctestblock': 1246 str = re.sub('\0', '{', childstr) 1247 str = re.sub('\1', '}', str) 1248 lines = [' '+indent*' '+line for line in str.split('\n')] 1249 return '\n'.join(lines) + '\n\n' 1250 elif tree.tag == 'literalblock': 1251 str = re.sub('\0', '{', childstr) 1252 str = re.sub('\1', '}', str) 1253 lines = [(indent+1)*' '+line for line in str.split('\n')] 1254 return '\2' + '\n'.join(lines) + '\n\n' 1255 elif tree.tag == 'field': 1256 numargs = 0 1257 while tree.children[numargs+1].tag == 'arg': numargs += 1 1258 tag = variables[0] 1259 args = variables[1:1+numargs] 1260 body = variables[1+numargs:] 1261 str = (indent)*' '+'@'+variables[0] 1262 if args: str += '(' + ', '.join(args) + ')' 1263 return str + ':\n' + ''.join(body) 1264 elif tree.tag == 'target': 1265 return '<%s>' % childstr 1266 elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', 1267 'section', 'olist', 'ulist', 'name'): 1268 return childstr 1269 elif tree.tag == 'symbol': 1270 return 'E{%s}' % childstr 1271 elif tree.tag == 'graph': 1272 return 'G{%s}' % ' '.join(variables) 1273 else: 1274 for (tag, name) in _COLORIZING_TAGS.items(): 1275 if name == tree.tag: 1276 return '%s{%s}' % (tag, childstr) 1277 raise ValueError('Unknown DOM element %r' % tree.tag)

1278 1279 SYMBOL_TO_PLAINTEXT = { 1280 'crarr': '\\', 1281 } 1282

1283 -def to_plaintext(tree, indent=0, seclevel=0):

1284 """ 1285 Convert a DOM document encoding epytext to a string representation. 1286 This representation is similar to the string generated by 1287 C{to_epytext}, but C{to_plaintext} removes inline markup, prints 1288 escaped characters in unescaped form, etc. 1289 1290 @param tree: A DOM document encoding of an epytext string. 1291 @type tree: C{Element} 1292 @param indent: The indentation for the string representation of 1293 C{tree}. Each line of the returned string will begin with 1294 C{indent} space characters. 1295 @type indent: C{int} 1296 @param seclevel: The section level that C{tree} appears at. This 1297 is used to generate section headings. 1298 @type seclevel: C{int} 1299 @return: The epytext string corresponding to C{tree}. 1300 @rtype: C{string} 1301 """ 1302 if isinstance(tree, basestring): return tree 1303 1304 if tree.tag == 'section': seclevel += 1 1305 1306 # Figure out the child indent level. 1307 if tree.tag == 'epytext': cindent = indent 1308 elif tree.tag == 'li' and tree.attribs.get('bullet'): 1309 cindent = indent + 1 + len(tree.attribs.get('bullet')) 1310 else: 1311 cindent = indent + 2 1312 variables = [to_plaintext(c, cindent, seclevel) for c in tree.children] 1313 childstr = ''.join(variables) 1314 1315 if tree.tag == 'para': 1316 return wordwrap(childstr, indent)+'\n' 1317 elif tree.tag == 'li': 1318 # We should be able to use getAttribute here; but there's no 1319 # convenient way to test if an element has an attribute.. 1320 bullet = tree.attribs.get('bullet') or '-' 1321 return indent*' ' + bullet + ' ' + childstr.lstrip() 1322 elif tree.tag == 'heading': 1323 uline = len(childstr)*_HEADING_CHARS[seclevel-1] 1324 return ((indent-2)*' ' + childstr + '\n' + 1325 (indent-2)*' ' + uline + '\n') 1326 elif tree.tag == 'doctestblock': 1327 lines = [(indent+2)*' '+line for line in childstr.split('\n')] 1328 return '\n'.join(lines) + '\n\n' 1329 elif tree.tag == 'literalblock': 1330 lines = [(indent+1)*' '+line for line in childstr.split('\n')] 1331 return '\n'.join(lines) + '\n\n' 1332 elif tree.tag == 'fieldlist': 1333 return childstr 1334 elif tree.tag == 'field': 1335 numargs = 0 1336 while tree.children[numargs+1].tag == 'arg': numargs += 1 1337 tag = variables[0] 1338 args = variables[1:1+numargs] 1339 body = variables[1+numargs:] 1340 str = (indent)*' '+'@'+variables[0] 1341 if args: str += '(' + ', '.join(args) + ')' 1342 return str + ':\n' + ''.join(body) 1343 elif tree.tag == 'uri': 1344 if len(variables) != 2: raise ValueError('Bad URI ') 1345 elif variables[0] == variables[1]: return '<%s>' % variables[1] 1346 else: return '%r<%s>' % (variables[0], variables[1]) 1347 elif tree.tag == 'link': 1348 if len(variables) != 2: raise ValueError('Bad Link') 1349 return '%s' % variables[0] 1350 elif tree.tag in ('olist', 'ulist'): 1351 # [xx] always use condensed lists. 1352 ## Use a condensed list if each list item is 1 line long. 1353 #for child in variables: 1354 # if child.count('\n') > 2: return childstr 1355 return childstr.replace('\n\n', '\n')+'\n' 1356 elif tree.tag == 'symbol': 1357 return '%s' % SYMBOL_TO_PLAINTEXT.get(childstr, childstr) 1358 elif tree.tag == 'graph': 1359 return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:])) 1360 else: 1361 # Assume that anything else can be passed through. 1362 return childstr

1363

1364 -def to_debug(tree, indent=4, seclevel=0):

1365 """ 1366 Convert a DOM document encoding epytext back to an epytext string, 1367 annotated with extra debugging information. This function is 1368 similar to L{to_epytext}, but it adds explicit information about 1369 where different blocks begin, along the left margin. 1370 1371 @param tree: A DOM document encoding of an epytext string. 1372 @type tree: C{Element} 1373 @param indent: The indentation for the string representation of 1374 C{tree}. Each line of the returned string will begin with 1375 C{indent} space characters. 1376 @type indent: C{int} 1377 @param seclevel: The section level that C{tree} appears at. This 1378 is used to generate section headings. 1379 @type seclevel: C{int} 1380 @return: The epytext string corresponding to C{tree}. 1381 @rtype: C{string} 1382 """ 1383 if isinstance(tree, basestring): 1384 str = re.sub(r'\{', '\0', tree) 1385 str = re.sub(r'\}', '\1', str) 1386 return str 1387 1388 if tree.tag == 'section': seclevel += 1 1389 variables = [to_debug(c, indent+2, seclevel) for c in tree.children] 1390 childstr = ''.join(variables) 1391 1392 # Clean up for literal blocks (add the double "::" back) 1393 childstr = re.sub(':( *\n \|\n)\2', '::\\1', childstr) 1394 1395 if tree.tag == 'para': 1396 str = wordwrap(childstr, indent-6, 69)+'\n' 1397 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) 1398 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) 1399 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) 1400 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) 1401 str = re.sub('\0', 'E{lb}', str) 1402 str = re.sub('\1', 'E{rb}', str) 1403 lines = str.rstrip().split('\n') 1404 lines[0] = ' P>|' + lines[0] 1405 lines[1:] = [' |'+l for l in lines[1:]] 1406 return '\n'.join(lines)+'\n |\n' 1407 elif tree.tag == 'li': 1408 bullet = tree.attribs.get('bullet') or '-' 1409 return ' LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip() 1410 elif tree.tag in ('olist', 'ulist'): 1411 return 'LIST>|'+(indent-4)*' '+childstr[indent+2:] 1412 elif tree.tag == 'heading': 1413 str = re.sub('\0', 'E{lb}', childstr) 1414 str = re.sub('\1', 'E{rb}', str) 1415 uline = len(childstr)*_HEADING_CHARS[seclevel-1] 1416 return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' + 1417 ' |'+(indent-8)*' ' + uline + '\n') 1418 elif tree.tag == 'doctestblock': 1419 str = re.sub('\0', '{', childstr) 1420 str = re.sub('\1', '}', str) 1421 lines = [' |'+(indent-4)*' '+line for line in str.split('\n')] 1422 lines[0] = 'DTST>'+lines[0][5:] 1423 return '\n'.join(lines) + '\n |\n' 1424 elif tree.tag == 'literalblock': 1425 str = re.sub('\0', '{', childstr) 1426 str = re.sub('\1', '}', str) 1427 lines = [' |'+(indent-5)*' '+line for line in str.split('\n')] 1428 lines[0] = ' LIT>'+lines[0][5:] 1429 return '\2' + '\n'.join(lines) + '\n |\n' 1430 elif tree.tag == 'field': 1431 numargs = 0 1432 while tree.children[numargs+1].tag == 'arg': numargs += 1 1433 tag = variables[0] 1434 args = variables[1:1+numargs] 1435 body = variables[1+numargs:] 1436 str = ' FLD>|'+(indent-6)*' '+'@'+variables[0] 1437 if args: str += '(' + ', '.join(args) + ')' 1438 return str + ':\n' + ''.join(body) 1439 elif tree.tag == 'target': 1440 return '<%s>' % childstr 1441 elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', 1442 'section', 'olist', 'ulist', 'name'): 1443 return childstr 1444 elif tree.tag == 'symbol': 1445 return 'E{%s}' % childstr 1446 elif tree.tag == 'graph': 1447 return 'G{%s}' % ' '.join(variables) 1448 else: 1449 for (tag, name) in _COLORIZING_TAGS.items(): 1450 if name == tree.tag: 1451 return '%s{%s}' % (tag, childstr) 1452 raise ValueError('Unknown DOM element %r' % tree.tag)

1453 1454 ################################################## 1455 ## Top-Level Wrapper function 1456 ##################################################

1457 -def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr):

1458 """ 1459 Pretty-parse the string. This parses the string, and catches any 1460 warnings or errors produced. Any warnings and errors are 1461 displayed, and the resulting DOM parse structure is returned. 1462 1463 @param str: The string to parse. 1464 @type str: C{string} 1465 @param show_warnings: Whether or not to display non-fatal errors 1466 generated by parsing C{str}. 1467 @type show_warnings: C{boolean} 1468 @param show_errors: Whether or not to display fatal errors 1469 generated by parsing C{str}. 1470 @type show_errors: C{boolean} 1471 @param stream: The stream that warnings and errors should be 1472 written to. 1473 @type stream: C{stream} 1474 @return: a DOM document encoding the contents of C{str}. 1475 @rtype: C{Element} 1476 @raise SyntaxError: If any fatal errors were encountered. 1477 """ 1478 errors = [] 1479 confused = 0 1480 try: 1481 val = parse(str, errors) 1482 warnings = [e for e in errors if not e.is_fatal()] 1483 errors = [e for e in errors if e.is_fatal()] 1484 except: 1485 confused = 1 1486 1487 if not show_warnings: warnings = [] 1488 warnings.sort() 1489 errors.sort() 1490 if warnings: 1491 print >>stream, '='*SCRWIDTH 1492 print >>stream, "WARNINGS" 1493 print >>stream, '-'*SCRWIDTH 1494 for warning in warnings: 1495 print >>stream, warning.as_warning() 1496 print >>stream, '='*SCRWIDTH 1497 if errors and show_errors: 1498 if not warnings: print >>stream, '='*SCRWIDTH 1499 print >>stream, "ERRORS" 1500 print >>stream, '-'*SCRWIDTH 1501 for error in errors: 1502 print >>stream, error 1503 print >>stream, '='*SCRWIDTH 1504 1505 if confused: raise 1506 elif errors: raise SyntaxError('Encountered Errors') 1507 else: return val

1508 1509 ################################################## 1510 ## Parse Errors 1511 ################################################## 1512

1513 -class TokenizationError(ParseError):

1514 """ 1515 An error generated while tokenizing a formatted documentation 1516 string. 1517 """

1518

1519 -class StructuringError(ParseError):

1520 """ 1521 An error generated while structuring a formatted documentation 1522 string. 1523 """

1524

1525 -class ColorizingError(ParseError):

1526 """ 1527 An error generated while colorizing a paragraph. 1528 """

1529 - def __init__(self, descr, token, charnum, is_fatal=1):

1530 """ 1531 Construct a new colorizing exception. 1532 1533 @param descr: A short description of the error. 1534 @type descr: C{string} 1535 @param token: The token where the error occured 1536 @type token: L{Token} 1537 @param charnum: The character index of the position in 1538 C{token} where the error occured. 1539 @type charnum: C{int} 1540 """ 1541 ParseError.__init__(self, descr, token.startline, is_fatal) 1542 self.token = token 1543 self.charnum = charnum

1544 1545 CONTEXT_RANGE = 20

1546 - def descr(self):

1547 RANGE = self.CONTEXT_RANGE 1548 if self.charnum <= RANGE: 1549 left = self.token.contents[0:self.charnum] 1550 else: 1551 left = '...'+self.token.contents[self.charnum-RANGE:self.charnum] 1552 if (len(self.token.contents)-self.charnum) <= RANGE: 1553 right = self.token.contents[self.charnum:] 1554 else: 1555 right = (self.token.contents[self.charnum:self.charnum+RANGE] 1556 + '...') 1557 return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left)))

1558 1559 ################################################## 1560 ## Convenience parsers 1561 ################################################## 1562

1563 -def parse_as_literal(str):

1564 """ 1565 Return a DOM document matching the epytext DTD, containing a 1566 single literal block. That literal block will include the 1567 contents of the given string. This method is typically used as a 1568 fall-back when the parser fails. 1569 1570 @param str: The string which should be enclosed in a literal 1571 block. 1572 @type str: C{string} 1573 1574 @return: A DOM document containing C{str} in a single literal 1575 block. 1576 @rtype: C{Element} 1577 """ 1578 return Element('epytext', Element('literalblock', str))

1579

1580 -def parse_as_para(str):

1581 """ 1582 Return a DOM document matching the epytext DTD, containing a 1583 single paragraph. That paragraph will include the contents of the 1584 given string. This can be used to wrap some forms of 1585 automatically generated information (such as type names) in 1586 paragraphs. 1587 1588 @param str: The string which should be enclosed in a paragraph. 1589 @type str: C{string} 1590 1591 @return: A DOM document containing C{str} in a single paragraph. 1592 @rtype: C{Element} 1593 """ 1594 return Element('epytext', Element('para', str))

1595 1596 ################################################################# 1597 ## SUPPORT FOR EPYDOC 1598 ################################################################# 1599

1600 -def parse_docstring(docstring, errors, **options):

1601 """ 1602 Parse the given docstring, which is formatted using epytext; and 1603 return a C{ParsedDocstring} representation of its contents. 1604 @param docstring: The docstring to parse 1605 @type docstring: C{string} 1606 @param errors: A list where any errors generated during parsing 1607 will be stored. 1608 @type errors: C{list} of L{ParseError} 1609 @param options: Extra options. Unknown options are ignored. 1610 Currently, no extra options are defined. 1611 @rtype: L{ParsedDocstring} 1612 """ 1613 return ParsedEpytextDocstring(parse(docstring, errors))

1614

1615 -class ParsedEpytextDocstring(ParsedDocstring):

1616 SYMBOL_TO_HTML = { 1617 # Symbols 1618 '<-': '←', '->': '→', '^': '↑', 'v': '↓', 1619 1620 # Greek letters 1621 'alpha': 'α', 'beta': 'β', 'gamma': 'γ', 1622 'delta': 'δ', 'epsilon': 'ε', 'zeta': 'ζ', 1623 'eta': 'η', 'theta': 'θ', 'iota': 'ι', 1624 'kappa': 'κ', 'lambda': 'λ', 'mu': 'μ', 1625 'nu': 'ν', 'xi': 'ξ', 'omicron': 'ο', 1626 'pi': 'π', 'rho': 'ρ', 'sigma': 'σ', 1627 'tau': 'τ', 'upsilon': 'υ', 'phi': 'φ', 1628 'chi': 'χ', 'psi': 'ψ', 'omega': 'ω', 1629 'Alpha': 'Α', 'Beta': 'Β', 'Gamma': 'Γ', 1630 'Delta': 'Δ', 'Epsilon': 'Ε', 'Zeta': 'Ζ', 1631 'Eta': 'Η', 'Theta': 'Θ', 'Iota': 'Ι', 1632 'Kappa': 'Κ', 'Lambda': 'Λ', 'Mu': 'Μ', 1633 'Nu': 'Ν', 'Xi': 'Ξ', 'Omicron': 'Ο', 1634 'Pi': 'Π', 'Rho': 'Ρ', 'Sigma': 'Σ', 1635 'Tau': 'Τ', 'Upsilon': 'Υ', 'Phi': 'Φ', 1636 'Chi': 'Χ', 'Psi': 'Ψ', 'Omega': 'Ω', 1637 1638 # HTML character entities 1639 'larr': '←', 'rarr': '→', 'uarr': '↑', 1640 'darr': '↓', 'harr': '↔', 'crarr': '&crarr;', 1641 'lArr': '⇐', 'rArr': '⇒', 'uArr': '&uArr;', 1642 'dArr': '&dArr;', 'hArr': '⇔', 1643 'copy': '©', 'times': '×', 'forall': '∀', 1644 'exist': '∃', 'part': '∂', 1645 'empty': '∅', 'isin': '∈', 'notin': '∉', 1646 'ni': '&ni;', 'prod': '∏', 'sum': '∑', 1647 'prop': '&prop;', 'infin': '∞', 'ang': '&ang;', 1648 'and': '&and;', 'or': '&or;', 'cap': '∩', 'cup': '∪', 1649 'int': '∫', 'there4': '&there4;', 'sim': '&sim;', 1650 'cong': '&cong;', 'asymp': '≈', 'ne': '≠', 1651 'equiv': '&equiv;', 'le': '≤', 'ge': '≥', 1652 'sub': '⊂', 'sup': '⊃', 'nsub': '&nsub;', 1653 'sube': '&sube;', 'supe': '&supe;', 'oplus': '&oplus;', 1654 'otimes': '&otimes;', 'perp': '&perp;', 1655 1656 # Alternate (long) names 1657 'infinity': '∞', 'integral': '∫', 'product': '∏', 1658 '<=': '≤', '>=': '≥', 1659 } 1660 1661 SYMBOL_TO_LATEX = { 1662 # Symbols 1663 '<-': r'$\leftarrow$', '->': r'$\rightarrow$', 1664 '^': r'$\uparrow$', 'v': r'$\downarrow$', 1665 1666 # Greek letters (use lower case when upcase not available) 1667 1668 'alpha': r'$\alpha$', 'beta': r'$\beta$', 'gamma': 1669 r'$\gamma$', 'delta': r'$\delta$', 'epsilon': 1670 r'$\epsilon$', 'zeta': r'$\zeta$', 'eta': r'$\eta$', 1671 'theta': r'$\theta$', 'iota': r'$\iota$', 'kappa': 1672 r'$\kappa$', 'lambda': r'$\lambda$', 'mu': r'$\mu$', 1673 'nu': r'$\nu$', 'xi': r'$\xi$', 'omicron': r'$o$', 'pi': 1674 r'$\pi$', 'rho': r'$\rho$', 'sigma': r'$\sigma$', 'tau': 1675 r'$\tau$', 'upsilon': r'$\upsilon$', 'phi': r'$\phi$', 1676 'chi': r'$\chi$', 'psi': r'$\psi$', 'omega': 1677 r'$\omega$', 1678 1679 'Alpha': r'$\alpha$', 'Beta': r'$\beta$', 'Gamma': 1680 r'$\Gamma$', 'Delta': r'$\Delta$', 'Epsilon': 1681 r'$\epsilon$', 'Zeta': r'$\zeta$', 'Eta': r'$\eta$', 1682 'Theta': r'$\Theta$', 'Iota': r'$\iota$', 'Kappa': 1683 r'$\kappa$', 'Lambda': r'$\Lambda$', 'Mu': r'$\mu$', 1684 'Nu': r'$\nu$', 'Xi': r'$\Xi$', 'Omicron': r'$o$', 'Pi': 1685 r'$\Pi$', 'ho': r'$\rho$', 'Sigma': r'$\Sigma$', 'Tau': 1686 r'$\tau$', 'Upsilon': r'$\Upsilon$', 'Phi': r'$\Phi$', 1687 'Chi': r'$\chi$', 'Psi': r'$\Psi$', 'Omega': 1688 r'$\Omega$', 1689 1690 # HTML character entities 1691 'larr': r'$\leftarrow$', 'rarr': r'$\rightarrow$', 'uarr': 1692 r'$\uparrow$', 'darr': r'$\downarrow$', 'harr': 1693 r'$\leftrightarrow$', 'crarr': r'$\hookleftarrow$', 1694 'lArr': r'$\Leftarrow$', 'rArr': r'$\Rightarrow$', 'uArr': 1695 r'$\Uparrow$', 'dArr': r'$\Downarrow$', 'hArr': 1696 r'$\Leftrightarrow$', 'copy': r'{\textcopyright}', 1697 'times': r'$\times$', 'forall': r'$\forall$', 'exist': 1698 r'$\exists$', 'part': r'$\partial$', 'empty': 1699 r'$\emptyset$', 'isin': r'$\in$', 'notin': r'$\notin$', 1700 'ni': r'$\ni$', 'prod': r'$\prod$', 'sum': r'$\sum$', 1701 'prop': r'$\propto$', 'infin': r'$\infty$', 'ang': 1702 r'$\angle$', 'and': r'$\wedge$', 'or': r'$\vee$', 'cap': 1703 r'$\cap$', 'cup': r'$\cup$', 'int': r'$\int$', 'there4': 1704 r'$\therefore$', 'sim': r'$\sim$', 'cong': r'$\cong$', 1705 'asymp': r'$\approx$', 'ne': r'$\ne$', 'equiv': 1706 r'$\equiv$', 'le': r'$\le$', 'ge': r'$\ge$', 'sub': 1707 r'$\subset$', 'sup': r'$\supset$', 'nsub': r'$\supset$', 1708 'sube': r'$\subseteq$', 'supe': r'$\supseteq$', 'oplus': 1709 r'$\oplus$', 'otimes': r'$\otimes$', 'perp': r'$\perp$', 1710 1711 # Alternate (long) names 1712 'infinity': r'$\infty$', 'integral': r'$\int$', 'product': 1713 r'$\prod$', '<=': r'$\le$', '>=': r'$\ge$', 1714 } 1715

1716 - def __init__(self, dom_tree):

1717 self._tree = dom_tree 1718 # Caching: 1719 self._html = self._latex = self._plaintext = None 1720 self._terms = None

1721

1722 - def __str__(self):

1723 return str(self._tree)

1724

1725 - def to_html(self, docstring_linker, directory=None, docindex=None, 1726 context=None, **options):

1727 if self._html is not None: return self._html 1728 if self._tree is None: return '' 1729 indent = options.get('indent', 0) 1730 self._html = self._to_html(self._tree, docstring_linker, directory, 1731 docindex, context, indent) 1732 return self._html

1733

1734 - def to_latex(self, docstring_linker, **options):

1735 if self._latex is not None: return self._latex 1736 if self._tree is None: return '' 1737 indent = options.get('indent', 0) 1738 self._hyperref = options.get('hyperref', 1) 1739 self._latex = self._to_latex(self._tree, docstring_linker, indent) 1740 return self._latex

1741

1742 - def to_plaintext(self, docstring_linker, **options):

1743 # [XX] don't cache -- different options might be used!! 1744 #if self._plaintext is not None: return self._plaintext 1745 if self._tree is None: return '' 1746 if 'indent' in options: 1747 self._plaintext = to_plaintext(self._tree, 1748 indent=options['indent']) 1749 else: 1750 self._plaintext = to_plaintext(self._tree) 1751 return self._plaintext

1752

1753 - def _index_term_key(self, tree):

1754 str = to_plaintext(tree) 1755 str = re.sub(r'\s\s+', '-', str) 1756 return "index-"+re.sub("[^a-zA-Z0-9]", "_", str)

1757

1758 - def _to_html(self, tree, linker, directory, docindex, context, 1759 indent=0, seclevel=0):

1760 if isinstance(tree, basestring): 1761 return plaintext_to_html(tree) 1762 1763 if tree.tag == 'epytext': indent -= 2 1764 if tree.tag == 'section': seclevel += 1 1765 1766 # Process the variables first. 1767 variables = [self._to_html(c, linker, directory, docindex, context, 1768 indent+2, seclevel) 1769 for c in tree.children] 1770 1771 # Get rid of unnecessary ... tags; they introduce extra 1772 # space on most browsers that we don't want. 1773 for i in range(len(variables)-1): 1774 if (not isinstance(tree.children[i], basestring) and 1775 tree.children[i].tag == 'para' and 1776 (isinstance(tree.children[i+1], basestring) or 1777 tree.children[i+1].tag != 'para')): 1778 variables[i] = ' '*(indent+2)+variables[i][5+indent:-5]+'\n' 1779 if (tree.children and 1780 not isinstance(tree.children[-1], basestring) and 1781 tree.children[-1].tag == 'para'): 1782 variables[-1] = ' '*(indent+2)+variables[-1][5+indent:-5]+'\n' 1783 1784 # Construct the HTML string for the variables. 1785 childstr = ''.join(variables) 1786 1787 # Perform the approriate action for the DOM tree type. 1788 if tree.tag == 'para': 1789 return wordwrap('%s' % childstr, indent) 1790 elif tree.tag == 'code': 1791 style = tree.attribs.get('style') 1792 if style: 1793 return '<code class="%s">%s</code>' % (style, childstr) 1794 else: 1795 return '<code>%s</code>' % childstr 1796 elif tree.tag == 'uri': 1797 return ('<a href="%s" target="_top">%s</a>' % 1798 (variables[1], variables[0])) 1799 elif tree.tag == 'link': 1800 return linker.translate_identifier_xref(variables[1], variables[0]) 1801 elif tree.tag == 'italic': 1802 return '%s' % childstr 1803 elif tree.tag == 'math': 1804 return '%s' % childstr 1805 elif tree.tag == 'indexed': 1806 term = Element('epytext', *tree.children, **tree.attribs) 1807 return linker.translate_indexterm(ParsedEpytextDocstring(term)) 1808 #term_key = self._index_term_key(tree) 1809 #return linker.translate_indexterm(childstr, term_key) 1810 elif tree.tag == 'bold': 1811 return '%s' % childstr 1812 elif tree.tag == 'ulist': 1813 return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ') 1814 elif tree.tag == 'olist': 1815 start = tree.attribs.get('start') or '' 1816 return ('%s<ol start="%s">\n%s%s</ol>\n' % 1817 (indent*' ', start, childstr, indent*' ')) 1818 elif tree.tag == 'li': 1819 return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ') 1820 elif tree.tag == 'heading': 1821 return ('%s<h%s class="heading">%s</h%s>\n' % 1822 ((indent-2)*' ', seclevel, childstr, seclevel)) 1823 elif tree.tag == 'literalblock': 1824 return '<pre class="literalblock">\n%s\n</pre>\n' % childstr 1825 elif tree.tag == 'doctestblock': 1826 return doctest_to_html(tree.children[0].strip()) 1827 elif tree.tag == 'fieldlist': 1828 raise AssertionError("There should not be any field lists left") 1829 elif tree.tag in ('epytext', 'section', 'tag', 'arg', 1830 'name', 'target', 'html'): 1831 return childstr 1832 elif tree.tag == 'symbol': 1833 symbol = tree.children[0] 1834 return self.SYMBOL_TO_HTML.get(symbol, '[%s]' % symbol) 1835 elif tree.tag == 'graph': 1836 # Generate the graph. 1837 graph = self._build_graph(variables[0], variables[1:], linker, 1838 docindex, context) 1839 if not graph: return '' 1840 # Write the graph. 1841 image_url = '%s.gif' % graph.uid 1842 image_file = os.path.join(directory, image_url) 1843 return graph.to_html(image_file, image_url) 1844 else: 1845 raise ValueError('Unknown epytext DOM element %r' % tree.tag)

1846 1847 #GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph']

1848 - def _build_graph(self, graph_type, graph_args, linker, 1849 docindex, context):

1850 # Generate the graph 1851 if graph_type == 'classtree': 1852 if graph_args: 1853 bases = [docindex.find(name, context) 1854 for name in graph_args] 1855 elif isinstance(context, ClassDoc): 1856 bases = [context] 1857 else: 1858 log.warning("Could not construct class tree: you must " 1859 "specify one or more base classes.") 1860 return None 1861 from epydoc.docwriter.dotgraph import class_tree_graph 1862 return class_tree_graph(bases, linker, context) 1863 elif graph_type == 'packagetree': 1864 from epydoc.apidoc import ModuleDoc 1865 if graph_args: 1866 packages = [docindex.find(name, context) 1867 for name in graph_args] 1868 elif isinstance(context, ModuleDoc): 1869 packages = [context] 1870 else: 1871 log.warning("Could not construct package tree: you must " 1872 "specify one or more root packages.") 1873 return None 1874 from epydoc.docwriter.dotgraph import package_tree_graph 1875 return package_tree_graph(packages, linker, context) 1876 elif graph_type == 'importgraph': 1877 from epydoc.apidoc import ModuleDoc 1878 modules = [d for d in docindex.root if isinstance(d, ModuleDoc)] 1879 from epydoc.docwriter.dotgraph import import_graph 1880 return import_graph(modules, docindex, linker, context) 1881 1882 elif graph_type == 'callgraph': 1883 if graph_args: 1884 docs = [docindex.find(name, context) for name in graph_args] 1885 docs = [doc for doc in docs if doc is not None] 1886 else: 1887 docs = [context] 1888 from epydoc.docwriter.dotgraph import call_graph 1889 return call_graph(docs, docindex, linker, context) 1890 else: 1891 log.warning("Unknown graph type %s" % graph_type)

1892 1893

1894 - def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0):

1895 if isinstance(tree, basestring): 1896 return plaintext_to_latex(tree, breakany=breakany) 1897 1898 if tree.tag == 'section': seclevel += 1 1899 1900 # Figure out the child indent level. 1901 if tree.tag == 'epytext': cindent = indent 1902 else: cindent = indent + 2 1903 variables = [self._to_latex(c, linker, cindent, seclevel, breakany) 1904 for c in tree.children] 1905 childstr = ''.join(variables) 1906 1907 if tree.tag == 'para': 1908 return wordwrap(childstr, indent)+'\n' 1909 elif tree.tag == 'code': 1910 return '\\texttt{%s}' % childstr 1911 elif tree.tag == 'uri': 1912 if len(variables) != 2: raise ValueError('Bad URI ') 1913 if self._hyperref: 1914 # ~ and # should not be escaped in the URI. 1915 uri = tree.children[1].children[0] 1916 uri = uri.replace('{\\textasciitilde}', '~') 1917 uri = uri.replace('\\#', '#') 1918 if variables[0] == variables[1]: 1919 return '\\href{%s}{\\textit{%s}}' % (uri, variables[1]) 1920 else: 1921 return ('%s\\footnote{\\href{%s}{%s}}' % 1922 (variables[0], uri, variables[1])) 1923 else: 1924 if variables[0] == variables[1]: 1925 return '\\textit{%s}' % variables[1] 1926 else: 1927 return '%s\\footnote{%s}' % (variables[0], variables[1]) 1928 elif tree.tag == 'link': 1929 if len(variables) != 2: raise ValueError('Bad Link') 1930 return linker.translate_identifier_xref(variables[1], variables[0]) 1931 elif tree.tag == 'italic': 1932 return '\\textit{%s}' % childstr 1933 elif tree.tag == 'math': 1934 return '\\textit{%s}' % childstr 1935 elif tree.tag == 'indexed': 1936 term = Element('epytext', *tree.children, **tree.attribs) 1937 return linker.translate_indexterm(ParsedEpytextDocstring(term)) 1938 elif tree.tag == 'bold': 1939 return '\\textbf{%s}' % childstr 1940 elif tree.tag == 'li': 1941 return indent*' ' + '\\item ' + childstr.lstrip() 1942 elif tree.tag == 'heading': 1943 return ' '*(indent-2) + '(section) %s\n\n' % childstr 1944 elif tree.tag == 'doctestblock': 1945 return doctest_to_latex(tree.children[0].strip()) 1946 elif tree.tag == 'literalblock': 1947 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr 1948 elif tree.tag == 'fieldlist': 1949 return indent*' '+'{omitted fieldlist}\n' 1950 elif tree.tag == 'olist': 1951 return (' '*indent + '\\begin{enumerate}\n\n' + 1952 ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' + 1953 childstr + 1954 ' '*indent + '\\end{enumerate}\n\n') 1955 elif tree.tag == 'ulist': 1956 return (' '*indent + '\\begin{itemize}\n' + 1957 ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' + 1958 childstr + 1959 ' '*indent + '\\end{itemize}\n\n') 1960 elif tree.tag == 'symbol': 1961 symbol = tree.children[0] 1962 return self.SYMBOL_TO_LATEX.get(symbol, '[%s]' % symbol) 1963 elif tree.tag == 'graph': 1964 return '(GRAPH)' 1965 #raise ValueError, 'graph not implemented yet for latex' 1966 else: 1967 # Assume that anything else can be passed through. 1968 return childstr

1969

1970 - def summary(self):

1971 if self._tree is None: return self, False 1972 tree = self._tree 1973 doc = Element('epytext') 1974 1975 # Find the first paragraph. 1976 variables = tree.children 1977 while (len(variables) > 0) and (variables[0].tag != 'para'): 1978 if variables[0].tag in ('section', 'ulist', 'olist', 'li'): 1979 variables = variables[0].children 1980 else: 1981 variables = variables[1:] 1982 1983 # Special case: if the docstring contains a single literal block, 1984 # then try extracting the summary from it. 1985 if (len(variables) == 0 and len(tree.children) == 1 and 1986 tree.children[0].tag == 'literalblock'): 1987 str = re.split(r'\n\s*(\n|$).*', 1988 tree.children[0].children[0], 1)[0] 1989 variables = [Element('para')] 1990 variables[0].children.append(str) 1991 1992 # If we didn't find a paragraph, return an empty epytext. 1993 if len(variables) == 0: return ParsedEpytextDocstring(doc), False 1994 1995 # Is there anything else, excluding tags, after the first variable? 1996 long_docs = False 1997 for var in variables[1:]: 1998 if isinstance(var, Element) and var.tag == 'fieldlist': 1999 continue 2000 long_docs = True 2001 break 2002 2003 # Extract the first sentence. 2004 parachildren = variables[0].children 2005 para = Element('para') 2006 doc.children.append(para) 2007 for parachild in parachildren: 2008 if isinstance(parachild, basestring): 2009 m = re.match(r'(\s*[\w\W]*?\.)(\s|$)', parachild) 2010 if m: 2011 para.children.append(m.group(1)) 2012 long_docs |= parachild is not parachildren[-1] 2013 if not long_docs: 2014 other = parachild[m.end():] 2015 if other and not other.isspace(): 2016 long_docs = True 2017 return ParsedEpytextDocstring(doc), long_docs 2018 para.children.append(parachild) 2019 2020 return ParsedEpytextDocstring(doc), long_docs

2021

2022 - def split_fields(self, errors=None):

2023 if self._tree is None: return (self, ()) 2024 tree = Element(self._tree.tag, *self._tree.children, 2025 **self._tree.attribs) 2026 fields = [] 2027 2028 if (tree.children and 2029 tree.children[-1].tag == 'fieldlist' and 2030 tree.children[-1].children): 2031 field_nodes = tree.children[-1].children 2032 del tree.children[-1] 2033 2034 for field in field_nodes: 2035 # Get the tag 2036 tag = field.children[0].children[0].lower() 2037 del field.children[0] 2038 2039 # Get the argument. 2040 if field.children and field.children[0].tag == 'arg': 2041 arg = field.children[0].children[0] 2042 del field.children[0] 2043 else: 2044 arg = None 2045 2046 # Process the field. 2047 field.tag = 'epytext' 2048 fields.append(Field(tag, arg, ParsedEpytextDocstring(field))) 2049 2050 # Save the remaining docstring as the description.. 2051 if tree.children and tree.children[0].children: 2052 return ParsedEpytextDocstring(tree), fields 2053 else: 2054 return None, fields

2055 2056

2057 - def index_terms(self):

2058 if self._terms is None: 2059 self._terms = [] 2060 self._index_terms(self._tree, self._terms) 2061 return self._terms

2062

2063 - def _index_terms(self, tree, terms):

2064 if tree is None or isinstance(tree, basestring): 2065 return 2066 2067 if tree.tag == 'indexed': 2068 term = Element('epytext', *tree.children, **tree.attribs) 2069 terms.append(ParsedEpytextDocstring(term)) 2070 2071 # Look for index items in child nodes. 2072 for child in tree.children: 2073 self._index_terms(child, terms)

2074

Source Code for Module epydoc.markup.epytext