1
2
3
4
5
6
7
8
9 """
10 Parser for epytext strings. Epytext is a lightweight markup whose
11 primary intended application is Python documentation strings. This
12 parser converts Epytext strings to a simple DOM-like representation
13 (encoded as a tree of L{Element} objects and strings). Epytext
14 strings can contain the following X{structural blocks}:
15
16 - X{epytext}: The top-level element of the DOM tree.
17 - X{para}: A paragraph of text. Paragraphs contain no newlines,
18 and all spaces are soft.
19 - X{section}: A section or subsection.
20 - X{field}: A tagged field. These fields provide information
21 about specific aspects of a Python object, such as the
22 description of a function's parameter, or the author of a
23 module.
24 - X{literalblock}: A block of literal text. This text should be
25 displayed as it would be displayed in plaintext. The
26 parser removes the appropriate amount of leading whitespace
27 from each line in the literal block.
28 - X{doctestblock}: A block containing sample python code,
29 formatted according to the specifications of the C{doctest}
30 module.
31 - X{ulist}: An unordered list.
32 - X{olist}: An ordered list.
33 - X{li}: A list item. This tag is used both for unordered list
34 items and for ordered list items.
35
36 Additionally, the following X{inline regions} may be used within
37 C{para} blocks:
38
39 - X{code}: Source code and identifiers.
40 - X{math}: Mathematical expressions.
41 - X{index}: A term which should be included in an index, if one
42 is generated.
43 - X{italic}: Italicized text.
44 - X{bold}: Bold-faced text.
45 - X{uri}: A Universal Resource Indicator (URI) or Universal
46 Resource Locator (URL)
47 - X{link}: A Python identifier which should be hyperlinked to
48 the named object's documentation, when possible.
49
50 The returned DOM tree will conform to the the following Document Type
51 Description::
52
53 <!ENTITY % colorized '(code | math | index | italic |
54 bold | uri | link | symbol)*'>
55
56 <!ELEMENT epytext ((para | literalblock | doctestblock |
57 section | ulist | olist)*, fieldlist?)>
58
59 <!ELEMENT para (#PCDATA | %colorized;)*>
60
61 <!ELEMENT section (para | listblock | doctestblock |
62 section | ulist | olist)+>
63
64 <!ELEMENT fieldlist (field+)>
65 <!ELEMENT field (tag, arg?, (para | listblock | doctestblock)
66 ulist | olist)+)>
67 <!ELEMENT tag (#PCDATA)>
68 <!ELEMENT arg (#PCDATA)>
69
70 <!ELEMENT literalblock (#PCDATA | %colorized;)*>
71 <!ELEMENT doctestblock (#PCDATA)>
72
73 <!ELEMENT ulist (li+)>
74 <!ELEMENT olist (li+)>
75 <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+>
76 <!ATTLIST li bullet NMTOKEN #IMPLIED>
77 <!ATTLIST olist start NMTOKEN #IMPLIED>
78
79 <!ELEMENT uri (name, target)>
80 <!ELEMENT link (name, target)>
81 <!ELEMENT name (#PCDATA | %colorized;)*>
82 <!ELEMENT target (#PCDATA)>
83
84 <!ELEMENT code (#PCDATA | %colorized;)*>
85 <!ELEMENT math (#PCDATA | %colorized;)*>
86 <!ELEMENT italic (#PCDATA | %colorized;)*>
87 <!ELEMENT bold (#PCDATA | %colorized;)*>
88 <!ELEMENT indexed (#PCDATA | %colorized;)>
89 <!ATTLIST code style CDATA #IMPLIED>
90
91 <!ELEMENT symbol (#PCDATA)>
92
93 @var SYMBOLS: A list of the of escape symbols that are supported
94 by epydoc. Currently the following symbols are supported:
95 <<<SYMBOLS>>>
96 """
97
98
99
100 __docformat__ = 'epytext en'
101
102
103
104
105
106
107
108
109 import re, string, types, sys, os.path
110 from epydoc.markup import *
111 from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex
112 from epydoc.markup.doctest import doctest_to_html, doctest_to_latex
113
114
115
116
117
119 """
120 A very simple DOM-like representation for parsed epytext
121 documents. Each epytext document is encoded as a tree whose nodes
122 are L{Element} objects, and whose leaves are C{string}s. Each
123 node is marked by a I{tag} and zero or more I{attributes}. Each
124 attribute is a mapping from a string key to a string value.
125 """
126 - def __init__(self, tag, *children, **attribs):
127 self.tag = tag
128 """A string tag indicating the type of this element.
129 @type: C{string}"""
130
131 self.children = list(children)
132 """A list of the children of this element.
133 @type: C{list} of (C{string} or C{Element})"""
134
135 self.attribs = attribs
136 """A dictionary mapping attribute names to attribute values
137 for this element.
138 @type: C{dict} from C{string} to C{string}"""
139
141 """
142 Return a string representation of this element, using XML
143 notation.
144 @bug: Doesn't escape '<' or '&' or '>'.
145 """
146 attribs = ''.join([' %s=%r' % t for t in self.attribs.items()])
147 return ('<%s%s>' % (self.tag, attribs) +
148 ''.join([str(child) for child in self.children]) +
149 '</%s>' % self.tag)
150
152 attribs = ''.join([', %s=%r' % t for t in self.attribs.items()])
153 args = ''.join([', %r' % c for c in self.children])
154 return 'Element(%s%s%s)' % (self.tag, args, attribs)
155
156
157
158
159
160
161
162 _HEADING_CHARS = "=-~"
163
164
165 _ESCAPES = {'lb':'{', 'rb': '}'}
166
167
168 SYMBOLS = [
169
170 '<-', '->', '^', 'v',
171
172
173 'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta',
174 'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu',
175 'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma',
176 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega',
177 'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta',
178 'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu',
179 'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma',
180 'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega',
181
182
183 'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr',
184 'lArr', 'rArr', 'uArr', 'dArr', 'hArr',
185 'copy', 'times', 'forall', 'exist', 'part',
186 'empty', 'isin', 'notin', 'ni', 'prod', 'sum',
187 'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup',
188 'int', 'there4', 'sim', 'cong', 'asymp', 'ne',
189 'equiv', 'le', 'ge', 'sub', 'sup', 'nsub',
190 'sube', 'supe', 'oplus', 'otimes', 'perp',
191
192
193 'infinity', 'integral', 'product',
194 '>=', '<=',
195 ]
196
197 _SYMBOLS = {}
198 for symbol in SYMBOLS: _SYMBOLS[symbol] = 1
199
200
201 symblist = ' '
202 symblist += ';\n '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol)
203 for symbol in SYMBOLS])
204 __doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist)
205 del symbol, symblist
206
207
208 _COLORIZING_TAGS = {
209 'C': 'code',
210 'M': 'math',
211 'X': 'indexed',
212 'I': 'italic',
213 'B': 'bold',
214 'U': 'uri',
215 'L': 'link',
216 'E': 'escape',
217 'S': 'symbol',
218 'G': 'graph',
219 }
220
221
222 _LINK_COLORIZING_TAGS = ['link', 'uri']
223
224
225
226
227
228 -def parse(str, errors = None):
229 """
230 Return a DOM tree encoding the contents of an epytext string. Any
231 errors generated during parsing will be stored in C{errors}.
232
233 @param str: The epytext string to parse.
234 @type str: C{string}
235 @param errors: A list where any errors generated during parsing
236 will be stored. If no list is specified, then fatal errors
237 will generate exceptions, and non-fatal errors will be
238 ignored.
239 @type errors: C{list} of L{ParseError}
240 @return: a DOM tree encoding the contents of an epytext string.
241 @rtype: C{Element}
242 @raise ParseError: If C{errors} is C{None} and an error is
243 encountered while parsing.
244 """
245
246 if errors == None:
247 errors = []
248 raise_on_error = 1
249 else:
250 raise_on_error = 0
251
252
253 str = re.sub('\015\012', '\012', str)
254 str = string.expandtabs(str)
255
256
257 tokens = _tokenize(str, errors)
258
259
260 encountered_field = 0
261
262
263 doc = Element('epytext')
264
265
266
267
268
269
270
271
272
273
274
275 stack = [None, doc]
276 indent_stack = [-1, None]
277
278 for token in tokens:
279
280
281
282
283
284
285
286 _pop_completed_blocks(token, stack, indent_stack)
287
288
289 if token.tag == Token.PARA:
290 _add_para(doc, token, stack, indent_stack, errors)
291
292
293 elif token.tag == Token.HEADING:
294 _add_section(doc, token, stack, indent_stack, errors)
295
296
297 elif token.tag == Token.LBLOCK:
298 stack[-1].children.append(token.to_dom(doc))
299
300
301 elif token.tag == Token.DTBLOCK:
302 stack[-1].children.append(token.to_dom(doc))
303
304
305 elif token.tag == Token.BULLET:
306 _add_list(doc, token, stack, indent_stack, errors)
307 else:
308 assert 0, 'Unknown token type: '+token.tag
309
310
311 if stack[-1].tag == 'field':
312 encountered_field = 1
313 elif encountered_field == 1:
314 if len(stack) <= 3:
315 estr = ("Fields must be the final elements in an "+
316 "epytext string.")
317 errors.append(StructuringError(estr, token.startline))
318
319
320 if len([e for e in errors if e.is_fatal()]) > 0:
321 if raise_on_error:
322 raise errors[0]
323 else:
324 return None
325
326
327 return doc
328
330 """
331 Pop any completed blocks off the stack. This includes any
332 blocks that we have dedented past, as well as any list item
333 blocks that we've dedented to. The top element on the stack
334 should only be a list if we're about to start a new list
335 item (i.e., if the next token is a bullet).
336 """
337 indent = token.indent
338 if indent != None:
339 while (len(stack) > 2):
340 pop = 0
341
342
343 if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1
344 elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1
345
346
347
348 elif (token.tag == 'bullet' and indent==indent_stack[-2] and
349 stack[-1].tag in ('li', 'field')): pop=1
350
351
352 elif (stack[-1].tag in ('ulist', 'olist') and
353 (token.tag != 'bullet' or token.contents[-1] == ':')):
354 pop=1
355
356
357 if pop == 0: return
358 stack.pop()
359 indent_stack.pop()
360
361 -def _add_para(doc, para_token, stack, indent_stack, errors):
362 """Colorize the given paragraph, and add it to the DOM tree."""
363
364
365 if indent_stack[-1] == None:
366 indent_stack[-1] = para_token.indent
367 if para_token.indent == indent_stack[-1]:
368
369 para = _colorize(doc, para_token, errors)
370 stack[-1].children.append(para)
371 else:
372 estr = "Improper paragraph indentation."
373 errors.append(StructuringError(estr, para_token.startline))
374
375 -def _add_section(doc, heading_token, stack, indent_stack, errors):
376 """Add a new section to the DOM tree, with the given heading."""
377 if indent_stack[-1] == None:
378 indent_stack[-1] = heading_token.indent
379 elif indent_stack[-1] != heading_token.indent:
380 estr = "Improper heading indentation."
381 errors.append(StructuringError(estr, heading_token.startline))
382
383
384 for tok in stack[2:]:
385 if tok.tag != "section":
386 estr = "Headings must occur at the top level."
387 errors.append(StructuringError(estr, heading_token.startline))
388 break
389 if (heading_token.level+2) > len(stack):
390 estr = "Wrong underline character for heading."
391 errors.append(StructuringError(estr, heading_token.startline))
392
393
394
395 stack[heading_token.level+2:] = []
396 indent_stack[heading_token.level+2:] = []
397
398
399 head = _colorize(doc, heading_token, errors, 'heading')
400
401
402 sec = Element("section")
403 stack[-1].children.append(sec)
404 stack.append(sec)
405 sec.children.append(head)
406 indent_stack.append(None)
407
408 -def _add_list(doc, bullet_token, stack, indent_stack, errors):
409 """
410 Add a new list item or field to the DOM tree, with the given
411 bullet or field tag. When necessary, create the associated
412 list.
413 """
414
415 if bullet_token.contents[-1] == '-':
416 list_type = 'ulist'
417 elif bullet_token.contents[-1] == '.':
418 list_type = 'olist'
419 elif bullet_token.contents[-1] == ':':
420 list_type = 'fieldlist'
421 else:
422 raise AssertionError('Bad Bullet: %r' % bullet_token.contents)
423
424
425 newlist = 0
426 if stack[-1].tag != list_type:
427 newlist = 1
428 elif list_type == 'olist' and stack[-1].tag == 'olist':
429 old_listitem = stack[-1].children[-1]
430 old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1]
431 new_bullet = bullet_token.contents.split('.')[:-1]
432 if (new_bullet[:-1] != old_bullet[:-1] or
433 int(new_bullet[-1]) != int(old_bullet[-1])+1):
434 newlist = 1
435
436
437 if newlist:
438 if stack[-1].tag is 'fieldlist':
439
440
441
442
443
444
445 estr = "Lists must be indented."
446 errors.append(StructuringError(estr, bullet_token.startline))
447 if stack[-1].tag in ('ulist', 'olist', 'fieldlist'):
448 stack.pop()
449 indent_stack.pop()
450
451 if (list_type != 'fieldlist' and indent_stack[-1] is not None and
452 bullet_token.indent == indent_stack[-1]):
453
454
455
456 if bullet_token.startline != 1 or bullet_token.indent != 0:
457 estr = "Lists must be indented."
458 errors.append(StructuringError(estr, bullet_token.startline))
459
460 if list_type == 'fieldlist':
461
462 for tok in stack[2:]:
463 if tok.tag != "section":
464 estr = "Fields must be at the top level."
465 errors.append(
466 StructuringError(estr, bullet_token.startline))
467 break
468 stack[2:] = []
469 indent_stack[2:] = []
470
471
472 lst = Element(list_type)
473 stack[-1].children.append(lst)
474 stack.append(lst)
475 indent_stack.append(bullet_token.indent)
476 if list_type == 'olist':
477 start = bullet_token.contents.split('.')[:-1]
478 if start != '1':
479 lst.attribs["start"] = start[-1]
480
481
482
483
484
485 if list_type == 'fieldlist':
486 li = Element("field")
487 token_words = bullet_token.contents[1:-1].split(None, 1)
488 tag_elt = Element("tag")
489 tag_elt.children.append(token_words[0])
490 li.children.append(tag_elt)
491
492 if len(token_words) > 1:
493 arg_elt = Element("arg")
494 arg_elt.children.append(token_words[1])
495 li.children.append(arg_elt)
496 else:
497 li = Element("li")
498 if list_type == 'olist':
499 li.attribs["bullet"] = bullet_token.contents
500
501
502 stack[-1].children.append(li)
503 stack.append(li)
504 indent_stack.append(None)
505
506
507
508
509
511 """
512 C{Token}s are an intermediate data structure used while
513 constructing the structuring DOM tree for a formatted docstring.
514 There are five types of C{Token}:
515
516 - Paragraphs
517 - Literal blocks
518 - Doctest blocks
519 - Headings
520 - Bullets
521
522 The text contained in each C{Token} is stored in the
523 C{contents} variable. The string in this variable has been
524 normalized. For paragraphs, this means that it has been converted
525 into a single line of text, with newline/indentation replaced by
526 single spaces. For literal blocks and doctest blocks, this means
527 that the appropriate amount of leading whitespace has been removed
528 from each line.
529
530 Each C{Token} has an indentation level associated with it,
531 stored in the C{indent} variable. This indentation level is used
532 by the structuring procedure to assemble hierarchical blocks.
533
534 @type tag: C{string}
535 @ivar tag: This C{Token}'s type. Possible values are C{Token.PARA}
536 (paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK}
537 (doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}.
538
539 @type startline: C{int}
540 @ivar startline: The line on which this C{Token} begins. This
541 line number is only used for issuing errors.
542
543 @type contents: C{string}
544 @ivar contents: The normalized text contained in this C{Token}.
545
546 @type indent: C{int} or C{None}
547 @ivar indent: The indentation level of this C{Token} (in
548 number of leading spaces). A value of C{None} indicates an
549 unknown indentation; this is used for list items and fields
550 that begin with one-line paragraphs.
551
552 @type level: C{int} or C{None}
553 @ivar level: The heading-level of this C{Token} if it is a
554 heading; C{None}, otherwise. Valid heading levels are 0, 1,
555 and 2.
556
557 @type PARA: C{string}
558 @cvar PARA: The C{tag} value for paragraph C{Token}s.
559 @type LBLOCK: C{string}
560 @cvar LBLOCK: The C{tag} value for literal C{Token}s.
561 @type DTBLOCK: C{string}
562 @cvar DTBLOCK: The C{tag} value for doctest C{Token}s.
563 @type HEADING: C{string}
564 @cvar HEADING: The C{tag} value for heading C{Token}s.
565 @type BULLET: C{string}
566 @cvar BULLET: The C{tag} value for bullet C{Token}s. This C{tag}
567 value is also used for field tag C{Token}s, since fields
568 function syntactically the same as list items.
569 """
570
571 PARA = "para"
572 LBLOCK = "literalblock"
573 DTBLOCK = "doctestblock"
574 HEADING = "heading"
575 BULLET = "bullet"
576
577 - def __init__(self, tag, startline, contents, indent, level=None):
578 """
579 Create a new C{Token}.
580
581 @param tag: The type of the new C{Token}.
582 @type tag: C{string}
583 @param startline: The line on which the new C{Token} begins.
584 @type startline: C{int}
585 @param contents: The normalized contents of the new C{Token}.
586 @type contents: C{string}
587 @param indent: The indentation of the new C{Token} (in number
588 of leading spaces). A value of C{None} indicates an
589 unknown indentation.
590 @type indent: C{int} or C{None}
591 @param level: The heading-level of this C{Token} if it is a
592 heading; C{None}, otherwise.
593 @type level: C{int} or C{None}
594 """
595 self.tag = tag
596 self.startline = startline
597 self.contents = contents
598 self.indent = indent
599 self.level = level
600
602 """
603 @rtype: C{string}
604 @return: the formal representation of this C{Token}.
605 C{Token}s have formal representaitons of the form::
606 <Token: para at line 12>
607 """
608 return '<Token: %s at line %s>' % (self.tag, self.startline)
609
611 """
612 @return: a DOM representation of this C{Token}.
613 @rtype: L{Element}
614 """
615 e = Element(self.tag)
616 e.children.append(self.contents)
617 return e
618
619
620
621
622 _ULIST_BULLET = '[-]( +|$)'
623 _OLIST_BULLET = '(\d+[.])+( +|$)'
624 _FIELD_BULLET = '@\w+( [^{}:\n]+)?:'
625 _BULLET_RE = re.compile(_ULIST_BULLET + '|' +
626 _OLIST_BULLET + '|' +
627 _FIELD_BULLET)
628 _LIST_BULLET_RE = re.compile(_ULIST_BULLET + '|' + _OLIST_BULLET)
629 _FIELD_BULLET_RE = re.compile(_FIELD_BULLET)
630 del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET
631
633 """
634 Construct a L{Token} containing the doctest block starting at
635 C{lines[start]}, and append it to C{tokens}. C{block_indent}
636 should be the indentation of the doctest block. Any errors
637 generated while tokenizing the doctest block will be appended to
638 C{errors}.
639
640 @param lines: The list of lines to be tokenized
641 @param start: The index into C{lines} of the first line of the
642 doctest block to be tokenized.
643 @param block_indent: The indentation of C{lines[start]}. This is
644 the indentation of the doctest block.
645 @param errors: A list where any errors generated during parsing
646 will be stored. If no list is specified, then errors will
647 generate exceptions.
648 @return: The line number of the first line following the doctest
649 block.
650
651 @type lines: C{list} of C{string}
652 @type start: C{int}
653 @type block_indent: C{int}
654 @type tokens: C{list} of L{Token}
655 @type errors: C{list} of L{ParseError}
656 @rtype: C{int}
657 """
658
659
660
661 min_indent = block_indent
662
663 linenum = start + 1
664 while linenum < len(lines):
665
666 line = lines[linenum]
667 indent = len(line) - len(line.lstrip())
668
669
670 if indent == len(line): break
671
672
673 if indent < block_indent:
674 min_indent = min(min_indent, indent)
675 estr = 'Improper doctest block indentation.'
676 errors.append(TokenizationError(estr, linenum))
677
678
679 linenum += 1
680
681
682 contents = [line[min_indent:] for line in lines[start:linenum]]
683 contents = '\n'.join(contents)
684 tokens.append(Token(Token.DTBLOCK, start, contents, block_indent))
685 return linenum
686
688 """
689 Construct a L{Token} containing the literal block starting at
690 C{lines[start]}, and append it to C{tokens}. C{block_indent}
691 should be the indentation of the literal block. Any errors
692 generated while tokenizing the literal block will be appended to
693 C{errors}.
694
695 @param lines: The list of lines to be tokenized
696 @param start: The index into C{lines} of the first line of the
697 literal block to be tokenized.
698 @param block_indent: The indentation of C{lines[start]}. This is
699 the indentation of the literal block.
700 @param errors: A list of the errors generated by parsing. Any
701 new errors generated while will tokenizing this paragraph
702 will be appended to this list.
703 @return: The line number of the first line following the literal
704 block.
705
706 @type lines: C{list} of C{string}
707 @type start: C{int}
708 @type block_indent: C{int}
709 @type tokens: C{list} of L{Token}
710 @type errors: C{list} of L{ParseError}
711 @rtype: C{int}
712 """
713 linenum = start + 1
714 while linenum < len(lines):
715
716 line = lines[linenum]
717 indent = len(line) - len(line.lstrip())
718
719
720
721 if len(line) != indent and indent <= block_indent:
722 break
723
724
725 linenum += 1
726
727
728 contents = [line[block_indent+1:] for line in lines[start:linenum]]
729 contents = '\n'.join(contents)
730 contents = re.sub('(\A[ \n]*\n)|(\n[ \n]*\Z)', '', contents)
731 tokens.append(Token(Token.LBLOCK, start, contents, block_indent))
732 return linenum
733
735 """
736 Construct L{Token}s for the bullet and the first paragraph of the
737 list item (or field) starting at C{lines[start]}, and append them
738 to C{tokens}. C{bullet_indent} should be the indentation of the
739 list item. Any errors generated while tokenizing will be
740 appended to C{errors}.
741
742 @param lines: The list of lines to be tokenized
743 @param start: The index into C{lines} of the first line of the
744 list item to be tokenized.
745 @param bullet_indent: The indentation of C{lines[start]}. This is
746 the indentation of the list item.
747 @param errors: A list of the errors generated by parsing. Any
748 new errors generated while will tokenizing this paragraph
749 will be appended to this list.
750 @return: The line number of the first line following the list
751 item's first paragraph.
752
753 @type lines: C{list} of C{string}
754 @type start: C{int}
755 @type bullet_indent: C{int}
756 @type tokens: C{list} of L{Token}
757 @type errors: C{list} of L{ParseError}
758 @rtype: C{int}
759 """
760 linenum = start + 1
761 para_indent = None
762 doublecolon = lines[start].rstrip()[-2:] == '::'
763
764
765 para_start = _BULLET_RE.match(lines[start], bullet_indent).end()
766 bcontents = lines[start][bullet_indent:para_start].strip()
767
768 while linenum < len(lines):
769
770 line = lines[linenum]
771 indent = len(line) - len(line.lstrip())
772
773
774 if doublecolon: break
775 if line.rstrip()[-2:] == '::': doublecolon = 1
776
777
778 if indent == len(line): break
779
780
781 if indent < bullet_indent: break
782
783
784 if _BULLET_RE.match(line, indent): break
785
786
787
788 if para_indent == None: para_indent = indent
789
790
791 if indent != para_indent: break
792
793
794 linenum += 1
795
796
797 tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent))
798
799
800 pcontents = ([lines[start][para_start:].strip()] +
801 [line.strip() for line in lines[start+1:linenum]])
802 pcontents = ' '.join(pcontents).strip()
803 if pcontents:
804 tokens.append(Token(Token.PARA, start, pcontents, para_indent))
805
806
807 return linenum
808
810 """
811 Construct a L{Token} containing the paragraph starting at
812 C{lines[start]}, and append it to C{tokens}. C{para_indent}
813 should be the indentation of the paragraph . Any errors
814 generated while tokenizing the paragraph will be appended to
815 C{errors}.
816
817 @param lines: The list of lines to be tokenized
818 @param start: The index into C{lines} of the first line of the
819 paragraph to be tokenized.
820 @param para_indent: The indentation of C{lines[start]}. This is
821 the indentation of the paragraph.
822 @param errors: A list of the errors generated by parsing. Any
823 new errors generated while will tokenizing this paragraph
824 will be appended to this list.
825 @return: The line number of the first line following the
826 paragraph.
827
828 @type lines: C{list} of C{string}
829 @type start: C{int}
830 @type para_indent: C{int}
831 @type tokens: C{list} of L{Token}
832 @type errors: C{list} of L{ParseError}
833 @rtype: C{int}
834 """
835 linenum = start + 1
836 doublecolon = 0
837 while linenum < len(lines):
838
839 line = lines[linenum]
840 indent = len(line) - len(line.lstrip())
841
842
843 if doublecolon: break
844 if line.rstrip()[-2:] == '::': doublecolon = 1
845
846
847 if indent == len(line): break
848
849
850 if indent != para_indent: break
851
852
853 if _BULLET_RE.match(line, indent): break
854
855
856 if line[indent] == '@':
857 estr = "Possible mal-formatted field item."
858 errors.append(TokenizationError(estr, linenum, is_fatal=0))
859
860
861 linenum += 1
862
863 contents = [line.strip() for line in lines[start:linenum]]
864
865
866 if ((len(contents) < 2) or
867 (contents[1][0] not in _HEADING_CHARS) or
868 (abs(len(contents[0])-len(contents[1])) > 5)):
869 looks_like_heading = 0
870 else:
871 looks_like_heading = 1
872 for char in contents[1]:
873 if char != contents[1][0]:
874 looks_like_heading = 0
875 break
876
877 if looks_like_heading:
878 if len(contents[0]) != len(contents[1]):
879 estr = ("Possible heading typo: the number of "+
880 "underline characters must match the "+
881 "number of heading characters.")
882 errors.append(TokenizationError(estr, start, is_fatal=0))
883 else:
884 level = _HEADING_CHARS.index(contents[1][0])
885 tokens.append(Token(Token.HEADING, start,
886 contents[0], para_indent, level))
887 return start+2
888
889
890 contents = ' '.join(contents)
891 tokens.append(Token(Token.PARA, start, contents, para_indent))
892 return linenum
893
895 """
896 Split a given formatted docstring into an ordered list of
897 C{Token}s, according to the epytext markup rules.
898
899 @param str: The epytext string
900 @type str: C{string}
901 @param errors: A list where any errors generated during parsing
902 will be stored. If no list is specified, then errors will
903 generate exceptions.
904 @type errors: C{list} of L{ParseError}
905 @return: a list of the C{Token}s that make up the given string.
906 @rtype: C{list} of L{Token}
907 """
908 tokens = []
909 lines = str.split('\n')
910
911
912
913 linenum = 0
914 while linenum < len(lines):
915
916 line = lines[linenum]
917 indent = len(line)-len(line.lstrip())
918
919 if indent == len(line):
920
921 linenum += 1
922 continue
923 elif line[indent:indent+4] == '>>> ':
924
925 linenum = _tokenize_doctest(lines, linenum, indent,
926 tokens, errors)
927 elif _BULLET_RE.match(line, indent):
928
929 linenum = _tokenize_listart(lines, linenum, indent,
930 tokens, errors)
931 if tokens[-1].indent != None:
932 indent = tokens[-1].indent
933 else:
934
935 if line[indent] == '@':
936 estr = "Possible mal-formatted field item."
937 errors.append(TokenizationError(estr, linenum, is_fatal=0))
938
939
940 linenum = _tokenize_para(lines, linenum, indent, tokens, errors)
941
942
943 if (tokens[-1].tag == Token.PARA and
944 tokens[-1].contents[-2:] == '::'):
945 tokens[-1].contents = tokens[-1].contents[:-1]
946 linenum = _tokenize_literal(lines, linenum, indent, tokens, errors)
947
948 return tokens
949
950
951
952
953
954
955
956 _BRACE_RE = re.compile('{|}')
957 _TARGET_RE = re.compile('^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$')
958
959 -def _colorize(doc, token, errors, tagName='para'):
960 """
961 Given a string containing the contents of a paragraph, produce a
962 DOM C{Element} encoding that paragraph. Colorized regions are
963 represented using DOM C{Element}s, and text is represented using
964 DOM C{Text}s.
965
966 @param errors: A list of errors. Any newly generated errors will
967 be appended to this list.
968 @type errors: C{list} of C{string}
969
970 @param tagName: The element tag for the DOM C{Element} that should
971 be generated.
972 @type tagName: C{string}
973
974 @return: a DOM C{Element} encoding the given paragraph.
975 @returntype: C{Element}
976 """
977 str = token.contents
978 linenum = 0
979
980
981
982
983
984 stack = [Element(tagName)]
985
986
987
988
989 openbrace_stack = [0]
990
991
992
993
994
995 start = 0
996 while 1:
997 match = _BRACE_RE.search(str, start)
998 if match == None: break
999 end = match.start()
1000
1001
1002
1003
1004
1005
1006
1007 if match.group() == '{':
1008 if (end>0) and 'A' <= str[end-1] <= 'Z':
1009 if (end-1) > start:
1010 stack[-1].children.append(str[start:end-1])
1011 if not _COLORIZING_TAGS.has_key(str[end-1]):
1012 estr = "Unknown inline markup tag."
1013 errors.append(ColorizingError(estr, token, end-1))
1014 stack.append(Element('unknown'))
1015 else:
1016 tag = _COLORIZING_TAGS[str[end-1]]
1017 stack.append(Element(tag))
1018 else:
1019 if end > start:
1020 stack[-1].children.append(str[start:end])
1021 stack.append(Element('litbrace'))
1022 openbrace_stack.append(end)
1023 stack[-2].children.append(stack[-1])
1024
1025
1026 elif match.group() == '}':
1027
1028 if len(stack) <= 1:
1029 estr = "Unbalanced '}'."
1030 errors.append(ColorizingError(estr, token, end))
1031 start = end + 1
1032 continue
1033
1034
1035 if end > start:
1036 stack[-1].children.append(str[start:end])
1037
1038
1039 if stack[-1].tag == 'symbol':
1040 if (len(stack[-1].children) != 1 or
1041 not isinstance(stack[-1].children[0], basestring)):
1042 estr = "Invalid symbol code."
1043 errors.append(ColorizingError(estr, token, end))
1044 else:
1045 symb = stack[-1].children[0]
1046 if _SYMBOLS.has_key(symb):
1047
1048 stack[-2].children[-1] = Element('symbol', symb)
1049 else:
1050 estr = "Invalid symbol code."
1051 errors.append(ColorizingError(estr, token, end))
1052
1053
1054 if stack[-1].tag == 'escape':
1055 if (len(stack[-1].children) != 1 or
1056 not isinstance(stack[-1].children[0], basestring)):
1057 estr = "Invalid escape code."
1058 errors.append(ColorizingError(estr, token, end))
1059 else:
1060 escp = stack[-1].children[0]
1061 if _ESCAPES.has_key(escp):
1062
1063 stack[-2].children[-1] = _ESCAPES[escp]
1064 elif len(escp) == 1:
1065
1066 stack[-2].children[-1] = escp
1067 else:
1068 estr = "Invalid escape code."
1069 errors.append(ColorizingError(estr, token, end))
1070
1071
1072 if stack[-1].tag == 'litbrace':
1073 stack[-2].children[-1:] = ['{'] + stack[-1].children + ['}']
1074
1075
1076 if stack[-1].tag == 'graph':
1077 _colorize_graph(doc, stack[-1], token, end, errors)
1078
1079
1080 if stack[-1].tag in _LINK_COLORIZING_TAGS:
1081 _colorize_link(doc, stack[-1], token, end, errors)
1082
1083
1084 openbrace_stack.pop()
1085 stack.pop()
1086
1087 start = end+1
1088
1089
1090 if start < len(str):
1091 stack[-1].children.append(str[start:])
1092
1093 if len(stack) != 1:
1094 estr = "Unbalanced '{'."
1095 errors.append(ColorizingError(estr, token, openbrace_stack[-1]))
1096
1097 return stack[0]
1098
1099 GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph']
1100
1102 """
1103 Eg::
1104 G{classtree}
1105 G{classtree x, y, z}
1106 G{importgraph}
1107 """
1108 bad_graph_spec = False
1109
1110 children = graph.children[:]
1111 graph.children = []
1112
1113 if len(children) != 1 or not isinstance(children[0], basestring):
1114 bad_graph_spec = "Bad graph specification"
1115 else:
1116 pieces = children[0].split(None, 1)
1117 graphtype = pieces[0].replace(':','').strip().lower()
1118 if graphtype in GRAPH_TYPES:
1119 if len(pieces) == 2:
1120 if re.match(r'\s*:?\s*([\w\.]+\s*,?\s*)*', pieces[1]):
1121 args = pieces[1].replace(',', ' ').replace(':','').split()
1122 else:
1123 bad_graph_spec = "Bad graph arg list"
1124 else:
1125 args = []
1126 else:
1127 bad_graph_spec = ("Bad graph type %s -- use one of %s" %
1128 (pieces[0], ', '.join(GRAPH_TYPES)))
1129
1130 if bad_graph_spec:
1131 errors.append(ColorizingError(bad_graph_spec, token, end))
1132 graph.children.append('none')
1133 graph.children.append('')
1134 return
1135
1136 graph.children.append(graphtype)
1137 for arg in args:
1138 graph.children.append(arg)
1139
1141 variables = link.children[:]
1142
1143
1144 if len(variables)==0 or not isinstance(variables[-1], basestring):
1145 estr = "Bad %s target." % link.tag
1146 errors.append(ColorizingError(estr, token, end))
1147 return
1148
1149
1150 match2 = _TARGET_RE.match(variables[-1])
1151 if match2:
1152 (text, target) = match2.groups()
1153 variables[-1] = text
1154
1155 elif len(variables) == 1:
1156 target = variables[0]
1157 else:
1158 estr = "Bad %s target." % link.tag
1159 errors.append(ColorizingError(estr, token, end))
1160 return
1161
1162
1163 name_elt = Element('name', *variables)
1164
1165
1166
1167 target = re.sub(r'\s', '', target)
1168 if link.tag=='uri':
1169 if not re.match(r'\w+:', target):
1170 if re.match(r'\w+@(\w+)(\.\w+)*', target):
1171 target = 'mailto:' + target
1172 else:
1173 target = 'http://'+target
1174 elif link.tag=='link':
1175
1176 target = re.sub(r'\(.*\)$', '', target)
1177 if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target):
1178 estr = "Bad link target."
1179 errors.append(ColorizingError(estr, token, end))
1180 return
1181
1182
1183 target_elt = Element('target', target)
1184
1185
1186 link.children = [name_elt, target_elt]
1187
1188
1189
1190
1191
1192 -def to_epytext(tree, indent=0, seclevel=0):
1193 """
1194 Convert a DOM document encoding epytext back to an epytext string.
1195 This is the inverse operation from L{parse}. I.e., assuming there
1196 are no errors, the following is true:
1197 - C{parse(to_epytext(tree)) == tree}
1198
1199 The inverse is true, except that whitespace, line wrapping, and
1200 character escaping may be done differently.
1201 - C{to_epytext(parse(str)) == str} (approximately)
1202
1203 @param tree: A DOM document encoding of an epytext string.
1204 @type tree: C{Element}
1205 @param indent: The indentation for the string representation of
1206 C{tree}. Each line of the returned string will begin with
1207 C{indent} space characters.
1208 @type indent: C{int}
1209 @param seclevel: The section level that C{tree} appears at. This
1210 is used to generate section headings.
1211 @type seclevel: C{int}
1212 @return: The epytext string corresponding to C{tree}.
1213 @rtype: C{string}
1214 """
1215 if isinstance(tree, basestring):
1216 str = re.sub(r'\{', '\0', tree)
1217 str = re.sub(r'\}', '\1', str)
1218 return str
1219
1220 if tree.tag == 'epytext': indent -= 2
1221 if tree.tag == 'section': seclevel += 1
1222 variables = [to_epytext(c, indent+2, seclevel) for c in tree.children]
1223 childstr = ''.join(variables)
1224
1225
1226 childstr = re.sub(':(\s*)\2', '::\\1', childstr)
1227
1228 if tree.tag == 'para':
1229 str = wordwrap(childstr, indent)+'\n'
1230 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str)
1231 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str)
1232 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str)
1233 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str)
1234 str = re.sub('\0', 'E{lb}', str)
1235 str = re.sub('\1', 'E{rb}', str)
1236 return str
1237 elif tree.tag == 'li':
1238 bullet = tree.attribs.get('bullet') or '-'
1239 return indent*' '+ bullet + ' ' + childstr.lstrip()
1240 elif tree.tag == 'heading':
1241 str = re.sub('\0', 'E{lb}',childstr)
1242 str = re.sub('\1', 'E{rb}', str)
1243 uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1244 return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n'
1245 elif tree.tag == 'doctestblock':
1246 str = re.sub('\0', '{', childstr)
1247 str = re.sub('\1', '}', str)
1248 lines = [' '+indent*' '+line for line in str.split('\n')]
1249 return '\n'.join(lines) + '\n\n'
1250 elif tree.tag == 'literalblock':
1251 str = re.sub('\0', '{', childstr)
1252 str = re.sub('\1', '}', str)
1253 lines = [(indent+1)*' '+line for line in str.split('\n')]
1254 return '\2' + '\n'.join(lines) + '\n\n'
1255 elif tree.tag == 'field':
1256 numargs = 0
1257 while tree.children[numargs+1].tag == 'arg': numargs += 1
1258 tag = variables[0]
1259 args = variables[1:1+numargs]
1260 body = variables[1+numargs:]
1261 str = (indent)*' '+'@'+variables[0]
1262 if args: str += '(' + ', '.join(args) + ')'
1263 return str + ':\n' + ''.join(body)
1264 elif tree.tag == 'target':
1265 return '<%s>' % childstr
1266 elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext',
1267 'section', 'olist', 'ulist', 'name'):
1268 return childstr
1269 elif tree.tag == 'symbol':
1270 return 'E{%s}' % childstr
1271 elif tree.tag == 'graph':
1272 return 'G{%s}' % ' '.join(variables)
1273 else:
1274 for (tag, name) in _COLORIZING_TAGS.items():
1275 if name == tree.tag:
1276 return '%s{%s}' % (tag, childstr)
1277 raise ValueError('Unknown DOM element %r' % tree.tag)
1278
1279 SYMBOL_TO_PLAINTEXT = {
1280 'crarr': '\\',
1281 }
1282
1283 -def to_plaintext(tree, indent=0, seclevel=0):
1284 """
1285 Convert a DOM document encoding epytext to a string representation.
1286 This representation is similar to the string generated by
1287 C{to_epytext}, but C{to_plaintext} removes inline markup, prints
1288 escaped characters in unescaped form, etc.
1289
1290 @param tree: A DOM document encoding of an epytext string.
1291 @type tree: C{Element}
1292 @param indent: The indentation for the string representation of
1293 C{tree}. Each line of the returned string will begin with
1294 C{indent} space characters.
1295 @type indent: C{int}
1296 @param seclevel: The section level that C{tree} appears at. This
1297 is used to generate section headings.
1298 @type seclevel: C{int}
1299 @return: The epytext string corresponding to C{tree}.
1300 @rtype: C{string}
1301 """
1302 if isinstance(tree, basestring): return tree
1303
1304 if tree.tag == 'section': seclevel += 1
1305
1306
1307 if tree.tag == 'epytext': cindent = indent
1308 elif tree.tag == 'li' and tree.attribs.get('bullet'):
1309 cindent = indent + 1 + len(tree.attribs.get('bullet'))
1310 else:
1311 cindent = indent + 2
1312 variables = [to_plaintext(c, cindent, seclevel) for c in tree.children]
1313 childstr = ''.join(variables)
1314
1315 if tree.tag == 'para':
1316 return wordwrap(childstr, indent)+'\n'
1317 elif tree.tag == 'li':
1318
1319
1320 bullet = tree.attribs.get('bullet') or '-'
1321 return indent*' ' + bullet + ' ' + childstr.lstrip()
1322 elif tree.tag == 'heading':
1323 uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1324 return ((indent-2)*' ' + childstr + '\n' +
1325 (indent-2)*' ' + uline + '\n')
1326 elif tree.tag == 'doctestblock':
1327 lines = [(indent+2)*' '+line for line in childstr.split('\n')]
1328 return '\n'.join(lines) + '\n\n'
1329 elif tree.tag == 'literalblock':
1330 lines = [(indent+1)*' '+line for line in childstr.split('\n')]
1331 return '\n'.join(lines) + '\n\n'
1332 elif tree.tag == 'fieldlist':
1333 return childstr
1334 elif tree.tag == 'field':
1335 numargs = 0
1336 while tree.children[numargs+1].tag == 'arg': numargs += 1
1337 tag = variables[0]
1338 args = variables[1:1+numargs]
1339 body = variables[1+numargs:]
1340 str = (indent)*' '+'@'+variables[0]
1341 if args: str += '(' + ', '.join(args) + ')'
1342 return str + ':\n' + ''.join(body)
1343 elif tree.tag == 'uri':
1344 if len(variables) != 2: raise ValueError('Bad URI ')
1345 elif variables[0] == variables[1]: return '<%s>' % variables[1]
1346 else: return '%r<%s>' % (variables[0], variables[1])
1347 elif tree.tag == 'link':
1348 if len(variables) != 2: raise ValueError('Bad Link')
1349 return '%s' % variables[0]
1350 elif tree.tag in ('olist', 'ulist'):
1351
1352
1353
1354
1355 return childstr.replace('\n\n', '\n')+'\n'
1356 elif tree.tag == 'symbol':
1357 return '%s' % SYMBOL_TO_PLAINTEXT.get(childstr, childstr)
1358 elif tree.tag == 'graph':
1359 return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:]))
1360 else:
1361
1362 return childstr
1363
1364 -def to_debug(tree, indent=4, seclevel=0):
1365 """
1366 Convert a DOM document encoding epytext back to an epytext string,
1367 annotated with extra debugging information. This function is
1368 similar to L{to_epytext}, but it adds explicit information about
1369 where different blocks begin, along the left margin.
1370
1371 @param tree: A DOM document encoding of an epytext string.
1372 @type tree: C{Element}
1373 @param indent: The indentation for the string representation of
1374 C{tree}. Each line of the returned string will begin with
1375 C{indent} space characters.
1376 @type indent: C{int}
1377 @param seclevel: The section level that C{tree} appears at. This
1378 is used to generate section headings.
1379 @type seclevel: C{int}
1380 @return: The epytext string corresponding to C{tree}.
1381 @rtype: C{string}
1382 """
1383 if isinstance(tree, basestring):
1384 str = re.sub(r'\{', '\0', tree)
1385 str = re.sub(r'\}', '\1', str)
1386 return str
1387
1388 if tree.tag == 'section': seclevel += 1
1389 variables = [to_debug(c, indent+2, seclevel) for c in tree.children]
1390 childstr = ''.join(variables)
1391
1392
1393 childstr = re.sub(':( *\n \|\n)\2', '::\\1', childstr)
1394
1395 if tree.tag == 'para':
1396 str = wordwrap(childstr, indent-6, 69)+'\n'
1397 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str)
1398 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str)
1399 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str)
1400 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str)
1401 str = re.sub('\0', 'E{lb}', str)
1402 str = re.sub('\1', 'E{rb}', str)
1403 lines = str.rstrip().split('\n')
1404 lines[0] = ' P>|' + lines[0]
1405 lines[1:] = [' |'+l for l in lines[1:]]
1406 return '\n'.join(lines)+'\n |\n'
1407 elif tree.tag == 'li':
1408 bullet = tree.attribs.get('bullet') or '-'
1409 return ' LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip()
1410 elif tree.tag in ('olist', 'ulist'):
1411 return 'LIST>|'+(indent-4)*' '+childstr[indent+2:]
1412 elif tree.tag == 'heading':
1413 str = re.sub('\0', 'E{lb}', childstr)
1414 str = re.sub('\1', 'E{rb}', str)
1415 uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1416 return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' +
1417 ' |'+(indent-8)*' ' + uline + '\n')
1418 elif tree.tag == 'doctestblock':
1419 str = re.sub('\0', '{', childstr)
1420 str = re.sub('\1', '}', str)
1421 lines = [' |'+(indent-4)*' '+line for line in str.split('\n')]
1422 lines[0] = 'DTST>'+lines[0][5:]
1423 return '\n'.join(lines) + '\n |\n'
1424 elif tree.tag == 'literalblock':
1425 str = re.sub('\0', '{', childstr)
1426 str = re.sub('\1', '}', str)
1427 lines = [' |'+(indent-5)*' '+line for line in str.split('\n')]
1428 lines[0] = ' LIT>'+lines[0][5:]
1429 return '\2' + '\n'.join(lines) + '\n |\n'
1430 elif tree.tag == 'field':
1431 numargs = 0
1432 while tree.children[numargs+1].tag == 'arg': numargs += 1
1433 tag = variables[0]
1434 args = variables[1:1+numargs]
1435 body = variables[1+numargs:]
1436 str = ' FLD>|'+(indent-6)*' '+'@'+variables[0]
1437 if args: str += '(' + ', '.join(args) + ')'
1438 return str + ':\n' + ''.join(body)
1439 elif tree.tag == 'target':
1440 return '<%s>' % childstr
1441 elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext',
1442 'section', 'olist', 'ulist', 'name'):
1443 return childstr
1444 elif tree.tag == 'symbol':
1445 return 'E{%s}' % childstr
1446 elif tree.tag == 'graph':
1447 return 'G{%s}' % ' '.join(variables)
1448 else:
1449 for (tag, name) in _COLORIZING_TAGS.items():
1450 if name == tree.tag:
1451 return '%s{%s}' % (tag, childstr)
1452 raise ValueError('Unknown DOM element %r' % tree.tag)
1453
1454
1455
1456
1457 -def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr):
1458 """
1459 Pretty-parse the string. This parses the string, and catches any
1460 warnings or errors produced. Any warnings and errors are
1461 displayed, and the resulting DOM parse structure is returned.
1462
1463 @param str: The string to parse.
1464 @type str: C{string}
1465 @param show_warnings: Whether or not to display non-fatal errors
1466 generated by parsing C{str}.
1467 @type show_warnings: C{boolean}
1468 @param show_errors: Whether or not to display fatal errors
1469 generated by parsing C{str}.
1470 @type show_errors: C{boolean}
1471 @param stream: The stream that warnings and errors should be
1472 written to.
1473 @type stream: C{stream}
1474 @return: a DOM document encoding the contents of C{str}.
1475 @rtype: C{Element}
1476 @raise SyntaxError: If any fatal errors were encountered.
1477 """
1478 errors = []
1479 confused = 0
1480 try:
1481 val = parse(str, errors)
1482 warnings = [e for e in errors if not e.is_fatal()]
1483 errors = [e for e in errors if e.is_fatal()]
1484 except:
1485 confused = 1
1486
1487 if not show_warnings: warnings = []
1488 warnings.sort()
1489 errors.sort()
1490 if warnings:
1491 print >>stream, '='*SCRWIDTH
1492 print >>stream, "WARNINGS"
1493 print >>stream, '-'*SCRWIDTH
1494 for warning in warnings:
1495 print >>stream, warning.as_warning()
1496 print >>stream, '='*SCRWIDTH
1497 if errors and show_errors:
1498 if not warnings: print >>stream, '='*SCRWIDTH
1499 print >>stream, "ERRORS"
1500 print >>stream, '-'*SCRWIDTH
1501 for error in errors:
1502 print >>stream, error
1503 print >>stream, '='*SCRWIDTH
1504
1505 if confused: raise
1506 elif errors: raise SyntaxError('Encountered Errors')
1507 else: return val
1508
1509
1510
1511
1512
1514 """
1515 An error generated while tokenizing a formatted documentation
1516 string.
1517 """
1518
1520 """
1521 An error generated while structuring a formatted documentation
1522 string.
1523 """
1524
1526 """
1527 An error generated while colorizing a paragraph.
1528 """
1529 - def __init__(self, descr, token, charnum, is_fatal=1):
1530 """
1531 Construct a new colorizing exception.
1532
1533 @param descr: A short description of the error.
1534 @type descr: C{string}
1535 @param token: The token where the error occured
1536 @type token: L{Token}
1537 @param charnum: The character index of the position in
1538 C{token} where the error occured.
1539 @type charnum: C{int}
1540 """
1541 ParseError.__init__(self, descr, token.startline, is_fatal)
1542 self.token = token
1543 self.charnum = charnum
1544
1545 CONTEXT_RANGE = 20
1547 RANGE = self.CONTEXT_RANGE
1548 if self.charnum <= RANGE:
1549 left = self.token.contents[0:self.charnum]
1550 else:
1551 left = '...'+self.token.contents[self.charnum-RANGE:self.charnum]
1552 if (len(self.token.contents)-self.charnum) <= RANGE:
1553 right = self.token.contents[self.charnum:]
1554 else:
1555 right = (self.token.contents[self.charnum:self.charnum+RANGE]
1556 + '...')
1557 return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left)))
1558
1559
1560
1561
1562
1564 """
1565 Return a DOM document matching the epytext DTD, containing a
1566 single literal block. That literal block will include the
1567 contents of the given string. This method is typically used as a
1568 fall-back when the parser fails.
1569
1570 @param str: The string which should be enclosed in a literal
1571 block.
1572 @type str: C{string}
1573
1574 @return: A DOM document containing C{str} in a single literal
1575 block.
1576 @rtype: C{Element}
1577 """
1578 return Element('epytext', Element('literalblock', str))
1579
1581 """
1582 Return a DOM document matching the epytext DTD, containing a
1583 single paragraph. That paragraph will include the contents of the
1584 given string. This can be used to wrap some forms of
1585 automatically generated information (such as type names) in
1586 paragraphs.
1587
1588 @param str: The string which should be enclosed in a paragraph.
1589 @type str: C{string}
1590
1591 @return: A DOM document containing C{str} in a single paragraph.
1592 @rtype: C{Element}
1593 """
1594 return Element('epytext', Element('para', str))
1595
1596
1597
1598
1599
1601 """
1602 Parse the given docstring, which is formatted using epytext; and
1603 return a C{ParsedDocstring} representation of its contents.
1604 @param docstring: The docstring to parse
1605 @type docstring: C{string}
1606 @param errors: A list where any errors generated during parsing
1607 will be stored.
1608 @type errors: C{list} of L{ParseError}
1609 @param options: Extra options. Unknown options are ignored.
1610 Currently, no extra options are defined.
1611 @rtype: L{ParsedDocstring}
1612 """
1613 return ParsedEpytextDocstring(parse(docstring, errors))
1614
1615 -class ParsedEpytextDocstring(ParsedDocstring):
1616 SYMBOL_TO_HTML = {
1617
1618 '<-': '←', '->': '→', '^': '↑', 'v': '↓',
1619
1620
1621 'alpha': 'α', 'beta': 'β', 'gamma': 'γ',
1622 'delta': 'δ', 'epsilon': 'ε', 'zeta': 'ζ',
1623 'eta': 'η', 'theta': 'θ', 'iota': 'ι',
1624 'kappa': 'κ', 'lambda': 'λ', 'mu': 'μ',
1625 'nu': 'ν', 'xi': 'ξ', 'omicron': 'ο',
1626 'pi': 'π', 'rho': 'ρ', 'sigma': 'σ',
1627 'tau': 'τ', 'upsilon': 'υ', 'phi': 'φ',
1628 'chi': 'χ', 'psi': 'ψ', 'omega': 'ω',
1629 'Alpha': 'Α', 'Beta': 'Β', 'Gamma': 'Γ',
1630 'Delta': 'Δ', 'Epsilon': 'Ε', 'Zeta': 'Ζ',
1631 'Eta': 'Η', 'Theta': 'Θ', 'Iota': 'Ι',
1632 'Kappa': 'Κ', 'Lambda': 'Λ', 'Mu': 'Μ',
1633 'Nu': 'Ν', 'Xi': 'Ξ', 'Omicron': 'Ο',
1634 'Pi': 'Π', 'Rho': 'Ρ', 'Sigma': 'Σ',
1635 'Tau': 'Τ', 'Upsilon': 'Υ', 'Phi': 'Φ',
1636 'Chi': 'Χ', 'Psi': 'Ψ', 'Omega': 'Ω',
1637
1638
1639 'larr': '←', 'rarr': '→', 'uarr': '↑',
1640 'darr': '↓', 'harr': '↔', 'crarr': '↵',
1641 'lArr': '⇐', 'rArr': '⇒', 'uArr': '⇑',
1642 'dArr': '⇓', 'hArr': '⇔',
1643 'copy': '©', 'times': '×', 'forall': '∀',
1644 'exist': '∃', 'part': '∂',
1645 'empty': '∅', 'isin': '∈', 'notin': '∉',
1646 'ni': '∋', 'prod': '∏', 'sum': '∑',
1647 'prop': '∝', 'infin': '∞', 'ang': '∠',
1648 'and': '∧', 'or': '∨', 'cap': '∩', 'cup': '∪',
1649 'int': '∫', 'there4': '∴', 'sim': '∼',
1650 'cong': '≅', 'asymp': '≈', 'ne': '≠',
1651 'equiv': '≡', 'le': '≤', 'ge': '≥',
1652 'sub': '⊂', 'sup': '⊃', 'nsub': '⊄',
1653 'sube': '⊆', 'supe': '⊇', 'oplus': '⊕',
1654 'otimes': '⊗', 'perp': '⊥',
1655
1656
1657 'infinity': '∞', 'integral': '∫', 'product': '∏',
1658 '<=': '≤', '>=': '≥',
1659 }
1660
1661 SYMBOL_TO_LATEX = {
1662
1663 '<-': r'\(\leftarrow\)', '->': r'\(\rightarrow\)',
1664 '^': r'\(\uparrow\)', 'v': r'\(\downarrow\)',
1665
1666
1667
1668 'alpha': r'\(\alpha\)', 'beta': r'\(\beta\)', 'gamma':
1669 r'\(\gamma\)', 'delta': r'\(\delta\)', 'epsilon':
1670 r'\(\epsilon\)', 'zeta': r'\(\zeta\)', 'eta': r'\(\eta\)',
1671 'theta': r'\(\theta\)', 'iota': r'\(\iota\)', 'kappa':
1672 r'\(\kappa\)', 'lambda': r'\(\lambda\)', 'mu': r'\(\mu\)',
1673 'nu': r'\(\nu\)', 'xi': r'\(\xi\)', 'omicron': r'\(o\)', 'pi':
1674 r'\(\pi\)', 'rho': r'\(\rho\)', 'sigma': r'\(\sigma\)', 'tau':
1675 r'\(\tau\)', 'upsilon': r'\(\upsilon\)', 'phi': r'\(\phi\)',
1676 'chi': r'\(\chi\)', 'psi': r'\(\psi\)', 'omega':
1677 r'\(\omega\)',
1678
1679 'Alpha': r'\(\alpha\)', 'Beta': r'\(\beta\)', 'Gamma':
1680 r'\(\Gamma\)', 'Delta': r'\(\Delta\)', 'Epsilon':
1681 r'\(\epsilon\)', 'Zeta': r'\(\zeta\)', 'Eta': r'\(\eta\)',
1682 'Theta': r'\(\Theta\)', 'Iota': r'\(\iota\)', 'Kappa':
1683 r'\(\kappa\)', 'Lambda': r'\(\Lambda\)', 'Mu': r'\(\mu\)',
1684 'Nu': r'\(\nu\)', 'Xi': r'\(\Xi\)', 'Omicron': r'\(o\)', 'Pi':
1685 r'\(\Pi\)', 'ho': r'\(\rho\)', 'Sigma': r'\(\Sigma\)', 'Tau':
1686 r'\(\tau\)', 'Upsilon': r'\(\Upsilon\)', 'Phi': r'\(\Phi\)',
1687 'Chi': r'\(\chi\)', 'Psi': r'\(\Psi\)', 'Omega':
1688 r'\(\Omega\)',
1689
1690
1691 'larr': r'\(\leftarrow\)', 'rarr': r'\(\rightarrow\)', 'uarr':
1692 r'\(\uparrow\)', 'darr': r'\(\downarrow\)', 'harr':
1693 r'\(\leftrightarrow\)', 'crarr': r'\(\hookleftarrow\)',
1694 'lArr': r'\(\Leftarrow\)', 'rArr': r'\(\Rightarrow\)', 'uArr':
1695 r'\(\Uparrow\)', 'dArr': r'\(\Downarrow\)', 'hArr':
1696 r'\(\Leftrightarrow\)', 'copy': r'{\textcopyright}',
1697 'times': r'\(\times\)', 'forall': r'\(\forall\)', 'exist':
1698 r'\(\exists\)', 'part': r'\(\partial\)', 'empty':
1699 r'\(\emptyset\)', 'isin': r'\(\in\)', 'notin': r'\(\notin\)',
1700 'ni': r'\(\ni\)', 'prod': r'\(\prod\)', 'sum': r'\(\sum\)',
1701 'prop': r'\(\propto\)', 'infin': r'\(\infty\)', 'ang':
1702 r'\(\angle\)', 'and': r'\(\wedge\)', 'or': r'\(\vee\)', 'cap':
1703 r'\(\cap\)', 'cup': r'\(\cup\)', 'int': r'\(\int\)', 'there4':
1704 r'\(\therefore\)', 'sim': r'\(\sim\)', 'cong': r'\(\cong\)',
1705 'asymp': r'\(\approx\)', 'ne': r'\(\ne\)', 'equiv':
1706 r'\(\equiv\)', 'le': r'\(\le\)', 'ge': r'\(\ge\)', 'sub':
1707 r'\(\subset\)', 'sup': r'\(\supset\)', 'nsub': r'\(\supset\)',
1708 'sube': r'\(\subseteq\)', 'supe': r'\(\supseteq\)', 'oplus':
1709 r'\(\oplus\)', 'otimes': r'\(\otimes\)', 'perp': r'\(\perp\)',
1710
1711
1712 'infinity': r'\(\infty\)', 'integral': r'\(\int\)', 'product':
1713 r'\(\prod\)', '<=': r'\(\le\)', '>=': r'\(\ge\)',
1714 }
1715
1716 - def __init__(self, dom_tree):
1717 self._tree = dom_tree
1718
1719 self._html = self._latex = self._plaintext = None
1720 self._terms = None
1721
1722 - def __str__(self):
1723 return str(self._tree)
1724
1725 - def to_html(self, docstring_linker, directory=None, docindex=None,
1726 context=None, **options):
1727 if self._html is not None: return self._html
1728 if self._tree is None: return ''
1729 indent = options.get('indent', 0)
1730 self._html = self._to_html(self._tree, docstring_linker, directory,
1731 docindex, context, indent)
1732 return self._html
1733
1734 - def to_latex(self, docstring_linker, **options):
1735 if self._latex is not None: return self._latex
1736 if self._tree is None: return ''
1737 indent = options.get('indent', 0)
1738 self._hyperref = options.get('hyperref', 1)
1739 self._latex = self._to_latex(self._tree, docstring_linker, indent)
1740 return self._latex
1741
1742 - def to_plaintext(self, docstring_linker, **options):
1743
1744
1745 if self._tree is None: return ''
1746 if 'indent' in options:
1747 self._plaintext = to_plaintext(self._tree,
1748 indent=options['indent'])
1749 else:
1750 self._plaintext = to_plaintext(self._tree)
1751 return self._plaintext
1752
1753 - def _index_term_key(self, tree):
1754 str = to_plaintext(tree)
1755 str = re.sub(r'\s\s+', '-', str)
1756 return "index-"+re.sub("[^a-zA-Z0-9]", "_", str)
1757
1758 - def _to_html(self, tree, linker, directory, docindex, context,
1759 indent=0, seclevel=0):
1760 if isinstance(tree, basestring):
1761 return plaintext_to_html(tree)
1762
1763 if tree.tag == 'epytext': indent -= 2
1764 if tree.tag == 'section': seclevel += 1
1765
1766
1767 variables = [self._to_html(c, linker, directory, docindex, context,
1768 indent+2, seclevel)
1769 for c in tree.children]
1770
1771
1772
1773 for i in range(len(variables)-1):
1774 if (not isinstance(tree.children[i], basestring) and
1775 tree.children[i].tag == 'para' and
1776 (isinstance(tree.children[i+1], basestring) or
1777 tree.children[i+1].tag != 'para')):
1778 variables[i] = ' '*(indent+2)+variables[i][5+indent:-5]+'\n'
1779 if (tree.children and
1780 not isinstance(tree.children[-1], basestring) and
1781 tree.children[-1].tag == 'para'):
1782 variables[-1] = ' '*(indent+2)+variables[-1][5+indent:-5]+'\n'
1783
1784
1785 childstr = ''.join(variables)
1786
1787
1788 if tree.tag == 'para':
1789 return wordwrap('<p>%s</p>' % childstr, indent)
1790 elif tree.tag == 'code':
1791 style = tree.attribs.get('style')
1792 if style:
1793 return '<code class="%s">%s</code>' % (style, childstr)
1794 else:
1795 return '<code>%s</code>' % childstr
1796 elif tree.tag == 'uri':
1797 return ('<a href="%s" target="_top">%s</a>' %
1798 (variables[1], variables[0]))
1799 elif tree.tag == 'link':
1800 return linker.translate_identifier_xref(variables[1], variables[0])
1801 elif tree.tag == 'italic':
1802 return '<i>%s</i>' % childstr
1803 elif tree.tag == 'math':
1804 return '<i class="math">%s</i>' % childstr
1805 elif tree.tag == 'indexed':
1806 term = Element('epytext', *tree.children, **tree.attribs)
1807 return linker.translate_indexterm(ParsedEpytextDocstring(term))
1808
1809
1810 elif tree.tag == 'bold':
1811 return '<b>%s</b>' % childstr
1812 elif tree.tag == 'ulist':
1813 return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ')
1814 elif tree.tag == 'olist':
1815 start = tree.attribs.get('start') or ''
1816 return ('%s<ol start="%s">\n%s%s</ol>\n' %
1817 (indent*' ', start, childstr, indent*' '))
1818 elif tree.tag == 'li':
1819 return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ')
1820 elif tree.tag == 'heading':
1821 return ('%s<h%s class="heading">%s</h%s>\n' %
1822 ((indent-2)*' ', seclevel, childstr, seclevel))
1823 elif tree.tag == 'literalblock':
1824 return '<pre class="literalblock">\n%s\n</pre>\n' % childstr
1825 elif tree.tag == 'doctestblock':
1826 return doctest_to_html(tree.children[0].strip())
1827 elif tree.tag == 'fieldlist':
1828 raise AssertionError("There should not be any field lists left")
1829 elif tree.tag in ('epytext', 'section', 'tag', 'arg',
1830 'name', 'target', 'html'):
1831 return childstr
1832 elif tree.tag == 'symbol':
1833 symbol = tree.children[0]
1834 return self.SYMBOL_TO_HTML.get(symbol, '[%s]' % symbol)
1835 elif tree.tag == 'graph':
1836
1837 graph = self._build_graph(variables[0], variables[1:], linker,
1838 docindex, context)
1839 if not graph: return ''
1840
1841 image_url = '%s.gif' % graph.uid
1842 image_file = os.path.join(directory, image_url)
1843 return graph.to_html(image_file, image_url)
1844 else:
1845 raise ValueError('Unknown epytext DOM element %r' % tree.tag)
1846
1847
1848 - def _build_graph(self, graph_type, graph_args, linker,
1849 docindex, context):
1850
1851 if graph_type == 'classtree':
1852 if graph_args:
1853 bases = [docindex.find(name, context)
1854 for name in graph_args]
1855 elif isinstance(context, ClassDoc):
1856 bases = [context]
1857 else:
1858 log.warning("Could not construct class tree: you must "
1859 "specify one or more base classes.")
1860 return None
1861 from epydoc.docwriter.dotgraph import class_tree_graph
1862 return class_tree_graph(bases, linker, context)
1863 elif graph_type == 'packagetree':
1864 from epydoc.apidoc import ModuleDoc
1865 if graph_args:
1866 packages = [docindex.find(name, context)
1867 for name in graph_args]
1868 elif isinstance(context, ModuleDoc):
1869 packages = [context]
1870 else:
1871 log.warning("Could not construct package tree: you must "
1872 "specify one or more root packages.")
1873 return None
1874 from epydoc.docwriter.dotgraph import package_tree_graph
1875 return package_tree_graph(packages, linker, context)
1876 elif graph_type == 'importgraph':
1877 from epydoc.apidoc import ModuleDoc
1878 modules = [d for d in docindex.root if isinstance(d, ModuleDoc)]
1879 from epydoc.docwriter.dotgraph import import_graph
1880 return import_graph(modules, docindex, linker, context)
1881
1882 elif graph_type == 'callgraph':
1883 if graph_args:
1884 docs = [docindex.find(name, context) for name in graph_args]
1885 docs = [doc for doc in docs if doc is not None]
1886 else:
1887 docs = [context]
1888 from epydoc.docwriter.dotgraph import call_graph
1889 return call_graph(docs, docindex, linker, context)
1890 else:
1891 log.warning("Unknown graph type %s" % graph_type)
1892
1893
1894 - def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0):
1895 if isinstance(tree, basestring):
1896 return plaintext_to_latex(tree, breakany=breakany)
1897
1898 if tree.tag == 'section': seclevel += 1
1899
1900
1901 if tree.tag == 'epytext': cindent = indent
1902 else: cindent = indent + 2
1903 variables = [self._to_latex(c, linker, cindent, seclevel, breakany)
1904 for c in tree.children]
1905 childstr = ''.join(variables)
1906
1907 if tree.tag == 'para':
1908 return wordwrap(childstr, indent)+'\n'
1909 elif tree.tag == 'code':
1910 return '\\texttt{%s}' % childstr
1911 elif tree.tag == 'uri':
1912 if len(variables) != 2: raise ValueError('Bad URI ')
1913 if self._hyperref:
1914
1915 uri = tree.children[1].children[0]
1916 uri = uri.replace('{\\textasciitilde}', '~')
1917 uri = uri.replace('\\#', '#')
1918 if variables[0] == variables[1]:
1919 return '\\href{%s}{\\textit{%s}}' % (uri, variables[1])
1920 else:
1921 return ('%s\\footnote{\\href{%s}{%s}}' %
1922 (variables[0], uri, variables[1]))
1923 else:
1924 if variables[0] == variables[1]:
1925 return '\\textit{%s}' % variables[1]
1926 else:
1927 return '%s\\footnote{%s}' % (variables[0], variables[1])
1928 elif tree.tag == 'link':
1929 if len(variables) != 2: raise ValueError('Bad Link')
1930 return linker.translate_identifier_xref(variables[1], variables[0])
1931 elif tree.tag == 'italic':
1932 return '\\textit{%s}' % childstr
1933 elif tree.tag == 'math':
1934 return '\\textit{%s}' % childstr
1935 elif tree.tag == 'indexed':
1936 term = Element('epytext', *tree.children, **tree.attribs)
1937 return linker.translate_indexterm(ParsedEpytextDocstring(term))
1938 elif tree.tag == 'bold':
1939 return '\\textbf{%s}' % childstr
1940 elif tree.tag == 'li':
1941 return indent*' ' + '\\item ' + childstr.lstrip()
1942 elif tree.tag == 'heading':
1943 return ' '*(indent-2) + '(section) %s\n\n' % childstr
1944 elif tree.tag == 'doctestblock':
1945 return doctest_to_latex(tree.children[0].strip())
1946 elif tree.tag == 'literalblock':
1947 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr
1948 elif tree.tag == 'fieldlist':
1949 return indent*' '+'{omitted fieldlist}\n'
1950 elif tree.tag == 'olist':
1951 return (' '*indent + '\\begin{enumerate}\n\n' +
1952 ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' +
1953 childstr +
1954 ' '*indent + '\\end{enumerate}\n\n')
1955 elif tree.tag == 'ulist':
1956 return (' '*indent + '\\begin{itemize}\n' +
1957 ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' +
1958 childstr +
1959 ' '*indent + '\\end{itemize}\n\n')
1960 elif tree.tag == 'symbol':
1961 symbol = tree.children[0]
1962 return self.SYMBOL_TO_LATEX.get(symbol, '[%s]' % symbol)
1963 elif tree.tag == 'graph':
1964 return '(GRAPH)'
1965
1966 else:
1967
1968 return childstr
1969
1970 - def summary(self):
1971 if self._tree is None: return self, False
1972 tree = self._tree
1973 doc = Element('epytext')
1974
1975
1976 variables = tree.children
1977 while (len(variables) > 0) and (variables[0].tag != 'para'):
1978 if variables[0].tag in ('section', 'ulist', 'olist', 'li'):
1979 variables = variables[0].children
1980 else:
1981 variables = variables[1:]
1982
1983
1984
1985 if (len(variables) == 0 and len(tree.children) == 1 and
1986 tree.children[0].tag == 'literalblock'):
1987 str = re.split(r'\n\s*(\n|$).*',
1988 tree.children[0].children[0], 1)[0]
1989 variables = [Element('para')]
1990 variables[0].children.append(str)
1991
1992
1993 if len(variables) == 0: return ParsedEpytextDocstring(doc), False
1994
1995
1996 long_docs = False
1997 for var in variables[1:]:
1998 if isinstance(var, Element) and var.tag == 'fieldlist':
1999 continue
2000 long_docs = True
2001 break
2002
2003
2004 parachildren = variables[0].children
2005 para = Element('para')
2006 doc.children.append(para)
2007 for parachild in parachildren:
2008 if isinstance(parachild, basestring):
2009 m = re.match(r'(\s*[\w\W]*?\.)(\s|$)', parachild)
2010 if m:
2011 para.children.append(m.group(1))
2012 long_docs |= parachild is not parachildren[-1]
2013 if not long_docs:
2014 other = parachild[m.end():]
2015 if other and not other.isspace():
2016 long_docs = True
2017 return ParsedEpytextDocstring(doc), long_docs
2018 para.children.append(parachild)
2019
2020 return ParsedEpytextDocstring(doc), long_docs
2021
2022 - def split_fields(self, errors=None):
2023 if self._tree is None: return (self, ())
2024 tree = Element(self._tree.tag, *self._tree.children,
2025 **self._tree.attribs)
2026 fields = []
2027
2028 if (tree.children and
2029 tree.children[-1].tag == 'fieldlist' and
2030 tree.children[-1].children):
2031 field_nodes = tree.children[-1].children
2032 del tree.children[-1]
2033
2034 for field in field_nodes:
2035
2036 tag = field.children[0].children[0].lower()
2037 del field.children[0]
2038
2039
2040 if field.children and field.children[0].tag == 'arg':
2041 arg = field.children[0].children[0]
2042 del field.children[0]
2043 else:
2044 arg = None
2045
2046
2047 field.tag = 'epytext'
2048 fields.append(Field(tag, arg, ParsedEpytextDocstring(field)))
2049
2050
2051 if tree.children and tree.children[0].children:
2052 return ParsedEpytextDocstring(tree), fields
2053 else:
2054 return None, fields
2055
2056
2057 - def index_terms(self):
2058 if self._terms is None:
2059 self._terms = []
2060 self._index_terms(self._tree, self._terms)
2061 return self._terms
2062
2063 - def _index_terms(self, tree, terms):
2064 if tree is None or isinstance(tree, basestring):
2065 return
2066
2067 if tree.tag == 'indexed':
2068 term = Element('epytext', *tree.children, **tree.attribs)
2069 terms.append(ParsedEpytextDocstring(term))
2070
2071
2072 for child in tree.children:
2073 self._index_terms(child, terms)
2074