epydoc.markup

1 # 2 # epydoc package file 3 # 4 # A python documentation Module 5 # Edward Loper 6 # 7 # $Id: __init__.py 1444 2007-02-10 19:26:54Z dvarrazzo $ 8 # 9 10 """ 11 Markup language support for docstrings. Each submodule defines a 12 parser for a single markup language. These parsers convert an 13 object's docstring to a L{ParsedDocstring}, a standard intermediate 14 representation that can be used to generate output. 15 C{ParsedDocstring}s support the following operations: 16 - output generation (L{to_plaintext()<ParsedDocstring.to_plaintext>}, 17 L{to_html()<ParsedDocstring.to_html>}, and 18 L{to_latex()<ParsedDocstring.to_latex>}). 19 - Summarization (L{summary()<ParsedDocstring.summary>}). 20 - Field extraction (L{split_fields()<ParsedDocstring.split_fields>}). 21 - Index term extraction (L{index_terms()<ParsedDocstring.index_terms>}. 22 23 The L{parse()} function provides a single interface to the 24 C{epydoc.markup} package: it takes a docstring and the name of a 25 markup language; delegates to the appropriate parser; and returns the 26 parsed docstring (along with any errors or warnings that were 27 generated). 28 29 The C{ParsedDocstring} output generation methods (C{to_M{format}()}) 30 use a L{DocstringLinker} to link the docstring output with the rest of 31 the documentation that epydoc generates. C{DocstringLinker}s are 32 currently responsible for translating two kinds of crossreference: 33 - index terms (L{translate_indexterm() 34 <DocstringLinker.translate_indexterm>}). 35 - identifier crossreferences (L{translate_identifier_xref() 36 <DocstringLinker.translate_identifier_xref>}). 37 38 A parsed docstring's fields can be extracted using the 39 L{ParsedDocstring.split_fields()} method. This method divides a 40 docstring into its main body and a list of L{Field}s, each of which 41 encodes a single field. The field's bodies are encoded as 42 C{ParsedDocstring}s. 43 44 Markup errors are represented using L{ParseError}s. These exception 45 classes record information about the cause, location, and severity of 46 each error. 47 48 @sort: parse, ParsedDocstring, Field, DocstringLinker 49 @group Errors and Warnings: ParseError 50 @group Utility Functions: parse_type_of 51 @var SCRWIDTH: The default width with which text will be wrapped 52 when formatting the output of the parser. 53 @type SCRWIDTH: C{int} 54 @var _parse_warnings: Used by L{_parse_warn}. 55 """ 56 __docformat__ = 'epytext en' 57 58 import re, types, sys 59 from epydoc import log 60 from epydoc.util import plaintext_to_html, plaintext_to_latex 61 import epydoc 62 from epydoc.compat import * 63 64 ################################################## 65 ## Contents 66 ################################################## 67 # 68 # 1. parse() dispatcher 69 # 2. ParsedDocstring abstract base class 70 # 3. Field class 71 # 4. Docstring Linker 72 # 5. ParseError exceptions 73 # 6. Misc helpers 74 # 75 76 ################################################## 77 ## Dispatcher 78 ################################################## 79 80 _markup_language_registry = { 81 'restructuredtext': 'epydoc.markup.restructuredtext', 82 'epytext': 'epydoc.markup.epytext', 83 'plaintext': 'epydoc.markup.plaintext', 84 'javadoc': 'epydoc.markup.javadoc', 85 } 86

87 -def register_markup_language(name, parse_function):

88 """ 89 Register a new markup language named C{name}, which can be parsed 90 by the function C{parse_function}. 91 92 @param name: The name of the markup language. C{name} should be a 93 simple identifier, such as C{'epytext'} or C{'restructuredtext'}. 94 Markup language names are case insensitive. 95 96 @param parse_function: A function which can be used to parse the 97 markup language, and returns a L{ParsedDocstring}. It should 98 have the following signature: 99 100 >>> def parse(s, errors): 101 ... 'returns a ParsedDocstring' 102 103 Where: 104 - C{s} is the string to parse. (C{s} will be a unicode 105 string.) 106 - C{errors} is a list; any errors that are generated 107 during docstring parsing should be appended to this 108 list (as L{ParseError} objects). 109 """ 110 _markup_language_registry[name.lower()] = parse_function

111 112 MARKUP_LANGUAGES_USED = set() 113

114 -def parse(docstring, markup='plaintext', errors=None, **options):

115 """ 116 Parse the given docstring, and use it to construct a 117 C{ParsedDocstring}. If any fatal C{ParseError}s are encountered 118 while parsing the docstring, then the docstring will be rendered 119 as plaintext, instead. 120 121 @type docstring: C{string} 122 @param docstring: The docstring to encode. 123 @type markup: C{string} 124 @param markup: The name of the markup language that is used by 125 the docstring. If the markup language is not supported, then 126 the docstring will be treated as plaintext. The markup name 127 is case-insensitive. 128 @param errors: A list where any errors generated during parsing 129 will be stored. If no list is specified, then fatal errors 130 will generate exceptions, and non-fatal errors will be 131 ignored. 132 @type errors: C{list} of L{ParseError} 133 @rtype: L{ParsedDocstring} 134 @return: A L{ParsedDocstring} that encodes the contents of 135 C{docstring}. 136 @raise ParseError: If C{errors} is C{None} and an error is 137 encountered while parsing. 138 """ 139 # Initialize errors list. 140 raise_on_error = (errors is None) 141 if errors == None: errors = [] 142 143 # Normalize the markup language name. 144 markup = markup.lower() 145 146 # Is the markup language valid? 147 if not re.match(r'\w+', markup): 148 _parse_warn('Bad markup language name %r. Treating ' 149 'docstrings as plaintext.' % markup) 150 import epydoc.markup.plaintext as plaintext 151 return plaintext.parse_docstring(docstring, errors, **options) 152 153 # Is the markup language supported? 154 if markup not in _markup_language_registry: 155 _parse_warn('Unsupported markup language %r. Treating ' 156 'docstrings as plaintext.' % markup) 157 import epydoc.markup.plaintext as plaintext 158 return plaintext.parse_docstring(docstring, errors, **options) 159 160 # Get the parse function. 161 parse_docstring = _markup_language_registry[markup] 162 163 # If it's a string, then it names a function to import. 164 if isinstance(parse_docstring, basestring): 165 try: exec('from %s import parse_docstring' % parse_docstring) 166 except ImportError, e: 167 _parse_warn('Error importing %s for markup language %s: %s' % 168 (parse_docstring, markup, e)) 169 import epydoc.markup.plaintext as plaintext 170 return plaintext.parse_docstring(docstring, errors, **options) 171 _markup_language_registry[markup] = parse_docstring 172 173 # Keep track of which markup languages have been used so far. 174 MARKUP_LANGUAGES_USED.add(markup) 175 176 # Parse the docstring. 177 try: parsed_docstring = parse_docstring(docstring, errors, **options) 178 except KeyboardInterrupt: raise 179 except Exception, e: 180 if epydoc.DEBUG: raise 181 log.error('Internal error while parsing a docstring: %s; ' 182 'treating docstring as plaintext' % e) 183 import epydoc.markup.plaintext as plaintext 184 return plaintext.parse_docstring(docstring, errors, **options) 185 186 # Check for fatal errors. 187 fatal_errors = [e for e in errors if e.is_fatal()] 188 if fatal_errors and raise_on_error: raise fatal_errors[0] 189 if fatal_errors: 190 import epydoc.markup.plaintext as plaintext 191 return plaintext.parse_docstring(docstring, errors, **options) 192 193 return parsed_docstring

194 195 # only issue each warning once: 196 _parse_warnings = {}

197 -def _parse_warn(estr):

198 """ 199 Print a warning message. If the given error has already been 200 printed, then do nothing. 201 """ 202 global _parse_warnings 203 if _parse_warnings.has_key(estr): return 204 _parse_warnings[estr] = 1 205 log.warning(estr)

206 207 ################################################## 208 ## ParsedDocstring 209 ##################################################

210 -class ParsedDocstring:

211 """ 212 A standard intermediate representation for parsed docstrings that 213 can be used to generate output. Parsed docstrings are produced by 214 markup parsers (such as L{epytext.parse} or L{javadoc.parse}). 215 C{ParsedDocstring}s support several kinds of operation: 216 - output generation (L{to_plaintext()}, L{to_html()}, and 217 L{to_latex()}). 218 - Summarization (L{summary()}). 219 - Field extraction (L{split_fields()}). 220 - Index term extraction (L{index_terms()}. 221 222 The output generation methods (C{to_M{format}()}) use a 223 L{DocstringLinker} to link the docstring output with the rest 224 of the documentation that epydoc generates. 225 226 Subclassing 227 =========== 228 The only method that a subclass is I{required} to implement is 229 L{to_plaintext()}; but it is often useful to override the other 230 methods. The default behavior of each method is described below: 231 - C{to_I{format}}: Calls C{to_plaintext}, and uses the string it 232 returns to generate verbatim output. 233 - C{summary}: Returns C{self} (i.e., the entire docstring). 234 - C{split_fields}: Returns C{(self, [])} (i.e., extracts no 235 fields). 236 - C{index_terms}: Returns C{[]} (i.e., extracts no index terms). 237 238 If and when epydoc adds more output formats, new C{to_I{format}} 239 methods will be added to this base class; but they will always 240 be given a default implementation. 241 """

242 - def split_fields(self, errors=None):

243 """ 244 Split this docstring into its body and its fields. 245 246 @return: A tuple C{(M{body}, M{fields})}, where C{M{body}} is 247 the main body of this docstring, and C{M{fields}} is a list 248 of its fields. If the resulting body is empty, return 249 C{None} for the body. 250 @rtype: C{(L{ParsedDocstring}, list of L{Field})} 251 @param errors: A list where any errors generated during 252 splitting will be stored. If no list is specified, then 253 errors will be ignored. 254 @type errors: C{list} of L{ParseError} 255 """ 256 # Default behavior: 257 return self, []

258

259 - def summary(self):

260 """ 261 @return: A pair consisting of a short summary of this docstring and a 262 boolean value indicating whether there is further documentation 263 in addition to the summary. Typically, the summary consists of the 264 first sentence of the docstring. 265 @rtype: (L{ParsedDocstring}, C{bool}) 266 """ 267 # Default behavior: 268 return self, False

269

270 - def concatenate(self, other):

271 """ 272 @return: A new parsed docstring containing the concatination 273 of this docstring and C{other}. 274 @raise ValueError: If the two parsed docstrings are 275 incompatible. 276 """ 277 return ConcatenatedDocstring(self, other)

278

279 - def __add__(self, other): return self.concatenate(other)

280

281 - def to_html(self, docstring_linker, **options):

282 """ 283 Translate this docstring to HTML. 284 285 @param docstring_linker: An HTML translator for crossreference 286 links into and out of the docstring. 287 @type docstring_linker: L{DocstringLinker} 288 @param options: Any extra options for the output. Unknown 289 options are ignored. 290 @return: An HTML fragment that encodes this docstring. 291 @rtype: C{string} 292 """ 293 # Default behavior: 294 plaintext = plaintext_to_html(self.to_plaintext(docstring_linker)) 295 return '<pre class="literalblock">\n%s\n</pre>\n' % plaintext

296

297 - def to_latex(self, docstring_linker, **options):

298 """ 299 Translate this docstring to LaTeX. 300 301 @param docstring_linker: A LaTeX translator for crossreference 302 links into and out of the docstring. 303 @type docstring_linker: L{DocstringLinker} 304 @param options: Any extra options for the output. Unknown 305 options are ignored. 306 @return: A LaTeX fragment that encodes this docstring. 307 @rtype: C{string} 308 """ 309 # Default behavior: 310 plaintext = plaintext_to_latex(self.to_plaintext(docstring_linker)) 311 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % plaintext

312

313 - def to_plaintext(self, docstring_linker, **options):

314 """ 315 Translate this docstring to plaintext. 316 317 @param docstring_linker: A plaintext translator for 318 crossreference links into and out of the docstring. 319 @type docstring_linker: L{DocstringLinker} 320 @param options: Any extra options for the output. Unknown 321 options are ignored. 322 @return: A plaintext fragment that encodes this docstring. 323 @rtype: C{string} 324 """ 325 raise NotImplementedError, 'ParsedDocstring.to_plaintext()'

326

327 - def index_terms(self):

328 """ 329 @return: The list of index terms that are defined in this 330 docstring. Each of these items will be added to the index 331 page of the documentation. 332 @rtype: C{list} of C{ParsedDocstring} 333 """ 334 # Default behavior: 335 return []

336 337 ################################################## 338 ## Concatenated Docstring 339 ##################################################

340 -class ConcatenatedDocstring:

341 - def __init__(self, *parsed_docstrings):

342 self._parsed_docstrings = [pds for pds in parsed_docstrings 343 if pds is not None]

344

345 - def split_fields(self, errors=None):

346 bodies = [] 347 fields = [] 348 for doc in self._parsed_docstrings: 349 b,f = doc.split_fields() 350 bodies.append(b) 351 fields.extend(f) 352 353 return ConcatenatedDocstring(*bodies), fields

354

355 - def summary(self):

356 return self._parsed_docstrings[0].summary()

357

358 - def to_html(self, docstring_linker, **options):

359 htmlstring = '' 360 for doc in self._parsed_docstrings: 361 htmlstring += doc.to_html(docstring_linker, **options) 362 return htmlstring

363

364 - def to_latex(self, docstring_linker, **options):

365 latexstring = '' 366 for doc in self._parsed_docstrings: 367 latexstring += doc.to_latex(docstring_linker, **options) 368 return latexstring

369

370 - def to_plaintext(self, docstring_linker, **options):

371 textstring = '' 372 for doc in self._parsed_docstrings: 373 textstring += doc.to_plaintext(docstring_linker, **options) 374 return textstring

375

376 - def index_terms(self):

377 terms = [] 378 for doc in self._parsed_docstrings: 379 terms += doc.index_terms() 380 return terms

381 382 ################################################## 383 ## Fields 384 ##################################################

385 -class Field:

386 """ 387 The contents of a docstring's field. Docstring fields are used 388 to describe specific aspects of an object, such as a parameter of 389 a function or the author of a module. Each field consists of a 390 tag, an optional argument, and a body: 391 - The tag specifies the type of information that the field 392 encodes. 393 - The argument specifies the object that the field describes. 394 The argument may be C{None} or a C{string}. 395 - The body contains the field's information. 396 397 Tags are automatically downcased and stripped; and arguments are 398 automatically stripped. 399 """

400 - def __init__(self, tag, arg, body):

401 self._tag = tag.lower().strip() 402 if arg is None: self._arg = None 403 else: self._arg = arg.strip() 404 self._body = body

405

406 - def tag(self):

407 """ 408 @return: This field's tag. 409 @rtype: C{string} 410 """ 411 return self._tag

412

413 - def arg(self):

414 """ 415 @return: This field's argument, or C{None} if this field has 416 no argument. 417 @rtype: C{string} or C{None} 418 """ 419 return self._arg

420

421 - def body(self):

422 """ 423 @return: This field's body. 424 @rtype: L{ParsedDocstring} 425 """ 426 return self._body

427

428 - def __repr__(self):

429 if self._arg is None: 430 return '<Field @%s: ...>' % self._tag 431 else: 432 return '<Field @%s %s: ...>' % (self._tag, self._arg)

433 434 ################################################## 435 ## Docstring Linker (resolves crossreferences) 436 ##################################################

437 -class DocstringLinker:

438 """ 439 A translator for crossreference links into and out of a 440 C{ParsedDocstring}. C{DocstringLinker} is used by 441 C{ParsedDocstring} to convert these crossreference links into 442 appropriate output formats. For example, 443 C{DocstringLinker.to_html} expects a C{DocstringLinker} that 444 converts crossreference links to HTML. 445 """

446 - def translate_indexterm(self, indexterm):

447 """ 448 Translate an index term to the appropriate output format. The 449 output will typically include a crossreference anchor. 450 451 @type indexterm: L{ParsedDocstring} 452 @param indexterm: The index term to translate. 453 @rtype: C{string} 454 @return: The translated index term. 455 """ 456 raise NotImplementedError, 'DocstringLinker.translate_indexterm()'

457

458 - def translate_identifier_xref(self, identifier, label=None):

459 """ 460 Translate a crossreference link to a Python identifier to the 461 appropriate output format. The output will typically include 462 a reference or pointer to the crossreference target. 463 464 @type identifier: C{string} 465 @param identifier: The name of the Python identifier that 466 should be linked to. 467 @type label: C{string} or C{None} 468 @param label: The label that should be used for the identifier, 469 if it's different from the name of the identifier. 470 @rtype: C{string} 471 @return: The translated crossreference link. 472 """ 473 raise NotImplementedError, 'DocstringLinker.translate_xref()'

474 475 ################################################## 476 ## ParseError exceptions 477 ################################################## 478

479 -class ParseError(Exception):

480 """ 481 The base class for errors generated while parsing docstrings. 482 483 @ivar _linenum: The line on which the error occured within the 484 docstring. The linenum of the first line is 0. 485 @type _linenum: C{int} 486 @ivar _offset: The line number where the docstring begins. This 487 offset is added to C{_linenum} when displaying the line number 488 of the error. Default value: 1. 489 @type _offset: C{int} 490 @ivar _descr: A description of the error. 491 @type _descr: C{string} 492 @ivar _fatal: True if this is a fatal error. 493 @type _fatal: C{boolean} 494 """

495 - def __init__(self, descr, linenum=None, is_fatal=1):

496 """ 497 @type descr: C{string} 498 @param descr: A description of the error. 499 @type linenum: C{int} 500 @param linenum: The line on which the error occured within 501 the docstring. The linenum of the first line is 0. 502 @type is_fatal: C{boolean} 503 @param is_fatal: True if this is a fatal error. 504 """ 505 self._descr = descr 506 self._linenum = linenum 507 self._fatal = is_fatal 508 self._offset = 1

509

510 - def is_fatal(self):

511 """ 512 @return: true if this is a fatal error. If an error is fatal, 513 then epydoc should ignore the output of the parser, and 514 parse the docstring as plaintext. 515 @rtype: C{boolean} 516 """ 517 return self._fatal

518

519 - def linenum(self):

520 """ 521 @return: The line number on which the error occured (including 522 any offset). If the line number is unknown, then return 523 C{None}. 524 @rtype: C{int} or C{None} 525 """ 526 if self._linenum is None: return None 527 else: return self._offset + self._linenum

528

529 - def set_linenum_offset(self, offset):

530 """ 531 Set the line number offset for this error. This offset is the 532 line number where the docstring begins. This offset is added 533 to C{_linenum} when displaying the line number of the error. 534 535 @param offset: The new line number offset. 536 @type offset: C{int} 537 @rtype: C{None} 538 """ 539 self._offset = offset

540

541 - def descr(self):

542 return self._descr

543

544 - def __str__(self):

545 """ 546 Return a string representation of this C{ParseError}. This 547 multi-line string contains a description of the error, and 548 specifies where it occured. 549 550 @return: the informal representation of this C{ParseError}. 551 @rtype: C{string} 552 """ 553 if self._linenum is not None: 554 return 'Line %s: %s' % (self._linenum+self._offset, self.descr()) 555 else: 556 return self.descr()

557

558 - def __repr__(self):

559 """ 560 Return the formal representation of this C{ParseError}. 561 C{ParseError}s have formal representations of the form:: 562 <ParseError on line 12> 563 564 @return: the formal representation of this C{ParseError}. 565 @rtype: C{string} 566 """ 567 if self._linenum is None: 568 return '<ParseError on line %d' % self._offset 569 else: 570 return '<ParseError on line %d>' % (self._linenum+self._offset)

571

572 - def __cmp__(self, other):

573 """ 574 Compare two C{ParseError}s, based on their line number. 575 - Return -1 if C{self.linenum<other.linenum} 576 - Return +1 if C{self.linenum>other.linenum} 577 - Return 0 if C{self.linenum==other.linenum}. 578 The return value is undefined if C{other} is not a 579 ParseError. 580 581 @rtype: C{int} 582 """ 583 if not isinstance(other, ParseError): return -1000 584 return cmp(self._linenum+self._offset, 585 other._linenum+other._offset)

586 587 ################################################## 588 ## Misc helpers 589 ################################################## 590 # These are used by multiple markup parsers 591

592 -def parse_type_of(obj):

593 """ 594 @return: A C{ParsedDocstring} that encodes the type of the given 595 object. 596 @rtype: L{ParsedDocstring} 597 @param obj: The object whose type should be returned as DOM document. 598 @type obj: any 599 """ 600 # This is a bit hackish; oh well. :) 601 from epydoc.markup.epytext import ParsedEpytextDocstring 602 from xml.dom.minidom import Document 603 doc = Document() 604 epytext = doc.createElement('epytext') 605 para = doc.createElement('para') 606 doc.appendChild(epytext) 607 epytext.appendChild(para) 608 609 if type(obj) is types.InstanceType: 610 link = doc.createElement('link') 611 name = doc.createElement('name') 612 target = doc.createElement('target') 613 para.appendChild(link) 614 link.appendChild(name) 615 link.appendChild(target) 616 name.appendChild(doc.createTextNode(str(obj.__class__.__name__))) 617 target.appendChild(doc.createTextNode(str(obj.__class__))) 618 else: 619 code = doc.createElement('code') 620 para.appendChild(code) 621 code.appendChild(doc.createTextNode(type(obj).__name__)) 622 return ParsedEpytextDocstring(doc)

623

Source Code for Package epydoc.markup