Package epydoc :: Package markup :: Module pyval_repr
[hide private]
[frames] | no frames]

Source Code for Module epydoc.markup.pyval_repr

  1  # epydoc -- Marked-up Representations for Python Values 
  2  # 
  3  # Copyright (C) 2005 Edward Loper 
  4  # Author: Edward Loper <[email protected]> 
  5  # URL: <http://epydoc.sf.net> 
  6  # 
  7  # $Id: apidoc.py 1448 2007-02-11 00:05:34Z dvarrazzo $ 
  8   
  9  """ 
 10  Syntax highlighter for Python values.  Currently provides special 
 11  colorization support for: 
 12   
 13    - lists, tuples, sets, frozensets, dicts 
 14    - numbers 
 15    - strings 
 16    - compiled regexps 
 17   
 18  The highlighter also takes care of line-wrapping, and automatically 
 19  stops generating repr output as soon as it has exceeded the specified 
 20  number of lines (which should make it faster than pprint for large 
 21  values).  It does I{not} bother to do automatic cycle detection, 
 22  because maxlines is typically around 5, so it's really not worth it. 
 23   
 24  The syntax-highlighted output is encoded using a 
 25  L{ParsedEpytextDocstring}, which can then be used to generate output in 
 26  a variety of formats. 
 27  """ 
 28  __docformat__ = 'epytext en' 
 29   
 30  # Implementation note: we use exact tests for classes (list, etc) 
 31  # rather than using isinstance, because subclasses might override 
 32  # __repr__. 
 33   
 34  import types, re 
 35  import epydoc.apidoc 
 36  from epydoc.util import decode_with_backslashreplace 
 37  from epydoc.util import plaintext_to_html, plaintext_to_latex 
 38  from epydoc.compat import * 
 39  import sre_parse, sre_constants 
 40   
 41  from epydoc.markup.epytext import Element, ParsedEpytextDocstring 
 42   
43 -def is_re_pattern(pyval):
44 return type(pyval).__name__ == 'SRE_Pattern'
45
46 -class _ColorizerState:
47 """ 48 An object uesd to keep track of the current state of the pyval 49 colorizer. The L{mark()}/L{restore()} methods can be used to set 50 a backup point, and restore back to that backup point. This is 51 used by several colorization methods that first try colorizing 52 their object on a single line (setting linebreakok=False); and 53 then fall back on a multi-line output if that fails. The L{score} 54 variable is used to keep track of a 'score', reflecting how good 55 we think this repr is. E.g., unhelpful values like '<Foo instance 56 at 0x12345>' get low scores. If the score is too low, we'll use 57 the parse-derived repr instead. 58 """
59 - def __init__(self):
60 self.result = [] 61 self.charpos = 0 62 self.lineno = 1 63 self.linebreakok = True 64 65 #: How good this represention is? 66 self.score = 0
67
68 - def mark(self):
69 return (len(self.result), self.charpos, 70 self.lineno, self.linebreakok, self.score)
71
72 - def restore(self, mark):
73 n, self.charpos, self.lineno, self.linebreakok, self.score = mark 74 del self.result[n:]
75
76 -class _Maxlines(Exception):
77 """A control-flow exception that is raised when PyvalColorizer 78 exeeds the maximum number of allowed lines."""
79
80 -class _Linebreak(Exception):
81 """A control-flow exception that is raised when PyvalColorizer 82 generates a string containing a newline, but the state object's 83 linebreakok variable is False."""
84
85 -class ColorizedPyvalRepr(ParsedEpytextDocstring):
86 """ 87 @ivar score: A score, evaluating how good this repr is. 88 @ivar is_complete: True if this colorized repr completely describes 89 the object. 90 """
91 - def __init__(self, tree, score, is_complete):
92 ParsedEpytextDocstring.__init__(self, tree) 93 self.score = score 94 self.is_complete = is_complete
95
96 -def colorize_pyval(pyval, parse_repr=None, min_score=None, 97 linelen=75, maxlines=5, linebreakok=True, sort=True):
98 return PyvalColorizer(linelen, maxlines, linebreakok, sort).colorize( 99 pyval, parse_repr, min_score)
100
101 -class PyvalColorizer:
102 """ 103 Syntax highlighter for Python values. 104 """ 105
106 - def __init__(self, linelen=75, maxlines=5, linebreakok=True, sort=True):
107 self.linelen = linelen 108 self.maxlines = maxlines 109 self.linebreakok = linebreakok 110 self.sort = sort
111 112 #//////////////////////////////////////////////////////////// 113 # Colorization Tags & other constants 114 #//////////////////////////////////////////////////////////// 115 116 GROUP_TAG = 'variable-group' # e.g., "[" and "]" 117 COMMA_TAG = 'variable-op' # The "," that separates elements 118 COLON_TAG = 'variable-op' # The ":" in dictionaries 119 CONST_TAG = None # None, True, False 120 NUMBER_TAG = None # ints, floats, etc 121 QUOTE_TAG = 'variable-quote' # Quotes around strings. 122 STRING_TAG = 'variable-string' # Body of string literals 123 124 RE_CHAR_TAG = None 125 RE_GROUP_TAG = 're-group' 126 RE_REF_TAG = 're-ref' 127 RE_OP_TAG = 're-op' 128 RE_FLAGS_TAG = 're-flags' 129 130 ELLIPSIS = Element('code', u'...', style='variable-ellipsis') 131 LINEWRAP = Element('symbol', u'crarr') 132 UNKNOWN_REPR = Element('code', u'??', style='variable-unknown') 133 134 GENERIC_OBJECT_RE = re.compile(r'^<.* at 0x[0-9a-f]+>$', re.IGNORECASE) 135 136 #//////////////////////////////////////////////////////////// 137 # Entry Point 138 #//////////////////////////////////////////////////////////// 139
140 - def colorize(self, pyval, parse_repr=None, min_score=None):
141 """ 142 @return: A L{ColorizedPyvalRepr} describing the given pyval. 143 """ 144 UNKNOWN = epydoc.apidoc.UNKNOWN 145 # Create an object to keep track of the colorization. 146 state = _ColorizerState() 147 state.linebreakok = self.linebreakok 148 # Colorize the value. If we reach maxlines, then add on an 149 # ellipsis marker and call it a day. 150 try: 151 if pyval is not UNKNOWN: 152 self._colorize(pyval, state) 153 elif parse_repr not in (None, UNKNOWN): 154 self._output(parse_repr, None, state) 155 else: 156 state.result.append(PyvalColorizer.UNKNOWN_REPR) 157 is_complete = True 158 except (_Maxlines, _Linebreak): 159 if self.linebreakok: 160 state.result.append('\n') 161 state.result.append(self.ELLIPSIS) 162 else: 163 if state.result[-1] is self.LINEWRAP: 164 state.result.pop() 165 self._trim_result(state.result, 3) 166 state.result.append(self.ELLIPSIS) 167 is_complete = False 168 # If we didn't score high enough, then try again. 169 if (pyval is not UNKNOWN and parse_repr not in (None, UNKNOWN) 170 and min_score is not None and state.score < min_score): 171 return self.colorize(UNKNOWN, parse_repr) 172 # Put it all together. 173 tree = Element('epytext', *state.result) 174 return ColorizedPyvalRepr(tree, state.score, is_complete)
175
176 - def _colorize(self, pyval, state):
177 pyval_type = type(pyval) 178 state.score += 1 179 180 if pyval is None or pyval is True or pyval is False: 181 self._output(unicode(pyval), self.CONST_TAG, state) 182 elif pyval_type in (int, float, long, types.ComplexType): 183 self._output(unicode(pyval), self.NUMBER_TAG, state) 184 elif pyval_type is str: 185 self._colorize_str(pyval, state, '', 'string-escape') 186 elif pyval_type is unicode: 187 self._colorize_str(pyval, state, 'u', 'unicode-escape') 188 elif pyval_type is list: 189 self._multiline(self._colorize_iter, pyval, state, '[', ']') 190 elif pyval_type is tuple: 191 self._multiline(self._colorize_iter, pyval, state, '(', ')') 192 elif pyval_type is set: 193 self._multiline(self._colorize_iter, self._sort(pyval), 194 state, 'set([', '])') 195 elif pyval_type is frozenset: 196 self._multiline(self._colorize_iter, self._sort(pyval), 197 state, 'frozenset([', '])') 198 elif pyval_type is dict: 199 self._multiline(self._colorize_dict, self._sort(pyval.items()), 200 state, '{', '}') 201 elif is_re_pattern(pyval): 202 self._colorize_re(pyval, state) 203 else: 204 try: 205 pyval_repr = repr(pyval) 206 if not isinstance(pyval_repr, (str, unicode)): 207 pyval_repr = unicode(pyval_repr) 208 pyval_repr_ok = True 209 except KeyboardInterrupt: 210 raise 211 except: 212 pyval_repr_ok = False 213 state.score -= 100 214 215 if pyval_repr_ok: 216 if self.GENERIC_OBJECT_RE.match(pyval_repr): 217 state.score -= 5 218 self._output(pyval_repr, None, state) 219 else: 220 state.result.append(self.UNKNOWN_REPR)
221
222 - def _sort(self, items):
223 if not self.sort: return items 224 try: return sorted(items) 225 except KeyboardInterrupt: raise 226 except: return items
227
228 - def _trim_result(self, result, num_chars):
229 while num_chars > 0: 230 if not result: return 231 if isinstance(result[-1], Element): 232 assert len(result[-1].children) == 1 233 trim = min(num_chars, len(result[-1].children[0])) 234 result[-1].children[0] = result[-1].children[0][:-trim] 235 if not result[-1].children[0]: result.pop() 236 num_chars -= trim 237 else: 238 trim = min(num_chars, len(result[-1])) 239 result[-1] = result[-1][:-trim] 240 if not result[-1]: result.pop() 241 num_chars -= trim
242 243 #//////////////////////////////////////////////////////////// 244 # Object Colorization Functions 245 #//////////////////////////////////////////////////////////// 246
247 - def _multiline(self, func, pyval, state, *args):
248 """ 249 Helper for container-type colorizers. First, try calling 250 C{func(pyval, state, *args)} with linebreakok set to false; 251 and if that fails, then try again with it set to true. 252 """ 253 linebreakok = state.linebreakok 254 mark = state.mark() 255 256 try: 257 state.linebreakok = False 258 func(pyval, state, *args) 259 state.linebreakok = linebreakok 260 261 except _Linebreak: 262 if not linebreakok: 263 raise 264 state.restore(mark) 265 func(pyval, state, *args)
266
267 - def _colorize_iter(self, pyval, state, prefix, suffix):
268 self._output(prefix, self.GROUP_TAG, state) 269 indent = state.charpos 270 for i, elt in enumerate(pyval): 271 if i>=1: 272 if state.linebreakok: 273 self._output(',', self.COMMA_TAG, state) 274 self._output('\n'+' '*indent, None, state) 275 else: 276 self._output(', ', self.COMMA_TAG, state) 277 self._colorize(elt, state) 278 self._output(suffix, self.GROUP_TAG, state)
279
280 - def _colorize_dict(self, items, state, prefix, suffix):
281 self._output(prefix, self.GROUP_TAG, state) 282 indent = state.charpos 283 for i, (key, val) in enumerate(items): 284 if i>=1: 285 if state.linebreakok: 286 self._output(',', self.COMMA_TAG, state) 287 self._output('\n'+' '*indent, None, state) 288 else: 289 self._output(', ', self.COMMA_TAG, state) 290 self._colorize(key, state) 291 self._output(': ', self.COLON_TAG, state) 292 self._colorize(val, state) 293 self._output(suffix, self.GROUP_TAG, state)
294
295 - def _colorize_str(self, pyval, state, prefix, encoding):
296 # Decide which quote to use. 297 if '\n' in pyval and state.linebreakok: quote = "'''" 298 else: quote = "'" 299 # Divide the string into lines. 300 if state.linebreakok: 301 lines = pyval.split('\n') 302 else: 303 lines = [pyval] 304 # Open quote. 305 self._output(prefix+quote, self.QUOTE_TAG, state) 306 # Body 307 for i, line in enumerate(lines): 308 if i>0: self._output('\n', None, state) 309 self._output(line.encode(encoding), self.STRING_TAG, state) 310 # Close quote. 311 self._output(quote, self.QUOTE_TAG, state)
312
313 - def _colorize_re(self, pyval, state):
314 # Extract the flag & pattern from the regexp. 315 pat, flags = pyval.pattern, pyval.flags 316 # If the pattern is a string, decode it to unicode. 317 if isinstance(pat, str): 318 pat = decode_with_backslashreplace(pat) 319 # Parse the regexp pattern. 320 tree = sre_parse.parse(pat, flags) 321 groups = dict([(num,name) for (name,num) in 322 tree.pattern.groupdict.items()]) 323 # Colorize it! 324 self._output("re.compile(r'", None, state) 325 self._colorize_re_flags(tree.pattern.flags, state) 326 self._colorize_re_tree(tree, state, True, groups) 327 self._output("')", None, state)
328
329 - def _colorize_re_flags(self, flags, state):
330 if flags: 331 flags = [c for (c,n) in sorted(sre_parse.FLAGS.items()) 332 if (n&flags)] 333 flags = '(?%s)' % ''.join(flags) 334 self._output(flags, self.RE_FLAGS_TAG, state)
335
336 - def _colorize_re_tree(self, tree, state, noparen, groups):
337 assert noparen in (True, False) 338 if len(tree) > 1 and not noparen: 339 self._output('(', self.RE_GROUP_TAG, state) 340 for elt in tree: 341 op = elt[0] 342 args = elt[1] 343 344 if op == sre_constants.LITERAL: 345 c = unichr(args) 346 # Add any appropriate escaping. 347 if c in '.^$\\*+?{}[]|()\'': c = '\\'+c 348 elif c == '\t': c = '\\t' 349 elif c == '\r': c = '\\r' 350 elif c == '\n': c = '\\n' 351 elif c == '\f': c = '\\f' 352 elif c == '\v': c = '\\v' 353 elif ord(c) > 0xffff: c = r'\U%08x' % ord(c) 354 elif ord(c) > 0xff: c = r'\u%04x' % ord(c) 355 elif ord(c)<32 or ord(c)>=127: c = r'\x%02x' % ord(c) 356 self._output(c, self.RE_CHAR_TAG, state) 357 358 elif op == sre_constants.ANY: 359 self._output('.', self.RE_CHAR_TAG, state) 360 361 elif op == sre_constants.BRANCH: 362 if args[0] is not None: 363 raise ValueError('Branch expected None arg but got %s' 364 % args[0]) 365 for i, item in enumerate(args[1]): 366 if i > 0: 367 self._output('|', self.RE_OP_TAG, state) 368 self._colorize_re_tree(item, state, True, groups) 369 370 elif op == sre_constants.IN: 371 if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY): 372 self._colorize_re_tree(args, state, False, groups) 373 else: 374 self._output('[', self.RE_GROUP_TAG, state) 375 self._colorize_re_tree(args, state, True, groups) 376 self._output(']', self.RE_GROUP_TAG, state) 377 378 elif op == sre_constants.CATEGORY: 379 if args == sre_constants.CATEGORY_DIGIT: val = r'\d' 380 elif args == sre_constants.CATEGORY_NOT_DIGIT: val = r'\D' 381 elif args == sre_constants.CATEGORY_SPACE: val = r'\s' 382 elif args == sre_constants.CATEGORY_NOT_SPACE: val = r'\S' 383 elif args == sre_constants.CATEGORY_WORD: val = r'\w' 384 elif args == sre_constants.CATEGORY_NOT_WORD: val = r'\W' 385 else: raise ValueError('Unknown category %s' % args) 386 self._output(val, self.RE_CHAR_TAG, state) 387 388 elif op == sre_constants.AT: 389 if args == sre_constants.AT_BEGINNING_STRING: val = r'\A' 390 elif args == sre_constants.AT_BEGINNING: val = r'^' 391 elif args == sre_constants.AT_END: val = r'$' 392 elif args == sre_constants.AT_BOUNDARY: val = r'\b' 393 elif args == sre_constants.AT_NON_BOUNDARY: val = r'\B' 394 elif args == sre_constants.AT_END_STRING: val = r'\Z' 395 else: raise ValueError('Unknown position %s' % args) 396 self._output(val, self.RE_CHAR_TAG, state) 397 398 elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT): 399 minrpt = args[0] 400 maxrpt = args[1] 401 if maxrpt == sre_constants.MAXREPEAT: 402 if minrpt == 0: val = '*' 403 elif minrpt == 1: val = '+' 404 else: val = '{%d,}' % (minrpt) 405 elif minrpt == 0: 406 if maxrpt == 1: val = '?' 407 else: val = '{,%d}' % (maxrpt) 408 elif minrpt == maxrpt: 409 val = '{%d}' % (maxrpt) 410 else: 411 val = '{%d,%d}' % (minrpt, maxrpt) 412 if op == sre_constants.MIN_REPEAT: 413 val += '?' 414 415 self._colorize_re_tree(args[2], state, False, groups) 416 self._output(val, self.RE_OP_TAG, state) 417 418 elif op == sre_constants.SUBPATTERN: 419 if args[0] is None: 420 self._output('(?:', self.RE_GROUP_TAG, state) 421 elif args[0] in groups: 422 self._output('(?P<', self.RE_GROUP_TAG, state) 423 self._output(groups[args[0]], self.RE_REF_TAG, state) 424 self._output('>', self.RE_GROUP_TAG, state) 425 elif isinstance(args[0], (int, long)): 426 # This is cheating: 427 self._output('(', self.RE_GROUP_TAG, state) 428 else: 429 self._output('(?P<', self.RE_GROUP_TAG, state) 430 self._output(args[0], self.RE_REF_TAG, state) 431 self._output('>', self.RE_GROUP_TAG, state) 432 self._colorize_re_tree(args[1], state, True, groups) 433 self._output(')', self.RE_GROUP_TAG, state) 434 435 elif op == sre_constants.GROUPREF: 436 self._output('\\%d' % args, self.RE_REF_TAG, state) 437 438 elif op == sre_constants.RANGE: 439 self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),), 440 state, False, groups ) 441 self._output('-', self.RE_OP_TAG, state) 442 self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),), 443 state, False, groups ) 444 445 elif op == sre_constants.NEGATE: 446 self._output('^', self.RE_OP_TAG, state) 447 448 elif op == sre_constants.ASSERT: 449 if args[0] > 0: 450 self._output('(?=', self.RE_GROUP_TAG, state) 451 else: 452 self._output('(?<=', self.RE_GROUP_TAG, state) 453 self._colorize_re_tree(args[1], state, True, groups) 454 self._output(')', self.RE_GROUP_TAG, state) 455 456 elif op == sre_constants.ASSERT_NOT: 457 if args[0] > 0: 458 self._output('(?!', self.RE_GROUP_TAG, state) 459 else: 460 self._output('(?<!', self.RE_GROUP_TAG, state) 461 self._colorize_re_tree(args[1], state, True, groups) 462 self._output(')', self.RE_GROUP_TAG, state) 463 464 elif op == sre_constants.NOT_LITERAL: 465 self._output('[^', self.RE_GROUP_TAG, state) 466 self._colorize_re_tree( ((sre_constants.LITERAL, args),), 467 state, False, groups ) 468 self._output(']', self.RE_GROUP_TAG, state) 469 else: 470 log.error("Error colorizing regexp: unknown elt %r" % elt) 471 if len(tree) > 1 and not noparen: 472 self._output(')', self.RE_GROUP_TAG, state)
473 474 #//////////////////////////////////////////////////////////// 475 # Output function 476 #//////////////////////////////////////////////////////////// 477
478 - def _output(self, s, tag, state):
479 """ 480 Add the string `s` to the result list, tagging its contents 481 with tag `tag`. Any lines that go beyond `self.linelen` will 482 be line-wrapped. If the total number of lines exceeds 483 `self.maxlines`, then raise a `_Maxlines` exception. 484 """ 485 # Make sure the string is unicode. 486 if isinstance(s, str): 487 s = decode_with_backslashreplace(s) 488 489 # Split the string into segments. The first segment is the 490 # content to add to the current line, and the remaining 491 # segments are new lines. 492 segments = s.split('\n') 493 494 for i, segment in enumerate(segments): 495 # If this isn't the first segment, then add a newline to 496 # split it from the previous segment. 497 if i > 0: 498 if (state.lineno+1) > self.maxlines: 499 raise _Maxlines() 500 if not state.linebreakok: 501 raise _Linebreak() 502 state.result.append(u'\n') 503 state.lineno += 1 504 state.charpos = 0 505 506 # If the segment fits on the current line, then just call 507 # markup to tag it, and store the result. 508 if state.charpos + len(segment) <= self.linelen: 509 state.charpos += len(segment) 510 if tag: 511 segment = Element('code', segment, style=tag) 512 state.result.append(segment) 513 514 # If the segment doesn't fit on the current line, then 515 # line-wrap it, and insert the remainder of the line into 516 # the segments list that we're iterating over. (We'll go 517 # the the beginning of the next line at the start of the 518 # next iteration through the loop.) 519 else: 520 split = self.linelen-state.charpos 521 segments.insert(i+1, segment[split:]) 522 segment = segment[:split] 523 if tag: 524 segment = Element('code', segment, style=tag) 525 state.result += [segment, self.LINEWRAP]
526