1
2
3
4
5
6
7
8
9 """
10 Syntax highlighter for Python values. Currently provides special
11 colorization support for:
12
13 - lists, tuples, sets, frozensets, dicts
14 - numbers
15 - strings
16 - compiled regexps
17
18 The highlighter also takes care of line-wrapping, and automatically
19 stops generating repr output as soon as it has exceeded the specified
20 number of lines (which should make it faster than pprint for large
21 values). It does I{not} bother to do automatic cycle detection,
22 because maxlines is typically around 5, so it's really not worth it.
23
24 The syntax-highlighted output is encoded using a
25 L{ParsedEpytextDocstring}, which can then be used to generate output in
26 a variety of formats.
27 """
28 __docformat__ = 'epytext en'
29
30
31
32
33
34 import types, re
35 import epydoc.apidoc
36 from epydoc.util import decode_with_backslashreplace
37 from epydoc.util import plaintext_to_html, plaintext_to_latex
38 from epydoc.compat import *
39 import sre_parse, sre_constants
40
41 from epydoc.markup.epytext import Element, ParsedEpytextDocstring
42
44 return type(pyval).__name__ == 'SRE_Pattern'
45
47 """
48 An object uesd to keep track of the current state of the pyval
49 colorizer. The L{mark()}/L{restore()} methods can be used to set
50 a backup point, and restore back to that backup point. This is
51 used by several colorization methods that first try colorizing
52 their object on a single line (setting linebreakok=False); and
53 then fall back on a multi-line output if that fails. The L{score}
54 variable is used to keep track of a 'score', reflecting how good
55 we think this repr is. E.g., unhelpful values like '<Foo instance
56 at 0x12345>' get low scores. If the score is too low, we'll use
57 the parse-derived repr instead.
58 """
60 self.result = []
61 self.charpos = 0
62 self.lineno = 1
63 self.linebreakok = True
64
65
66 self.score = 0
67
69 return (len(self.result), self.charpos,
70 self.lineno, self.linebreakok, self.score)
71
73 n, self.charpos, self.lineno, self.linebreakok, self.score = mark
74 del self.result[n:]
75
77 """A control-flow exception that is raised when PyvalColorizer
78 exeeds the maximum number of allowed lines."""
79
81 """A control-flow exception that is raised when PyvalColorizer
82 generates a string containing a newline, but the state object's
83 linebreakok variable is False."""
84
86 """
87 @ivar score: A score, evaluating how good this repr is.
88 @ivar is_complete: True if this colorized repr completely describes
89 the object.
90 """
91 - def __init__(self, tree, score, is_complete):
95
96 -def colorize_pyval(pyval, parse_repr=None, min_score=None,
97 linelen=75, maxlines=5, linebreakok=True, sort=True):
100
102 """
103 Syntax highlighter for Python values.
104 """
105
106 - def __init__(self, linelen=75, maxlines=5, linebreakok=True, sort=True):
107 self.linelen = linelen
108 self.maxlines = maxlines
109 self.linebreakok = linebreakok
110 self.sort = sort
111
112
113
114
115
116 GROUP_TAG = 'variable-group'
117 COMMA_TAG = 'variable-op'
118 COLON_TAG = 'variable-op'
119 CONST_TAG = None
120 NUMBER_TAG = None
121 QUOTE_TAG = 'variable-quote'
122 STRING_TAG = 'variable-string'
123
124 RE_CHAR_TAG = None
125 RE_GROUP_TAG = 're-group'
126 RE_REF_TAG = 're-ref'
127 RE_OP_TAG = 're-op'
128 RE_FLAGS_TAG = 're-flags'
129
130 ELLIPSIS = Element('code', u'...', style='variable-ellipsis')
131 LINEWRAP = Element('symbol', u'crarr')
132 UNKNOWN_REPR = Element('code', u'??', style='variable-unknown')
133
134 GENERIC_OBJECT_RE = re.compile(r'^<.* at 0x[0-9a-f]+>$', re.IGNORECASE)
135
136
137
138
139
140 - def colorize(self, pyval, parse_repr=None, min_score=None):
141 """
142 @return: A L{ColorizedPyvalRepr} describing the given pyval.
143 """
144 UNKNOWN = epydoc.apidoc.UNKNOWN
145
146 state = _ColorizerState()
147 state.linebreakok = self.linebreakok
148
149
150 try:
151 if pyval is not UNKNOWN:
152 self._colorize(pyval, state)
153 elif parse_repr not in (None, UNKNOWN):
154 self._output(parse_repr, None, state)
155 else:
156 state.result.append(PyvalColorizer.UNKNOWN_REPR)
157 is_complete = True
158 except (_Maxlines, _Linebreak):
159 if self.linebreakok:
160 state.result.append('\n')
161 state.result.append(self.ELLIPSIS)
162 else:
163 if state.result[-1] is self.LINEWRAP:
164 state.result.pop()
165 self._trim_result(state.result, 3)
166 state.result.append(self.ELLIPSIS)
167 is_complete = False
168
169 if (pyval is not UNKNOWN and parse_repr not in (None, UNKNOWN)
170 and min_score is not None and state.score < min_score):
171 return self.colorize(UNKNOWN, parse_repr)
172
173 tree = Element('epytext', *state.result)
174 return ColorizedPyvalRepr(tree, state.score, is_complete)
175
177 pyval_type = type(pyval)
178 state.score += 1
179
180 if pyval is None or pyval is True or pyval is False:
181 self._output(unicode(pyval), self.CONST_TAG, state)
182 elif pyval_type in (int, float, long, types.ComplexType):
183 self._output(unicode(pyval), self.NUMBER_TAG, state)
184 elif pyval_type is str:
185 self._colorize_str(pyval, state, '', 'string-escape')
186 elif pyval_type is unicode:
187 self._colorize_str(pyval, state, 'u', 'unicode-escape')
188 elif pyval_type is list:
189 self._multiline(self._colorize_iter, pyval, state, '[', ']')
190 elif pyval_type is tuple:
191 self._multiline(self._colorize_iter, pyval, state, '(', ')')
192 elif pyval_type is set:
193 self._multiline(self._colorize_iter, self._sort(pyval),
194 state, 'set([', '])')
195 elif pyval_type is frozenset:
196 self._multiline(self._colorize_iter, self._sort(pyval),
197 state, 'frozenset([', '])')
198 elif pyval_type is dict:
199 self._multiline(self._colorize_dict, self._sort(pyval.items()),
200 state, '{', '}')
201 elif is_re_pattern(pyval):
202 self._colorize_re(pyval, state)
203 else:
204 try:
205 pyval_repr = repr(pyval)
206 if not isinstance(pyval_repr, (str, unicode)):
207 pyval_repr = unicode(pyval_repr)
208 pyval_repr_ok = True
209 except KeyboardInterrupt:
210 raise
211 except:
212 pyval_repr_ok = False
213 state.score -= 100
214
215 if pyval_repr_ok:
216 if self.GENERIC_OBJECT_RE.match(pyval_repr):
217 state.score -= 5
218 self._output(pyval_repr, None, state)
219 else:
220 state.result.append(self.UNKNOWN_REPR)
221
223 if not self.sort: return items
224 try: return sorted(items)
225 except KeyboardInterrupt: raise
226 except: return items
227
229 while num_chars > 0:
230 if not result: return
231 if isinstance(result[-1], Element):
232 assert len(result[-1].children) == 1
233 trim = min(num_chars, len(result[-1].children[0]))
234 result[-1].children[0] = result[-1].children[0][:-trim]
235 if not result[-1].children[0]: result.pop()
236 num_chars -= trim
237 else:
238 trim = min(num_chars, len(result[-1]))
239 result[-1] = result[-1][:-trim]
240 if not result[-1]: result.pop()
241 num_chars -= trim
242
243
244
245
246
248 """
249 Helper for container-type colorizers. First, try calling
250 C{func(pyval, state, *args)} with linebreakok set to false;
251 and if that fails, then try again with it set to true.
252 """
253 linebreakok = state.linebreakok
254 mark = state.mark()
255
256 try:
257 state.linebreakok = False
258 func(pyval, state, *args)
259 state.linebreakok = linebreakok
260
261 except _Linebreak:
262 if not linebreakok:
263 raise
264 state.restore(mark)
265 func(pyval, state, *args)
266
268 self._output(prefix, self.GROUP_TAG, state)
269 indent = state.charpos
270 for i, elt in enumerate(pyval):
271 if i>=1:
272 if state.linebreakok:
273 self._output(',', self.COMMA_TAG, state)
274 self._output('\n'+' '*indent, None, state)
275 else:
276 self._output(', ', self.COMMA_TAG, state)
277 self._colorize(elt, state)
278 self._output(suffix, self.GROUP_TAG, state)
279
281 self._output(prefix, self.GROUP_TAG, state)
282 indent = state.charpos
283 for i, (key, val) in enumerate(items):
284 if i>=1:
285 if state.linebreakok:
286 self._output(',', self.COMMA_TAG, state)
287 self._output('\n'+' '*indent, None, state)
288 else:
289 self._output(', ', self.COMMA_TAG, state)
290 self._colorize(key, state)
291 self._output(': ', self.COLON_TAG, state)
292 self._colorize(val, state)
293 self._output(suffix, self.GROUP_TAG, state)
294
296
297 if '\n' in pyval and state.linebreakok: quote = "'''"
298 else: quote = "'"
299
300 if state.linebreakok:
301 lines = pyval.split('\n')
302 else:
303 lines = [pyval]
304
305 self._output(prefix+quote, self.QUOTE_TAG, state)
306
307 for i, line in enumerate(lines):
308 if i>0: self._output('\n', None, state)
309 self._output(line.encode(encoding), self.STRING_TAG, state)
310
311 self._output(quote, self.QUOTE_TAG, state)
312
314
315 pat, flags = pyval.pattern, pyval.flags
316
317 if isinstance(pat, str):
318 pat = decode_with_backslashreplace(pat)
319
320 tree = sre_parse.parse(pat, flags)
321 groups = dict([(num,name) for (name,num) in
322 tree.pattern.groupdict.items()])
323
324 self._output("re.compile(r'", None, state)
325 self._colorize_re_flags(tree.pattern.flags, state)
326 self._colorize_re_tree(tree, state, True, groups)
327 self._output("')", None, state)
328
330 if flags:
331 flags = [c for (c,n) in sorted(sre_parse.FLAGS.items())
332 if (n&flags)]
333 flags = '(?%s)' % ''.join(flags)
334 self._output(flags, self.RE_FLAGS_TAG, state)
335
337 assert noparen in (True, False)
338 if len(tree) > 1 and not noparen:
339 self._output('(', self.RE_GROUP_TAG, state)
340 for elt in tree:
341 op = elt[0]
342 args = elt[1]
343
344 if op == sre_constants.LITERAL:
345 c = unichr(args)
346
347 if c in '.^$\\*+?{}[]|()\'': c = '\\'+c
348 elif c == '\t': c = '\\t'
349 elif c == '\r': c = '\\r'
350 elif c == '\n': c = '\\n'
351 elif c == '\f': c = '\\f'
352 elif c == '\v': c = '\\v'
353 elif ord(c) > 0xffff: c = r'\U%08x' % ord(c)
354 elif ord(c) > 0xff: c = r'\u%04x' % ord(c)
355 elif ord(c)<32 or ord(c)>=127: c = r'\x%02x' % ord(c)
356 self._output(c, self.RE_CHAR_TAG, state)
357
358 elif op == sre_constants.ANY:
359 self._output('.', self.RE_CHAR_TAG, state)
360
361 elif op == sre_constants.BRANCH:
362 if args[0] is not None:
363 raise ValueError('Branch expected None arg but got %s'
364 % args[0])
365 for i, item in enumerate(args[1]):
366 if i > 0:
367 self._output('|', self.RE_OP_TAG, state)
368 self._colorize_re_tree(item, state, True, groups)
369
370 elif op == sre_constants.IN:
371 if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY):
372 self._colorize_re_tree(args, state, False, groups)
373 else:
374 self._output('[', self.RE_GROUP_TAG, state)
375 self._colorize_re_tree(args, state, True, groups)
376 self._output(']', self.RE_GROUP_TAG, state)
377
378 elif op == sre_constants.CATEGORY:
379 if args == sre_constants.CATEGORY_DIGIT: val = r'\d'
380 elif args == sre_constants.CATEGORY_NOT_DIGIT: val = r'\D'
381 elif args == sre_constants.CATEGORY_SPACE: val = r'\s'
382 elif args == sre_constants.CATEGORY_NOT_SPACE: val = r'\S'
383 elif args == sre_constants.CATEGORY_WORD: val = r'\w'
384 elif args == sre_constants.CATEGORY_NOT_WORD: val = r'\W'
385 else: raise ValueError('Unknown category %s' % args)
386 self._output(val, self.RE_CHAR_TAG, state)
387
388 elif op == sre_constants.AT:
389 if args == sre_constants.AT_BEGINNING_STRING: val = r'\A'
390 elif args == sre_constants.AT_BEGINNING: val = r'^'
391 elif args == sre_constants.AT_END: val = r'$'
392 elif args == sre_constants.AT_BOUNDARY: val = r'\b'
393 elif args == sre_constants.AT_NON_BOUNDARY: val = r'\B'
394 elif args == sre_constants.AT_END_STRING: val = r'\Z'
395 else: raise ValueError('Unknown position %s' % args)
396 self._output(val, self.RE_CHAR_TAG, state)
397
398 elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT):
399 minrpt = args[0]
400 maxrpt = args[1]
401 if maxrpt == sre_constants.MAXREPEAT:
402 if minrpt == 0: val = '*'
403 elif minrpt == 1: val = '+'
404 else: val = '{%d,}' % (minrpt)
405 elif minrpt == 0:
406 if maxrpt == 1: val = '?'
407 else: val = '{,%d}' % (maxrpt)
408 elif minrpt == maxrpt:
409 val = '{%d}' % (maxrpt)
410 else:
411 val = '{%d,%d}' % (minrpt, maxrpt)
412 if op == sre_constants.MIN_REPEAT:
413 val += '?'
414
415 self._colorize_re_tree(args[2], state, False, groups)
416 self._output(val, self.RE_OP_TAG, state)
417
418 elif op == sre_constants.SUBPATTERN:
419 if args[0] is None:
420 self._output('(?:', self.RE_GROUP_TAG, state)
421 elif args[0] in groups:
422 self._output('(?P<', self.RE_GROUP_TAG, state)
423 self._output(groups[args[0]], self.RE_REF_TAG, state)
424 self._output('>', self.RE_GROUP_TAG, state)
425 elif isinstance(args[0], (int, long)):
426
427 self._output('(', self.RE_GROUP_TAG, state)
428 else:
429 self._output('(?P<', self.RE_GROUP_TAG, state)
430 self._output(args[0], self.RE_REF_TAG, state)
431 self._output('>', self.RE_GROUP_TAG, state)
432 self._colorize_re_tree(args[1], state, True, groups)
433 self._output(')', self.RE_GROUP_TAG, state)
434
435 elif op == sre_constants.GROUPREF:
436 self._output('\\%d' % args, self.RE_REF_TAG, state)
437
438 elif op == sre_constants.RANGE:
439 self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),),
440 state, False, groups )
441 self._output('-', self.RE_OP_TAG, state)
442 self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),),
443 state, False, groups )
444
445 elif op == sre_constants.NEGATE:
446 self._output('^', self.RE_OP_TAG, state)
447
448 elif op == sre_constants.ASSERT:
449 if args[0] > 0:
450 self._output('(?=', self.RE_GROUP_TAG, state)
451 else:
452 self._output('(?<=', self.RE_GROUP_TAG, state)
453 self._colorize_re_tree(args[1], state, True, groups)
454 self._output(')', self.RE_GROUP_TAG, state)
455
456 elif op == sre_constants.ASSERT_NOT:
457 if args[0] > 0:
458 self._output('(?!', self.RE_GROUP_TAG, state)
459 else:
460 self._output('(?<!', self.RE_GROUP_TAG, state)
461 self._colorize_re_tree(args[1], state, True, groups)
462 self._output(')', self.RE_GROUP_TAG, state)
463
464 elif op == sre_constants.NOT_LITERAL:
465 self._output('[^', self.RE_GROUP_TAG, state)
466 self._colorize_re_tree( ((sre_constants.LITERAL, args),),
467 state, False, groups )
468 self._output(']', self.RE_GROUP_TAG, state)
469 else:
470 log.error("Error colorizing regexp: unknown elt %r" % elt)
471 if len(tree) > 1 and not noparen:
472 self._output(')', self.RE_GROUP_TAG, state)
473
474
475
476
477
479 """
480 Add the string `s` to the result list, tagging its contents
481 with tag `tag`. Any lines that go beyond `self.linelen` will
482 be line-wrapped. If the total number of lines exceeds
483 `self.maxlines`, then raise a `_Maxlines` exception.
484 """
485
486 if isinstance(s, str):
487 s = decode_with_backslashreplace(s)
488
489
490
491
492 segments = s.split('\n')
493
494 for i, segment in enumerate(segments):
495
496
497 if i > 0:
498 if (state.lineno+1) > self.maxlines:
499 raise _Maxlines()
500 if not state.linebreakok:
501 raise _Linebreak()
502 state.result.append(u'\n')
503 state.lineno += 1
504 state.charpos = 0
505
506
507
508 if state.charpos + len(segment) <= self.linelen:
509 state.charpos += len(segment)
510 if tag:
511 segment = Element('code', segment, style=tag)
512 state.result.append(segment)
513
514
515
516
517
518
519 else:
520 split = self.linelen-state.charpos
521 segments.insert(i+1, segment[split:])
522 segment = segment[:split]
523 if tag:
524 segment = Element('code', segment, style=tag)
525 state.result += [segment, self.LINEWRAP]
526