epydoc.markup.pyval

102 """ 103 Syntax highlighter for Python values. 104 """ 105

106 - def __init__(self, linelen=75, maxlines=5, linebreakok=True, sort=True):

107 self.linelen = linelen 108 self.maxlines = maxlines 109 self.linebreakok = linebreakok 110 self.sort = sort

111 112 #//////////////////////////////////////////////////////////// 113 # Colorization Tags & other constants 114 #//////////////////////////////////////////////////////////// 115 116 GROUP_TAG = 'variable-group' # e.g., "[" and "]" 117 COMMA_TAG = 'variable-op' # The "," that separates elements 118 COLON_TAG = 'variable-op' # The ":" in dictionaries 119 CONST_TAG = None # None, True, False 120 NUMBER_TAG = None # ints, floats, etc 121 QUOTE_TAG = 'variable-quote' # Quotes around strings. 122 STRING_TAG = 'variable-string' # Body of string literals 123 124 RE_CHAR_TAG = None 125 RE_GROUP_TAG = 're-group' 126 RE_REF_TAG = 're-ref' 127 RE_OP_TAG = 're-op' 128 RE_FLAGS_TAG = 're-flags' 129 130 ELLIPSIS = Element('code', u'...', style='variable-ellipsis') 131 LINEWRAP = Element('symbol', u'crarr') 132 UNKNOWN_REPR = Element('code', u'??', style='variable-unknown') 133 134 GENERIC_OBJECT_RE = re.compile(r'^<.* at 0x[0-9a-f]+>$', re.IGNORECASE) 135 136 #//////////////////////////////////////////////////////////// 137 # Entry Point 138 #//////////////////////////////////////////////////////////// 139

140 - def colorize(self, pyval, parse_repr=None, min_score=None):

141 """ 142 @return: A L{ColorizedPyvalRepr} describing the given pyval. 143 """ 144 UNKNOWN = epydoc.apidoc.UNKNOWN 145 # Create an object to keep track of the colorization. 146 state = _ColorizerState() 147 state.linebreakok = self.linebreakok 148 # Colorize the value. If we reach maxlines, then add on an 149 # ellipsis marker and call it a day. 150 try: 151 if pyval is not UNKNOWN: 152 self._colorize(pyval, state) 153 elif parse_repr not in (None, UNKNOWN): 154 self._output(parse_repr, None, state) 155 else: 156 state.result.append(PyvalColorizer.UNKNOWN_REPR) 157 is_complete = True 158 except (_Maxlines, _Linebreak): 159 if self.linebreakok: 160 state.result.append('\n') 161 state.result.append(self.ELLIPSIS) 162 else: 163 if state.result[-1] is self.LINEWRAP: 164 state.result.pop() 165 self._trim_result(state.result, 3) 166 state.result.append(self.ELLIPSIS) 167 is_complete = False 168 # If we didn't score high enough, then try again. 169 if (pyval is not UNKNOWN and parse_repr not in (None, UNKNOWN) 170 and min_score is not None and state.score < min_score): 171 return self.colorize(UNKNOWN, parse_repr) 172 # Put it all together. 173 tree = Element('epytext', *state.result) 174 return ColorizedPyvalRepr(tree, state.score, is_complete)

175

176 - def _colorize(self, pyval, state):

177 pyval_type = type(pyval) 178 state.score += 1 179 180 if pyval is None or pyval is True or pyval is False: 181 self._output(unicode(pyval), self.CONST_TAG, state) 182 elif pyval_type in (int, float, long, types.ComplexType): 183 self._output(unicode(pyval), self.NUMBER_TAG, state) 184 elif pyval_type is str: 185 self._colorize_str(pyval, state, '', 'string-escape') 186 elif pyval_type is unicode: 187 self._colorize_str(pyval, state, 'u', 'unicode-escape') 188 elif pyval_type is list: 189 self._multiline(self._colorize_iter, pyval, state, '[', ']') 190 elif pyval_type is tuple: 191 self._multiline(self._colorize_iter, pyval, state, '(', ')') 192 elif pyval_type is set: 193 self._multiline(self._colorize_iter, self._sort(pyval), 194 state, 'set([', '])') 195 elif pyval_type is frozenset: 196 self._multiline(self._colorize_iter, self._sort(pyval), 197 state, 'frozenset([', '])') 198 elif pyval_type is dict: 199 self._multiline(self._colorize_dict, self._sort(pyval.items()), 200 state, '{', '}') 201 elif is_re_pattern(pyval): 202 self._colorize_re(pyval, state) 203 else: 204 try: 205 pyval_repr = repr(pyval) 206 if not isinstance(pyval_repr, (str, unicode)): 207 pyval_repr = unicode(pyval_repr) 208 pyval_repr_ok = True 209 except KeyboardInterrupt: 210 raise 211 except: 212 pyval_repr_ok = False 213 state.score -= 100 214 215 if pyval_repr_ok: 216 if self.GENERIC_OBJECT_RE.match(pyval_repr): 217 state.score -= 5 218 self._output(pyval_repr, None, state) 219 else: 220 state.result.append(self.UNKNOWN_REPR)

221

222 - def _sort(self, items):

223 if not self.sort: return items 224 try: return sorted(items) 225 except KeyboardInterrupt: raise 226 except: return items

227

228 - def _trim_result(self, result, num_chars):

229 while num_chars > 0: 230 if not result: return 231 if isinstance(result[-1], Element): 232 assert len(result[-1].children) == 1 233 trim = min(num_chars, len(result[-1].children[0])) 234 result[-1].children[0] = result[-1].children[0][:-trim] 235 if not result[-1].children[0]: result.pop() 236 num_chars -= trim 237 else: 238 trim = min(num_chars, len(result[-1])) 239 result[-1] = result[-1][:-trim] 240 if not result[-1]: result.pop() 241 num_chars -= trim

242 243 #//////////////////////////////////////////////////////////// 244 # Object Colorization Functions 245 #//////////////////////////////////////////////////////////// 246

247 - def _multiline(self, func, pyval, state, *args):

248 """ 249 Helper for container-type colorizers. First, try calling 250 C{func(pyval, state, *args)} with linebreakok set to false; 251 and if that fails, then try again with it set to true. 252 """ 253 linebreakok = state.linebreakok 254 mark = state.mark() 255 256 try: 257 state.linebreakok = False 258 func(pyval, state, *args) 259 state.linebreakok = linebreakok 260 261 except _Linebreak: 262 if not linebreakok: 263 raise 264 state.restore(mark) 265 func(pyval, state, *args)

266

267 - def _colorize_iter(self, pyval, state, prefix, suffix):

268 self._output(prefix, self.GROUP_TAG, state) 269 indent = state.charpos 270 for i, elt in enumerate(pyval): 271 if i>=1: 272 if state.linebreakok: 273 self._output(',', self.COMMA_TAG, state) 274 self._output('\n'+' '*indent, None, state) 275 else: 276 self._output(', ', self.COMMA_TAG, state) 277 self._colorize(elt, state) 278 self._output(suffix, self.GROUP_TAG, state)

279

280 - def _colorize_dict(self, items, state, prefix, suffix):

281 self._output(prefix, self.GROUP_TAG, state) 282 indent = state.charpos 283 for i, (key, val) in enumerate(items): 284 if i>=1: 285 if state.linebreakok: 286 self._output(',', self.COMMA_TAG, state) 287 self._output('\n'+' '*indent, None, state) 288 else: 289 self._output(', ', self.COMMA_TAG, state) 290 self._colorize(key, state) 291 self._output(': ', self.COLON_TAG, state) 292 self._colorize(val, state) 293 self._output(suffix, self.GROUP_TAG, state)

294

295 - def _colorize_str(self, pyval, state, prefix, encoding):

296 # Decide which quote to use. 297 if '\n' in pyval and state.linebreakok: quote = "'''" 298 else: quote = "'" 299 # Divide the string into lines. 300 if state.linebreakok: 301 lines = pyval.split('\n') 302 else: 303 lines = [pyval] 304 # Open quote. 305 self._output(prefix+quote, self.QUOTE_TAG, state) 306 # Body 307 for i, line in enumerate(lines): 308 if i>0: self._output('\n', None, state) 309 self._output(line.encode(encoding), self.STRING_TAG, state) 310 # Close quote. 311 self._output(quote, self.QUOTE_TAG, state)

312

313 - def _colorize_re(self, pyval, state):

314 # Extract the flag & pattern from the regexp. 315 pat, flags = pyval.pattern, pyval.flags 316 # If the pattern is a string, decode it to unicode. 317 if isinstance(pat, str): 318 pat = decode_with_backslashreplace(pat) 319 # Parse the regexp pattern. 320 tree = sre_parse.parse(pat, flags) 321 groups = dict([(num,name) for (name,num) in 322 tree.pattern.groupdict.items()]) 323 # Colorize it! 324 self._output("re.compile(r'", None, state) 325 self._colorize_re_flags(tree.pattern.flags, state) 326 self._colorize_re_tree(tree, state, True, groups) 327 self._output("')", None, state)

328

329 - def _colorize_re_flags(self, flags, state):

330 if flags: 331 flags = [c for (c,n) in sorted(sre_parse.FLAGS.items()) 332 if (n&flags)] 333 flags = '(?%s)' % ''.join(flags) 334 self._output(flags, self.RE_FLAGS_TAG, state)

335

336 - def _colorize_re_tree(self, tree, state, noparen, groups):

337 assert noparen in (True, False) 338 if len(tree) > 1 and not noparen: 339 self._output('(', self.RE_GROUP_TAG, state) 340 for elt in tree: 341 op = elt[0] 342 args = elt[1] 343 344 if op == sre_constants.LITERAL: 345 c = unichr(args) 346 # Add any appropriate escaping. 347 if c in '.^$\\*+?{}[]|()\'': c = '\\'+c 348 elif c == '\t': c = '\\t' 349 elif c == '\r': c = '\\r' 350 elif c == '\n': c = '\\n' 351 elif c == '\f': c = '\\f' 352 elif c == '\v': c = '\\v' 353 elif ord(c) > 0xffff: c = r'\U%08x' % ord(c) 354 elif ord(c) > 0xff: c = r'\u%04x' % ord(c) 355 elif ord(c)<32 or ord(c)>=127: c = r'\x%02x' % ord(c) 356 self._output(c, self.RE_CHAR_TAG, state) 357 358 elif op == sre_constants.ANY: 359 self._output('.', self.RE_CHAR_TAG, state) 360 361 elif op == sre_constants.BRANCH: 362 if args[0] is not None: 363 raise ValueError('Branch expected None arg but got %s' 364 % args[0]) 365 for i, item in enumerate(args[1]): 366 if i > 0: 367 self._output('|', self.RE_OP_TAG, state) 368 self._colorize_re_tree(item, state, True, groups) 369 370 elif op == sre_constants.IN: 371 if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY): 372 self._colorize_re_tree(args, state, False, groups) 373 else: 374 self._output('[', self.RE_GROUP_TAG, state) 375 self._colorize_re_tree(args, state, True, groups) 376 self._output(']', self.RE_GROUP_TAG, state) 377 378 elif op == sre_constants.CATEGORY: 379 if args == sre_constants.CATEGORY_DIGIT: val = r'\d' 380 elif args == sre_constants.CATEGORY_NOT_DIGIT: val = r'\D' 381 elif args == sre_constants.CATEGORY_SPACE: val = r'\s' 382 elif args == sre_constants.CATEGORY_NOT_SPACE: val = r'\S' 383 elif args == sre_constants.CATEGORY_WORD: val = r'\w' 384 elif args == sre_constants.CATEGORY_NOT_WORD: val = r'\W' 385 else: raise ValueError('Unknown category %s' % args) 386 self._output(val, self.RE_CHAR_TAG, state) 387 388 elif op == sre_constants.AT: 389 if args == sre_constants.AT_BEGINNING_STRING: val = r'\A' 390 elif args == sre_constants.AT_BEGINNING: val = r'^' 391 elif args == sre_constants.AT_END: val = r'$' 392 elif args == sre_constants.AT_BOUNDARY: val = r'\b' 393 elif args == sre_constants.AT_NON_BOUNDARY: val = r'\B' 394 elif args == sre_constants.AT_END_STRING: val = r'\Z' 395 else: raise ValueError('Unknown position %s' % args) 396 self._output(val, self.RE_CHAR_TAG, state) 397 398 elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT): 399 minrpt = args[0] 400 maxrpt = args[1] 401 if maxrpt == sre_constants.MAXREPEAT: 402 if minrpt == 0: val = '*' 403 elif minrpt == 1: val = '+' 404 else: val = '{%d,}' % (minrpt) 405 elif minrpt == 0: 406 if maxrpt == 1: val = '?' 407 else: val = '{,%d}' % (maxrpt) 408 elif minrpt == maxrpt: 409 val = '{%d}' % (maxrpt) 410 else: 411 val = '{%d,%d}' % (minrpt, maxrpt) 412 if op == sre_constants.MIN_REPEAT: 413 val += '?' 414 415 self._colorize_re_tree(args[2], state, False, groups) 416 self._output(val, self.RE_OP_TAG, state) 417 418 elif op == sre_constants.SUBPATTERN: 419 if args[0] is None: 420 self._output('(?:', self.RE_GROUP_TAG, state) 421 elif args[0] in groups: 422 self._output('(?P<', self.RE_GROUP_TAG, state) 423 self._output(groups[args[0]], self.RE_REF_TAG, state) 424 self._output('>', self.RE_GROUP_TAG, state) 425 elif isinstance(args[0], (int, long)): 426 # This is cheating: 427 self._output('(', self.RE_GROUP_TAG, state) 428 else: 429 self._output('(?P<', self.RE_GROUP_TAG, state) 430 self._output(args[0], self.RE_REF_TAG, state) 431 self._output('>', self.RE_GROUP_TAG, state) 432 self._colorize_re_tree(args[1], state, True, groups) 433 self._output(')', self.RE_GROUP_TAG, state) 434 435 elif op == sre_constants.GROUPREF: 436 self._output('\\%d' % args, self.RE_REF_TAG, state) 437 438 elif op == sre_constants.RANGE: 439 self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),), 440 state, False, groups ) 441 self._output('-', self.RE_OP_TAG, state) 442 self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),), 443 state, False, groups ) 444 445 elif op == sre_constants.NEGATE: 446 self._output('^', self.RE_OP_TAG, state) 447 448 elif op == sre_constants.ASSERT: 449 if args[0] > 0: 450 self._output('(?=', self.RE_GROUP_TAG, state) 451 else: 452 self._output('(?<=', self.RE_GROUP_TAG, state) 453 self._colorize_re_tree(args[1], state, True, groups) 454 self._output(')', self.RE_GROUP_TAG, state) 455 456 elif op == sre_constants.ASSERT_NOT: 457 if args[0] > 0: 458 self._output('(?!', self.RE_GROUP_TAG, state) 459 else: 460 self._output('(?<!', self.RE_GROUP_TAG, state) 461 self._colorize_re_tree(args[1], state, True, groups) 462 self._output(')', self.RE_GROUP_TAG, state) 463 464 elif op == sre_constants.NOT_LITERAL: 465 self._output('[^', self.RE_GROUP_TAG, state) 466 self._colorize_re_tree( ((sre_constants.LITERAL, args),), 467 state, False, groups ) 468 self._output(']', self.RE_GROUP_TAG, state) 469 else: 470 log.error("Error colorizing regexp: unknown elt %r" % elt) 471 if len(tree) > 1 and not noparen: 472 self._output(')', self.RE_GROUP_TAG, state)

473 474 #//////////////////////////////////////////////////////////// 475 # Output function 476 #//////////////////////////////////////////////////////////// 477

478 - def _output(self, s, tag, state):

479 """ 480 Add the string `s` to the result list, tagging its contents 481 with tag `tag`. Any lines that go beyond `self.linelen` will 482 be line-wrapped. If the total number of lines exceeds 483 `self.maxlines`, then raise a `_Maxlines` exception. 484 """ 485 # Make sure the string is unicode. 486 if isinstance(s, str): 487 s = decode_with_backslashreplace(s) 488 489 # Split the string into segments. The first segment is the 490 # content to add to the current line, and the remaining 491 # segments are new lines. 492 segments = s.split('\n') 493 494 for i, segment in enumerate(segments): 495 # If this isn't the first segment, then add a newline to 496 # split it from the previous segment. 497 if i > 0: 498 if (state.lineno+1) > self.maxlines: 499 raise _Maxlines() 500 if not state.linebreakok: 501 raise _Linebreak() 502 state.result.append(u'\n') 503 state.lineno += 1 504 state.charpos = 0 505 506 # If the segment fits on the current line, then just call 507 # markup to tag it, and store the result. 508 if state.charpos + len(segment) <= self.linelen: 509 state.charpos += len(segment) 510 if tag: 511 segment = Element('code', segment, style=tag) 512 state.result.append(segment) 513 514 # If the segment doesn't fit on the current line, then 515 # line-wrap it, and insert the remainder of the line into 516 # the segments list that we're iterating over. (We'll go 517 # the the beginning of the next line at the start of the 518 # next iteration through the loop.) 519 else: 520 split = self.linelen-state.charpos 521 segments.insert(i+1, segment[split:]) 522 segment = segment[:split] 523 if tag: 524 segment = Element('code', segment, style=tag) 525 state.result += [segment, self.LINEWRAP]

Source Code for Module epydoc.markup.pyval_repr