1
2
3
4
5
6
7
8
9 """
10 Syntax highlighting for doctest blocks. This module defines two
11 functions, L{doctest_to_html()} and L{doctest_to_latex()}, which can
12 be used to perform syntax highlighting on doctest blocks. It also
13 defines the more general C{colorize_doctest()}, which could be used to
14 do syntac highlighting on doctest blocks with other output formats.
15 (Both C{doctest_to_html()} and C{doctest_to_latex()} are defined using
16 C{colorize_doctest()}.)
17 """
18 __docformat__ = 'epytext en'
19
20 import re
21 from epydoc.util import plaintext_to_html, plaintext_to_latex
22
23 __all__ = ['doctest_to_html', 'doctest_to_latex',
24 'DoctestColorizer', 'XMLDoctestColorizer',
25 'HTMLDoctestColorizer', 'LaTeXDoctestColorizer']
26
28 """
29 Perform syntax highlighting on the given doctest string, and
30 return the resulting HTML code. This code consists of a C{<pre>}
31 block with class=py-doctest. Syntax highlighting is performed
32 using the following css classes:
33
34 - C{py-prompt} -- the Python PS1 prompt (>>>)
35 - C{py-more} -- the Python PS2 prompt (...)
36 - C{py-keyword} -- a Python keyword (for, if, etc.)
37 - C{py-builtin} -- a Python builtin name (abs, dir, etc.)
38 - C{py-string} -- a string literal
39 - C{py-comment} -- a comment
40 - C{py-except} -- an exception traceback (up to the next >>>)
41 - C{py-output} -- the output from a doctest block.
42 - C{py-defname} -- the name of a function or class defined by
43 a C{def} or C{class} statement.
44 """
45 return HTMLDoctestColorizer().colorize_doctest(s)
46
48 """
49 Perform syntax highlighting on the given doctest string, and
50 return the resulting LaTeX code. This code consists of an
51 C{alltt} environment. Syntax highlighting is performed using
52 the following new latex commands, which must be defined externally:
53 - C{\pysrcprompt} -- the Python PS1 prompt (>>>)
54 - C{\pysrcmore} -- the Python PS2 prompt (...)
55 - C{\pysrckeyword} -- a Python keyword (for, if, etc.)
56 - C{\pysrcbuiltin} -- a Python builtin name (abs, dir, etc.)
57 - C{\pysrcstring} -- a string literal
58 - C{\pysrccomment} -- a comment
59 - C{\pysrcexcept} -- an exception traceback (up to the next >>>)
60 - C{\pysrcoutput} -- the output from a doctest block.
61 - C{\pysrcdefname} -- the name of a function or class defined by
62 a C{def} or C{class} statement.
63 """
64 return LaTeXDoctestColorizer().colorize_doctest(s)
65
67 """
68 An abstract base class for performing syntax highlighting on
69 doctest blocks and other bits of Python code. Subclasses should
70 provide definitions for:
71
72 - The L{markup()} method, which takes a substring and a tag, and
73 returns a colorized version of the substring.
74 - The L{PREFIX} and L{SUFFIX} variables, which will be added
75 to the beginning and end of the strings returned by
76 L{colorize_codeblock} and L{colorize_doctest}.
77 """
78
79
80
81
82 PREFIX = None
83
84
85
86
87 SUFFIX = None
88
89
90
91 _KEYWORDS = ("and del for is raise"
92 "assert elif from lambda return"
93 "break else global not try"
94 "class except if or while"
95 "continue exec import pass yield"
96 "def finally in print as").split()
97
98
99 _BUILTINS = [_BI for _BI in dir(__builtins__)
100 if not _BI.startswith('__')]
101
102
103 _KEYWORD_GRP = '|'.join([r'\b%s\b' % _KW for _KW in _KEYWORDS])
104
105
106 _BUILTIN_GRP = (r'(?<!\.)(?:%s)' % '|'.join([r'\b%s\b' % _BI
107 for _BI in _BUILTINS]))
108
109
110 _STRING_GRP = '|'.join(
111 [r'("""("""|.*?((?!").)"""))', r'("("|.*?((?!").)"))',
112 r"('''('''|.*?[^\\']'''))", r"('('|.*?[^\\']'))"])
113
114
115 _COMMENT_GRP = '(#.*?$)'
116
117
118 _PROMPT1_GRP = r'^[ \t]*>>>(?:[ \t]|$)'
119
120
121 _PROMPT2_GRP = r'^[ \t]*\.\.\.(?:[ \t]|$)'
122
123
124 _DEFINE_GRP = r'\b(?:def|class)[ \t]+\w+'
125
126
127 PROMPT_RE = re.compile('(%s|%s)' % (_PROMPT1_GRP, _PROMPT2_GRP),
128 re.MULTILINE | re.DOTALL)
129
130
131 PROMPT2_RE = re.compile('(%s)' % _PROMPT2_GRP,
132 re.MULTILINE | re.DOTALL)
133
134
135 EXCEPT_RE = re.compile(r'^[ \t]*Traceback \(most recent call last\):.*',
136 re.DOTALL | re.MULTILINE)
137
138
139 DOCTEST_DIRECTIVE_RE = re.compile(r'#[ \t]*doctest:.*')
140
141
142
143 DOCTEST_RE = re.compile(
144 r'(.*?)((?P<STRING>%s)|(?P<COMMENT>%s)|(?P<DEFINE>%s)|'
145 r'(?P<KEYWORD>%s)|(?P<BUILTIN>%s)|'
146 r'(?P<PROMPT1>%s)|(?P<PROMPT2>%s)|(?P<EOS>\Z))' % (
147 _STRING_GRP, _COMMENT_GRP, _DEFINE_GRP, _KEYWORD_GRP, _BUILTIN_GRP,
148 _PROMPT1_GRP, _PROMPT2_GRP), re.MULTILINE | re.DOTALL)
149
150
151
152
153 DOCTEST_EXAMPLE_RE = re.compile(r'''
154 # Source consists of a PS1 line followed by zero or more PS2 lines.
155 (?P<source>
156 (?:^(?P<indent> [ ]*) >>> .*) # PS1 line
157 (?:\n [ ]* \.\.\. .*)* # PS2 lines
158 \n?)
159 # Want consists of any non-blank lines that do not start with PS1.
160 (?P<want> (?:(?![ ]*$) # Not a blank line
161 (?![ ]*>>>) # Not a line starting with PS1
162 .*$\n? # But any other line
163 )*)
164 ''', re.MULTILINE | re.VERBOSE)
165
167 """
168 Colorize a string containing Python code. Do not add the
169 L{PREFIX} and L{SUFFIX} strings to the returned value. This
170 method is intended for generating syntax-highlighted strings
171 that are appropriate for inclusion as inline expressions.
172 """
173 return self.DOCTEST_RE.sub(self.subfunc, s)
174
176 """
177 Colorize a string containing only Python code. This method
178 differs from L{colorize_doctest} in that it will not search
179 for doctest prompts when deciding how to colorize the string.
180 """
181 body = self.DOCTEST_RE.sub(self.subfunc, s)
182 return self.PREFIX + body + self.SUFFIX
183
185 """
186 Colorize a string containing one or more doctest examples.
187 """
188 output = []
189 charno = 0
190 for m in self.DOCTEST_EXAMPLE_RE.finditer(s):
191
192 pysrc, want = m.group('source', 'want')
193
194 output.append(s[charno:m.start()])
195
196 output.append(self.DOCTEST_RE.sub(self.subfunc, pysrc))
197
198 if want:
199 if self.EXCEPT_RE.match(want):
200 output += '\n'.join([self.markup(line, 'except')
201 for line in want.split('\n')])
202 else:
203 output += '\n'.join([self.markup(line, 'output')
204 for line in want.split('\n')])
205
206 charno = m.end()
207
208 output.append(s[charno:])
209
210 return self.PREFIX + ''.join(output) + self.SUFFIX
211
213 other, text = match.group(1, 2)
214
215 if other:
216 other = '\n'.join([self.markup(line, 'other')
217 for line in other.split('\n')])
218
219 if match.group('PROMPT1'):
220 return other + self.markup(text, 'prompt')
221 elif match.group('PROMPT2'):
222 return other + self.markup(text, 'more')
223 elif match.group('KEYWORD'):
224 return other + self.markup(text, 'keyword')
225 elif match.group('BUILTIN'):
226 return other + self.markup(text, 'builtin')
227 elif match.group('COMMENT'):
228 return other + self.markup(text, 'comment')
229 elif match.group('STRING') and '\n' not in text:
230 return other + self.markup(text, 'string')
231 elif match.group('STRING'):
232
233
234 pieces = []
235 for line in text.split('\n'):
236 if self.PROMPT2_RE.match(line):
237 if len(line) > 4:
238 pieces.append(self.markup(line[:4], 'more') +
239 self.markup(line[4:], 'string'))
240 else:
241 pieces.append(self.markup(line[:4], 'more'))
242 elif line:
243 pieces.append(self.markup(line, 'string'))
244 else:
245 pieces.append('')
246 return other + '\n'.join(pieces)
247 elif match.group('DEFINE'):
248 m = re.match('(?P<def>\w+)(?P<space>\s+)(?P<name>\w+)', text)
249 return other + (self.markup(m.group('def'), 'keyword') +
250 self.markup(m.group('space'), 'other') +
251 self.markup(m.group('name'), 'defname'))
252 elif match.group('EOS') is not None:
253 return other
254 else:
255 assert 0, 'Unexpected match!'
256
258 """
259 Apply syntax highlighting to a single substring from a doctest
260 block. C{s} is the substring, and C{tag} is the tag that
261 should be applied to the substring. C{tag} will be one of the
262 following strings:
263
264 - C{prompt} -- the Python PS1 prompt (>>>)
265 - C{more} -- the Python PS2 prompt (...)
266 - C{keyword} -- a Python keyword (for, if, etc.)
267 - C{builtin} -- a Python builtin name (abs, dir, etc.)
268 - C{string} -- a string literal
269 - C{comment} -- a comment
270 - C{except} -- an exception traceback (up to the next >>>)
271 - C{output} -- the output from a doctest block.
272 - C{defname} -- the name of a function or class defined by
273 a C{def} or C{class} statement.
274 - C{other} -- anything else (does *not* include output.)
275 """
276 raise AssertionError("Abstract method")
277
279 """
280 A subclass of DoctestColorizer that generates XML-like output.
281 This class is mainly intended to be used for testing purposes.
282 """
283 PREFIX = '<colorized>\n'
284 SUFFIX = '</colorized>\n'
286 s = s.replace('&', '&').replace('<', '<').replace('>', '>')
287 if tag == 'other': return s
288 else: return '<%s>%s</%s>' % (tag, s, tag)
289
291 """A subclass of DoctestColorizer that generates HTML output."""
292 PREFIX = '<pre class="py-doctest">\n'
293 SUFFIX = '</pre>\n'
300
302 """A subclass of DoctestColorizer that generates LaTeX output."""
303 PREFIX = '\\begin{alltt}\n'
304 SUFFIX = '\\end{alltt}\n'
310