A parser for HTML and XHTML.
|
|
interesting_normal = re.compile(r'[&<]')
|
|
|
interesting_cdata = re.compile(r'<(/|\Z)')
|
|
|
incomplete = re.compile(r'&[a-zA-Z#]')
|
|
|
entityref = re.compile(r'&([a-zA-Z][-\.a-zA-Z0-9]*)[^a-zA-Z0-9]')
|
|
|
charref = re.compile(r'&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-...
|
|
|
starttagopen = re.compile(r'<[a-zA-Z]')
|
|
|
piclose = re.compile(r'>')
|
|
|
commentclose = re.compile(r'--\s*>')
|
|
|
tagfind = re.compile(r'[a-zA-Z][-\.a-zA-Z0-9:_]*')
|
|
|
attrfind = re.compile(r'\s*([a-zA-Z_][-\.:a-zA-Z_0-9]*)(\s*=\s...
|
|
|
locatestarttagend = re.compile(r'(?x)<[a-zA-Z][-\.a-zA-Z0-9:_]...
|
|
|
endendtag = re.compile(r'>')
|
|
|
endtagfind = re.compile(r'</\s*([a-zA-Z][-\.a-zA-Z0-9:_]*)\s*>')
|