Package nltk :: Package etree :: Module ElementPath
[hide private]
[frames] | no frames]

Source Code for Module nltk.etree.ElementPath

  1  # 
  2  # ElementTree 
  3  # $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $ 
  4  # 
  5  # limited xpath support for element trees 
  6  # 
  7  # history: 
  8  # 2003-05-23 fl   created 
  9  # 2003-05-28 fl   added support for // etc 
 10  # 2003-08-27 fl   fixed parsing of periods in element names 
 11  # 
 12  # Copyright (c) 2003-2004 by Fredrik Lundh.  All rights reserved. 
 13  # 
 14  # [email protected] 
 15  # http://www.pythonware.com 
 16  # 
 17  # -------------------------------------------------------------------- 
 18  # The ElementTree toolkit is 
 19  # 
 20  # Copyright (c) 1999-2004 by Fredrik Lundh 
 21  # 
 22  # By obtaining, using, and/or copying this software and/or its 
 23  # associated documentation, you agree that you have read, understood, 
 24  # and will comply with the following terms and conditions: 
 25  # 
 26  # Permission to use, copy, modify, and distribute this software and 
 27  # its associated documentation for any purpose and without fee is 
 28  # hereby granted, provided that the above copyright notice appears in 
 29  # all copies, and that both that copyright notice and this permission 
 30  # notice appear in supporting documentation, and that the name of 
 31  # Secret Labs AB or the author not be used in advertising or publicity 
 32  # pertaining to distribution of the software without specific, written 
 33  # prior permission. 
 34  # 
 35  # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 
 36  # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 
 37  # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 
 38  # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 
 39  # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
 40  # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 
 41  # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
 42  # OF THIS SOFTWARE. 
 43  # -------------------------------------------------------------------- 
 44   
 45  # Licensed to PSF under a Contributor Agreement. 
 46  # See http://www.python.org/2.4/license for licensing details. 
 47   
 48  ## 
 49  # Implementation module for XPath support.  There's usually no reason 
 50  # to import this module directly; the <b>ElementTree</b> does this for 
 51  # you, if needed. 
 52  ## 
 53   
 54  import re 
 55   
 56  xpath_tokenizer = re.compile( 
 57      "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+" 
 58      ).findall 
 59   
60 -class xpath_descendant_or_self:
61 pass
62 63 ## 64 # Wrapper for a compiled XPath. 65
66 -class Path:
67 68 ## 69 # Create an Path instance from an XPath expression. 70
71 - def __init__(self, path):
72 tokens = xpath_tokenizer(path) 73 # the current version supports 'path/path'-style expressions only 74 self.path = [] 75 self.tag = None 76 if tokens and tokens[0][0] == "/": 77 raise SyntaxError("cannot use absolute path on element") 78 while tokens: 79 op, tag = tokens.pop(0) 80 if tag or op == "*": 81 self.path.append(tag or op) 82 elif op == ".": 83 pass 84 elif op == "/": 85 self.path.append(xpath_descendant_or_self()) 86 continue 87 else: 88 raise SyntaxError("unsupported path syntax (%s)" % op) 89 if tokens: 90 op, tag = tokens.pop(0) 91 if op != "/": 92 raise SyntaxError( 93 "expected path separator (%s)" % (op or tag) 94 ) 95 if self.path and isinstance(self.path[-1], xpath_descendant_or_self): 96 raise SyntaxError("path cannot end with //") 97 if len(self.path) == 1 and isinstance(self.path[0], type("")): 98 self.tag = self.path[0]
99 100 ## 101 # Find first matching object. 102
103 - def find(self, element):
104 tag = self.tag 105 if tag is None: 106 nodeset = self.findall(element) 107 if not nodeset: 108 return None 109 return nodeset[0] 110 for elem in element: 111 if elem.tag == tag: 112 return elem 113 return None
114 115 ## 116 # Find text for first matching object. 117
118 - def findtext(self, element, default=None):
119 tag = self.tag 120 if tag is None: 121 nodeset = self.findall(element) 122 if not nodeset: 123 return default 124 return nodeset[0].text or "" 125 for elem in element: 126 if elem.tag == tag: 127 return elem.text or "" 128 return default
129 130 ## 131 # Find all matching objects. 132
133 - def findall(self, element):
134 nodeset = [element] 135 index = 0 136 while 1: 137 try: 138 path = self.path[index] 139 index = index + 1 140 except IndexError: 141 return nodeset 142 set = [] 143 if isinstance(path, xpath_descendant_or_self): 144 try: 145 tag = self.path[index] 146 if not isinstance(tag, type("")): 147 tag = None 148 else: 149 index = index + 1 150 except IndexError: 151 tag = None # invalid path 152 for node in nodeset: 153 new = list(node.getiterator(tag)) 154 if new and new[0] is node: 155 set.extend(new[1:]) 156 else: 157 set.extend(new) 158 else: 159 for node in nodeset: 160 for node in node: 161 if path == "*" or node.tag == path: 162 set.append(node) 163 if not set: 164 return [] 165 nodeset = set
166 167 _cache = {} 168 169 ## 170 # (Internal) Compile path. 171
172 -def _compile(path):
173 p = _cache.get(path) 174 if p is not None: 175 return p 176 p = Path(path) 177 if len(_cache) >= 100: 178 _cache.clear() 179 _cache[path] = p 180 return p
181 182 ## 183 # Find first matching object. 184
185 -def find(element, path):
186 return _compile(path).find(element)
187 188 ## 189 # Find text for first matching object. 190
191 -def findtext(element, path, default=None):
192 return _compile(path).findtext(element, default)
193 194 ## 195 # Find all matching objects. 196
197 -def findall(element, path):
198 return _compile(path).findall(element)
199