Package nltk :: Module olac
[hide private]
[frames] | no frames]

Source Code for Module nltk.olac

 1  # Natural Language Toolkit: Support for OLAC Metadata 
 2  # 
 3  # Copyright (C) 2001-2008 NLTK Project 
 4  # Author: Steven Bird <[email protected]> 
 5  # URL: <http://nltk.org> 
 6  # For license information, see LICENSE.TXT 
 7   
 8   
 9  from StringIO import StringIO 
10   
11 -def read_olac(xml):
12 """ 13 Read an OLAC XML record and return a list of attributes. 14 15 @param xml: XML string for conversion 16 @type xml: C{string} 17 @rtype: C{list} of C{tuple} 18 """ 19 from lxml import etree 20 21 root = etree.parse(StringIO(xml)).getroot() 22 return [(element.tag, element.attrib, element.text) for element in root.getchildren()]
23
24 -def pprint_olac(xml):
25 for tag, attrib, text in read_olac(xml): 26 print "%-12s" % tag + ':', 27 if text: 28 print text, 29 if attrib: 30 print "(%s=%s)" % (attrib['type'], attrib['code']), 31 print
32
33 -def demo():
34 from lxml import etree 35 import nltk.data 36 37 file = nltk.data.find('corpora/treebank/olac.xml') 38 xml = open(file).read() 39 pprint_olac(xml)
40 41 if __name__ == '__main__': 42 demo() 43 44 __all__ = ['read_olac', 'pprint_olac'] 45