#!/usr/bin/env python """ python_201_rparser_plex.py A recursive descent parser example. This example uses Plex to implement a tokenizer. The grammar: Prog ::= Command | Command Prog Command ::= Func_call Func_call ::= Term '(' Func_call_list ')' Func_call_list ::= Func_call | Func_call ',' Func_call_list Term = """ import sys, string, types import getopt import Plex ## from IPython.Shell import IPShellEmbed ## ipshell = IPShellEmbed((), ## banner = '>>>>>>>> Into IPython >>>>>>>>', ## exit_msg = '<<<<<<<< Out of IPython <<<<<<<<') # # Constants # # AST node types NoneNodeType = 0 ProgNodeType = 1 CommandNodeType = 2 FuncCallNodeType = 3 FuncCallListNodeType = 4 TermNodeType = 5 # Token types NoneTokType = 0 LParTokType = 1 RParTokType = 2 WordTokType = 3 CommaTokType = 4 EOFTokType = 5 # Dictionary to map node type values to node type names NodeTypeDict = { NoneNodeType: 'NoneNodeType', ProgNodeType: 'ProgNodeType', CommandNodeType: 'CommandNodeType', FuncCallNodeType: 'FuncCallNodeType', FuncCallListNodeType: 'FuncCallListNodeType', TermNodeType: 'TermNodeType', } # # Representation of a node in the AST (abstract syntax tree). # class ASTNode: def __init__(self, nodeType, *args): self.nodeType = nodeType self.children = [] for item in args: self.children.append(item) def show(self, level): self.showLevel(level) print 'Node -- Type %s' % NodeTypeDict[self.nodeType] level += 1 for child in self.children: if isinstance(child, ASTNode): child.show(level) elif type(child) == types.ListType: for item in child: item.show(level) else: self.showLevel(level) print 'Child:', child def showLevel(self, level): for idx in range(level): print ' ', # # The recursive descent parser class. # Contains the "recognizer" methods, which implement the grammar # rules (above), one recognizer method for each production rule. # class ProgParser: def __init__(self): pass def parseFile(self, infileName): self.tokens = None self.tokenType = NoneTokType self.token = '' self.lineNo = -1 self.infile = file(infileName, 'r') self.tokens = genTokens(self.infile, infileName) try: self.tokenType, self.token, self.lineNo = self.tokens.next() except StopIteration: raise RuntimeError, 'Empty file' result = self.prog_reco() self.infile.close() self.infile = None return result def parseStream(self, instream): self.tokens = None self.tokenType = NoneTokType self.token = '' self.lineNo = -1 self.tokens = genTokens(self.instream, '') try: self.tokenType, self.token, self.lineNo = self.tokens.next() except StopIteration: raise RuntimeError, 'Empty stream' result = self.prog_reco() self.infile.close() self.infile = None return result def prog_reco(self): commandList = [] while 1: result = self.command_reco() if not result: break commandList.append(result) return ASTNode(ProgNodeType, commandList) def command_reco(self): if self.tokenType == EOFTokType: return None result = self.func_call_reco() return ASTNode(CommandNodeType, result) def func_call_reco(self): if self.tokenType == WordTokType: term = ASTNode(TermNodeType, self.token) self.tokenType, self.token, self.lineNo = self.tokens.next() if self.tokenType == LParTokType: self.tokenType, self.token, self.lineNo = self.tokens.next() result = self.func_call_list_reco() if result: if self.tokenType == RParTokType: self.tokenType, self.token, self.lineNo = \ self.tokens.next() return ASTNode(FuncCallNodeType, term, result) else: raise ParseError(self.lineNo, 'missing right paren') else: raise ParseError(self.lineNo, 'bad func call list') else: raise ParseError(self.lineNo, 'missing left paren') else: return None def func_call_list_reco(self): terms = [] while 1: result = self.func_call_reco() if not result: break terms.append(result) if self.tokenType != CommaTokType: break self.tokenType, self.token, self.lineNo = self.tokens.next() return ASTNode(FuncCallListNodeType, terms) # # The parse error exception class. # class ParseError(Exception): def __init__(self, lineNo, msg): RuntimeError.__init__(self, msg) self.lineNo = lineNo self.msg = msg def getLineNo(self): return self.lineNo def getMsg(self): return self.msg # # Generate the tokens. # Usage - example # gen = genTokens(infile) # tokType, tok, lineNo = gen.next() # ... def genTokens(infile, infileName): letter = Plex.Range("AZaz") digit = Plex.Range("09") name = letter + Plex.Rep(letter | digit) lpar = Plex.Str('(') rpar = Plex.Str(')') comma = Plex.Str(',') comment = Plex.Str("#") + Plex.Rep(Plex.AnyBut("\n")) space = Plex.Any(" \t\n") lexicon = Plex.Lexicon([ (name, 'word'), (lpar, 'lpar'), (rpar, 'rpar'), (comma, 'comma'), (comment, Plex.IGNORE), (space, Plex.IGNORE), ]) scanner = Plex.Scanner(lexicon, infile, infileName) while 1: tokenType, token = scanner.read() name, lineNo, columnNo = scanner.position() if tokenType == None: tokType = EOFTokType token = None elif tokenType == 'word': tokType = WordTokType elif tokenType == 'lpar': tokType = LParTokType elif tokenType == 'rpar': tokType = RParTokType elif tokenType == 'comma': tokType = CommaTokType else: tokType = NoneTokType tok = token yield (tokType, tok, lineNo) def test(infileName): parser = ProgParser() #ipshell('(test) #1\nCtrl-D to exit') result = None try: result = parser.parseFile(infileName) except ParseError, exp: sys.stderr.write('ParseError: (%d) %s\n' % \ (exp.getLineNo(), exp.getMsg())) if result: result.show(0) USAGE_TEXT = """ Usage: python python_201_rparser_plex.py [options] Options: -h, --help Display this help message. Example: python python_201_rparser_plex.py myfile.txt """ def usage(): print USAGE_TEXT sys.exit(-1) def main(): args = sys.argv[1:] try: opts, args = getopt.getopt(args, 'h', ['help']) except: usage() for opt, val in opts: if opt in ('-h', '--help'): usage() if len(args) != 1: usage() infileName = args[0] test(infileName) if __name__ == '__main__': main() #import pdb #pdb.run('main()')