#!/usr/bin/env python """ python_201_parser_ply.py A parser example. This example uses PLY to implement a lexer and parser. The grammar: Prog ::= Command* Command ::= Func_call Func_call ::= Term '(' Func_call_list ')' Func_call_list ::= Func_call* Term = """ import sys, types import getopt import lex import yacc # # Globals # startlinepos = 0 # # Constants # # AST node types NoneNodeType = 0 ProgNodeType = 1 CommandNodeType = 2 CommandListNodeType = 3 FuncCallNodeType = 4 FuncCallListNodeType = 5 TermNodeType = 6 # Dictionary to map node type values to node type names NodeTypeDict = { NoneNodeType: 'NoneNodeType', ProgNodeType: 'ProgNodeType', CommandNodeType: 'CommandNodeType', CommandListNodeType: 'CommandListNodeType', FuncCallNodeType: 'FuncCallNodeType', FuncCallListNodeType: 'FuncCallListNodeType', TermNodeType: 'TermNodeType', } # # Representation of a node in the AST (abstract syntax tree). # class ASTNode: def __init__(self, nodeType, *args): self.nodeType = nodeType self.children = [] for item in args: self.children.append(item) def append(self, item): self.children.append(item) def show(self, level): self.showLevel(level) print 'Node -- Type: %s' % NodeTypeDict[self.nodeType] level += 1 for child in self.children: if isinstance(child, ASTNode): child.show(level) elif type(child) == types.ListType: for item in child: item.show(level) else: self.showLevel(level) print 'Value:', child def showLevel(self, level): for idx in range(level): print ' ', # # Exception classes # class LexerError(Exception): def __init__(self, msg, lineno, columnno): self.msg = msg self.lineno = lineno self.columnno = columnno def show(self): sys.stderr.write('Lexer error (%d, %d) %s\n' % \ (self.lineno, self.columnno, self.msg)) class ParserError(Exception): def __init__(self, msg, lineno, columnno): self.msg = msg self.lineno = lineno self.columnno = columnno def show(self): sys.stderr.write('Parser error (%d, %d) %s\n' % \ (self.lineno, self.columnno, self.msg)) # # Lexer specification # tokens = ( 'NAME', 'LPAR','RPAR', 'COMMA', ) # Tokens t_LPAR = r'\(' t_RPAR = r'\)' t_COMMA = r'\,' t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' # Ignore whitespace t_ignore = ' \t' # Ignore comments ('#' to end of line) def t_COMMENT(t): r'\#[^\n]*' pass def t_newline(t): r'\n+' global startlinepos startlinepos = t.lexer.lexpos - 1 t.lineno += t.value.count("\n") def t_error(t): global startlinepos msg = "Illegal character '%s'" % (t.value[0]) columnno = t.lexer.lexpos - startlinepos raise LexerError(msg, t.lineno, columnno) # # Parser specification # def p_prog(t): 'prog : command_list' t[0] = ASTNode(ProgNodeType, t[1]) def p_command_list_1(t): 'command_list : command' t[0] = ASTNode(CommandListNodeType, t[1]) def p_command_list_2(t): 'command_list : command_list command' t[1].append(t[2]) t[0] = t[1] def p_command(t): 'command : func_call' t[0] = ASTNode(CommandNodeType, t[1]) def p_func_call_1(t): 'func_call : term LPAR RPAR' t[0] = ASTNode(FuncCallNodeType, t[1]) def p_func_call_2(t): 'func_call : term LPAR func_call_list RPAR' t[0] = ASTNode(FuncCallNodeType, t[1], t[3]) def p_func_call_list_1(t): 'func_call_list : func_call' t[0] = ASTNode(FuncCallListNodeType, t[1]) def p_func_call_list_2(t): 'func_call_list : func_call_list COMMA func_call' t[1].append(t[3]) t[0] = t[1] def p_term(t): 'term : NAME' t[0] = ASTNode(TermNodeType, t[1]) def p_error(t): global startlinepos msg = "Syntax error at '%s'" % t.value columnno = t.lexer.lexpos - startlinepos raise ParserError(msg, t.lineno, columnno) # # Parse the input and display the AST (abstract syntax tree) # def parse(infileName): startlinepos = 0 # Build the lexer lex.lex(debug=1) # Build the parser yacc.yacc() # Read the input infile = file(infileName, 'r') content = infile.read() infile.close() try: # Do the parse result = yacc.parse(content) # Display the AST result.show(0) except LexerError, exp: exp.show() except ParserError, exp: exp.show() USAGE_TEXT = """ Usage: python python_201_parser_ply.py [options] Options: -h, --help Display this help message. Example: python python_201_parser_ply.py testfile.prog """ def usage(): print USAGE_TEXT sys.exit(-1) def main(): args = sys.argv[1:] try: opts, args = getopt.getopt(args, 'h', ['help']) except: usage() relink = 1 for opt, val in opts: if opt in ('-h', '--help'): usage() if len(args) != 1: usage() infileName = args[0] parse(infileName) if __name__ == '__main__': main() #import pdb #pdb.run('main()')