LLVM API Documentation
00001 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 00010 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H 00011 #define LLVM_MC_MCPARSER_MCASMLEXER_H 00012 00013 #include "llvm/ADT/APInt.h" 00014 #include "llvm/ADT/StringRef.h" 00015 #include "llvm/Support/Compiler.h" 00016 #include "llvm/Support/DataTypes.h" 00017 #include "llvm/Support/SMLoc.h" 00018 00019 namespace llvm { 00020 00021 /// AsmToken - Target independent representation for an assembler token. 00022 class AsmToken { 00023 public: 00024 enum TokenKind { 00025 // Markers 00026 Eof, Error, 00027 00028 // String values. 00029 Identifier, 00030 String, 00031 00032 // Integer values. 00033 Integer, 00034 BigNum, // larger than 64 bits 00035 00036 // Real values. 00037 Real, 00038 00039 // No-value. 00040 EndOfStatement, 00041 Colon, 00042 Space, 00043 Plus, Minus, Tilde, 00044 Slash, // '/' 00045 BackSlash, // '\' 00046 LParen, RParen, LBrac, RBrac, LCurly, RCurly, 00047 Star, Dot, Comma, Dollar, Equal, EqualEqual, 00048 00049 Pipe, PipePipe, Caret, 00050 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, 00051 Less, LessEqual, LessLess, LessGreater, 00052 Greater, GreaterEqual, GreaterGreater, At 00053 }; 00054 00055 private: 00056 TokenKind Kind; 00057 00058 /// A reference to the entire token contents; this is always a pointer into 00059 /// a memory buffer owned by the source manager. 00060 StringRef Str; 00061 00062 APInt IntVal; 00063 00064 public: 00065 AsmToken() {} 00066 AsmToken(TokenKind _Kind, StringRef _Str, APInt _IntVal) 00067 : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} 00068 AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0) 00069 : Kind(_Kind), Str(_Str), IntVal(64, _IntVal, true) {} 00070 00071 TokenKind getKind() const { return Kind; } 00072 bool is(TokenKind K) const { return Kind == K; } 00073 bool isNot(TokenKind K) const { return Kind != K; } 00074 00075 SMLoc getLoc() const; 00076 SMLoc getEndLoc() const; 00077 00078 /// getStringContents - Get the contents of a string token (without quotes). 00079 StringRef getStringContents() const { 00080 assert(Kind == String && "This token isn't a string!"); 00081 return Str.slice(1, Str.size() - 1); 00082 } 00083 00084 /// getIdentifier - Get the identifier string for the current token, which 00085 /// should be an identifier or a string. This gets the portion of the string 00086 /// which should be used as the identifier, e.g., it does not include the 00087 /// quotes on strings. 00088 StringRef getIdentifier() const { 00089 if (Kind == Identifier) 00090 return getString(); 00091 return getStringContents(); 00092 } 00093 00094 /// getString - Get the string for the current token, this includes all 00095 /// characters (for example, the quotes on strings) in the token. 00096 /// 00097 /// The returned StringRef points into the source manager's memory buffer, and 00098 /// is safe to store across calls to Lex(). 00099 StringRef getString() const { return Str; } 00100 00101 // FIXME: Don't compute this in advance, it makes every token larger, and is 00102 // also not generally what we want (it is nicer for recovery etc. to lex 123br 00103 // as a single token, then diagnose as an invalid number). 00104 int64_t getIntVal() const { 00105 assert(Kind == Integer && "This token isn't an integer!"); 00106 return IntVal.getZExtValue(); 00107 } 00108 00109 APInt getAPIntVal() const { 00110 assert((Kind == Integer || Kind == BigNum) && 00111 "This token isn't an integer!"); 00112 return IntVal; 00113 } 00114 }; 00115 00116 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific 00117 /// assembly lexers. 00118 class MCAsmLexer { 00119 /// The current token, stored in the base class for faster access. 00120 AsmToken CurTok; 00121 00122 /// The location and description of the current error 00123 SMLoc ErrLoc; 00124 std::string Err; 00125 00126 MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION; 00127 void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION; 00128 protected: // Can only create subclasses. 00129 const char *TokStart; 00130 bool SkipSpace; 00131 bool AllowAtInIdentifier; 00132 00133 MCAsmLexer(); 00134 00135 virtual AsmToken LexToken() = 0; 00136 00137 void SetError(const SMLoc &errLoc, const std::string &err) { 00138 ErrLoc = errLoc; 00139 Err = err; 00140 } 00141 00142 public: 00143 virtual ~MCAsmLexer(); 00144 00145 /// Lex - Consume the next token from the input stream and return it. 00146 /// 00147 /// The lexer will continuosly return the end-of-file token once the end of 00148 /// the main input file has been reached. 00149 const AsmToken &Lex() { 00150 return CurTok = LexToken(); 00151 } 00152 00153 virtual StringRef LexUntilEndOfStatement() = 0; 00154 00155 /// getLoc - Get the current source location. 00156 SMLoc getLoc() const; 00157 00158 /// getTok - Get the current (last) lexed token. 00159 const AsmToken &getTok() { 00160 return CurTok; 00161 } 00162 00163 /// peekTok - Look ahead at the next token to be lexed. 00164 virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0; 00165 00166 /// getErrLoc - Get the current error location 00167 const SMLoc &getErrLoc() { 00168 return ErrLoc; 00169 } 00170 00171 /// getErr - Get the current error string 00172 const std::string &getErr() { 00173 return Err; 00174 } 00175 00176 /// getKind - Get the kind of current token. 00177 AsmToken::TokenKind getKind() const { return CurTok.getKind(); } 00178 00179 /// is - Check if the current token has kind \p K. 00180 bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } 00181 00182 /// isNot - Check if the current token has kind \p K. 00183 bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } 00184 00185 /// setSkipSpace - Set whether spaces should be ignored by the lexer 00186 void setSkipSpace(bool val) { SkipSpace = val; } 00187 00188 bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } 00189 void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } 00190 }; 00191 00192 } // End llvm namespace 00193 00194 #endif