LLVM API Documentation

MCAsmLexer.h
Go to the documentation of this file.
00001 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 
00010 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
00011 #define LLVM_MC_MCPARSER_MCASMLEXER_H
00012 
00013 #include "llvm/ADT/APInt.h"
00014 #include "llvm/ADT/StringRef.h"
00015 #include "llvm/Support/Compiler.h"
00016 #include "llvm/Support/DataTypes.h"
00017 #include "llvm/Support/SMLoc.h"
00018 
00019 namespace llvm {
00020 
00021 /// AsmToken - Target independent representation for an assembler token.
00022 class AsmToken {
00023 public:
00024   enum TokenKind {
00025     // Markers
00026     Eof, Error,
00027 
00028     // String values.
00029     Identifier,
00030     String,
00031 
00032     // Integer values.
00033     Integer,
00034     BigNum, // larger than 64 bits
00035 
00036     // Real values.
00037     Real,
00038 
00039     // No-value.
00040     EndOfStatement,
00041     Colon,
00042     Space,
00043     Plus, Minus, Tilde,
00044     Slash,    // '/'
00045     BackSlash, // '\'
00046     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
00047     Star, Dot, Comma, Dollar, Equal, EqualEqual,
00048 
00049     Pipe, PipePipe, Caret,
00050     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
00051     Less, LessEqual, LessLess, LessGreater,
00052     Greater, GreaterEqual, GreaterGreater, At
00053   };
00054 
00055 private:
00056   TokenKind Kind;
00057 
00058   /// A reference to the entire token contents; this is always a pointer into
00059   /// a memory buffer owned by the source manager.
00060   StringRef Str;
00061 
00062   APInt IntVal;
00063 
00064 public:
00065   AsmToken() {}
00066   AsmToken(TokenKind _Kind, StringRef _Str, APInt _IntVal)
00067     : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
00068   AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
00069     : Kind(_Kind), Str(_Str), IntVal(64, _IntVal, true) {}
00070 
00071   TokenKind getKind() const { return Kind; }
00072   bool is(TokenKind K) const { return Kind == K; }
00073   bool isNot(TokenKind K) const { return Kind != K; }
00074 
00075   SMLoc getLoc() const;
00076   SMLoc getEndLoc() const;
00077 
00078   /// getStringContents - Get the contents of a string token (without quotes).
00079   StringRef getStringContents() const {
00080     assert(Kind == String && "This token isn't a string!");
00081     return Str.slice(1, Str.size() - 1);
00082   }
00083 
00084   /// getIdentifier - Get the identifier string for the current token, which
00085   /// should be an identifier or a string. This gets the portion of the string
00086   /// which should be used as the identifier, e.g., it does not include the
00087   /// quotes on strings.
00088   StringRef getIdentifier() const {
00089     if (Kind == Identifier)
00090       return getString();
00091     return getStringContents();
00092   }
00093 
00094   /// getString - Get the string for the current token, this includes all
00095   /// characters (for example, the quotes on strings) in the token.
00096   ///
00097   /// The returned StringRef points into the source manager's memory buffer, and
00098   /// is safe to store across calls to Lex().
00099   StringRef getString() const { return Str; }
00100 
00101   // FIXME: Don't compute this in advance, it makes every token larger, and is
00102   // also not generally what we want (it is nicer for recovery etc. to lex 123br
00103   // as a single token, then diagnose as an invalid number).
00104   int64_t getIntVal() const {
00105     assert(Kind == Integer && "This token isn't an integer!");
00106     return IntVal.getZExtValue();
00107   }
00108 
00109   APInt getAPIntVal() const {
00110     assert((Kind == Integer || Kind == BigNum) &&
00111            "This token isn't an integer!");
00112     return IntVal;
00113   }
00114 };
00115 
00116 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific
00117 /// assembly lexers.
00118 class MCAsmLexer {
00119   /// The current token, stored in the base class for faster access.
00120   AsmToken CurTok;
00121 
00122   /// The location and description of the current error
00123   SMLoc ErrLoc;
00124   std::string Err;
00125 
00126   MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
00127   void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
00128 protected: // Can only create subclasses.
00129   const char *TokStart;
00130   bool SkipSpace;
00131   bool AllowAtInIdentifier;
00132 
00133   MCAsmLexer();
00134 
00135   virtual AsmToken LexToken() = 0;
00136 
00137   void SetError(const SMLoc &errLoc, const std::string &err) {
00138     ErrLoc = errLoc;
00139     Err = err;
00140   }
00141 
00142 public:
00143   virtual ~MCAsmLexer();
00144 
00145   /// Lex - Consume the next token from the input stream and return it.
00146   ///
00147   /// The lexer will continuosly return the end-of-file token once the end of
00148   /// the main input file has been reached.
00149   const AsmToken &Lex() {
00150     return CurTok = LexToken();
00151   }
00152 
00153   virtual StringRef LexUntilEndOfStatement() = 0;
00154 
00155   /// getLoc - Get the current source location.
00156   SMLoc getLoc() const;
00157 
00158   /// getTok - Get the current (last) lexed token.
00159   const AsmToken &getTok() {
00160     return CurTok;
00161   }
00162 
00163   /// peekTok - Look ahead at the next token to be lexed.
00164   virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0;
00165 
00166   /// getErrLoc - Get the current error location
00167   const SMLoc &getErrLoc() {
00168     return ErrLoc;
00169   }
00170 
00171   /// getErr - Get the current error string
00172   const std::string &getErr() {
00173     return Err;
00174   }
00175 
00176   /// getKind - Get the kind of current token.
00177   AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
00178 
00179   /// is - Check if the current token has kind \p K.
00180   bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
00181 
00182   /// isNot - Check if the current token has kind \p K.
00183   bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
00184 
00185   /// setSkipSpace - Set whether spaces should be ignored by the lexer
00186   void setSkipSpace(bool val) { SkipSpace = val; }
00187 
00188   bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
00189   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
00190 };
00191 
00192 } // End llvm namespace
00193 
00194 #endif