clang API Documentation
00001 //===--- Token.h - Token interface ------------------------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines the Token interface. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #ifndef LLVM_CLANG_LEX_TOKEN_H 00015 #define LLVM_CLANG_LEX_TOKEN_H 00016 00017 #include "clang/Basic/OperatorKinds.h" 00018 #include "clang/Basic/SourceLocation.h" 00019 #include "clang/Basic/TemplateKinds.h" 00020 #include "clang/Basic/TokenKinds.h" 00021 #include "llvm/ADT/StringRef.h" 00022 #include <cstdlib> 00023 00024 namespace clang { 00025 00026 class IdentifierInfo; 00027 00028 /// Token - This structure provides full information about a lexed token. 00029 /// It is not intended to be space efficient, it is intended to return as much 00030 /// information as possible about each returned token. This is expected to be 00031 /// compressed into a smaller form if memory footprint is important. 00032 /// 00033 /// The parser can create a special "annotation token" representing a stream of 00034 /// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>" 00035 /// can be represented by a single typename annotation token that carries 00036 /// information about the SourceRange of the tokens and the type object. 00037 class Token { 00038 /// The location of the token. 00039 SourceLocation Loc; 00040 00041 // Conceptually these next two fields could be in a union. However, this 00042 // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical 00043 // routine. Keeping as separate members with casts until a more beautiful fix 00044 // presents itself. 00045 00046 /// UintData - This holds either the length of the token text, when 00047 /// a normal token, or the end of the SourceRange when an annotation 00048 /// token. 00049 unsigned UintData; 00050 00051 /// PtrData - This is a union of four different pointer types, which depends 00052 /// on what type of token this is: 00053 /// Identifiers, keywords, etc: 00054 /// This is an IdentifierInfo*, which contains the uniqued identifier 00055 /// spelling. 00056 /// Literals: isLiteral() returns true. 00057 /// This is a pointer to the start of the token in a text buffer, which 00058 /// may be dirty (have trigraphs / escaped newlines). 00059 /// Annotations (resolved type names, C++ scopes, etc): isAnnotation(). 00060 /// This is a pointer to sema-specific data for the annotation token. 00061 /// Other: 00062 /// This is null. 00063 void *PtrData; 00064 00065 /// Kind - The actual flavor of token this is. 00066 tok::TokenKind Kind; 00067 00068 /// Flags - Bits we track about this token, members of the TokenFlags enum. 00069 unsigned char Flags; 00070 public: 00071 00072 // Various flags set per token: 00073 enum TokenFlags { 00074 StartOfLine = 0x01, // At start of line or only after whitespace 00075 // (considering the line after macro expansion). 00076 LeadingSpace = 0x02, // Whitespace exists before this token (considering 00077 // whitespace after macro expansion). 00078 DisableExpand = 0x04, // This identifier may never be macro expanded. 00079 NeedsCleaning = 0x08, // Contained an escaped newline or trigraph. 00080 LeadingEmptyMacro = 0x10, // Empty macro exists before this token. 00081 HasUDSuffix = 0x20, // This string or character literal has a ud-suffix. 00082 HasUCN = 0x40, // This identifier contains a UCN. 00083 IgnoredComma = 0x80 // This comma is not a macro argument separator (MS). 00084 }; 00085 00086 tok::TokenKind getKind() const { return Kind; } 00087 void setKind(tok::TokenKind K) { Kind = K; } 00088 00089 /// is/isNot - Predicates to check if this token is a specific kind, as in 00090 /// "if (Tok.is(tok::l_brace)) {...}". 00091 bool is(tok::TokenKind K) const { return Kind == K; } 00092 bool isNot(tok::TokenKind K) const { return Kind != K; } 00093 00094 /// \brief Return true if this is a raw identifier (when lexing 00095 /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode). 00096 bool isAnyIdentifier() const { 00097 return tok::isAnyIdentifier(getKind()); 00098 } 00099 00100 /// \brief Return true if this is a "literal", like a numeric 00101 /// constant, string, etc. 00102 bool isLiteral() const { 00103 return tok::isLiteral(getKind()); 00104 } 00105 00106 /// \brief Return true if this is any of tok::annot_* kind tokens. 00107 bool isAnnotation() const { 00108 return tok::isAnnotation(getKind()); 00109 } 00110 00111 /// \brief Return a source location identifier for the specified 00112 /// offset in the current file. 00113 SourceLocation getLocation() const { return Loc; } 00114 unsigned getLength() const { 00115 assert(!isAnnotation() && "Annotation tokens have no length field"); 00116 return UintData; 00117 } 00118 00119 void setLocation(SourceLocation L) { Loc = L; } 00120 void setLength(unsigned Len) { 00121 assert(!isAnnotation() && "Annotation tokens have no length field"); 00122 UintData = Len; 00123 } 00124 00125 SourceLocation getAnnotationEndLoc() const { 00126 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); 00127 return SourceLocation::getFromRawEncoding(UintData); 00128 } 00129 void setAnnotationEndLoc(SourceLocation L) { 00130 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); 00131 UintData = L.getRawEncoding(); 00132 } 00133 00134 SourceLocation getLastLoc() const { 00135 return isAnnotation() ? getAnnotationEndLoc() : getLocation(); 00136 } 00137 00138 /// \brief SourceRange of the group of tokens that this annotation token 00139 /// represents. 00140 SourceRange getAnnotationRange() const { 00141 return SourceRange(getLocation(), getAnnotationEndLoc()); 00142 } 00143 void setAnnotationRange(SourceRange R) { 00144 setLocation(R.getBegin()); 00145 setAnnotationEndLoc(R.getEnd()); 00146 } 00147 00148 const char *getName() const { return tok::getTokenName(Kind); } 00149 00150 /// \brief Reset all flags to cleared. 00151 void startToken() { 00152 Kind = tok::unknown; 00153 Flags = 0; 00154 PtrData = nullptr; 00155 UintData = 0; 00156 Loc = SourceLocation(); 00157 } 00158 00159 IdentifierInfo *getIdentifierInfo() const { 00160 assert(isNot(tok::raw_identifier) && 00161 "getIdentifierInfo() on a tok::raw_identifier token!"); 00162 assert(!isAnnotation() && 00163 "getIdentifierInfo() on an annotation token!"); 00164 if (isLiteral()) return nullptr; 00165 return (IdentifierInfo*) PtrData; 00166 } 00167 void setIdentifierInfo(IdentifierInfo *II) { 00168 PtrData = (void*) II; 00169 } 00170 00171 /// getRawIdentifier - For a raw identifier token (i.e., an identifier 00172 /// lexed in raw mode), returns a reference to the text substring in the 00173 /// buffer if known. 00174 StringRef getRawIdentifier() const { 00175 assert(is(tok::raw_identifier)); 00176 return StringRef(reinterpret_cast<const char *>(PtrData), getLength()); 00177 } 00178 void setRawIdentifierData(const char *Ptr) { 00179 assert(is(tok::raw_identifier)); 00180 PtrData = const_cast<char*>(Ptr); 00181 } 00182 00183 /// getLiteralData - For a literal token (numeric constant, string, etc), this 00184 /// returns a pointer to the start of it in the text buffer if known, null 00185 /// otherwise. 00186 const char *getLiteralData() const { 00187 assert(isLiteral() && "Cannot get literal data of non-literal"); 00188 return reinterpret_cast<const char*>(PtrData); 00189 } 00190 void setLiteralData(const char *Ptr) { 00191 assert(isLiteral() && "Cannot set literal data of non-literal"); 00192 PtrData = const_cast<char*>(Ptr); 00193 } 00194 00195 void *getAnnotationValue() const { 00196 assert(isAnnotation() && "Used AnnotVal on non-annotation token"); 00197 return PtrData; 00198 } 00199 void setAnnotationValue(void *val) { 00200 assert(isAnnotation() && "Used AnnotVal on non-annotation token"); 00201 PtrData = val; 00202 } 00203 00204 /// \brief Set the specified flag. 00205 void setFlag(TokenFlags Flag) { 00206 Flags |= Flag; 00207 } 00208 00209 /// \brief Unset the specified flag. 00210 void clearFlag(TokenFlags Flag) { 00211 Flags &= ~Flag; 00212 } 00213 00214 /// \brief Return the internal represtation of the flags. 00215 /// 00216 /// This is only intended for low-level operations such as writing tokens to 00217 /// disk. 00218 unsigned getFlags() const { 00219 return Flags; 00220 } 00221 00222 /// \brief Set a flag to either true or false. 00223 void setFlagValue(TokenFlags Flag, bool Val) { 00224 if (Val) 00225 setFlag(Flag); 00226 else 00227 clearFlag(Flag); 00228 } 00229 00230 /// isAtStartOfLine - Return true if this token is at the start of a line. 00231 /// 00232 bool isAtStartOfLine() const { return (Flags & StartOfLine) ? true : false; } 00233 00234 /// \brief Return true if this token has whitespace before it. 00235 /// 00236 bool hasLeadingSpace() const { return (Flags & LeadingSpace) ? true : false; } 00237 00238 /// \brief Return true if this identifier token should never 00239 /// be expanded in the future, due to C99 6.10.3.4p2. 00240 bool isExpandDisabled() const { 00241 return (Flags & DisableExpand) ? true : false; 00242 } 00243 00244 /// \brief Return true if we have an ObjC keyword identifier. 00245 bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const; 00246 00247 /// \brief Return the ObjC keyword kind. 00248 tok::ObjCKeywordKind getObjCKeywordID() const; 00249 00250 /// \brief Return true if this token has trigraphs or escaped newlines in it. 00251 bool needsCleaning() const { return (Flags & NeedsCleaning) ? true : false; } 00252 00253 /// \brief Return true if this token has an empty macro before it. 00254 /// 00255 bool hasLeadingEmptyMacro() const { 00256 return (Flags & LeadingEmptyMacro) ? true : false; 00257 } 00258 00259 /// \brief Return true if this token is a string or character literal which 00260 /// has a ud-suffix. 00261 bool hasUDSuffix() const { return (Flags & HasUDSuffix) ? true : false; } 00262 00263 /// Returns true if this token contains a universal character name. 00264 bool hasUCN() const { return (Flags & HasUCN) ? true : false; } 00265 }; 00266 00267 /// \brief Information about the conditional stack (\#if directives) 00268 /// currently active. 00269 struct PPConditionalInfo { 00270 /// \brief Location where the conditional started. 00271 SourceLocation IfLoc; 00272 00273 /// \brief True if this was contained in a skipping directive, e.g., 00274 /// in a "\#if 0" block. 00275 bool WasSkipping; 00276 00277 /// \brief True if we have emitted tokens already, and now we're in 00278 /// an \#else block or something. Only useful in Skipping blocks. 00279 bool FoundNonSkip; 00280 00281 /// \brief True if we've seen a \#else in this block. If so, 00282 /// \#elif/\#else directives are not allowed. 00283 bool FoundElse; 00284 }; 00285 00286 } // end namespace clang 00287 00288 namespace llvm { 00289 template <> 00290 struct isPodLike<clang::Token> { static const bool value = true; }; 00291 } // end namespace llvm 00292 00293 #endif