clang API Documentation
00001 //===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines the NumericLiteralParser, CharLiteralParser, and 00011 // StringLiteralParser interfaces. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H 00016 #define LLVM_CLANG_LEX_LITERALSUPPORT_H 00017 00018 #include "clang/Basic/CharInfo.h" 00019 #include "clang/Basic/LLVM.h" 00020 #include "clang/Basic/TokenKinds.h" 00021 #include "llvm/ADT/APFloat.h" 00022 #include "llvm/ADT/SmallString.h" 00023 #include "llvm/ADT/StringRef.h" 00024 #include "llvm/Support/DataTypes.h" 00025 00026 namespace clang { 00027 00028 class DiagnosticsEngine; 00029 class Preprocessor; 00030 class Token; 00031 class SourceLocation; 00032 class TargetInfo; 00033 class SourceManager; 00034 class LangOptions; 00035 00036 /// Copy characters from Input to Buf, expanding any UCNs. 00037 void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input); 00038 00039 /// NumericLiteralParser - This performs strict semantic analysis of the content 00040 /// of a ppnumber, classifying it as either integer, floating, or erroneous, 00041 /// determines the radix of the value and can convert it to a useful value. 00042 class NumericLiteralParser { 00043 Preprocessor &PP; // needed for diagnostics 00044 00045 const char *const ThisTokBegin; 00046 const char *const ThisTokEnd; 00047 const char *DigitsBegin, *SuffixBegin; // markers 00048 const char *s; // cursor 00049 00050 unsigned radix; 00051 00052 bool saw_exponent, saw_period, saw_ud_suffix; 00053 00054 SmallString<32> UDSuffixBuf; 00055 00056 public: 00057 NumericLiteralParser(StringRef TokSpelling, 00058 SourceLocation TokLoc, 00059 Preprocessor &PP); 00060 bool hadError; 00061 bool isUnsigned; 00062 bool isLong; // This is *not* set for long long. 00063 bool isLongLong; 00064 bool isFloat; // 1.0f 00065 bool isImaginary; // 1.0i 00066 uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. 00067 00068 bool isIntegerLiteral() const { 00069 return !saw_period && !saw_exponent; 00070 } 00071 bool isFloatingLiteral() const { 00072 return saw_period || saw_exponent; 00073 } 00074 00075 bool hasUDSuffix() const { 00076 return saw_ud_suffix; 00077 } 00078 StringRef getUDSuffix() const { 00079 assert(saw_ud_suffix); 00080 return UDSuffixBuf; 00081 } 00082 unsigned getUDSuffixOffset() const { 00083 assert(saw_ud_suffix); 00084 return SuffixBegin - ThisTokBegin; 00085 } 00086 00087 static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); 00088 00089 unsigned getRadix() const { return radix; } 00090 00091 /// GetIntegerValue - Convert this numeric literal value to an APInt that 00092 /// matches Val's input width. If there is an overflow (i.e., if the unsigned 00093 /// value read is larger than the APInt's bits will hold), set Val to the low 00094 /// bits of the result and return true. Otherwise, return false. 00095 bool GetIntegerValue(llvm::APInt &Val); 00096 00097 /// GetFloatValue - Convert this numeric literal to a floating value, using 00098 /// the specified APFloat fltSemantics (specifying float, double, etc). 00099 /// The optional bool isExact (passed-by-reference) has its value 00100 /// set to true if the returned APFloat can represent the number in the 00101 /// literal exactly, and false otherwise. 00102 llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); 00103 00104 private: 00105 00106 void ParseNumberStartingWithZero(SourceLocation TokLoc); 00107 00108 static bool isDigitSeparator(char C) { return C == '\''; } 00109 00110 enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits }; 00111 00112 /// \brief Ensure that we don't have a digit separator here. 00113 void checkSeparator(SourceLocation TokLoc, const char *Pos, 00114 CheckSeparatorKind IsAfterDigits); 00115 00116 /// SkipHexDigits - Read and skip over any hex digits, up to End. 00117 /// Return a pointer to the first non-hex digit or End. 00118 const char *SkipHexDigits(const char *ptr) { 00119 while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr))) 00120 ptr++; 00121 return ptr; 00122 } 00123 00124 /// SkipOctalDigits - Read and skip over any octal digits, up to End. 00125 /// Return a pointer to the first non-hex digit or End. 00126 const char *SkipOctalDigits(const char *ptr) { 00127 while (ptr != ThisTokEnd && 00128 ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr))) 00129 ptr++; 00130 return ptr; 00131 } 00132 00133 /// SkipDigits - Read and skip over any digits, up to End. 00134 /// Return a pointer to the first non-hex digit or End. 00135 const char *SkipDigits(const char *ptr) { 00136 while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr))) 00137 ptr++; 00138 return ptr; 00139 } 00140 00141 /// SkipBinaryDigits - Read and skip over any binary digits, up to End. 00142 /// Return a pointer to the first non-binary digit or End. 00143 const char *SkipBinaryDigits(const char *ptr) { 00144 while (ptr != ThisTokEnd && 00145 (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr))) 00146 ptr++; 00147 return ptr; 00148 } 00149 00150 }; 00151 00152 /// CharLiteralParser - Perform interpretation and semantic analysis of a 00153 /// character literal. 00154 class CharLiteralParser { 00155 uint64_t Value; 00156 tok::TokenKind Kind; 00157 bool IsMultiChar; 00158 bool HadError; 00159 SmallString<32> UDSuffixBuf; 00160 unsigned UDSuffixOffset; 00161 public: 00162 CharLiteralParser(const char *begin, const char *end, 00163 SourceLocation Loc, Preprocessor &PP, 00164 tok::TokenKind kind); 00165 00166 bool hadError() const { return HadError; } 00167 bool isAscii() const { return Kind == tok::char_constant; } 00168 bool isWide() const { return Kind == tok::wide_char_constant; } 00169 bool isUTF16() const { return Kind == tok::utf16_char_constant; } 00170 bool isUTF32() const { return Kind == tok::utf32_char_constant; } 00171 bool isMultiChar() const { return IsMultiChar; } 00172 uint64_t getValue() const { return Value; } 00173 StringRef getUDSuffix() const { return UDSuffixBuf; } 00174 unsigned getUDSuffixOffset() const { 00175 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 00176 return UDSuffixOffset; 00177 } 00178 }; 00179 00180 /// StringLiteralParser - This decodes string escape characters and performs 00181 /// wide string analysis and Translation Phase #6 (concatenation of string 00182 /// literals) (C99 5.1.1.2p1). 00183 class StringLiteralParser { 00184 const SourceManager &SM; 00185 const LangOptions &Features; 00186 const TargetInfo &Target; 00187 DiagnosticsEngine *Diags; 00188 00189 unsigned MaxTokenLength; 00190 unsigned SizeBound; 00191 unsigned CharByteWidth; 00192 tok::TokenKind Kind; 00193 SmallString<512> ResultBuf; 00194 char *ResultPtr; // cursor 00195 SmallString<32> UDSuffixBuf; 00196 unsigned UDSuffixToken; 00197 unsigned UDSuffixOffset; 00198 public: 00199 StringLiteralParser(ArrayRef<Token> StringToks, 00200 Preprocessor &PP, bool Complain = true); 00201 StringLiteralParser(ArrayRef<Token> StringToks, 00202 const SourceManager &sm, const LangOptions &features, 00203 const TargetInfo &target, 00204 DiagnosticsEngine *diags = nullptr) 00205 : SM(sm), Features(features), Target(target), Diags(diags), 00206 MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), 00207 ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { 00208 init(StringToks); 00209 } 00210 00211 00212 bool hadError; 00213 bool Pascal; 00214 00215 StringRef GetString() const { 00216 return StringRef(ResultBuf.data(), GetStringLength()); 00217 } 00218 unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } 00219 00220 unsigned GetNumStringChars() const { 00221 return GetStringLength() / CharByteWidth; 00222 } 00223 /// getOffsetOfStringByte - This function returns the offset of the 00224 /// specified byte of the string data represented by Token. This handles 00225 /// advancing over escape sequences in the string. 00226 /// 00227 /// If the Diagnostics pointer is non-null, then this will do semantic 00228 /// checking of the string literal and emit errors and warnings. 00229 unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; 00230 00231 bool isAscii() const { return Kind == tok::string_literal; } 00232 bool isWide() const { return Kind == tok::wide_string_literal; } 00233 bool isUTF8() const { return Kind == tok::utf8_string_literal; } 00234 bool isUTF16() const { return Kind == tok::utf16_string_literal; } 00235 bool isUTF32() const { return Kind == tok::utf32_string_literal; } 00236 bool isPascal() const { return Pascal; } 00237 00238 StringRef getUDSuffix() const { return UDSuffixBuf; } 00239 00240 /// Get the index of a token containing a ud-suffix. 00241 unsigned getUDSuffixToken() const { 00242 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 00243 return UDSuffixToken; 00244 } 00245 /// Get the spelling offset of the first byte of the ud-suffix. 00246 unsigned getUDSuffixOffset() const { 00247 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 00248 return UDSuffixOffset; 00249 } 00250 00251 private: 00252 void init(ArrayRef<Token> StringToks); 00253 bool CopyStringFragment(const Token &Tok, const char *TokBegin, 00254 StringRef Fragment); 00255 void DiagnoseLexingError(SourceLocation Loc); 00256 }; 00257 00258 } // end namespace clang 00259 00260 #endif