clang API Documentation
00001 //===--- clang/Basic/CharInfo.h - Classifying ASCII Characters ------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 00010 #ifndef LLVM_CLANG_BASIC_CHARINFO_H 00011 #define LLVM_CLANG_BASIC_CHARINFO_H 00012 00013 #include "clang/Basic/LLVM.h" 00014 #include "llvm/ADT/StringRef.h" 00015 #include "llvm/Support/Compiler.h" 00016 #include "llvm/Support/DataTypes.h" 00017 00018 namespace clang { 00019 namespace charinfo { 00020 extern const uint16_t InfoTable[256]; 00021 00022 enum { 00023 CHAR_HORZ_WS = 0x0001, // '\t', '\f', '\v'. Note, no '\0' 00024 CHAR_VERT_WS = 0x0002, // '\r', '\n' 00025 CHAR_SPACE = 0x0004, // ' ' 00026 CHAR_DIGIT = 0x0008, // 0-9 00027 CHAR_XLETTER = 0x0010, // a-f,A-F 00028 CHAR_UPPER = 0x0020, // A-Z 00029 CHAR_LOWER = 0x0040, // a-z 00030 CHAR_UNDER = 0x0080, // _ 00031 CHAR_PERIOD = 0x0100, // . 00032 CHAR_RAWDEL = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"' 00033 CHAR_PUNCT = 0x0400 // `$@() 00034 }; 00035 00036 enum { 00037 CHAR_XUPPER = CHAR_XLETTER | CHAR_UPPER, 00038 CHAR_XLOWER = CHAR_XLETTER | CHAR_LOWER 00039 }; 00040 } // end namespace charinfo 00041 00042 /// Returns true if this is an ASCII character. 00043 LLVM_READNONE static inline bool isASCII(char c) { 00044 return static_cast<unsigned char>(c) <= 127; 00045 } 00046 00047 /// Returns true if this is a valid first character of a C identifier, 00048 /// which is [a-zA-Z_]. 00049 LLVM_READONLY static inline bool isIdentifierHead(unsigned char c, 00050 bool AllowDollar = false) { 00051 using namespace charinfo; 00052 if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_UNDER)) 00053 return true; 00054 return AllowDollar && c == '$'; 00055 } 00056 00057 /// Returns true if this is a body character of a C identifier, 00058 /// which is [a-zA-Z0-9_]. 00059 LLVM_READONLY static inline bool isIdentifierBody(unsigned char c, 00060 bool AllowDollar = false) { 00061 using namespace charinfo; 00062 if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER)) 00063 return true; 00064 return AllowDollar && c == '$'; 00065 } 00066 00067 /// Returns true if this character is horizontal ASCII whitespace: 00068 /// ' ', '\\t', '\\f', '\\v'. 00069 /// 00070 /// Note that this returns false for '\\0'. 00071 LLVM_READONLY static inline bool isHorizontalWhitespace(unsigned char c) { 00072 using namespace charinfo; 00073 return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0; 00074 } 00075 00076 /// Returns true if this character is vertical ASCII whitespace: '\\n', '\\r'. 00077 /// 00078 /// Note that this returns false for '\\0'. 00079 LLVM_READONLY static inline bool isVerticalWhitespace(unsigned char c) { 00080 using namespace charinfo; 00081 return (InfoTable[c] & CHAR_VERT_WS) != 0; 00082 } 00083 00084 /// Return true if this character is horizontal or vertical ASCII whitespace: 00085 /// ' ', '\\t', '\\f', '\\v', '\\n', '\\r'. 00086 /// 00087 /// Note that this returns false for '\\0'. 00088 LLVM_READONLY static inline bool isWhitespace(unsigned char c) { 00089 using namespace charinfo; 00090 return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_VERT_WS|CHAR_SPACE)) != 0; 00091 } 00092 00093 /// Return true if this character is an ASCII digit: [0-9] 00094 LLVM_READONLY static inline bool isDigit(unsigned char c) { 00095 using namespace charinfo; 00096 return (InfoTable[c] & CHAR_DIGIT) != 0; 00097 } 00098 00099 /// Return true if this character is a lowercase ASCII letter: [a-z] 00100 LLVM_READONLY static inline bool isLowercase(unsigned char c) { 00101 using namespace charinfo; 00102 return (InfoTable[c] & CHAR_LOWER) != 0; 00103 } 00104 00105 /// Return true if this character is an uppercase ASCII letter: [A-Z] 00106 LLVM_READONLY static inline bool isUppercase(unsigned char c) { 00107 using namespace charinfo; 00108 return (InfoTable[c] & CHAR_UPPER) != 0; 00109 } 00110 00111 /// Return true if this character is an ASCII letter: [a-zA-Z] 00112 LLVM_READONLY static inline bool isLetter(unsigned char c) { 00113 using namespace charinfo; 00114 return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER)) != 0; 00115 } 00116 00117 /// Return true if this character is an ASCII letter or digit: [a-zA-Z0-9] 00118 LLVM_READONLY static inline bool isAlphanumeric(unsigned char c) { 00119 using namespace charinfo; 00120 return (InfoTable[c] & (CHAR_DIGIT|CHAR_UPPER|CHAR_LOWER)) != 0; 00121 } 00122 00123 /// Return true if this character is an ASCII hex digit: [0-9a-fA-F] 00124 LLVM_READONLY static inline bool isHexDigit(unsigned char c) { 00125 using namespace charinfo; 00126 return (InfoTable[c] & (CHAR_DIGIT|CHAR_XLETTER)) != 0; 00127 } 00128 00129 /// Return true if this character is an ASCII punctuation character. 00130 /// 00131 /// Note that '_' is both a punctuation character and an identifier character! 00132 LLVM_READONLY static inline bool isPunctuation(unsigned char c) { 00133 using namespace charinfo; 00134 return (InfoTable[c] & (CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL|CHAR_PUNCT)) != 0; 00135 } 00136 00137 /// Return true if this character is an ASCII printable character; that is, a 00138 /// character that should take exactly one column to print in a fixed-width 00139 /// terminal. 00140 LLVM_READONLY static inline bool isPrintable(unsigned char c) { 00141 using namespace charinfo; 00142 return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|CHAR_PUNCT| 00143 CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL|CHAR_SPACE)) != 0; 00144 } 00145 00146 /// Return true if this is the body character of a C preprocessing number, 00147 /// which is [a-zA-Z0-9_.]. 00148 LLVM_READONLY static inline bool isPreprocessingNumberBody(unsigned char c) { 00149 using namespace charinfo; 00150 return (InfoTable[c] & 00151 (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER|CHAR_PERIOD)) != 0; 00152 } 00153 00154 /// Return true if this is the body character of a C++ raw string delimiter. 00155 LLVM_READONLY static inline bool isRawStringDelimBody(unsigned char c) { 00156 using namespace charinfo; 00157 return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD| 00158 CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0; 00159 } 00160 00161 00162 /// Converts the given ASCII character to its lowercase equivalent. 00163 /// 00164 /// If the character is not an uppercase character, it is returned as is. 00165 LLVM_READONLY static inline char toLowercase(char c) { 00166 if (isUppercase(c)) 00167 return c + 'a' - 'A'; 00168 return c; 00169 } 00170 00171 /// Converts the given ASCII character to its uppercase equivalent. 00172 /// 00173 /// If the character is not a lowercase character, it is returned as is. 00174 LLVM_READONLY static inline char toUppercase(char c) { 00175 if (isLowercase(c)) 00176 return c + 'A' - 'a'; 00177 return c; 00178 } 00179 00180 00181 /// Return true if this is a valid ASCII identifier. 00182 /// 00183 /// Note that this is a very simple check; it does not accept '$' or UCNs as 00184 /// valid identifier characters. 00185 LLVM_READONLY static inline bool isValidIdentifier(StringRef S) { 00186 if (S.empty() || !isIdentifierHead(S[0])) 00187 return false; 00188 00189 for (StringRef::iterator I = S.begin(), E = S.end(); I != E; ++I) 00190 if (!isIdentifierBody(*I)) 00191 return false; 00192 00193 return true; 00194 } 00195 00196 } // end namespace clang 00197 00198 #endif