clang API Documentation

CharInfo.h
Go to the documentation of this file.
00001 //===--- clang/Basic/CharInfo.h - Classifying ASCII Characters ------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 
00010 #ifndef LLVM_CLANG_BASIC_CHARINFO_H
00011 #define LLVM_CLANG_BASIC_CHARINFO_H
00012 
00013 #include "clang/Basic/LLVM.h"
00014 #include "llvm/ADT/StringRef.h"
00015 #include "llvm/Support/Compiler.h"
00016 #include "llvm/Support/DataTypes.h"
00017 
00018 namespace clang {
00019 namespace charinfo {
00020   extern const uint16_t InfoTable[256];
00021 
00022   enum {
00023     CHAR_HORZ_WS  = 0x0001,  // '\t', '\f', '\v'.  Note, no '\0'
00024     CHAR_VERT_WS  = 0x0002,  // '\r', '\n'
00025     CHAR_SPACE    = 0x0004,  // ' '
00026     CHAR_DIGIT    = 0x0008,  // 0-9
00027     CHAR_XLETTER  = 0x0010,  // a-f,A-F
00028     CHAR_UPPER    = 0x0020,  // A-Z
00029     CHAR_LOWER    = 0x0040,  // a-z
00030     CHAR_UNDER    = 0x0080,  // _
00031     CHAR_PERIOD   = 0x0100,  // .
00032     CHAR_RAWDEL   = 0x0200,  // {}[]#<>%:;?*+-/^&|~!=,"'
00033     CHAR_PUNCT    = 0x0400   // `$@()
00034   };
00035 
00036   enum {
00037     CHAR_XUPPER = CHAR_XLETTER | CHAR_UPPER,
00038     CHAR_XLOWER = CHAR_XLETTER | CHAR_LOWER
00039   };
00040 } // end namespace charinfo
00041 
00042 /// Returns true if this is an ASCII character.
00043 LLVM_READNONE static inline bool isASCII(char c) {
00044   return static_cast<unsigned char>(c) <= 127;
00045 }
00046 
00047 /// Returns true if this is a valid first character of a C identifier,
00048 /// which is [a-zA-Z_].
00049 LLVM_READONLY static inline bool isIdentifierHead(unsigned char c,
00050                                                   bool AllowDollar = false) {
00051   using namespace charinfo;
00052   if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_UNDER))
00053     return true;
00054   return AllowDollar && c == '$';
00055 }
00056 
00057 /// Returns true if this is a body character of a C identifier,
00058 /// which is [a-zA-Z0-9_].
00059 LLVM_READONLY static inline bool isIdentifierBody(unsigned char c,
00060                                                   bool AllowDollar = false) {
00061   using namespace charinfo;
00062   if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER))
00063     return true;
00064   return AllowDollar && c == '$';
00065 }
00066 
00067 /// Returns true if this character is horizontal ASCII whitespace:
00068 /// ' ', '\\t', '\\f', '\\v'.
00069 ///
00070 /// Note that this returns false for '\\0'.
00071 LLVM_READONLY static inline bool isHorizontalWhitespace(unsigned char c) {
00072   using namespace charinfo;
00073   return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0;
00074 }
00075 
00076 /// Returns true if this character is vertical ASCII whitespace: '\\n', '\\r'.
00077 ///
00078 /// Note that this returns false for '\\0'.
00079 LLVM_READONLY static inline bool isVerticalWhitespace(unsigned char c) {
00080   using namespace charinfo;
00081   return (InfoTable[c] & CHAR_VERT_WS) != 0;
00082 }
00083 
00084 /// Return true if this character is horizontal or vertical ASCII whitespace:
00085 /// ' ', '\\t', '\\f', '\\v', '\\n', '\\r'.
00086 ///
00087 /// Note that this returns false for '\\0'.
00088 LLVM_READONLY static inline bool isWhitespace(unsigned char c) {
00089   using namespace charinfo;
00090   return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_VERT_WS|CHAR_SPACE)) != 0;
00091 }
00092 
00093 /// Return true if this character is an ASCII digit: [0-9]
00094 LLVM_READONLY static inline bool isDigit(unsigned char c) {
00095   using namespace charinfo;
00096   return (InfoTable[c] & CHAR_DIGIT) != 0;
00097 }
00098 
00099 /// Return true if this character is a lowercase ASCII letter: [a-z]
00100 LLVM_READONLY static inline bool isLowercase(unsigned char c) {
00101   using namespace charinfo;
00102   return (InfoTable[c] & CHAR_LOWER) != 0;
00103 }
00104 
00105 /// Return true if this character is an uppercase ASCII letter: [A-Z]
00106 LLVM_READONLY static inline bool isUppercase(unsigned char c) {
00107   using namespace charinfo;
00108   return (InfoTable[c] & CHAR_UPPER) != 0;
00109 }
00110 
00111 /// Return true if this character is an ASCII letter: [a-zA-Z]
00112 LLVM_READONLY static inline bool isLetter(unsigned char c) {
00113   using namespace charinfo;
00114   return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER)) != 0;
00115 }
00116 
00117 /// Return true if this character is an ASCII letter or digit: [a-zA-Z0-9]
00118 LLVM_READONLY static inline bool isAlphanumeric(unsigned char c) {
00119   using namespace charinfo;
00120   return (InfoTable[c] & (CHAR_DIGIT|CHAR_UPPER|CHAR_LOWER)) != 0;
00121 }
00122 
00123 /// Return true if this character is an ASCII hex digit: [0-9a-fA-F]
00124 LLVM_READONLY static inline bool isHexDigit(unsigned char c) {
00125   using namespace charinfo;
00126   return (InfoTable[c] & (CHAR_DIGIT|CHAR_XLETTER)) != 0;
00127 }
00128 
00129 /// Return true if this character is an ASCII punctuation character.
00130 ///
00131 /// Note that '_' is both a punctuation character and an identifier character!
00132 LLVM_READONLY static inline bool isPunctuation(unsigned char c) {
00133   using namespace charinfo;
00134   return (InfoTable[c] & (CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL|CHAR_PUNCT)) != 0;
00135 }
00136 
00137 /// Return true if this character is an ASCII printable character; that is, a
00138 /// character that should take exactly one column to print in a fixed-width
00139 /// terminal.
00140 LLVM_READONLY static inline bool isPrintable(unsigned char c) {
00141   using namespace charinfo;
00142   return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|CHAR_PUNCT|
00143                           CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL|CHAR_SPACE)) != 0;
00144 }
00145 
00146 /// Return true if this is the body character of a C preprocessing number,
00147 /// which is [a-zA-Z0-9_.].
00148 LLVM_READONLY static inline bool isPreprocessingNumberBody(unsigned char c) {
00149   using namespace charinfo;
00150   return (InfoTable[c] &
00151           (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER|CHAR_PERIOD)) != 0;
00152 }
00153 
00154 /// Return true if this is the body character of a C++ raw string delimiter.
00155 LLVM_READONLY static inline bool isRawStringDelimBody(unsigned char c) {
00156   using namespace charinfo;
00157   return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|
00158                           CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0;
00159 }
00160 
00161 
00162 /// Converts the given ASCII character to its lowercase equivalent.
00163 ///
00164 /// If the character is not an uppercase character, it is returned as is.
00165 LLVM_READONLY static inline char toLowercase(char c) {
00166   if (isUppercase(c))
00167     return c + 'a' - 'A';
00168   return c;
00169 }
00170 
00171 /// Converts the given ASCII character to its uppercase equivalent.
00172 ///
00173 /// If the character is not a lowercase character, it is returned as is.
00174 LLVM_READONLY static inline char toUppercase(char c) {
00175   if (isLowercase(c))
00176     return c + 'A' - 'a';
00177   return c;
00178 }
00179 
00180 
00181 /// Return true if this is a valid ASCII identifier.
00182 ///
00183 /// Note that this is a very simple check; it does not accept '$' or UCNs as
00184 /// valid identifier characters.
00185 LLVM_READONLY static inline bool isValidIdentifier(StringRef S) {
00186   if (S.empty() || !isIdentifierHead(S[0]))
00187     return false;
00188 
00189   for (StringRef::iterator I = S.begin(), E = S.end(); I != E; ++I)
00190     if (!isIdentifierBody(*I))
00191       return false;
00192 
00193   return true;
00194 }
00195 
00196 } // end namespace clang
00197 
00198 #endif