clang API Documentation

IdentifierTable.h
Go to the documentation of this file.
00001 //===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 ///
00010 /// \file
00011 /// \brief Defines the clang::IdentifierInfo, clang::IdentifierTable, and
00012 /// clang::Selector interfaces.
00013 ///
00014 //===----------------------------------------------------------------------===//
00015 
00016 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
00017 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
00018 
00019 #include "clang/Basic/LLVM.h"
00020 #include "clang/Basic/TokenKinds.h"
00021 #include "llvm/ADT/StringMap.h"
00022 #include "llvm/ADT/StringRef.h"
00023 #include <cassert>
00024 #include <string>
00025 
00026 namespace llvm {
00027   template <typename T> struct DenseMapInfo;
00028 }
00029 
00030 namespace clang {
00031   class LangOptions;
00032   class IdentifierInfo;
00033   class IdentifierTable;
00034   class SourceLocation;
00035   class MultiKeywordSelector; // private class used by Selector
00036   class DeclarationName;      // AST class that stores declaration names
00037 
00038   /// \brief A simple pair of identifier info and location.
00039   typedef std::pair<IdentifierInfo*, SourceLocation> IdentifierLocPair;
00040 
00041 
00042 /// One of these records is kept for each identifier that
00043 /// is lexed.  This contains information about whether the token was \#define'd,
00044 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
00045 /// variable or function name).  The preprocessor keeps this information in a
00046 /// set, and all tok::identifier tokens have a pointer to one of these.
00047 class IdentifierInfo {
00048   unsigned TokenID            : 9; // Front-end token ID or tok::identifier.
00049   // Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
00050   // First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values
00051   // are for builtins.
00052   unsigned ObjCOrBuiltinID    :11;
00053   bool HasMacro               : 1; // True if there is a #define for this.
00054   bool HadMacro               : 1; // True if there was a #define for this.
00055   bool IsExtension            : 1; // True if identifier is a lang extension.
00056   bool IsCXX11CompatKeyword   : 1; // True if identifier is a keyword in C++11.
00057   bool IsPoisoned             : 1; // True if identifier is poisoned.
00058   bool IsCPPOperatorKeyword   : 1; // True if ident is a C++ operator keyword.
00059   bool NeedsHandleIdentifier  : 1; // See "RecomputeNeedsHandleIdentifier".
00060   bool IsFromAST              : 1; // True if identifier was loaded (at least 
00061                                    // partially) from an AST file.
00062   bool ChangedAfterLoad       : 1; // True if identifier has changed from the
00063                                    // definition loaded from an AST file.
00064   bool RevertedTokenID        : 1; // True if RevertTokenIDToIdentifier was
00065                                    // called.
00066   bool OutOfDate              : 1; // True if there may be additional
00067                                    // information about this identifier
00068                                    // stored externally.
00069   bool IsModulesImport        : 1; // True if this is the 'import' contextual
00070                                    // keyword.
00071   // 32-bit word is filled.
00072 
00073   void *FETokenInfo;               // Managed by the language front-end.
00074   llvm::StringMapEntry<IdentifierInfo*> *Entry;
00075 
00076   IdentifierInfo(const IdentifierInfo&) LLVM_DELETED_FUNCTION;
00077   void operator=(const IdentifierInfo&) LLVM_DELETED_FUNCTION;
00078 
00079   friend class IdentifierTable;
00080   
00081 public:
00082   IdentifierInfo();
00083 
00084 
00085   /// \brief Return true if this is the identifier for the specified string.
00086   ///
00087   /// This is intended to be used for string literals only: II->isStr("foo").
00088   template <std::size_t StrLen>
00089   bool isStr(const char (&Str)[StrLen]) const {
00090     return getLength() == StrLen-1 && !memcmp(getNameStart(), Str, StrLen-1);
00091   }
00092 
00093   /// \brief Return the beginning of the actual null-terminated string for this
00094   /// identifier.
00095   ///
00096   const char *getNameStart() const {
00097     if (Entry) return Entry->getKeyData();
00098     // FIXME: This is gross. It would be best not to embed specific details
00099     // of the PTH file format here.
00100     // The 'this' pointer really points to a
00101     // std::pair<IdentifierInfo, const char*>, where internal pointer
00102     // points to the external string data.
00103     typedef std::pair<IdentifierInfo, const char*> actualtype;
00104     return ((const actualtype*) this)->second;
00105   }
00106 
00107   /// \brief Efficiently return the length of this identifier info.
00108   ///
00109   unsigned getLength() const {
00110     if (Entry) return Entry->getKeyLength();
00111     // FIXME: This is gross. It would be best not to embed specific details
00112     // of the PTH file format here.
00113     // The 'this' pointer really points to a
00114     // std::pair<IdentifierInfo, const char*>, where internal pointer
00115     // points to the external string data.
00116     typedef std::pair<IdentifierInfo, const char*> actualtype;
00117     const char* p = ((const actualtype*) this)->second - 2;
00118     return (((unsigned) p[0]) | (((unsigned) p[1]) << 8)) - 1;
00119   }
00120 
00121   /// \brief Return the actual identifier string.
00122   StringRef getName() const {
00123     return StringRef(getNameStart(), getLength());
00124   }
00125 
00126   /// \brief Return true if this identifier is \#defined to some other value.
00127   bool hasMacroDefinition() const {
00128     return HasMacro;
00129   }
00130   void setHasMacroDefinition(bool Val) {
00131     if (HasMacro == Val) return;
00132 
00133     HasMacro = Val;
00134     if (Val) {
00135       NeedsHandleIdentifier = 1;
00136       HadMacro = true;
00137     } else {
00138       RecomputeNeedsHandleIdentifier();
00139     }
00140   }
00141   /// \brief Returns true if this identifier was \#defined to some value at any
00142   /// moment. In this case there should be an entry for the identifier in the
00143   /// macro history table in Preprocessor.
00144   bool hadMacroDefinition() const {
00145     return HadMacro;
00146   }
00147 
00148   /// If this is a source-language token (e.g. 'for'), this API
00149   /// can be used to cause the lexer to map identifiers to source-language
00150   /// tokens.
00151   tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
00152 
00153   /// \brief True if RevertTokenIDToIdentifier() was called.
00154   bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
00155 
00156   /// \brief Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
00157   /// compatibility.
00158   ///
00159   /// TokenID is normally read-only but there are 2 instances where we revert it
00160   /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
00161   /// using this method so we can inform serialization about it.
00162   void RevertTokenIDToIdentifier() {
00163     assert(TokenID != tok::identifier && "Already at tok::identifier");
00164     TokenID = tok::identifier;
00165     RevertedTokenID = true;
00166   }
00167 
00168   /// \brief Return the preprocessor keyword ID for this identifier.
00169   ///
00170   /// For example, "define" will return tok::pp_define.
00171   tok::PPKeywordKind getPPKeywordID() const;
00172 
00173   /// \brief Return the Objective-C keyword ID for the this identifier.
00174   ///
00175   /// For example, 'class' will return tok::objc_class if ObjC is enabled.
00176   tok::ObjCKeywordKind getObjCKeywordID() const {
00177     if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
00178       return tok::ObjCKeywordKind(ObjCOrBuiltinID);
00179     else
00180       return tok::objc_not_keyword;
00181   }
00182   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
00183 
00184   /// \brief Return a value indicating whether this is a builtin function.
00185   ///
00186   /// 0 is not-built-in.  1 is builtin-for-some-nonprimary-target.
00187   /// 2+ are specific builtin functions.
00188   unsigned getBuiltinID() const {
00189     if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
00190       return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
00191     else
00192       return 0;
00193   }
00194   void setBuiltinID(unsigned ID) {
00195     ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
00196     assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
00197            && "ID too large for field!");
00198   }
00199 
00200   unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
00201   void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
00202 
00203   /// get/setExtension - Initialize information about whether or not this
00204   /// language token is an extension.  This controls extension warnings, and is
00205   /// only valid if a custom token ID is set.
00206   bool isExtensionToken() const { return IsExtension; }
00207   void setIsExtensionToken(bool Val) {
00208     IsExtension = Val;
00209     if (Val)
00210       NeedsHandleIdentifier = 1;
00211     else
00212       RecomputeNeedsHandleIdentifier();
00213   }
00214 
00215   /// is/setIsCXX11CompatKeyword - Initialize information about whether or not
00216   /// this language token is a keyword in C++11. This controls compatibility
00217   /// warnings, and is only true when not parsing C++11. Once a compatibility
00218   /// problem has been diagnosed with this keyword, the flag will be cleared.
00219   bool isCXX11CompatKeyword() const { return IsCXX11CompatKeyword; }
00220   void setIsCXX11CompatKeyword(bool Val) {
00221     IsCXX11CompatKeyword = Val;
00222     if (Val)
00223       NeedsHandleIdentifier = 1;
00224     else
00225       RecomputeNeedsHandleIdentifier();
00226   }
00227 
00228   /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
00229   /// Preprocessor will emit an error every time this token is used.
00230   void setIsPoisoned(bool Value = true) {
00231     IsPoisoned = Value;
00232     if (Value)
00233       NeedsHandleIdentifier = 1;
00234     else
00235       RecomputeNeedsHandleIdentifier();
00236   }
00237 
00238   /// \brief Return true if this token has been poisoned.
00239   bool isPoisoned() const { return IsPoisoned; }
00240 
00241   /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
00242   /// this identifier is a C++ alternate representation of an operator.
00243   void setIsCPlusPlusOperatorKeyword(bool Val = true) {
00244     IsCPPOperatorKeyword = Val;
00245     if (Val)
00246       NeedsHandleIdentifier = 1;
00247     else
00248       RecomputeNeedsHandleIdentifier();
00249   }
00250   bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
00251 
00252   /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to
00253   /// associate arbitrary metadata with this token.
00254   template<typename T>
00255   T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); }
00256   void setFETokenInfo(void *T) { FETokenInfo = T; }
00257 
00258   /// \brief Return true if the Preprocessor::HandleIdentifier must be called
00259   /// on a token of this identifier.
00260   ///
00261   /// If this returns false, we know that HandleIdentifier will not affect
00262   /// the token.
00263   bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
00264 
00265   /// \brief Return true if the identifier in its current state was loaded
00266   /// from an AST file.
00267   bool isFromAST() const { return IsFromAST; }
00268 
00269   void setIsFromAST() { IsFromAST = true; }
00270 
00271   /// \brief Determine whether this identifier has changed since it was loaded
00272   /// from an AST file.
00273   bool hasChangedSinceDeserialization() const {
00274     return ChangedAfterLoad;
00275   }
00276   
00277   /// \brief Note that this identifier has changed since it was loaded from
00278   /// an AST file.
00279   void setChangedSinceDeserialization() {
00280     ChangedAfterLoad = true;
00281   }
00282 
00283   /// \brief Determine whether the information for this identifier is out of
00284   /// date with respect to the external source.
00285   bool isOutOfDate() const { return OutOfDate; }
00286   
00287   /// \brief Set whether the information for this identifier is out of
00288   /// date with respect to the external source.
00289   void setOutOfDate(bool OOD) {
00290     OutOfDate = OOD;
00291     if (OOD)
00292       NeedsHandleIdentifier = true;
00293     else
00294       RecomputeNeedsHandleIdentifier();
00295   }
00296   
00297   /// \brief Determine whether this is the contextual keyword \c import.
00298   bool isModulesImport() const { return IsModulesImport; }
00299   
00300   /// \brief Set whether this identifier is the contextual keyword \c import.
00301   void setModulesImport(bool I) {
00302     IsModulesImport = I;
00303     if (I)
00304       NeedsHandleIdentifier = true;
00305     else
00306       RecomputeNeedsHandleIdentifier();
00307   }
00308   
00309 private:
00310   /// The Preprocessor::HandleIdentifier does several special (but rare)
00311   /// things to identifiers of various sorts.  For example, it changes the
00312   /// \c for keyword token from tok::identifier to tok::for.
00313   ///
00314   /// This method is very tied to the definition of HandleIdentifier.  Any
00315   /// change to it should be reflected here.
00316   void RecomputeNeedsHandleIdentifier() {
00317     NeedsHandleIdentifier =
00318       (isPoisoned() | hasMacroDefinition() | isCPlusPlusOperatorKeyword() |
00319        isExtensionToken() | isCXX11CompatKeyword() || isOutOfDate() ||
00320        isModulesImport());
00321   }
00322 };
00323 
00324 /// \brief An RAII object for [un]poisoning an identifier within a scope.
00325 ///
00326 /// \p II is allowed to be null, in which case objects of this type have
00327 /// no effect.
00328 class PoisonIdentifierRAIIObject {
00329   IdentifierInfo *const II;
00330   const bool OldValue;
00331 public:
00332   PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
00333     : II(II), OldValue(II ? II->isPoisoned() : false) {
00334     if(II)
00335       II->setIsPoisoned(NewValue);
00336   }
00337 
00338   ~PoisonIdentifierRAIIObject() {
00339     if(II)
00340       II->setIsPoisoned(OldValue);
00341   }
00342 };
00343 
00344 /// \brief An iterator that walks over all of the known identifiers
00345 /// in the lookup table.
00346 ///
00347 /// Since this iterator uses an abstract interface via virtual
00348 /// functions, it uses an object-oriented interface rather than the
00349 /// more standard C++ STL iterator interface. In this OO-style
00350 /// iteration, the single function \c Next() provides dereference,
00351 /// advance, and end-of-sequence checking in a single
00352 /// operation. Subclasses of this iterator type will provide the
00353 /// actual functionality.
00354 class IdentifierIterator {
00355 private:
00356   IdentifierIterator(const IdentifierIterator &) LLVM_DELETED_FUNCTION;
00357   void operator=(const IdentifierIterator &) LLVM_DELETED_FUNCTION;
00358 
00359 protected:
00360   IdentifierIterator() { }
00361   
00362 public:
00363   virtual ~IdentifierIterator();
00364 
00365   /// \brief Retrieve the next string in the identifier table and
00366   /// advances the iterator for the following string.
00367   ///
00368   /// \returns The next string in the identifier table. If there is
00369   /// no such string, returns an empty \c StringRef.
00370   virtual StringRef Next() = 0;
00371 };
00372 
00373 /// \brief Provides lookups to, and iteration over, IdentiferInfo objects.
00374 class IdentifierInfoLookup {
00375 public:
00376   virtual ~IdentifierInfoLookup();
00377 
00378   /// \brief Return the IdentifierInfo for the specified named identifier.
00379   ///
00380   /// Unlike the version in IdentifierTable, this returns a pointer instead
00381   /// of a reference.  If the pointer is null then the IdentifierInfo cannot
00382   /// be found.
00383   virtual IdentifierInfo* get(StringRef Name) = 0;
00384 
00385   /// \brief Retrieve an iterator into the set of all identifiers
00386   /// known to this identifier lookup source.
00387   ///
00388   /// This routine provides access to all of the identifiers known to
00389   /// the identifier lookup, allowing access to the contents of the
00390   /// identifiers without introducing the overhead of constructing
00391   /// IdentifierInfo objects for each.
00392   ///
00393   /// \returns A new iterator into the set of known identifiers. The
00394   /// caller is responsible for deleting this iterator.
00395   virtual IdentifierIterator *getIdentifiers();
00396 };
00397 
00398 /// \brief An abstract class used to resolve numerical identifier
00399 /// references (meaningful only to some external source) into
00400 /// IdentifierInfo pointers.
00401 class ExternalIdentifierLookup {
00402 public:
00403   virtual ~ExternalIdentifierLookup();
00404 
00405   /// \brief Return the identifier associated with the given ID number.
00406   ///
00407   /// The ID 0 is associated with the NULL identifier.
00408   virtual IdentifierInfo *GetIdentifier(unsigned ID) = 0;
00409 };
00410 
00411 /// \brief Implements an efficient mapping from strings to IdentifierInfo nodes.
00412 ///
00413 /// This has no other purpose, but this is an extremely performance-critical
00414 /// piece of the code, as each occurrence of every identifier goes through
00415 /// here when lexed.
00416 class IdentifierTable {
00417   // Shark shows that using MallocAllocator is *much* slower than using this
00418   // BumpPtrAllocator!
00419   typedef llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator> HashTableTy;
00420   HashTableTy HashTable;
00421 
00422   IdentifierInfoLookup* ExternalLookup;
00423 
00424 public:
00425   /// \brief Create the identifier table, populating it with info about the
00426   /// language keywords for the language specified by \p LangOpts.
00427   IdentifierTable(const LangOptions &LangOpts,
00428                   IdentifierInfoLookup* externalLookup = nullptr);
00429 
00430   /// \brief Set the external identifier lookup mechanism.
00431   void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
00432     ExternalLookup = IILookup;
00433   }
00434 
00435   /// \brief Retrieve the external identifier lookup object, if any.
00436   IdentifierInfoLookup *getExternalIdentifierLookup() const {
00437     return ExternalLookup;
00438   }
00439   
00440   llvm::BumpPtrAllocator& getAllocator() {
00441     return HashTable.getAllocator();
00442   }
00443 
00444   /// \brief Return the identifier token info for the specified named
00445   /// identifier.
00446   IdentifierInfo &get(StringRef Name) {
00447     llvm::StringMapEntry<IdentifierInfo*> &Entry =
00448       HashTable.GetOrCreateValue(Name);
00449 
00450     IdentifierInfo *II = Entry.getValue();
00451     if (II) return *II;
00452 
00453     // No entry; if we have an external lookup, look there first.
00454     if (ExternalLookup) {
00455       II = ExternalLookup->get(Name);
00456       if (II) {
00457         // Cache in the StringMap for subsequent lookups.
00458         Entry.setValue(II);
00459         return *II;
00460       }
00461     }
00462 
00463     // Lookups failed, make a new IdentifierInfo.
00464     void *Mem = getAllocator().Allocate<IdentifierInfo>();
00465     II = new (Mem) IdentifierInfo();
00466     Entry.setValue(II);
00467 
00468     // Make sure getName() knows how to find the IdentifierInfo
00469     // contents.
00470     II->Entry = &Entry;
00471 
00472     return *II;
00473   }
00474 
00475   IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
00476     IdentifierInfo &II = get(Name);
00477     II.TokenID = TokenCode;
00478     assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
00479     return II;
00480   }
00481 
00482   /// \brief Gets an IdentifierInfo for the given name without consulting
00483   ///        external sources.
00484   ///
00485   /// This is a version of get() meant for external sources that want to
00486   /// introduce or modify an identifier. If they called get(), they would
00487   /// likely end up in a recursion.
00488   IdentifierInfo &getOwn(StringRef Name) {
00489     llvm::StringMapEntry<IdentifierInfo*> &Entry =
00490       HashTable.GetOrCreateValue(Name);
00491 
00492     IdentifierInfo *II = Entry.getValue();
00493     if (!II) {
00494 
00495       // Lookups failed, make a new IdentifierInfo.
00496       void *Mem = getAllocator().Allocate<IdentifierInfo>();
00497       II = new (Mem) IdentifierInfo();
00498       Entry.setValue(II);
00499 
00500       // Make sure getName() knows how to find the IdentifierInfo
00501       // contents.
00502       II->Entry = &Entry;
00503       
00504       // If this is the 'import' contextual keyword, mark it as such.
00505       if (Name.equals("import"))
00506         II->setModulesImport(true);
00507     }
00508 
00509     return *II;
00510   }
00511 
00512   typedef HashTableTy::const_iterator iterator;
00513   typedef HashTableTy::const_iterator const_iterator;
00514 
00515   iterator begin() const { return HashTable.begin(); }
00516   iterator end() const   { return HashTable.end(); }
00517   unsigned size() const { return HashTable.size(); }
00518 
00519   /// \brief Print some statistics to stderr that indicate how well the
00520   /// hashing is doing.
00521   void PrintStats() const;
00522 
00523   void AddKeywords(const LangOptions &LangOpts);
00524 };
00525 
00526 /// \brief A family of Objective-C methods. 
00527 ///
00528 /// These families have no inherent meaning in the language, but are
00529 /// nonetheless central enough in the existing implementations to
00530 /// merit direct AST support.  While, in theory, arbitrary methods can
00531 /// be considered to form families, we focus here on the methods
00532 /// involving allocation and retain-count management, as these are the
00533 /// most "core" and the most likely to be useful to diverse clients
00534 /// without extra information.
00535 ///
00536 /// Both selectors and actual method declarations may be classified
00537 /// into families.  Method families may impose additional restrictions
00538 /// beyond their selector name; for example, a method called '_init'
00539 /// that returns void is not considered to be in the 'init' family
00540 /// (but would be if it returned 'id').  It is also possible to
00541 /// explicitly change or remove a method's family.  Therefore the
00542 /// method's family should be considered the single source of truth.
00543 enum ObjCMethodFamily {
00544   /// \brief No particular method family.
00545   OMF_None,
00546 
00547   // Selectors in these families may have arbitrary arity, may be
00548   // written with arbitrary leading underscores, and may have
00549   // additional CamelCase "words" in their first selector chunk
00550   // following the family name.
00551   OMF_alloc,
00552   OMF_copy,
00553   OMF_init,
00554   OMF_mutableCopy,
00555   OMF_new,
00556 
00557   // These families are singletons consisting only of the nullary
00558   // selector with the given name.
00559   OMF_autorelease,
00560   OMF_dealloc,
00561   OMF_finalize,
00562   OMF_release,
00563   OMF_retain,
00564   OMF_retainCount,
00565   OMF_self,
00566   OMF_initialize,
00567 
00568   // performSelector families
00569   OMF_performSelector
00570 };
00571 
00572 /// Enough bits to store any enumerator in ObjCMethodFamily or
00573 /// InvalidObjCMethodFamily.
00574 enum { ObjCMethodFamilyBitWidth = 4 };
00575 
00576 /// \brief An invalid value of ObjCMethodFamily.
00577 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
00578 
00579 /// \brief A family of Objective-C methods.
00580 ///
00581 /// These are family of methods whose result type is initially 'id', but
00582 /// but are candidate for the result type to be changed to 'instancetype'.
00583 enum ObjCInstanceTypeFamily {
00584   OIT_None,
00585   OIT_Array,
00586   OIT_Dictionary,
00587   OIT_Singleton,
00588   OIT_Init,
00589   OIT_ReturnsSelf
00590 };
00591 
00592 enum ObjCStringFormatFamily {
00593   SFF_None,
00594   SFF_NSString,
00595   SFF_CFString
00596 };
00597 
00598 /// \brief Smart pointer class that efficiently represents Objective-C method
00599 /// names.
00600 ///
00601 /// This class will either point to an IdentifierInfo or a
00602 /// MultiKeywordSelector (which is private). This enables us to optimize
00603 /// selectors that take no arguments and selectors that take 1 argument, which
00604 /// accounts for 78% of all selectors in Cocoa.h.
00605 class Selector {
00606   friend class Diagnostic;
00607 
00608   enum IdentifierInfoFlag {
00609     // Empty selector = 0.
00610     ZeroArg  = 0x1,
00611     OneArg   = 0x2,
00612     MultiArg = 0x3,
00613     ArgFlags = ZeroArg|OneArg
00614   };
00615   uintptr_t InfoPtr; // a pointer to the MultiKeywordSelector or IdentifierInfo.
00616 
00617   Selector(IdentifierInfo *II, unsigned nArgs) {
00618     InfoPtr = reinterpret_cast<uintptr_t>(II);
00619     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
00620     assert(nArgs < 2 && "nArgs not equal to 0/1");
00621     InfoPtr |= nArgs+1;
00622   }
00623   Selector(MultiKeywordSelector *SI) {
00624     InfoPtr = reinterpret_cast<uintptr_t>(SI);
00625     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
00626     InfoPtr |= MultiArg;
00627   }
00628 
00629   IdentifierInfo *getAsIdentifierInfo() const {
00630     if (getIdentifierInfoFlag() < MultiArg)
00631       return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
00632     return nullptr;
00633   }
00634   MultiKeywordSelector *getMultiKeywordSelector() const {
00635     return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
00636   }
00637   
00638   unsigned getIdentifierInfoFlag() const {
00639     return InfoPtr & ArgFlags;
00640   }
00641 
00642   static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
00643   
00644   static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
00645 
00646 public:
00647   friend class SelectorTable; // only the SelectorTable can create these
00648   friend class DeclarationName; // and the AST's DeclarationName.
00649 
00650   /// The default ctor should only be used when creating data structures that
00651   ///  will contain selectors.
00652   Selector() : InfoPtr(0) {}
00653   Selector(uintptr_t V) : InfoPtr(V) {}
00654 
00655   /// operator==/!= - Indicate whether the specified selectors are identical.
00656   bool operator==(Selector RHS) const {
00657     return InfoPtr == RHS.InfoPtr;
00658   }
00659   bool operator!=(Selector RHS) const {
00660     return InfoPtr != RHS.InfoPtr;
00661   }
00662   void *getAsOpaquePtr() const {
00663     return reinterpret_cast<void*>(InfoPtr);
00664   }
00665 
00666   /// \brief Determine whether this is the empty selector.
00667   bool isNull() const { return InfoPtr == 0; }
00668 
00669   // Predicates to identify the selector type.
00670   bool isKeywordSelector() const {
00671     return getIdentifierInfoFlag() != ZeroArg;
00672   }
00673   bool isUnarySelector() const {
00674     return getIdentifierInfoFlag() == ZeroArg;
00675   }
00676   unsigned getNumArgs() const;
00677   
00678   
00679   /// \brief Retrieve the identifier at a given position in the selector.
00680   ///
00681   /// Note that the identifier pointer returned may be NULL. Clients that only
00682   /// care about the text of the identifier string, and not the specific, 
00683   /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
00684   /// an empty string when the identifier pointer would be NULL.
00685   ///
00686   /// \param argIndex The index for which we want to retrieve the identifier.
00687   /// This index shall be less than \c getNumArgs() unless this is a keyword
00688   /// selector, in which case 0 is the only permissible value.
00689   ///
00690   /// \returns the uniqued identifier for this slot, or NULL if this slot has
00691   /// no corresponding identifier.
00692   IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
00693   
00694   /// \brief Retrieve the name at a given position in the selector.
00695   ///
00696   /// \param argIndex The index for which we want to retrieve the name.
00697   /// This index shall be less than \c getNumArgs() unless this is a keyword
00698   /// selector, in which case 0 is the only permissible value.
00699   ///
00700   /// \returns the name for this slot, which may be the empty string if no
00701   /// name was supplied.
00702   StringRef getNameForSlot(unsigned argIndex) const;
00703   
00704   /// \brief Derive the full selector name (e.g. "foo:bar:") and return
00705   /// it as an std::string.
00706   std::string getAsString() const;
00707 
00708   /// \brief Prints the full selector name (e.g. "foo:bar:").
00709   void print(llvm::raw_ostream &OS) const;
00710 
00711   /// \brief Derive the conventional family of this method.
00712   ObjCMethodFamily getMethodFamily() const {
00713     return getMethodFamilyImpl(*this);
00714   }
00715   
00716   ObjCStringFormatFamily getStringFormatFamily() const {
00717     return getStringFormatFamilyImpl(*this);
00718   }
00719   
00720   static Selector getEmptyMarker() {
00721     return Selector(uintptr_t(-1));
00722   }
00723   static Selector getTombstoneMarker() {
00724     return Selector(uintptr_t(-2));
00725   }
00726   
00727   static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
00728 };
00729 
00730 /// \brief This table allows us to fully hide how we implement
00731 /// multi-keyword caching.
00732 class SelectorTable {
00733   void *Impl;  // Actually a SelectorTableImpl
00734   SelectorTable(const SelectorTable &) LLVM_DELETED_FUNCTION;
00735   void operator=(const SelectorTable &) LLVM_DELETED_FUNCTION;
00736 public:
00737   SelectorTable();
00738   ~SelectorTable();
00739 
00740   /// \brief Can create any sort of selector.
00741   ///
00742   /// \p NumArgs indicates whether this is a no argument selector "foo", a
00743   /// single argument selector "foo:" or multi-argument "foo:bar:".
00744   Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
00745 
00746   Selector getUnarySelector(IdentifierInfo *ID) {
00747     return Selector(ID, 1);
00748   }
00749   Selector getNullarySelector(IdentifierInfo *ID) {
00750     return Selector(ID, 0);
00751   }
00752 
00753   /// \brief Return the total amount of memory allocated for managing selectors.
00754   size_t getTotalMemory() const;
00755 
00756   /// \brief Return the default setter name for the given identifier.
00757   ///
00758   /// This is "set" + \p Name where the initial character of \p Name
00759   /// has been capitalized.
00760   static SmallString<64> constructSetterName(StringRef Name);
00761 
00762   /// \brief Return the default setter selector for the given identifier.
00763   ///
00764   /// This is "set" + \p Name where the initial character of \p Name
00765   /// has been capitalized.
00766   static Selector constructSetterSelector(IdentifierTable &Idents,
00767                                           SelectorTable &SelTable,
00768                                           const IdentifierInfo *Name);
00769 };
00770 
00771 /// DeclarationNameExtra - Common base of the MultiKeywordSelector,
00772 /// CXXSpecialName, and CXXOperatorIdName classes, all of which are
00773 /// private classes that describe different kinds of names.
00774 class DeclarationNameExtra {
00775 public:
00776   /// ExtraKind - The kind of "extra" information stored in the
00777   /// DeclarationName. See @c ExtraKindOrNumArgs for an explanation of
00778   /// how these enumerator values are used.
00779   enum ExtraKind {
00780     CXXConstructor = 0,
00781     CXXDestructor,
00782     CXXConversionFunction,
00783 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
00784     CXXOperator##Name,
00785 #include "clang/Basic/OperatorKinds.def"
00786     CXXLiteralOperator,
00787     CXXUsingDirective,
00788     NUM_EXTRA_KINDS
00789   };
00790 
00791   /// ExtraKindOrNumArgs - Either the kind of C++ special name or
00792   /// operator-id (if the value is one of the CXX* enumerators of
00793   /// ExtraKind), in which case the DeclarationNameExtra is also a
00794   /// CXXSpecialName, (for CXXConstructor, CXXDestructor, or
00795   /// CXXConversionFunction) CXXOperatorIdName, or CXXLiteralOperatorName,
00796   /// it may be also name common to C++ using-directives (CXXUsingDirective),
00797   /// otherwise it is NUM_EXTRA_KINDS+NumArgs, where NumArgs is the number of
00798   /// arguments in the Objective-C selector, in which case the
00799   /// DeclarationNameExtra is also a MultiKeywordSelector.
00800   unsigned ExtraKindOrNumArgs;
00801 };
00802 
00803 }  // end namespace clang
00804 
00805 namespace llvm {
00806 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
00807 /// DenseSets.
00808 template <>
00809 struct DenseMapInfo<clang::Selector> {
00810   static inline clang::Selector getEmptyKey() {
00811     return clang::Selector::getEmptyMarker();
00812   }
00813   static inline clang::Selector getTombstoneKey() {
00814     return clang::Selector::getTombstoneMarker();
00815   }
00816 
00817   static unsigned getHashValue(clang::Selector S);
00818 
00819   static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
00820     return LHS == RHS;
00821   }
00822 };
00823 
00824 template <>
00825 struct isPodLike<clang::Selector> { static const bool value = true; };
00826 
00827 template <typename T> class PointerLikeTypeTraits;
00828 
00829 template<>
00830 class PointerLikeTypeTraits<clang::Selector> {
00831 public:
00832   static inline const void *getAsVoidPointer(clang::Selector P) {
00833     return P.getAsOpaquePtr();
00834   }
00835   static inline clang::Selector getFromVoidPointer(const void *P) {
00836     return clang::Selector(reinterpret_cast<uintptr_t>(P));
00837   }
00838   enum { NumLowBitsAvailable = 0 };  
00839 };
00840 
00841 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
00842 // are not guaranteed to be 8-byte aligned.
00843 template<>
00844 class PointerLikeTypeTraits<clang::IdentifierInfo*> {
00845 public:
00846   static inline void *getAsVoidPointer(clang::IdentifierInfo* P) {
00847     return P;
00848   }
00849   static inline clang::IdentifierInfo *getFromVoidPointer(void *P) {
00850     return static_cast<clang::IdentifierInfo*>(P);
00851   }
00852   enum { NumLowBitsAvailable = 1 };
00853 };
00854 
00855 template<>
00856 class PointerLikeTypeTraits<const clang::IdentifierInfo*> {
00857 public:
00858   static inline const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
00859     return P;
00860   }
00861   static inline const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
00862     return static_cast<const clang::IdentifierInfo*>(P);
00863   }
00864   enum { NumLowBitsAvailable = 1 };
00865 };
00866 
00867 }  // end namespace llvm
00868 #endif