Home | History | Annotate | Line # | Download | only in Basic
      1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 /// \file
     10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
     11 /// clang::Selector interfaces.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
     16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
     17 
     18 #include "clang/Basic/LLVM.h"
     19 #include "clang/Basic/TokenKinds.h"
     20 #include "llvm/ADT/DenseMapInfo.h"
     21 #include "llvm/ADT/SmallString.h"
     22 #include "llvm/ADT/StringMap.h"
     23 #include "llvm/ADT/StringRef.h"
     24 #include "llvm/Support/Allocator.h"
     25 #include "llvm/Support/PointerLikeTypeTraits.h"
     26 #include "llvm/Support/type_traits.h"
     27 #include <cassert>
     28 #include <cstddef>
     29 #include <cstdint>
     30 #include <cstring>
     31 #include <string>
     32 #include <utility>
     33 
     34 namespace clang {
     35 
     36 class DeclarationName;
     37 class DeclarationNameTable;
     38 class IdentifierInfo;
     39 class LangOptions;
     40 class MultiKeywordSelector;
     41 class SourceLocation;
     42 
     43 enum class ReservedIdentifierStatus {
     44   NotReserved = 0,
     45   StartsWithUnderscoreAtGlobalScope,
     46   StartsWithDoubleUnderscore,
     47   StartsWithUnderscoreFollowedByCapitalLetter,
     48   ContainsDoubleUnderscore,
     49 };
     50 
     51 /// A simple pair of identifier info and location.
     52 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
     53 
     54 /// IdentifierInfo and other related classes are aligned to
     55 /// 8 bytes so that DeclarationName can use the lower 3 bits
     56 /// of a pointer to one of these classes.
     57 enum { IdentifierInfoAlignment = 8 };
     58 
     59 static constexpr int ObjCOrBuiltinIDBits = 15;
     60 
     61 /// One of these records is kept for each identifier that
     62 /// is lexed.  This contains information about whether the token was \#define'd,
     63 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
     64 /// variable or function name).  The preprocessor keeps this information in a
     65 /// set, and all tok::identifier tokens have a pointer to one of these.
     66 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
     67 class alignas(IdentifierInfoAlignment) IdentifierInfo {
     68   friend class IdentifierTable;
     69 
     70   // Front-end token ID or tok::identifier.
     71   unsigned TokenID : 9;
     72 
     73   // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
     74   // First NUM_OBJC_KEYWORDS values are for Objective-C,
     75   // the remaining values are for builtins.
     76   unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits;
     77 
     78   // True if there is a #define for this.
     79   unsigned HasMacro : 1;
     80 
     81   // True if there was a #define for this.
     82   unsigned HadMacro : 1;
     83 
     84   // True if the identifier is a language extension.
     85   unsigned IsExtension : 1;
     86 
     87   // True if the identifier is a keyword in a newer or proposed Standard.
     88   unsigned IsFutureCompatKeyword : 1;
     89 
     90   // True if the identifier is poisoned.
     91   unsigned IsPoisoned : 1;
     92 
     93   // True if the identifier is a C++ operator keyword.
     94   unsigned IsCPPOperatorKeyword : 1;
     95 
     96   // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
     97   // See comment about RecomputeNeedsHandleIdentifier for more info.
     98   unsigned NeedsHandleIdentifier : 1;
     99 
    100   // True if the identifier was loaded (at least partially) from an AST file.
    101   unsigned IsFromAST : 1;
    102 
    103   // True if the identifier has changed from the definition
    104   // loaded from an AST file.
    105   unsigned ChangedAfterLoad : 1;
    106 
    107   // True if the identifier's frontend information has changed from the
    108   // definition loaded from an AST file.
    109   unsigned FEChangedAfterLoad : 1;
    110 
    111   // True if revertTokenIDToIdentifier was called.
    112   unsigned RevertedTokenID : 1;
    113 
    114   // True if there may be additional information about
    115   // this identifier stored externally.
    116   unsigned OutOfDate : 1;
    117 
    118   // True if this is the 'import' contextual keyword.
    119   unsigned IsModulesImport : 1;
    120 
    121   // True if this is a mangled OpenMP variant name.
    122   unsigned IsMangledOpenMPVariantName : 1;
    123 
    124   // 28 bits left in a 64-bit word.
    125 
    126   // Managed by the language front-end.
    127   void *FETokenInfo = nullptr;
    128 
    129   llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
    130 
    131   IdentifierInfo()
    132       : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
    133         HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
    134         IsPoisoned(false), IsCPPOperatorKeyword(false),
    135         NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
    136         FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
    137         IsModulesImport(false), IsMangledOpenMPVariantName(false) {}
    138 
    139 public:
    140   IdentifierInfo(const IdentifierInfo &) = delete;
    141   IdentifierInfo &operator=(const IdentifierInfo &) = delete;
    142   IdentifierInfo(IdentifierInfo &&) = delete;
    143   IdentifierInfo &operator=(IdentifierInfo &&) = delete;
    144 
    145   /// Return true if this is the identifier for the specified string.
    146   ///
    147   /// This is intended to be used for string literals only: II->isStr("foo").
    148   template <std::size_t StrLen>
    149   bool isStr(const char (&Str)[StrLen]) const {
    150     return getLength() == StrLen-1 &&
    151            memcmp(getNameStart(), Str, StrLen-1) == 0;
    152   }
    153 
    154   /// Return true if this is the identifier for the specified StringRef.
    155   bool isStr(llvm::StringRef Str) const {
    156     llvm::StringRef ThisStr(getNameStart(), getLength());
    157     return ThisStr == Str;
    158   }
    159 
    160   /// Return the beginning of the actual null-terminated string for this
    161   /// identifier.
    162   const char *getNameStart() const { return Entry->getKeyData(); }
    163 
    164   /// Efficiently return the length of this identifier info.
    165   unsigned getLength() const { return Entry->getKeyLength(); }
    166 
    167   /// Return the actual identifier string.
    168   StringRef getName() const {
    169     return StringRef(getNameStart(), getLength());
    170   }
    171 
    172   /// Return true if this identifier is \#defined to some other value.
    173   /// \note The current definition may be in a module and not currently visible.
    174   bool hasMacroDefinition() const {
    175     return HasMacro;
    176   }
    177   void setHasMacroDefinition(bool Val) {
    178     if (HasMacro == Val) return;
    179 
    180     HasMacro = Val;
    181     if (Val) {
    182       NeedsHandleIdentifier = true;
    183       HadMacro = true;
    184     } else {
    185       RecomputeNeedsHandleIdentifier();
    186     }
    187   }
    188   /// Returns true if this identifier was \#defined to some value at any
    189   /// moment. In this case there should be an entry for the identifier in the
    190   /// macro history table in Preprocessor.
    191   bool hadMacroDefinition() const {
    192     return HadMacro;
    193   }
    194 
    195   /// If this is a source-language token (e.g. 'for'), this API
    196   /// can be used to cause the lexer to map identifiers to source-language
    197   /// tokens.
    198   tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
    199 
    200   /// True if revertTokenIDToIdentifier() was called.
    201   bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
    202 
    203   /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
    204   /// compatibility.
    205   ///
    206   /// TokenID is normally read-only but there are 2 instances where we revert it
    207   /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
    208   /// using this method so we can inform serialization about it.
    209   void revertTokenIDToIdentifier() {
    210     assert(TokenID != tok::identifier && "Already at tok::identifier");
    211     TokenID = tok::identifier;
    212     RevertedTokenID = true;
    213   }
    214   void revertIdentifierToTokenID(tok::TokenKind TK) {
    215     assert(TokenID == tok::identifier && "Should be at tok::identifier");
    216     TokenID = TK;
    217     RevertedTokenID = false;
    218   }
    219 
    220   /// Return the preprocessor keyword ID for this identifier.
    221   ///
    222   /// For example, "define" will return tok::pp_define.
    223   tok::PPKeywordKind getPPKeywordID() const;
    224 
    225   /// Return the Objective-C keyword ID for the this identifier.
    226   ///
    227   /// For example, 'class' will return tok::objc_class if ObjC is enabled.
    228   tok::ObjCKeywordKind getObjCKeywordID() const {
    229     if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
    230       return tok::ObjCKeywordKind(ObjCOrBuiltinID);
    231     else
    232       return tok::objc_not_keyword;
    233   }
    234   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
    235 
    236   /// Return a value indicating whether this is a builtin function.
    237   ///
    238   /// 0 is not-built-in. 1+ are specific builtin functions.
    239   unsigned getBuiltinID() const {
    240     if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
    241       return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
    242     else
    243       return 0;
    244   }
    245   void setBuiltinID(unsigned ID) {
    246     ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
    247     assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
    248            && "ID too large for field!");
    249   }
    250 
    251   unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
    252   void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
    253 
    254   /// get/setExtension - Initialize information about whether or not this
    255   /// language token is an extension.  This controls extension warnings, and is
    256   /// only valid if a custom token ID is set.
    257   bool isExtensionToken() const { return IsExtension; }
    258   void setIsExtensionToken(bool Val) {
    259     IsExtension = Val;
    260     if (Val)
    261       NeedsHandleIdentifier = true;
    262     else
    263       RecomputeNeedsHandleIdentifier();
    264   }
    265 
    266   /// is/setIsFutureCompatKeyword - Initialize information about whether or not
    267   /// this language token is a keyword in a newer or proposed Standard. This
    268   /// controls compatibility warnings, and is only true when not parsing the
    269   /// corresponding Standard. Once a compatibility problem has been diagnosed
    270   /// with this keyword, the flag will be cleared.
    271   bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
    272   void setIsFutureCompatKeyword(bool Val) {
    273     IsFutureCompatKeyword = Val;
    274     if (Val)
    275       NeedsHandleIdentifier = true;
    276     else
    277       RecomputeNeedsHandleIdentifier();
    278   }
    279 
    280   /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
    281   /// Preprocessor will emit an error every time this token is used.
    282   void setIsPoisoned(bool Value = true) {
    283     IsPoisoned = Value;
    284     if (Value)
    285       NeedsHandleIdentifier = true;
    286     else
    287       RecomputeNeedsHandleIdentifier();
    288   }
    289 
    290   /// Return true if this token has been poisoned.
    291   bool isPoisoned() const { return IsPoisoned; }
    292 
    293   /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
    294   /// this identifier is a C++ alternate representation of an operator.
    295   void setIsCPlusPlusOperatorKeyword(bool Val = true) {
    296     IsCPPOperatorKeyword = Val;
    297   }
    298   bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
    299 
    300   /// Return true if this token is a keyword in the specified language.
    301   bool isKeyword(const LangOptions &LangOpts) const;
    302 
    303   /// Return true if this token is a C++ keyword in the specified
    304   /// language.
    305   bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
    306 
    307   /// Get and set FETokenInfo. The language front-end is allowed to associate
    308   /// arbitrary metadata with this token.
    309   void *getFETokenInfo() const { return FETokenInfo; }
    310   void setFETokenInfo(void *T) { FETokenInfo = T; }
    311 
    312   /// Return true if the Preprocessor::HandleIdentifier must be called
    313   /// on a token of this identifier.
    314   ///
    315   /// If this returns false, we know that HandleIdentifier will not affect
    316   /// the token.
    317   bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
    318 
    319   /// Return true if the identifier in its current state was loaded
    320   /// from an AST file.
    321   bool isFromAST() const { return IsFromAST; }
    322 
    323   void setIsFromAST() { IsFromAST = true; }
    324 
    325   /// Determine whether this identifier has changed since it was loaded
    326   /// from an AST file.
    327   bool hasChangedSinceDeserialization() const {
    328     return ChangedAfterLoad;
    329   }
    330 
    331   /// Note that this identifier has changed since it was loaded from
    332   /// an AST file.
    333   void setChangedSinceDeserialization() {
    334     ChangedAfterLoad = true;
    335   }
    336 
    337   /// Determine whether the frontend token information for this
    338   /// identifier has changed since it was loaded from an AST file.
    339   bool hasFETokenInfoChangedSinceDeserialization() const {
    340     return FEChangedAfterLoad;
    341   }
    342 
    343   /// Note that the frontend token information for this identifier has
    344   /// changed since it was loaded from an AST file.
    345   void setFETokenInfoChangedSinceDeserialization() {
    346     FEChangedAfterLoad = true;
    347   }
    348 
    349   /// Determine whether the information for this identifier is out of
    350   /// date with respect to the external source.
    351   bool isOutOfDate() const { return OutOfDate; }
    352 
    353   /// Set whether the information for this identifier is out of
    354   /// date with respect to the external source.
    355   void setOutOfDate(bool OOD) {
    356     OutOfDate = OOD;
    357     if (OOD)
    358       NeedsHandleIdentifier = true;
    359     else
    360       RecomputeNeedsHandleIdentifier();
    361   }
    362 
    363   /// Determine whether this is the contextual keyword \c import.
    364   bool isModulesImport() const { return IsModulesImport; }
    365 
    366   /// Set whether this identifier is the contextual keyword \c import.
    367   void setModulesImport(bool I) {
    368     IsModulesImport = I;
    369     if (I)
    370       NeedsHandleIdentifier = true;
    371     else
    372       RecomputeNeedsHandleIdentifier();
    373   }
    374 
    375   /// Determine whether this is the mangled name of an OpenMP variant.
    376   bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
    377 
    378   /// Set whether this is the mangled name of an OpenMP variant.
    379   void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; }
    380 
    381   /// Return true if this identifier is an editor placeholder.
    382   ///
    383   /// Editor placeholders are produced by the code-completion engine and are
    384   /// represented as characters between '<#' and '#>' in the source code. An
    385   /// example of auto-completed call with a placeholder parameter is shown
    386   /// below:
    387   /// \code
    388   ///   function(<#int x#>);
    389   /// \endcode
    390   bool isEditorPlaceholder() const {
    391     return getName().startswith("<#") && getName().endswith("#>");
    392   }
    393 
    394   /// Determine whether \p this is a name reserved for the implementation (C99
    395   /// 7.1.3, C++ [lib.global.names]).
    396   ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const;
    397 
    398   /// Provide less than operator for lexicographical sorting.
    399   bool operator<(const IdentifierInfo &RHS) const {
    400     return getName() < RHS.getName();
    401   }
    402 
    403 private:
    404   /// The Preprocessor::HandleIdentifier does several special (but rare)
    405   /// things to identifiers of various sorts.  For example, it changes the
    406   /// \c for keyword token from tok::identifier to tok::for.
    407   ///
    408   /// This method is very tied to the definition of HandleIdentifier.  Any
    409   /// change to it should be reflected here.
    410   void RecomputeNeedsHandleIdentifier() {
    411     NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
    412                             isExtensionToken() || isFutureCompatKeyword() ||
    413                             isOutOfDate() || isModulesImport();
    414   }
    415 };
    416 
    417 /// An RAII object for [un]poisoning an identifier within a scope.
    418 ///
    419 /// \p II is allowed to be null, in which case objects of this type have
    420 /// no effect.
    421 class PoisonIdentifierRAIIObject {
    422   IdentifierInfo *const II;
    423   const bool OldValue;
    424 
    425 public:
    426   PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
    427     : II(II), OldValue(II ? II->isPoisoned() : false) {
    428     if(II)
    429       II->setIsPoisoned(NewValue);
    430   }
    431 
    432   ~PoisonIdentifierRAIIObject() {
    433     if(II)
    434       II->setIsPoisoned(OldValue);
    435   }
    436 };
    437 
    438 /// An iterator that walks over all of the known identifiers
    439 /// in the lookup table.
    440 ///
    441 /// Since this iterator uses an abstract interface via virtual
    442 /// functions, it uses an object-oriented interface rather than the
    443 /// more standard C++ STL iterator interface. In this OO-style
    444 /// iteration, the single function \c Next() provides dereference,
    445 /// advance, and end-of-sequence checking in a single
    446 /// operation. Subclasses of this iterator type will provide the
    447 /// actual functionality.
    448 class IdentifierIterator {
    449 protected:
    450   IdentifierIterator() = default;
    451 
    452 public:
    453   IdentifierIterator(const IdentifierIterator &) = delete;
    454   IdentifierIterator &operator=(const IdentifierIterator &) = delete;
    455 
    456   virtual ~IdentifierIterator();
    457 
    458   /// Retrieve the next string in the identifier table and
    459   /// advances the iterator for the following string.
    460   ///
    461   /// \returns The next string in the identifier table. If there is
    462   /// no such string, returns an empty \c StringRef.
    463   virtual StringRef Next() = 0;
    464 };
    465 
    466 /// Provides lookups to, and iteration over, IdentiferInfo objects.
    467 class IdentifierInfoLookup {
    468 public:
    469   virtual ~IdentifierInfoLookup();
    470 
    471   /// Return the IdentifierInfo for the specified named identifier.
    472   ///
    473   /// Unlike the version in IdentifierTable, this returns a pointer instead
    474   /// of a reference.  If the pointer is null then the IdentifierInfo cannot
    475   /// be found.
    476   virtual IdentifierInfo* get(StringRef Name) = 0;
    477 
    478   /// Retrieve an iterator into the set of all identifiers
    479   /// known to this identifier lookup source.
    480   ///
    481   /// This routine provides access to all of the identifiers known to
    482   /// the identifier lookup, allowing access to the contents of the
    483   /// identifiers without introducing the overhead of constructing
    484   /// IdentifierInfo objects for each.
    485   ///
    486   /// \returns A new iterator into the set of known identifiers. The
    487   /// caller is responsible for deleting this iterator.
    488   virtual IdentifierIterator *getIdentifiers();
    489 };
    490 
    491 /// Implements an efficient mapping from strings to IdentifierInfo nodes.
    492 ///
    493 /// This has no other purpose, but this is an extremely performance-critical
    494 /// piece of the code, as each occurrence of every identifier goes through
    495 /// here when lexed.
    496 class IdentifierTable {
    497   // Shark shows that using MallocAllocator is *much* slower than using this
    498   // BumpPtrAllocator!
    499   using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
    500   HashTableTy HashTable;
    501 
    502   IdentifierInfoLookup* ExternalLookup;
    503 
    504 public:
    505   /// Create the identifier table.
    506   explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
    507 
    508   /// Create the identifier table, populating it with info about the
    509   /// language keywords for the language specified by \p LangOpts.
    510   explicit IdentifierTable(const LangOptions &LangOpts,
    511                            IdentifierInfoLookup *ExternalLookup = nullptr);
    512 
    513   /// Set the external identifier lookup mechanism.
    514   void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
    515     ExternalLookup = IILookup;
    516   }
    517 
    518   /// Retrieve the external identifier lookup object, if any.
    519   IdentifierInfoLookup *getExternalIdentifierLookup() const {
    520     return ExternalLookup;
    521   }
    522 
    523   llvm::BumpPtrAllocator& getAllocator() {
    524     return HashTable.getAllocator();
    525   }
    526 
    527   /// Return the identifier token info for the specified named
    528   /// identifier.
    529   IdentifierInfo &get(StringRef Name) {
    530     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
    531 
    532     IdentifierInfo *&II = Entry.second;
    533     if (II) return *II;
    534 
    535     // No entry; if we have an external lookup, look there first.
    536     if (ExternalLookup) {
    537       II = ExternalLookup->get(Name);
    538       if (II)
    539         return *II;
    540     }
    541 
    542     // Lookups failed, make a new IdentifierInfo.
    543     void *Mem = getAllocator().Allocate<IdentifierInfo>();
    544     II = new (Mem) IdentifierInfo();
    545 
    546     // Make sure getName() knows how to find the IdentifierInfo
    547     // contents.
    548     II->Entry = &Entry;
    549 
    550     return *II;
    551   }
    552 
    553   IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
    554     IdentifierInfo &II = get(Name);
    555     II.TokenID = TokenCode;
    556     assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
    557     return II;
    558   }
    559 
    560   /// Gets an IdentifierInfo for the given name without consulting
    561   ///        external sources.
    562   ///
    563   /// This is a version of get() meant for external sources that want to
    564   /// introduce or modify an identifier. If they called get(), they would
    565   /// likely end up in a recursion.
    566   IdentifierInfo &getOwn(StringRef Name) {
    567     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
    568 
    569     IdentifierInfo *&II = Entry.second;
    570     if (II)
    571       return *II;
    572 
    573     // Lookups failed, make a new IdentifierInfo.
    574     void *Mem = getAllocator().Allocate<IdentifierInfo>();
    575     II = new (Mem) IdentifierInfo();
    576 
    577     // Make sure getName() knows how to find the IdentifierInfo
    578     // contents.
    579     II->Entry = &Entry;
    580 
    581     // If this is the 'import' contextual keyword, mark it as such.
    582     if (Name.equals("import"))
    583       II->setModulesImport(true);
    584 
    585     return *II;
    586   }
    587 
    588   using iterator = HashTableTy::const_iterator;
    589   using const_iterator = HashTableTy::const_iterator;
    590 
    591   iterator begin() const { return HashTable.begin(); }
    592   iterator end() const   { return HashTable.end(); }
    593   unsigned size() const  { return HashTable.size(); }
    594 
    595   iterator find(StringRef Name) const { return HashTable.find(Name); }
    596 
    597   /// Print some statistics to stderr that indicate how well the
    598   /// hashing is doing.
    599   void PrintStats() const;
    600 
    601   /// Populate the identifier table with info about the language keywords
    602   /// for the language specified by \p LangOpts.
    603   void AddKeywords(const LangOptions &LangOpts);
    604 };
    605 
    606 /// A family of Objective-C methods.
    607 ///
    608 /// These families have no inherent meaning in the language, but are
    609 /// nonetheless central enough in the existing implementations to
    610 /// merit direct AST support.  While, in theory, arbitrary methods can
    611 /// be considered to form families, we focus here on the methods
    612 /// involving allocation and retain-count management, as these are the
    613 /// most "core" and the most likely to be useful to diverse clients
    614 /// without extra information.
    615 ///
    616 /// Both selectors and actual method declarations may be classified
    617 /// into families.  Method families may impose additional restrictions
    618 /// beyond their selector name; for example, a method called '_init'
    619 /// that returns void is not considered to be in the 'init' family
    620 /// (but would be if it returned 'id').  It is also possible to
    621 /// explicitly change or remove a method's family.  Therefore the
    622 /// method's family should be considered the single source of truth.
    623 enum ObjCMethodFamily {
    624   /// No particular method family.
    625   OMF_None,
    626 
    627   // Selectors in these families may have arbitrary arity, may be
    628   // written with arbitrary leading underscores, and may have
    629   // additional CamelCase "words" in their first selector chunk
    630   // following the family name.
    631   OMF_alloc,
    632   OMF_copy,
    633   OMF_init,
    634   OMF_mutableCopy,
    635   OMF_new,
    636 
    637   // These families are singletons consisting only of the nullary
    638   // selector with the given name.
    639   OMF_autorelease,
    640   OMF_dealloc,
    641   OMF_finalize,
    642   OMF_release,
    643   OMF_retain,
    644   OMF_retainCount,
    645   OMF_self,
    646   OMF_initialize,
    647 
    648   // performSelector families
    649   OMF_performSelector
    650 };
    651 
    652 /// Enough bits to store any enumerator in ObjCMethodFamily or
    653 /// InvalidObjCMethodFamily.
    654 enum { ObjCMethodFamilyBitWidth = 4 };
    655 
    656 /// An invalid value of ObjCMethodFamily.
    657 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
    658 
    659 /// A family of Objective-C methods.
    660 ///
    661 /// These are family of methods whose result type is initially 'id', but
    662 /// but are candidate for the result type to be changed to 'instancetype'.
    663 enum ObjCInstanceTypeFamily {
    664   OIT_None,
    665   OIT_Array,
    666   OIT_Dictionary,
    667   OIT_Singleton,
    668   OIT_Init,
    669   OIT_ReturnsSelf
    670 };
    671 
    672 enum ObjCStringFormatFamily {
    673   SFF_None,
    674   SFF_NSString,
    675   SFF_CFString
    676 };
    677 
    678 /// Smart pointer class that efficiently represents Objective-C method
    679 /// names.
    680 ///
    681 /// This class will either point to an IdentifierInfo or a
    682 /// MultiKeywordSelector (which is private). This enables us to optimize
    683 /// selectors that take no arguments and selectors that take 1 argument, which
    684 /// accounts for 78% of all selectors in Cocoa.h.
    685 class Selector {
    686   friend class Diagnostic;
    687   friend class SelectorTable; // only the SelectorTable can create these
    688   friend class DeclarationName; // and the AST's DeclarationName.
    689 
    690   enum IdentifierInfoFlag {
    691     // Empty selector = 0. Note that these enumeration values must
    692     // correspond to the enumeration values of DeclarationName::StoredNameKind
    693     ZeroArg  = 0x01,
    694     OneArg   = 0x02,
    695     MultiArg = 0x07,
    696     ArgFlags = 0x07
    697   };
    698 
    699   /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
    700   /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
    701   /// case IdentifierInfo and MultiKeywordSelector are already aligned to
    702   /// 8 bytes even on 32 bits archs because of DeclarationName.
    703   uintptr_t InfoPtr = 0;
    704 
    705   Selector(IdentifierInfo *II, unsigned nArgs) {
    706     InfoPtr = reinterpret_cast<uintptr_t>(II);
    707     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
    708     assert(nArgs < 2 && "nArgs not equal to 0/1");
    709     InfoPtr |= nArgs+1;
    710   }
    711 
    712   Selector(MultiKeywordSelector *SI) {
    713     InfoPtr = reinterpret_cast<uintptr_t>(SI);
    714     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
    715     InfoPtr |= MultiArg;
    716   }
    717 
    718   IdentifierInfo *getAsIdentifierInfo() const {
    719     if (getIdentifierInfoFlag() < MultiArg)
    720       return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
    721     return nullptr;
    722   }
    723 
    724   MultiKeywordSelector *getMultiKeywordSelector() const {
    725     return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
    726   }
    727 
    728   unsigned getIdentifierInfoFlag() const {
    729     return InfoPtr & ArgFlags;
    730   }
    731 
    732   static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
    733 
    734   static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
    735 
    736 public:
    737   /// The default ctor should only be used when creating data structures that
    738   ///  will contain selectors.
    739   Selector() = default;
    740   explicit Selector(uintptr_t V) : InfoPtr(V) {}
    741 
    742   /// operator==/!= - Indicate whether the specified selectors are identical.
    743   bool operator==(Selector RHS) const {
    744     return InfoPtr == RHS.InfoPtr;
    745   }
    746   bool operator!=(Selector RHS) const {
    747     return InfoPtr != RHS.InfoPtr;
    748   }
    749 
    750   void *getAsOpaquePtr() const {
    751     return reinterpret_cast<void*>(InfoPtr);
    752   }
    753 
    754   /// Determine whether this is the empty selector.
    755   bool isNull() const { return InfoPtr == 0; }
    756 
    757   // Predicates to identify the selector type.
    758   bool isKeywordSelector() const {
    759     return getIdentifierInfoFlag() != ZeroArg;
    760   }
    761 
    762   bool isUnarySelector() const {
    763     return getIdentifierInfoFlag() == ZeroArg;
    764   }
    765 
    766   /// If this selector is the specific keyword selector described by Names.
    767   bool isKeywordSelector(ArrayRef<StringRef> Names) const;
    768 
    769   /// If this selector is the specific unary selector described by Name.
    770   bool isUnarySelector(StringRef Name) const;
    771 
    772   unsigned getNumArgs() const;
    773 
    774   /// Retrieve the identifier at a given position in the selector.
    775   ///
    776   /// Note that the identifier pointer returned may be NULL. Clients that only
    777   /// care about the text of the identifier string, and not the specific,
    778   /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
    779   /// an empty string when the identifier pointer would be NULL.
    780   ///
    781   /// \param argIndex The index for which we want to retrieve the identifier.
    782   /// This index shall be less than \c getNumArgs() unless this is a keyword
    783   /// selector, in which case 0 is the only permissible value.
    784   ///
    785   /// \returns the uniqued identifier for this slot, or NULL if this slot has
    786   /// no corresponding identifier.
    787   IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
    788 
    789   /// Retrieve the name at a given position in the selector.
    790   ///
    791   /// \param argIndex The index for which we want to retrieve the name.
    792   /// This index shall be less than \c getNumArgs() unless this is a keyword
    793   /// selector, in which case 0 is the only permissible value.
    794   ///
    795   /// \returns the name for this slot, which may be the empty string if no
    796   /// name was supplied.
    797   StringRef getNameForSlot(unsigned argIndex) const;
    798 
    799   /// Derive the full selector name (e.g. "foo:bar:") and return
    800   /// it as an std::string.
    801   std::string getAsString() const;
    802 
    803   /// Prints the full selector name (e.g. "foo:bar:").
    804   void print(llvm::raw_ostream &OS) const;
    805 
    806   void dump() const;
    807 
    808   /// Derive the conventional family of this method.
    809   ObjCMethodFamily getMethodFamily() const {
    810     return getMethodFamilyImpl(*this);
    811   }
    812 
    813   ObjCStringFormatFamily getStringFormatFamily() const {
    814     return getStringFormatFamilyImpl(*this);
    815   }
    816 
    817   static Selector getEmptyMarker() {
    818     return Selector(uintptr_t(-1));
    819   }
    820 
    821   static Selector getTombstoneMarker() {
    822     return Selector(uintptr_t(-2));
    823   }
    824 
    825   static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
    826 };
    827 
    828 /// This table allows us to fully hide how we implement
    829 /// multi-keyword caching.
    830 class SelectorTable {
    831   // Actually a SelectorTableImpl
    832   void *Impl;
    833 
    834 public:
    835   SelectorTable();
    836   SelectorTable(const SelectorTable &) = delete;
    837   SelectorTable &operator=(const SelectorTable &) = delete;
    838   ~SelectorTable();
    839 
    840   /// Can create any sort of selector.
    841   ///
    842   /// \p NumArgs indicates whether this is a no argument selector "foo", a
    843   /// single argument selector "foo:" or multi-argument "foo:bar:".
    844   Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
    845 
    846   Selector getUnarySelector(IdentifierInfo *ID) {
    847     return Selector(ID, 1);
    848   }
    849 
    850   Selector getNullarySelector(IdentifierInfo *ID) {
    851     return Selector(ID, 0);
    852   }
    853 
    854   /// Return the total amount of memory allocated for managing selectors.
    855   size_t getTotalMemory() const;
    856 
    857   /// Return the default setter name for the given identifier.
    858   ///
    859   /// This is "set" + \p Name where the initial character of \p Name
    860   /// has been capitalized.
    861   static SmallString<64> constructSetterName(StringRef Name);
    862 
    863   /// Return the default setter selector for the given identifier.
    864   ///
    865   /// This is "set" + \p Name where the initial character of \p Name
    866   /// has been capitalized.
    867   static Selector constructSetterSelector(IdentifierTable &Idents,
    868                                           SelectorTable &SelTable,
    869                                           const IdentifierInfo *Name);
    870 
    871   /// Return the property name for the given setter selector.
    872   static std::string getPropertyNameFromSetterSelector(Selector Sel);
    873 };
    874 
    875 namespace detail {
    876 
    877 /// DeclarationNameExtra is used as a base of various uncommon special names.
    878 /// This class is needed since DeclarationName has not enough space to store
    879 /// the kind of every possible names. Therefore the kind of common names is
    880 /// stored directly in DeclarationName, and the kind of uncommon names is
    881 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because
    882 /// DeclarationName needs the lower 3 bits to store the kind of common names.
    883 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change
    884 /// here is very likely to require changes in DeclarationName(Table).
    885 class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
    886   friend class clang::DeclarationName;
    887   friend class clang::DeclarationNameTable;
    888 
    889 protected:
    890   /// The kind of "extra" information stored in the DeclarationName. See
    891   /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
    892   /// are used. Note that DeclarationName depends on the numerical values
    893   /// of the enumerators in this enum. See DeclarationName::StoredNameKind
    894   /// for more info.
    895   enum ExtraKind {
    896     CXXDeductionGuideName,
    897     CXXLiteralOperatorName,
    898     CXXUsingDirective,
    899     ObjCMultiArgSelector
    900   };
    901 
    902   /// ExtraKindOrNumArgs has one of the following meaning:
    903   ///  * The kind of an uncommon C++ special name. This DeclarationNameExtra
    904   ///    is in this case in fact either a CXXDeductionGuideNameExtra or
    905   ///    a CXXLiteralOperatorIdName.
    906   ///
    907   ///  * It may be also name common to C++ using-directives (CXXUsingDirective),
    908   ///
    909   ///  * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
    910   ///    the number of arguments in the Objective-C selector, in which
    911   ///    case the DeclarationNameExtra is also a MultiKeywordSelector.
    912   unsigned ExtraKindOrNumArgs;
    913 
    914   DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
    915   DeclarationNameExtra(unsigned NumArgs)
    916       : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
    917 
    918   /// Return the corresponding ExtraKind.
    919   ExtraKind getKind() const {
    920     return static_cast<ExtraKind>(ExtraKindOrNumArgs >
    921                                           (unsigned)ObjCMultiArgSelector
    922                                       ? (unsigned)ObjCMultiArgSelector
    923                                       : ExtraKindOrNumArgs);
    924   }
    925 
    926   /// Return the number of arguments in an ObjC selector. Only valid when this
    927   /// is indeed an ObjCMultiArgSelector.
    928   unsigned getNumArgs() const {
    929     assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
    930            "getNumArgs called but this is not an ObjC selector!");
    931     return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
    932   }
    933 };
    934 
    935 } // namespace detail
    936 
    937 }  // namespace clang
    938 
    939 namespace llvm {
    940 
    941 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
    942 /// DenseSets.
    943 template <>
    944 struct DenseMapInfo<clang::Selector> {
    945   static clang::Selector getEmptyKey() {
    946     return clang::Selector::getEmptyMarker();
    947   }
    948 
    949   static clang::Selector getTombstoneKey() {
    950     return clang::Selector::getTombstoneMarker();
    951   }
    952 
    953   static unsigned getHashValue(clang::Selector S);
    954 
    955   static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
    956     return LHS == RHS;
    957   }
    958 };
    959 
    960 template<>
    961 struct PointerLikeTypeTraits<clang::Selector> {
    962   static const void *getAsVoidPointer(clang::Selector P) {
    963     return P.getAsOpaquePtr();
    964   }
    965 
    966   static clang::Selector getFromVoidPointer(const void *P) {
    967     return clang::Selector(reinterpret_cast<uintptr_t>(P));
    968   }
    969 
    970   static constexpr int NumLowBitsAvailable = 0;
    971 };
    972 
    973 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
    974 // are not guaranteed to be 8-byte aligned.
    975 template<>
    976 struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
    977   static void *getAsVoidPointer(clang::IdentifierInfo* P) {
    978     return P;
    979   }
    980 
    981   static clang::IdentifierInfo *getFromVoidPointer(void *P) {
    982     return static_cast<clang::IdentifierInfo*>(P);
    983   }
    984 
    985   static constexpr int NumLowBitsAvailable = 1;
    986 };
    987 
    988 template<>
    989 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
    990   static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
    991     return P;
    992   }
    993 
    994   static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
    995     return static_cast<const clang::IdentifierInfo*>(P);
    996   }
    997 
    998   static constexpr int NumLowBitsAvailable = 1;
    999 };
   1000 
   1001 } // namespace llvm
   1002 
   1003 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
   1004