Home | History | Annotate | Line # | Download | only in Support
      1 //===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file implements a hash set that can be used to remove duplication of
     10 // nodes in a graph.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "llvm/ADT/FoldingSet.h"
     15 #include "llvm/ADT/Hashing.h"
     16 #include "llvm/ADT/StringRef.h"
     17 #include "llvm/Support/Allocator.h"
     18 #include "llvm/Support/ErrorHandling.h"
     19 #include "llvm/Support/Host.h"
     20 #include "llvm/Support/MathExtras.h"
     21 #include <cassert>
     22 #include <cstring>
     23 using namespace llvm;
     24 
     25 //===----------------------------------------------------------------------===//
     26 // FoldingSetNodeIDRef Implementation
     27 
     28 /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
     29 /// used to lookup the node in the FoldingSetBase.
     30 unsigned FoldingSetNodeIDRef::ComputeHash() const {
     31   return static_cast<unsigned>(hash_combine_range(Data, Data+Size));
     32 }
     33 
     34 bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
     35   if (Size != RHS.Size) return false;
     36   return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
     37 }
     38 
     39 /// Used to compare the "ordering" of two nodes as defined by the
     40 /// profiled bits and their ordering defined by memcmp().
     41 bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const {
     42   if (Size != RHS.Size)
     43     return Size < RHS.Size;
     44   return memcmp(Data, RHS.Data, Size*sizeof(*Data)) < 0;
     45 }
     46 
     47 //===----------------------------------------------------------------------===//
     48 // FoldingSetNodeID Implementation
     49 
     50 /// Add* - Add various data types to Bit data.
     51 ///
     52 void FoldingSetNodeID::AddPointer(const void *Ptr) {
     53   // Note: this adds pointers to the hash using sizes and endianness that
     54   // depend on the host. It doesn't matter, however, because hashing on
     55   // pointer values is inherently unstable. Nothing should depend on the
     56   // ordering of nodes in the folding set.
     57   static_assert(sizeof(uintptr_t) <= sizeof(unsigned long long),
     58                 "unexpected pointer size");
     59   AddInteger(reinterpret_cast<uintptr_t>(Ptr));
     60 }
     61 void FoldingSetNodeID::AddInteger(signed I) {
     62   Bits.push_back(I);
     63 }
     64 void FoldingSetNodeID::AddInteger(unsigned I) {
     65   Bits.push_back(I);
     66 }
     67 void FoldingSetNodeID::AddInteger(long I) {
     68   AddInteger((unsigned long)I);
     69 }
     70 void FoldingSetNodeID::AddInteger(unsigned long I) {
     71   if (sizeof(long) == sizeof(int))
     72     AddInteger(unsigned(I));
     73   else if (sizeof(long) == sizeof(long long)) {
     74     AddInteger((unsigned long long)I);
     75   } else {
     76     llvm_unreachable("unexpected sizeof(long)");
     77   }
     78 }
     79 void FoldingSetNodeID::AddInteger(long long I) {
     80   AddInteger((unsigned long long)I);
     81 }
     82 void FoldingSetNodeID::AddInteger(unsigned long long I) {
     83   AddInteger(unsigned(I));
     84   AddInteger(unsigned(I >> 32));
     85 }
     86 
     87 void FoldingSetNodeID::AddString(StringRef String) {
     88   unsigned Size =  String.size();
     89 
     90   unsigned NumInserts = 1 + divideCeil(Size, 4);
     91   Bits.reserve(Bits.size() + NumInserts);
     92 
     93   Bits.push_back(Size);
     94   if (!Size) return;
     95 
     96   unsigned Units = Size / 4;
     97   unsigned Pos = 0;
     98   const unsigned *Base = (const unsigned*) String.data();
     99 
    100   // If the string is aligned do a bulk transfer.
    101   if (!((intptr_t)Base & 3)) {
    102     Bits.append(Base, Base + Units);
    103     Pos = (Units + 1) * 4;
    104   } else {
    105     // Otherwise do it the hard way.
    106     // To be compatible with above bulk transfer, we need to take endianness
    107     // into account.
    108     static_assert(sys::IsBigEndianHost || sys::IsLittleEndianHost,
    109                   "Unexpected host endianness");
    110     if (sys::IsBigEndianHost) {
    111       for (Pos += 4; Pos <= Size; Pos += 4) {
    112         unsigned V = ((unsigned char)String[Pos - 4] << 24) |
    113                      ((unsigned char)String[Pos - 3] << 16) |
    114                      ((unsigned char)String[Pos - 2] << 8) |
    115                       (unsigned char)String[Pos - 1];
    116         Bits.push_back(V);
    117       }
    118     } else {  // Little-endian host
    119       for (Pos += 4; Pos <= Size; Pos += 4) {
    120         unsigned V = ((unsigned char)String[Pos - 1] << 24) |
    121                      ((unsigned char)String[Pos - 2] << 16) |
    122                      ((unsigned char)String[Pos - 3] << 8) |
    123                       (unsigned char)String[Pos - 4];
    124         Bits.push_back(V);
    125       }
    126     }
    127   }
    128 
    129   // With the leftover bits.
    130   unsigned V = 0;
    131   // Pos will have overshot size by 4 - #bytes left over.
    132   // No need to take endianness into account here - this is always executed.
    133   switch (Pos - Size) {
    134   case 1: V = (V << 8) | (unsigned char)String[Size - 3]; LLVM_FALLTHROUGH;
    135   case 2: V = (V << 8) | (unsigned char)String[Size - 2]; LLVM_FALLTHROUGH;
    136   case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
    137   default: return; // Nothing left.
    138   }
    139 
    140   Bits.push_back(V);
    141 }
    142 
    143 // AddNodeID - Adds the Bit data of another ID to *this.
    144 void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
    145   Bits.append(ID.Bits.begin(), ID.Bits.end());
    146 }
    147 
    148 /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
    149 /// lookup the node in the FoldingSetBase.
    150 unsigned FoldingSetNodeID::ComputeHash() const {
    151   return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
    152 }
    153 
    154 /// operator== - Used to compare two nodes to each other.
    155 ///
    156 bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const {
    157   return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
    158 }
    159 
    160 /// operator== - Used to compare two nodes to each other.
    161 ///
    162 bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
    163   return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
    164 }
    165 
    166 /// Used to compare the "ordering" of two nodes as defined by the
    167 /// profiled bits and their ordering defined by memcmp().
    168 bool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const {
    169   return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
    170 }
    171 
    172 bool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const {
    173   return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS;
    174 }
    175 
    176 /// Intern - Copy this node's data to a memory region allocated from the
    177 /// given allocator and return a FoldingSetNodeIDRef describing the
    178 /// interned data.
    179 FoldingSetNodeIDRef
    180 FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
    181   unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
    182   std::uninitialized_copy(Bits.begin(), Bits.end(), New);
    183   return FoldingSetNodeIDRef(New, Bits.size());
    184 }
    185 
    186 //===----------------------------------------------------------------------===//
    187 /// Helper functions for FoldingSetBase.
    188 
    189 /// GetNextPtr - In order to save space, each bucket is a
    190 /// singly-linked-list. In order to make deletion more efficient, we make
    191 /// the list circular, so we can delete a node without computing its hash.
    192 /// The problem with this is that the start of the hash buckets are not
    193 /// Nodes.  If NextInBucketPtr is a bucket pointer, this method returns null:
    194 /// use GetBucketPtr when this happens.
    195 static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) {
    196   // The low bit is set if this is the pointer back to the bucket.
    197   if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
    198     return nullptr;
    199 
    200   return static_cast<FoldingSetBase::Node*>(NextInBucketPtr);
    201 }
    202 
    203 
    204 /// testing.
    205 static void **GetBucketPtr(void *NextInBucketPtr) {
    206   intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
    207   assert((Ptr & 1) && "Not a bucket pointer");
    208   return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
    209 }
    210 
    211 /// GetBucketFor - Hash the specified node ID and return the hash bucket for
    212 /// the specified ID.
    213 static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
    214   // NumBuckets is always a power of 2.
    215   unsigned BucketNum = Hash & (NumBuckets-1);
    216   return Buckets + BucketNum;
    217 }
    218 
    219 /// AllocateBuckets - Allocated initialized bucket memory.
    220 static void **AllocateBuckets(unsigned NumBuckets) {
    221   void **Buckets = static_cast<void**>(safe_calloc(NumBuckets + 1,
    222                                                    sizeof(void*)));
    223   // Set the very last bucket to be a non-null "pointer".
    224   Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
    225   return Buckets;
    226 }
    227 
    228 //===----------------------------------------------------------------------===//
    229 // FoldingSetBase Implementation
    230 
    231 FoldingSetBase::FoldingSetBase(unsigned Log2InitSize) {
    232   assert(5 < Log2InitSize && Log2InitSize < 32 &&
    233          "Initial hash table size out of range");
    234   NumBuckets = 1 << Log2InitSize;
    235   Buckets = AllocateBuckets(NumBuckets);
    236   NumNodes = 0;
    237 }
    238 
    239 FoldingSetBase::FoldingSetBase(FoldingSetBase &&Arg)
    240     : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) {
    241   Arg.Buckets = nullptr;
    242   Arg.NumBuckets = 0;
    243   Arg.NumNodes = 0;
    244 }
    245 
    246 FoldingSetBase &FoldingSetBase::operator=(FoldingSetBase &&RHS) {
    247   free(Buckets); // This may be null if the set is in a moved-from state.
    248   Buckets = RHS.Buckets;
    249   NumBuckets = RHS.NumBuckets;
    250   NumNodes = RHS.NumNodes;
    251   RHS.Buckets = nullptr;
    252   RHS.NumBuckets = 0;
    253   RHS.NumNodes = 0;
    254   return *this;
    255 }
    256 
    257 FoldingSetBase::~FoldingSetBase() {
    258   free(Buckets);
    259 }
    260 
    261 void FoldingSetBase::clear() {
    262   // Set all but the last bucket to null pointers.
    263   memset(Buckets, 0, NumBuckets*sizeof(void*));
    264 
    265   // Set the very last bucket to be a non-null "pointer".
    266   Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
    267 
    268   // Reset the node count to zero.
    269   NumNodes = 0;
    270 }
    271 
    272 void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount,
    273                                      const FoldingSetInfo &Info) {
    274   assert((NewBucketCount > NumBuckets) &&
    275          "Can't shrink a folding set with GrowBucketCount");
    276   assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!");
    277   void **OldBuckets = Buckets;
    278   unsigned OldNumBuckets = NumBuckets;
    279 
    280   // Clear out new buckets.
    281   Buckets = AllocateBuckets(NewBucketCount);
    282   // Set NumBuckets only if allocation of new buckets was successful.
    283   NumBuckets = NewBucketCount;
    284   NumNodes = 0;
    285 
    286   // Walk the old buckets, rehashing nodes into their new place.
    287   FoldingSetNodeID TempID;
    288   for (unsigned i = 0; i != OldNumBuckets; ++i) {
    289     void *Probe = OldBuckets[i];
    290     if (!Probe) continue;
    291     while (Node *NodeInBucket = GetNextPtr(Probe)) {
    292       // Figure out the next link, remove NodeInBucket from the old link.
    293       Probe = NodeInBucket->getNextInBucket();
    294       NodeInBucket->SetNextInBucket(nullptr);
    295 
    296       // Insert the node into the new bucket, after recomputing the hash.
    297       InsertNode(NodeInBucket,
    298                  GetBucketFor(Info.ComputeNodeHash(this, NodeInBucket, TempID),
    299                               Buckets, NumBuckets),
    300                  Info);
    301       TempID.clear();
    302     }
    303   }
    304 
    305   free(OldBuckets);
    306 }
    307 
    308 /// GrowHashTable - Double the size of the hash table and rehash everything.
    309 ///
    310 void FoldingSetBase::GrowHashTable(const FoldingSetInfo &Info) {
    311   GrowBucketCount(NumBuckets * 2, Info);
    312 }
    313 
    314 void FoldingSetBase::reserve(unsigned EltCount, const FoldingSetInfo &Info) {
    315   // This will give us somewhere between EltCount / 2 and
    316   // EltCount buckets.  This puts us in the load factor
    317   // range of 1.0 - 2.0.
    318   if(EltCount < capacity())
    319     return;
    320   GrowBucketCount(PowerOf2Floor(EltCount), Info);
    321 }
    322 
    323 /// FindNodeOrInsertPos - Look up the node specified by ID.  If it exists,
    324 /// return it.  If not, return the insertion token that will make insertion
    325 /// faster.
    326 FoldingSetBase::Node *FoldingSetBase::FindNodeOrInsertPos(
    327     const FoldingSetNodeID &ID, void *&InsertPos, const FoldingSetInfo &Info) {
    328   unsigned IDHash = ID.ComputeHash();
    329   void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets);
    330   void *Probe = *Bucket;
    331 
    332   InsertPos = nullptr;
    333 
    334   FoldingSetNodeID TempID;
    335   while (Node *NodeInBucket = GetNextPtr(Probe)) {
    336     if (Info.NodeEquals(this, NodeInBucket, ID, IDHash, TempID))
    337       return NodeInBucket;
    338     TempID.clear();
    339 
    340     Probe = NodeInBucket->getNextInBucket();
    341   }
    342 
    343   // Didn't find the node, return null with the bucket as the InsertPos.
    344   InsertPos = Bucket;
    345   return nullptr;
    346 }
    347 
    348 /// InsertNode - Insert the specified node into the folding set, knowing that it
    349 /// is not already in the map.  InsertPos must be obtained from
    350 /// FindNodeOrInsertPos.
    351 void FoldingSetBase::InsertNode(Node *N, void *InsertPos,
    352                                 const FoldingSetInfo &Info) {
    353   assert(!N->getNextInBucket());
    354   // Do we need to grow the hashtable?
    355   if (NumNodes+1 > capacity()) {
    356     GrowHashTable(Info);
    357     FoldingSetNodeID TempID;
    358     InsertPos = GetBucketFor(Info.ComputeNodeHash(this, N, TempID), Buckets,
    359                              NumBuckets);
    360   }
    361 
    362   ++NumNodes;
    363 
    364   /// The insert position is actually a bucket pointer.
    365   void **Bucket = static_cast<void**>(InsertPos);
    366 
    367   void *Next = *Bucket;
    368 
    369   // If this is the first insertion into this bucket, its next pointer will be
    370   // null.  Pretend as if it pointed to itself, setting the low bit to indicate
    371   // that it is a pointer to the bucket.
    372   if (!Next)
    373     Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
    374 
    375   // Set the node's next pointer, and make the bucket point to the node.
    376   N->SetNextInBucket(Next);
    377   *Bucket = N;
    378 }
    379 
    380 /// RemoveNode - Remove a node from the folding set, returning true if one was
    381 /// removed or false if the node was not in the folding set.
    382 bool FoldingSetBase::RemoveNode(Node *N) {
    383   // Because each bucket is a circular list, we don't need to compute N's hash
    384   // to remove it.
    385   void *Ptr = N->getNextInBucket();
    386   if (!Ptr) return false;  // Not in folding set.
    387 
    388   --NumNodes;
    389   N->SetNextInBucket(nullptr);
    390 
    391   // Remember what N originally pointed to, either a bucket or another node.
    392   void *NodeNextPtr = Ptr;
    393 
    394   // Chase around the list until we find the node (or bucket) which points to N.
    395   while (true) {
    396     if (Node *NodeInBucket = GetNextPtr(Ptr)) {
    397       // Advance pointer.
    398       Ptr = NodeInBucket->getNextInBucket();
    399 
    400       // We found a node that points to N, change it to point to N's next node,
    401       // removing N from the list.
    402       if (Ptr == N) {
    403         NodeInBucket->SetNextInBucket(NodeNextPtr);
    404         return true;
    405       }
    406     } else {
    407       void **Bucket = GetBucketPtr(Ptr);
    408       Ptr = *Bucket;
    409 
    410       // If we found that the bucket points to N, update the bucket to point to
    411       // whatever is next.
    412       if (Ptr == N) {
    413         *Bucket = NodeNextPtr;
    414         return true;
    415       }
    416     }
    417   }
    418 }
    419 
    420 /// GetOrInsertNode - If there is an existing simple Node exactly
    421 /// equal to the specified node, return it.  Otherwise, insert 'N' and it
    422 /// instead.
    423 FoldingSetBase::Node *
    424 FoldingSetBase::GetOrInsertNode(FoldingSetBase::Node *N,
    425                                 const FoldingSetInfo &Info) {
    426   FoldingSetNodeID ID;
    427   Info.GetNodeProfile(this, N, ID);
    428   void *IP;
    429   if (Node *E = FindNodeOrInsertPos(ID, IP, Info))
    430     return E;
    431   InsertNode(N, IP, Info);
    432   return N;
    433 }
    434 
    435 //===----------------------------------------------------------------------===//
    436 // FoldingSetIteratorImpl Implementation
    437 
    438 FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
    439   // Skip to the first non-null non-self-cycle bucket.
    440   while (*Bucket != reinterpret_cast<void*>(-1) &&
    441          (!*Bucket || !GetNextPtr(*Bucket)))
    442     ++Bucket;
    443 
    444   NodePtr = static_cast<FoldingSetNode*>(*Bucket);
    445 }
    446 
    447 void FoldingSetIteratorImpl::advance() {
    448   // If there is another link within this bucket, go to it.
    449   void *Probe = NodePtr->getNextInBucket();
    450 
    451   if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
    452     NodePtr = NextNodeInBucket;
    453   else {
    454     // Otherwise, this is the last link in this bucket.
    455     void **Bucket = GetBucketPtr(Probe);
    456 
    457     // Skip to the next non-null non-self-cycle bucket.
    458     do {
    459       ++Bucket;
    460     } while (*Bucket != reinterpret_cast<void*>(-1) &&
    461              (!*Bucket || !GetNextPtr(*Bucket)));
    462 
    463     NodePtr = static_cast<FoldingSetNode*>(*Bucket);
    464   }
    465 }
    466 
    467 //===----------------------------------------------------------------------===//
    468 // FoldingSetBucketIteratorImpl Implementation
    469 
    470 FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
    471   Ptr = (!*Bucket || !GetNextPtr(*Bucket)) ? (void*) Bucket : *Bucket;
    472 }
    473