Home | History | Annotate | Line # | Download | only in Native
      1 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 
      9 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
     10 #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
     11 
     12 #include "llvm/ADT/SparseBitVector.h"
     13 #include "llvm/ADT/iterator.h"
     14 #include "llvm/DebugInfo/PDB/Native/RawError.h"
     15 #include "llvm/Support/BinaryStreamReader.h"
     16 #include "llvm/Support/BinaryStreamWriter.h"
     17 #include "llvm/Support/Endian.h"
     18 #include "llvm/Support/Error.h"
     19 #include <cstdint>
     20 #include <iterator>
     21 #include <utility>
     22 #include <vector>
     23 
     24 namespace llvm {
     25 
     26 class BinaryStreamReader;
     27 class BinaryStreamWriter;
     28 
     29 namespace pdb {
     30 
     31 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
     32 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
     33 
     34 template <typename ValueT> class HashTable;
     35 
     36 template <typename ValueT>
     37 class HashTableIterator
     38     : public iterator_facade_base<HashTableIterator<ValueT>,
     39                                   std::forward_iterator_tag,
     40                                   const std::pair<uint32_t, ValueT>> {
     41   friend HashTable<ValueT>;
     42 
     43   HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
     44                     bool IsEnd)
     45       : Map(&Map), Index(Index), IsEnd(IsEnd) {}
     46 
     47 public:
     48   HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
     49     int I = Map.Present.find_first();
     50     if (I == -1) {
     51       Index = 0;
     52       IsEnd = true;
     53     } else {
     54       Index = static_cast<uint32_t>(I);
     55       IsEnd = false;
     56     }
     57   }
     58 
     59   HashTableIterator(const HashTableIterator &R) = default;
     60   HashTableIterator &operator=(const HashTableIterator &R) {
     61     Map = R.Map;
     62     return *this;
     63   }
     64   bool operator==(const HashTableIterator &R) const {
     65     if (IsEnd && R.IsEnd)
     66       return true;
     67     if (IsEnd != R.IsEnd)
     68       return false;
     69 
     70     return (Map == R.Map) && (Index == R.Index);
     71   }
     72   const std::pair<uint32_t, ValueT> &operator*() const {
     73     assert(Map->Present.test(Index));
     74     return Map->Buckets[Index];
     75   }
     76 
     77   // Implement postfix op++ in terms of prefix op++ by using the superclass
     78   // implementation.
     79   using iterator_facade_base<HashTableIterator<ValueT>,
     80                              std::forward_iterator_tag,
     81                              const std::pair<uint32_t, ValueT>>::operator++;
     82   HashTableIterator &operator++() {
     83     while (Index < Map->Buckets.size()) {
     84       ++Index;
     85       if (Map->Present.test(Index))
     86         return *this;
     87     }
     88 
     89     IsEnd = true;
     90     return *this;
     91   }
     92 
     93 private:
     94   bool isEnd() const { return IsEnd; }
     95   uint32_t index() const { return Index; }
     96 
     97   const HashTable<ValueT> *Map;
     98   uint32_t Index;
     99   bool IsEnd;
    100 };
    101 
    102 template <typename ValueT>
    103 class HashTable {
    104   struct Header {
    105     support::ulittle32_t Size;
    106     support::ulittle32_t Capacity;
    107   };
    108 
    109   using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
    110 
    111 public:
    112   using const_iterator = HashTableIterator<ValueT>;
    113   friend const_iterator;
    114 
    115   HashTable() { Buckets.resize(8); }
    116   explicit HashTable(uint32_t Capacity) {
    117     Buckets.resize(Capacity);
    118   }
    119 
    120   Error load(BinaryStreamReader &Stream) {
    121     const Header *H;
    122     if (auto EC = Stream.readObject(H))
    123       return EC;
    124     if (H->Capacity == 0)
    125       return make_error<RawError>(raw_error_code::corrupt_file,
    126                                   "Invalid Hash Table Capacity");
    127     if (H->Size > maxLoad(H->Capacity))
    128       return make_error<RawError>(raw_error_code::corrupt_file,
    129                                   "Invalid Hash Table Size");
    130 
    131     Buckets.resize(H->Capacity);
    132 
    133     if (auto EC = readSparseBitVector(Stream, Present))
    134       return EC;
    135     if (Present.count() != H->Size)
    136       return make_error<RawError>(raw_error_code::corrupt_file,
    137                                   "Present bit vector does not match size!");
    138 
    139     if (auto EC = readSparseBitVector(Stream, Deleted))
    140       return EC;
    141     if (Present.intersects(Deleted))
    142       return make_error<RawError>(raw_error_code::corrupt_file,
    143                                   "Present bit vector intersects deleted!");
    144 
    145     for (uint32_t P : Present) {
    146       if (auto EC = Stream.readInteger(Buckets[P].first))
    147         return EC;
    148       const ValueT *Value;
    149       if (auto EC = Stream.readObject(Value))
    150         return EC;
    151       Buckets[P].second = *Value;
    152     }
    153 
    154     return Error::success();
    155   }
    156 
    157   uint32_t calculateSerializedLength() const {
    158     uint32_t Size = sizeof(Header);
    159 
    160     constexpr int BitsPerWord = 8 * sizeof(uint32_t);
    161 
    162     int NumBitsP = Present.find_last() + 1;
    163     int NumBitsD = Deleted.find_last() + 1;
    164 
    165     uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
    166     uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
    167 
    168     // Present bit set number of words (4 bytes), followed by that many actual
    169     // words (4 bytes each).
    170     Size += sizeof(uint32_t);
    171     Size += NumWordsP * sizeof(uint32_t);
    172 
    173     // Deleted bit set number of words (4 bytes), followed by that many actual
    174     // words (4 bytes each).
    175     Size += sizeof(uint32_t);
    176     Size += NumWordsD * sizeof(uint32_t);
    177 
    178     // One (Key, ValueT) pair for each entry Present.
    179     Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
    180 
    181     return Size;
    182   }
    183 
    184   Error commit(BinaryStreamWriter &Writer) const {
    185     Header H;
    186     H.Size = size();
    187     H.Capacity = capacity();
    188     if (auto EC = Writer.writeObject(H))
    189       return EC;
    190 
    191     if (auto EC = writeSparseBitVector(Writer, Present))
    192       return EC;
    193 
    194     if (auto EC = writeSparseBitVector(Writer, Deleted))
    195       return EC;
    196 
    197     for (const auto &Entry : *this) {
    198       if (auto EC = Writer.writeInteger(Entry.first))
    199         return EC;
    200       if (auto EC = Writer.writeObject(Entry.second))
    201         return EC;
    202     }
    203     return Error::success();
    204   }
    205 
    206   void clear() {
    207     Buckets.resize(8);
    208     Present.clear();
    209     Deleted.clear();
    210   }
    211 
    212   bool empty() const { return size() == 0; }
    213   uint32_t capacity() const { return Buckets.size(); }
    214   uint32_t size() const { return Present.count(); }
    215 
    216   const_iterator begin() const { return const_iterator(*this); }
    217   const_iterator end() const { return const_iterator(*this, 0, true); }
    218 
    219   /// Find the entry whose key has the specified hash value, using the specified
    220   /// traits defining hash function and equality.
    221   template <typename Key, typename TraitsT>
    222   const_iterator find_as(const Key &K, TraitsT &Traits) const {
    223     uint32_t H = Traits.hashLookupKey(K) % capacity();
    224     uint32_t I = H;
    225     Optional<uint32_t> FirstUnused;
    226     do {
    227       if (isPresent(I)) {
    228         if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
    229           return const_iterator(*this, I, false);
    230       } else {
    231         if (!FirstUnused)
    232           FirstUnused = I;
    233         // Insertion occurs via linear probing from the slot hint, and will be
    234         // inserted at the first empty / deleted location.  Therefore, if we are
    235         // probing and find a location that is neither present nor deleted, then
    236         // nothing must have EVER been inserted at this location, and thus it is
    237         // not possible for a matching value to occur later.
    238         if (!isDeleted(I))
    239           break;
    240       }
    241       I = (I + 1) % capacity();
    242     } while (I != H);
    243 
    244     // The only way FirstUnused would not be set is if every single entry in the
    245     // table were Present.  But this would violate the load factor constraints
    246     // that we impose, so it should never happen.
    247     assert(FirstUnused);
    248     return const_iterator(*this, *FirstUnused, true);
    249   }
    250 
    251   /// Set the entry using a key type that the specified Traits can convert
    252   /// from a real key to an internal key.
    253   template <typename Key, typename TraitsT>
    254   bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
    255     return set_as_internal(K, std::move(V), Traits, None);
    256   }
    257 
    258   template <typename Key, typename TraitsT>
    259   ValueT get(const Key &K, TraitsT &Traits) const {
    260     auto Iter = find_as(K, Traits);
    261     assert(Iter != end());
    262     return (*Iter).second;
    263   }
    264 
    265 protected:
    266   bool isPresent(uint32_t K) const { return Present.test(K); }
    267   bool isDeleted(uint32_t K) const { return Deleted.test(K); }
    268 
    269   BucketList Buckets;
    270   mutable SparseBitVector<> Present;
    271   mutable SparseBitVector<> Deleted;
    272 
    273 private:
    274   /// Set the entry using a key type that the specified Traits can convert
    275   /// from a real key to an internal key.
    276   template <typename Key, typename TraitsT>
    277   bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
    278                        Optional<uint32_t> InternalKey) {
    279     auto Entry = find_as(K, Traits);
    280     if (Entry != end()) {
    281       assert(isPresent(Entry.index()));
    282       assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
    283       // We're updating, no need to do anything special.
    284       Buckets[Entry.index()].second = V;
    285       return false;
    286     }
    287 
    288     auto &B = Buckets[Entry.index()];
    289     assert(!isPresent(Entry.index()));
    290     assert(Entry.isEnd());
    291     B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
    292     B.second = V;
    293     Present.set(Entry.index());
    294     Deleted.reset(Entry.index());
    295 
    296     grow(Traits);
    297 
    298     assert((find_as(K, Traits)) != end());
    299     return true;
    300   }
    301 
    302   static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
    303 
    304   template <typename TraitsT>
    305   void grow(TraitsT &Traits) {
    306     uint32_t S = size();
    307     uint32_t MaxLoad = maxLoad(capacity());
    308     if (S < maxLoad(capacity()))
    309       return;
    310     assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
    311 
    312     uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
    313 
    314     // Growing requires rebuilding the table and re-hashing every item.  Make a
    315     // copy with a larger capacity, insert everything into the copy, then swap
    316     // it in.
    317     HashTable NewMap(NewCapacity);
    318     for (auto I : Present) {
    319       auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
    320       NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
    321                              Buckets[I].first);
    322     }
    323 
    324     Buckets.swap(NewMap.Buckets);
    325     std::swap(Present, NewMap.Present);
    326     std::swap(Deleted, NewMap.Deleted);
    327     assert(capacity() == NewCapacity);
    328     assert(size() == S);
    329   }
    330 };
    331 
    332 } // end namespace pdb
    333 
    334 } // end namespace llvm
    335 
    336 #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
    337