Home | History | Annotate | Line # | Download | only in Support
      1 //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 ///
      9 /// \file
     10 /// Lightweight arrays that are backed by an arbitrary BinaryStream.  This file
     11 /// provides two different array implementations.
     12 ///
     13 ///     VarStreamArray - Arrays of variable length records.  The user specifies
     14 ///       an Extractor type that can extract a record from a given offset and
     15 ///       return the number of bytes consumed by the record.
     16 ///
     17 ///     FixedStreamArray - Arrays of fixed length records.  This is similar in
     18 ///       spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the
     19 ///       elements of the array need not be laid out in contiguous memory.
     20 ///
     21 
     22 #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H
     23 #define LLVM_SUPPORT_BINARYSTREAMARRAY_H
     24 
     25 #include "llvm/ADT/ArrayRef.h"
     26 #include "llvm/ADT/iterator.h"
     27 #include "llvm/Support/Alignment.h"
     28 #include "llvm/Support/BinaryStreamRef.h"
     29 #include "llvm/Support/Error.h"
     30 #include <cassert>
     31 #include <cstdint>
     32 
     33 namespace llvm {
     34 
     35 /// VarStreamArrayExtractor is intended to be specialized to provide customized
     36 /// extraction logic.  On input it receives a BinaryStreamRef pointing to the
     37 /// beginning of the next record, but where the length of the record is not yet
     38 /// known.  Upon completion, it should return an appropriate Error instance if
     39 /// a record could not be extracted, or if one could be extracted it should
     40 /// return success and set Len to the number of bytes this record occupied in
     41 /// the underlying stream, and it should fill out the fields of the value type
     42 /// Item appropriately to represent the current record.
     43 ///
     44 /// You can specialize this template for your own custom value types to avoid
     45 /// having to specify a second template argument to VarStreamArray (documented
     46 /// below).
     47 template <typename T> struct VarStreamArrayExtractor {
     48   // Method intentionally deleted.  You must provide an explicit specialization
     49   // with the following method implemented.
     50   Error operator()(BinaryStreamRef Stream, uint32_t &Len,
     51                    T &Item) const = delete;
     52 };
     53 
     54 /// VarStreamArray represents an array of variable length records backed by a
     55 /// stream.  This could be a contiguous sequence of bytes in memory, it could
     56 /// be a file on disk, or it could be a PDB stream where bytes are stored as
     57 /// discontiguous blocks in a file.  Usually it is desirable to treat arrays
     58 /// as contiguous blocks of memory, but doing so with large PDB files, for
     59 /// example, could mean allocating huge amounts of memory just to allow
     60 /// re-ordering of stream data to be contiguous before iterating over it.  By
     61 /// abstracting this out, we need not duplicate this memory, and we can
     62 /// iterate over arrays in arbitrarily formatted streams.  Elements are parsed
     63 /// lazily on iteration, so there is no upfront cost associated with building
     64 /// or copying a VarStreamArray, no matter how large it may be.
     65 ///
     66 /// You create a VarStreamArray by specifying a ValueType and an Extractor type.
     67 /// If you do not specify an Extractor type, you are expected to specialize
     68 /// VarStreamArrayExtractor<T> for your ValueType.
     69 ///
     70 /// By default an Extractor is default constructed in the class, but in some
     71 /// cases you might find it useful for an Extractor to maintain state across
     72 /// extractions.  In this case you can provide your own Extractor through a
     73 /// secondary constructor.  The following examples show various ways of
     74 /// creating a VarStreamArray.
     75 ///
     76 ///       // Will use VarStreamArrayExtractor<MyType> as the extractor.
     77 ///       VarStreamArray<MyType> MyTypeArray;
     78 ///
     79 ///       // Will use a default-constructed MyExtractor as the extractor.
     80 ///       VarStreamArray<MyType, MyExtractor> MyTypeArray2;
     81 ///
     82 ///       // Will use the specific instance of MyExtractor provided.
     83 ///       // MyExtractor need not be default-constructible in this case.
     84 ///       MyExtractor E(SomeContext);
     85 ///       VarStreamArray<MyType, MyExtractor> MyTypeArray3(E);
     86 ///
     87 
     88 template <typename ValueType, typename Extractor> class VarStreamArrayIterator;
     89 
     90 template <typename ValueType,
     91           typename Extractor = VarStreamArrayExtractor<ValueType>>
     92 class VarStreamArray {
     93   friend class VarStreamArrayIterator<ValueType, Extractor>;
     94 
     95 public:
     96   typedef VarStreamArrayIterator<ValueType, Extractor> Iterator;
     97 
     98   VarStreamArray() = default;
     99 
    100   explicit VarStreamArray(const Extractor &E) : E(E) {}
    101 
    102   explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0)
    103       : Stream(Stream), Skew(Skew) {}
    104 
    105   VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0)
    106       : Stream(Stream), E(E), Skew(Skew) {}
    107 
    108   Iterator begin(bool *HadError = nullptr) const {
    109     return Iterator(*this, E, Skew, nullptr);
    110   }
    111 
    112   bool valid() const { return Stream.valid(); }
    113 
    114   uint32_t skew() const { return Skew; }
    115   Iterator end() const { return Iterator(E); }
    116 
    117   bool empty() const { return Stream.getLength() == 0; }
    118 
    119   VarStreamArray<ValueType, Extractor> substream(uint32_t Begin,
    120                                                  uint32_t End) const {
    121     assert(Begin >= Skew);
    122     // We should never cut off the beginning of the stream since it might be
    123     // skewed, meaning the initial bytes are important.
    124     BinaryStreamRef NewStream = Stream.slice(0, End);
    125     return {NewStream, E, Begin};
    126   }
    127 
    128   /// given an offset into the array's underlying stream, return an
    129   /// iterator to the record at that offset.  This is considered unsafe
    130   /// since the behavior is undefined if \p Offset does not refer to the
    131   /// beginning of a valid record.
    132   Iterator at(uint32_t Offset) const {
    133     return Iterator(*this, E, Offset, nullptr);
    134   }
    135 
    136   const Extractor &getExtractor() const { return E; }
    137   Extractor &getExtractor() { return E; }
    138 
    139   BinaryStreamRef getUnderlyingStream() const { return Stream; }
    140   void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) {
    141     Stream = NewStream;
    142     Skew = NewSkew;
    143   }
    144 
    145   void drop_front() { Skew += begin()->length(); }
    146 
    147 private:
    148   BinaryStreamRef Stream;
    149   Extractor E;
    150   uint32_t Skew = 0;
    151 };
    152 
    153 template <typename ValueType, typename Extractor>
    154 class VarStreamArrayIterator
    155     : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
    156                                   std::forward_iterator_tag, ValueType> {
    157   typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
    158   typedef VarStreamArray<ValueType, Extractor> ArrayType;
    159 
    160 public:
    161   VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
    162                          uint32_t Offset, bool *HadError)
    163       : IterRef(Array.Stream.drop_front(Offset)), Extract(E),
    164         Array(&Array), AbsOffset(Offset), HadError(HadError) {
    165     if (IterRef.getLength() == 0)
    166       moveToEnd();
    167     else {
    168       auto EC = Extract(IterRef, ThisLen, ThisValue);
    169       if (EC) {
    170         consumeError(std::move(EC));
    171         markError();
    172       }
    173     }
    174   }
    175 
    176   VarStreamArrayIterator() = default;
    177   explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {}
    178   ~VarStreamArrayIterator() = default;
    179 
    180   bool operator==(const IterType &R) const {
    181     if (Array && R.Array) {
    182       // Both have a valid array, make sure they're same.
    183       assert(Array == R.Array);
    184       return IterRef == R.IterRef;
    185     }
    186 
    187     // Both iterators are at the end.
    188     if (!Array && !R.Array)
    189       return true;
    190 
    191     // One is not at the end and one is.
    192     return false;
    193   }
    194 
    195   const ValueType &operator*() const {
    196     assert(Array && !HasError);
    197     return ThisValue;
    198   }
    199 
    200   ValueType &operator*() {
    201     assert(Array && !HasError);
    202     return ThisValue;
    203   }
    204 
    205   IterType &operator+=(unsigned N) {
    206     for (unsigned I = 0; I < N; ++I) {
    207       // We are done with the current record, discard it so that we are
    208       // positioned at the next record.
    209       AbsOffset += ThisLen;
    210       IterRef = IterRef.drop_front(ThisLen);
    211       if (IterRef.getLength() == 0) {
    212         // There is nothing after the current record, we must make this an end
    213         // iterator.
    214         moveToEnd();
    215       } else {
    216         // There is some data after the current record.
    217         auto EC = Extract(IterRef, ThisLen, ThisValue);
    218         if (EC) {
    219           consumeError(std::move(EC));
    220           markError();
    221         } else if (ThisLen == 0) {
    222           // An empty record? Make this an end iterator.
    223           moveToEnd();
    224         }
    225       }
    226     }
    227     return *this;
    228   }
    229 
    230   uint32_t offset() const { return AbsOffset; }
    231   uint32_t getRecordLength() const { return ThisLen; }
    232 
    233 private:
    234   void moveToEnd() {
    235     Array = nullptr;
    236     ThisLen = 0;
    237   }
    238   void markError() {
    239     moveToEnd();
    240     HasError = true;
    241     if (HadError != nullptr)
    242       *HadError = true;
    243   }
    244 
    245   ValueType ThisValue;
    246   BinaryStreamRef IterRef;
    247   Extractor Extract;
    248   const ArrayType *Array{nullptr};
    249   uint32_t ThisLen{0};
    250   uint32_t AbsOffset{0};
    251   bool HasError{false};
    252   bool *HadError{nullptr};
    253 };
    254 
    255 template <typename T> class FixedStreamArrayIterator;
    256 
    257 /// FixedStreamArray is similar to VarStreamArray, except with each record
    258 /// having a fixed-length.  As with VarStreamArray, there is no upfront
    259 /// cost associated with building or copying a FixedStreamArray, as the
    260 /// memory for each element is not read from the backing stream until that
    261 /// element is iterated.
    262 template <typename T> class FixedStreamArray {
    263   friend class FixedStreamArrayIterator<T>;
    264 
    265 public:
    266   typedef FixedStreamArrayIterator<T> Iterator;
    267 
    268   FixedStreamArray() = default;
    269   explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) {
    270     assert(Stream.getLength() % sizeof(T) == 0);
    271   }
    272 
    273   bool operator==(const FixedStreamArray<T> &Other) const {
    274     return Stream == Other.Stream;
    275   }
    276 
    277   bool operator!=(const FixedStreamArray<T> &Other) const {
    278     return !(*this == Other);
    279   }
    280 
    281   FixedStreamArray(const FixedStreamArray &) = default;
    282   FixedStreamArray &operator=(const FixedStreamArray &) = default;
    283 
    284   const T &operator[](uint32_t Index) const {
    285     assert(Index < size());
    286     uint32_t Off = Index * sizeof(T);
    287     ArrayRef<uint8_t> Data;
    288     if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) {
    289       assert(false && "Unexpected failure reading from stream");
    290       // This should never happen since we asserted that the stream length was
    291       // an exact multiple of the element size.
    292       consumeError(std::move(EC));
    293     }
    294     assert(isAddrAligned(Align::Of<T>(), Data.data()));
    295     return *reinterpret_cast<const T *>(Data.data());
    296   }
    297 
    298   uint32_t size() const { return Stream.getLength() / sizeof(T); }
    299 
    300   bool empty() const { return size() == 0; }
    301 
    302   FixedStreamArrayIterator<T> begin() const {
    303     return FixedStreamArrayIterator<T>(*this, 0);
    304   }
    305 
    306   FixedStreamArrayIterator<T> end() const {
    307     return FixedStreamArrayIterator<T>(*this, size());
    308   }
    309 
    310   const T &front() const { return *begin(); }
    311   const T &back() const {
    312     FixedStreamArrayIterator<T> I = end();
    313     return *(--I);
    314   }
    315 
    316   BinaryStreamRef getUnderlyingStream() const { return Stream; }
    317 
    318 private:
    319   BinaryStreamRef Stream;
    320 };
    321 
    322 template <typename T>
    323 class FixedStreamArrayIterator
    324     : public iterator_facade_base<FixedStreamArrayIterator<T>,
    325                                   std::random_access_iterator_tag, const T> {
    326 
    327 public:
    328   FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
    329       : Array(Array), Index(Index) {}
    330 
    331   FixedStreamArrayIterator<T>(const FixedStreamArrayIterator<T> &Other)
    332       : Array(Other.Array), Index(Other.Index) {}
    333   FixedStreamArrayIterator<T> &
    334   operator=(const FixedStreamArrayIterator<T> &Other) {
    335     Array = Other.Array;
    336     Index = Other.Index;
    337     return *this;
    338   }
    339 
    340   const T &operator*() const { return Array[Index]; }
    341   const T &operator*() { return Array[Index]; }
    342 
    343   bool operator==(const FixedStreamArrayIterator<T> &R) const {
    344     assert(Array == R.Array);
    345     return (Index == R.Index) && (Array == R.Array);
    346   }
    347 
    348   FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) {
    349     Index += N;
    350     return *this;
    351   }
    352 
    353   FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) {
    354     assert(std::ptrdiff_t(Index) >= N);
    355     Index -= N;
    356     return *this;
    357   }
    358 
    359   std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const {
    360     assert(Array == R.Array);
    361     assert(Index >= R.Index);
    362     return Index - R.Index;
    363   }
    364 
    365   bool operator<(const FixedStreamArrayIterator<T> &RHS) const {
    366     assert(Array == RHS.Array);
    367     return Index < RHS.Index;
    368   }
    369 
    370 private:
    371   FixedStreamArray<T> Array;
    372   uint32_t Index;
    373 };
    374 
    375 } // namespace llvm
    376 
    377 #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H
    378