Home | History | Annotate | Line # | Download | only in ProfileData
      1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file contains support for reading profiling data for instrumentation
     10 // based PGO and coverage.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
     15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
     16 
     17 #include "llvm/ADT/ArrayRef.h"
     18 #include "llvm/ADT/StringRef.h"
     19 #include "llvm/IR/ProfileSummary.h"
     20 #include "llvm/ProfileData/InstrProf.h"
     21 #include "llvm/Support/Endian.h"
     22 #include "llvm/Support/Error.h"
     23 #include "llvm/Support/LineIterator.h"
     24 #include "llvm/Support/MemoryBuffer.h"
     25 #include "llvm/Support/OnDiskHashTable.h"
     26 #include "llvm/Support/SwapByteOrder.h"
     27 #include <algorithm>
     28 #include <cassert>
     29 #include <cstddef>
     30 #include <cstdint>
     31 #include <iterator>
     32 #include <memory>
     33 #include <utility>
     34 #include <vector>
     35 
     36 namespace llvm {
     37 
     38 class InstrProfReader;
     39 
     40 /// A file format agnostic iterator over profiling data.
     41 class InstrProfIterator {
     42 public:
     43   using iterator_category = std::input_iterator_tag;
     44   using value_type = NamedInstrProfRecord;
     45   using difference_type = std::ptrdiff_t;
     46   using pointer = value_type *;
     47   using reference = value_type &;
     48 
     49 private:
     50   InstrProfReader *Reader = nullptr;
     51   value_type Record;
     52 
     53   void Increment();
     54 
     55 public:
     56   InstrProfIterator() = default;
     57   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
     58 
     59   InstrProfIterator &operator++() { Increment(); return *this; }
     60   bool operator==(const InstrProfIterator &RHS) const {
     61     return Reader == RHS.Reader;
     62   }
     63   bool operator!=(const InstrProfIterator &RHS) const {
     64     return Reader != RHS.Reader;
     65   }
     66   value_type &operator*() { return Record; }
     67   value_type *operator->() { return &Record; }
     68 };
     69 
     70 /// Base class and interface for reading profiling data of any known instrprof
     71 /// format. Provides an iterator over NamedInstrProfRecords.
     72 class InstrProfReader {
     73   instrprof_error LastError = instrprof_error::success;
     74 
     75 public:
     76   InstrProfReader() = default;
     77   virtual ~InstrProfReader() = default;
     78 
     79   /// Read the header.  Required before reading first record.
     80   virtual Error readHeader() = 0;
     81 
     82   /// Read a single record.
     83   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
     84 
     85   /// Iterator over profile data.
     86   InstrProfIterator begin() { return InstrProfIterator(this); }
     87   InstrProfIterator end() { return InstrProfIterator(); }
     88 
     89   virtual bool isIRLevelProfile() const = 0;
     90 
     91   virtual bool hasCSIRLevelProfile() const = 0;
     92 
     93   virtual bool instrEntryBBEnabled() const = 0;
     94 
     95   /// Return the PGO symtab. There are three different readers:
     96   /// Raw, Text, and Indexed profile readers. The first two types
     97   /// of readers are used only by llvm-profdata tool, while the indexed
     98   /// profile reader is also used by llvm-cov tool and the compiler (
     99   /// backend or frontend). Since creating PGO symtab can create
    100   /// significant runtime and memory overhead (as it touches data
    101   /// for the whole program), InstrProfSymtab for the indexed profile
    102   /// reader should be created on demand and it is recommended to be
    103   /// only used for dumping purpose with llvm-proftool, not with the
    104   /// compiler.
    105   virtual InstrProfSymtab &getSymtab() = 0;
    106 
    107   /// Compute the sum of counts and return in Sum.
    108   void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
    109 
    110 protected:
    111   std::unique_ptr<InstrProfSymtab> Symtab;
    112 
    113   /// Set the current error and return same.
    114   Error error(instrprof_error Err) {
    115     LastError = Err;
    116     if (Err == instrprof_error::success)
    117       return Error::success();
    118     return make_error<InstrProfError>(Err);
    119   }
    120 
    121   Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
    122 
    123   /// Clear the current error and return a successful one.
    124   Error success() { return error(instrprof_error::success); }
    125 
    126 public:
    127   /// Return true if the reader has finished reading the profile data.
    128   bool isEOF() { return LastError == instrprof_error::eof; }
    129 
    130   /// Return true if the reader encountered an error reading profiling data.
    131   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
    132 
    133   /// Get the current error.
    134   Error getError() {
    135     if (hasError())
    136       return make_error<InstrProfError>(LastError);
    137     return Error::success();
    138   }
    139 
    140   /// Factory method to create an appropriately typed reader for the given
    141   /// instrprof file.
    142   static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
    143 
    144   static Expected<std::unique_ptr<InstrProfReader>>
    145   create(std::unique_ptr<MemoryBuffer> Buffer);
    146 };
    147 
    148 /// Reader for the simple text based instrprof format.
    149 ///
    150 /// This format is a simple text format that's suitable for test data. Records
    151 /// are separated by one or more blank lines, and record fields are separated by
    152 /// new lines.
    153 ///
    154 /// Each record consists of a function name, a function hash, a number of
    155 /// counters, and then each counter value, in that order.
    156 class TextInstrProfReader : public InstrProfReader {
    157 private:
    158   /// The profile data file contents.
    159   std::unique_ptr<MemoryBuffer> DataBuffer;
    160   /// Iterator over the profile data.
    161   line_iterator Line;
    162   bool IsIRLevelProfile = false;
    163   bool HasCSIRLevelProfile = false;
    164   bool InstrEntryBBEnabled = false;
    165 
    166   Error readValueProfileData(InstrProfRecord &Record);
    167 
    168 public:
    169   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
    170       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
    171   TextInstrProfReader(const TextInstrProfReader &) = delete;
    172   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
    173 
    174   /// Return true if the given buffer is in text instrprof format.
    175   static bool hasFormat(const MemoryBuffer &Buffer);
    176 
    177   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
    178 
    179   bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
    180 
    181   bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; }
    182 
    183   /// Read the header.
    184   Error readHeader() override;
    185 
    186   /// Read a single record.
    187   Error readNextRecord(NamedInstrProfRecord &Record) override;
    188 
    189   InstrProfSymtab &getSymtab() override {
    190     assert(Symtab.get());
    191     return *Symtab.get();
    192   }
    193 };
    194 
    195 /// Reader for the raw instrprof binary format from runtime.
    196 ///
    197 /// This format is a raw memory dump of the instrumentation-baed profiling data
    198 /// from the runtime.  It has no index.
    199 ///
    200 /// Templated on the unsigned type whose size matches pointers on the platform
    201 /// that wrote the profile.
    202 template <class IntPtrT>
    203 class RawInstrProfReader : public InstrProfReader {
    204 private:
    205   /// The profile data file contents.
    206   std::unique_ptr<MemoryBuffer> DataBuffer;
    207   bool ShouldSwapBytes;
    208   // The value of the version field of the raw profile data header. The lower 56
    209   // bits specifies the format version and the most significant 8 bits specify
    210   // the variant types of the profile.
    211   uint64_t Version;
    212   uint64_t CountersDelta;
    213   uint64_t NamesDelta;
    214   const RawInstrProf::ProfileData<IntPtrT> *Data;
    215   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
    216   const uint64_t *CountersStart;
    217   const char *NamesStart;
    218   uint64_t NamesSize;
    219   // After value profile is all read, this pointer points to
    220   // the header of next profile data (if exists)
    221   const uint8_t *ValueDataStart;
    222   uint32_t ValueKindLast;
    223   uint32_t CurValueDataSize;
    224 
    225 public:
    226   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
    227       : DataBuffer(std::move(DataBuffer)) {}
    228   RawInstrProfReader(const RawInstrProfReader &) = delete;
    229   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
    230 
    231   static bool hasFormat(const MemoryBuffer &DataBuffer);
    232   Error readHeader() override;
    233   Error readNextRecord(NamedInstrProfRecord &Record) override;
    234 
    235   bool isIRLevelProfile() const override {
    236     return (Version & VARIANT_MASK_IR_PROF) != 0;
    237   }
    238 
    239   bool hasCSIRLevelProfile() const override {
    240     return (Version & VARIANT_MASK_CSIR_PROF) != 0;
    241   }
    242 
    243   bool instrEntryBBEnabled() const override {
    244     return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
    245   }
    246 
    247   InstrProfSymtab &getSymtab() override {
    248     assert(Symtab.get());
    249     return *Symtab.get();
    250   }
    251 
    252 private:
    253   Error createSymtab(InstrProfSymtab &Symtab);
    254   Error readNextHeader(const char *CurrentPos);
    255   Error readHeader(const RawInstrProf::Header &Header);
    256 
    257   template <class IntT> IntT swap(IntT Int) const {
    258     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
    259   }
    260 
    261   support::endianness getDataEndianness() const {
    262     support::endianness HostEndian = getHostEndianness();
    263     if (!ShouldSwapBytes)
    264       return HostEndian;
    265     if (HostEndian == support::little)
    266       return support::big;
    267     else
    268       return support::little;
    269   }
    270 
    271   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
    272     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
    273   }
    274 
    275   Error readName(NamedInstrProfRecord &Record);
    276   Error readFuncHash(NamedInstrProfRecord &Record);
    277   Error readRawCounts(InstrProfRecord &Record);
    278   Error readValueProfilingData(InstrProfRecord &Record);
    279   bool atEnd() const { return Data == DataEnd; }
    280 
    281   void advanceData() {
    282     Data++;
    283     ValueDataStart += CurValueDataSize;
    284   }
    285 
    286   const char *getNextHeaderPos() const {
    287       assert(atEnd());
    288       return (const char *)ValueDataStart;
    289   }
    290 
    291   /// Get the offset of \p CounterPtr from the start of the counters section of
    292   /// the profile. The offset has units of "number of counters", i.e. increasing
    293   /// the offset by 1 corresponds to an increase in the *byte offset* by 8.
    294   ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const {
    295     return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
    296   }
    297 
    298   const uint64_t *getCounter(ptrdiff_t Offset) const {
    299     return CountersStart + Offset;
    300   }
    301 
    302   StringRef getName(uint64_t NameRef) const {
    303     return Symtab->getFuncName(swap(NameRef));
    304   }
    305 };
    306 
    307 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
    308 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
    309 
    310 namespace IndexedInstrProf {
    311 
    312 enum class HashT : uint32_t;
    313 
    314 } // end namespace IndexedInstrProf
    315 
    316 /// Trait for lookups into the on-disk hash table for the binary instrprof
    317 /// format.
    318 class InstrProfLookupTrait {
    319   std::vector<NamedInstrProfRecord> DataBuffer;
    320   IndexedInstrProf::HashT HashType;
    321   unsigned FormatVersion;
    322   // Endianness of the input value profile data.
    323   // It should be LE by default, but can be changed
    324   // for testing purpose.
    325   support::endianness ValueProfDataEndianness = support::little;
    326 
    327 public:
    328   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
    329       : HashType(HashType), FormatVersion(FormatVersion) {}
    330 
    331   using data_type = ArrayRef<NamedInstrProfRecord>;
    332 
    333   using internal_key_type = StringRef;
    334   using external_key_type = StringRef;
    335   using hash_value_type = uint64_t;
    336   using offset_type = uint64_t;
    337 
    338   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
    339   static StringRef GetInternalKey(StringRef K) { return K; }
    340   static StringRef GetExternalKey(StringRef K) { return K; }
    341 
    342   hash_value_type ComputeHash(StringRef K);
    343 
    344   static std::pair<offset_type, offset_type>
    345   ReadKeyDataLength(const unsigned char *&D) {
    346     using namespace support;
    347 
    348     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
    349     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
    350     return std::make_pair(KeyLen, DataLen);
    351   }
    352 
    353   StringRef ReadKey(const unsigned char *D, offset_type N) {
    354     return StringRef((const char *)D, N);
    355   }
    356 
    357   bool readValueProfilingData(const unsigned char *&D,
    358                               const unsigned char *const End);
    359   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
    360 
    361   // Used for testing purpose only.
    362   void setValueProfDataEndianness(support::endianness Endianness) {
    363     ValueProfDataEndianness = Endianness;
    364   }
    365 };
    366 
    367 struct InstrProfReaderIndexBase {
    368   virtual ~InstrProfReaderIndexBase() = default;
    369 
    370   // Read all the profile records with the same key pointed to the current
    371   // iterator.
    372   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
    373 
    374   // Read all the profile records with the key equal to FuncName
    375   virtual Error getRecords(StringRef FuncName,
    376                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
    377   virtual void advanceToNextKey() = 0;
    378   virtual bool atEnd() const = 0;
    379   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
    380   virtual uint64_t getVersion() const = 0;
    381   virtual bool isIRLevelProfile() const = 0;
    382   virtual bool hasCSIRLevelProfile() const = 0;
    383   virtual bool instrEntryBBEnabled() const = 0;
    384   virtual Error populateSymtab(InstrProfSymtab &) = 0;
    385 };
    386 
    387 using OnDiskHashTableImplV3 =
    388     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
    389 
    390 template <typename HashTableImpl>
    391 class InstrProfReaderItaniumRemapper;
    392 
    393 template <typename HashTableImpl>
    394 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
    395 private:
    396   std::unique_ptr<HashTableImpl> HashTable;
    397   typename HashTableImpl::data_iterator RecordIterator;
    398   uint64_t FormatVersion;
    399 
    400   friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
    401 
    402 public:
    403   InstrProfReaderIndex(const unsigned char *Buckets,
    404                        const unsigned char *const Payload,
    405                        const unsigned char *const Base,
    406                        IndexedInstrProf::HashT HashType, uint64_t Version);
    407   ~InstrProfReaderIndex() override = default;
    408 
    409   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
    410   Error getRecords(StringRef FuncName,
    411                    ArrayRef<NamedInstrProfRecord> &Data) override;
    412   void advanceToNextKey() override { RecordIterator++; }
    413 
    414   bool atEnd() const override {
    415     return RecordIterator == HashTable->data_end();
    416   }
    417 
    418   void setValueProfDataEndianness(support::endianness Endianness) override {
    419     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
    420   }
    421 
    422   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
    423 
    424   bool isIRLevelProfile() const override {
    425     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
    426   }
    427 
    428   bool hasCSIRLevelProfile() const override {
    429     return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
    430   }
    431 
    432   bool instrEntryBBEnabled() const override {
    433     return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
    434   }
    435 
    436   Error populateSymtab(InstrProfSymtab &Symtab) override {
    437     return Symtab.create(HashTable->keys());
    438   }
    439 };
    440 
    441 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
    442 class InstrProfReaderRemapper {
    443 public:
    444   virtual ~InstrProfReaderRemapper() {}
    445   virtual Error populateRemappings() { return Error::success(); }
    446   virtual Error getRecords(StringRef FuncName,
    447                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
    448 };
    449 
    450 /// Reader for the indexed binary instrprof format.
    451 class IndexedInstrProfReader : public InstrProfReader {
    452 private:
    453   /// The profile data file contents.
    454   std::unique_ptr<MemoryBuffer> DataBuffer;
    455   /// The profile remapping file contents.
    456   std::unique_ptr<MemoryBuffer> RemappingBuffer;
    457   /// The index into the profile data.
    458   std::unique_ptr<InstrProfReaderIndexBase> Index;
    459   /// The profile remapping file contents.
    460   std::unique_ptr<InstrProfReaderRemapper> Remapper;
    461   /// Profile summary data.
    462   std::unique_ptr<ProfileSummary> Summary;
    463   /// Context sensitive profile summary data.
    464   std::unique_ptr<ProfileSummary> CS_Summary;
    465   // Index to the current record in the record array.
    466   unsigned RecordIndex;
    467 
    468   // Read the profile summary. Return a pointer pointing to one byte past the
    469   // end of the summary data if it exists or the input \c Cur.
    470   // \c UseCS indicates whether to use the context-sensitive profile summary.
    471   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
    472                                    const unsigned char *Cur, bool UseCS);
    473 
    474 public:
    475   IndexedInstrProfReader(
    476       std::unique_ptr<MemoryBuffer> DataBuffer,
    477       std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
    478       : DataBuffer(std::move(DataBuffer)),
    479         RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
    480   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
    481   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
    482 
    483   /// Return the profile version.
    484   uint64_t getVersion() const { return Index->getVersion(); }
    485   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
    486   bool hasCSIRLevelProfile() const override {
    487     return Index->hasCSIRLevelProfile();
    488   }
    489 
    490   bool instrEntryBBEnabled() const override {
    491     return Index->instrEntryBBEnabled();
    492   }
    493 
    494   /// Return true if the given buffer is in an indexed instrprof format.
    495   static bool hasFormat(const MemoryBuffer &DataBuffer);
    496 
    497   /// Read the file header.
    498   Error readHeader() override;
    499   /// Read a single record.
    500   Error readNextRecord(NamedInstrProfRecord &Record) override;
    501 
    502   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
    503   Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
    504                                                uint64_t FuncHash);
    505 
    506   /// Fill Counts with the profile data for the given function name.
    507   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
    508                           std::vector<uint64_t> &Counts);
    509 
    510   /// Return the maximum of all known function counts.
    511   /// \c UseCS indicates whether to use the context-sensitive count.
    512   uint64_t getMaximumFunctionCount(bool UseCS) {
    513     if (UseCS) {
    514       assert(CS_Summary && "No context sensitive profile summary");
    515       return CS_Summary->getMaxFunctionCount();
    516     } else {
    517       assert(Summary && "No profile summary");
    518       return Summary->getMaxFunctionCount();
    519     }
    520   }
    521 
    522   /// Factory method to create an indexed reader.
    523   static Expected<std::unique_ptr<IndexedInstrProfReader>>
    524   create(const Twine &Path, const Twine &RemappingPath = "");
    525 
    526   static Expected<std::unique_ptr<IndexedInstrProfReader>>
    527   create(std::unique_ptr<MemoryBuffer> Buffer,
    528          std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
    529 
    530   // Used for testing purpose only.
    531   void setValueProfDataEndianness(support::endianness Endianness) {
    532     Index->setValueProfDataEndianness(Endianness);
    533   }
    534 
    535   // See description in the base class. This interface is designed
    536   // to be used by llvm-profdata (for dumping). Avoid using this when
    537   // the client is the compiler.
    538   InstrProfSymtab &getSymtab() override;
    539 
    540   /// Return the profile summary.
    541   /// \c UseCS indicates whether to use the context-sensitive summary.
    542   ProfileSummary &getSummary(bool UseCS) {
    543     if (UseCS) {
    544       assert(CS_Summary && "No context sensitive summary");
    545       return *(CS_Summary.get());
    546     } else {
    547       assert(Summary && "No profile summary");
    548       return *(Summary.get());
    549     }
    550   }
    551 };
    552 
    553 } // end namespace llvm
    554 
    555 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
    556