Home | History | Annotate | Line # | Download | only in ProfileData
      1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file implements the class that reads LLVM sample profiles. It
     10 // supports three file formats: text, binary and gcov.
     11 //
     12 // The textual representation is useful for debugging and testing purposes. The
     13 // binary representation is more compact, resulting in smaller file sizes.
     14 //
     15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
     16 // tool (https://github.com/google/autofdo)
     17 //
     18 // All three encodings can be used interchangeably as an input sample profile.
     19 //
     20 //===----------------------------------------------------------------------===//
     21 
     22 #include "llvm/ProfileData/SampleProfReader.h"
     23 #include "llvm/ADT/DenseMap.h"
     24 #include "llvm/ADT/STLExtras.h"
     25 #include "llvm/ADT/StringRef.h"
     26 #include "llvm/IR/ProfileSummary.h"
     27 #include "llvm/ProfileData/ProfileCommon.h"
     28 #include "llvm/ProfileData/SampleProf.h"
     29 #include "llvm/Support/Compression.h"
     30 #include "llvm/Support/ErrorOr.h"
     31 #include "llvm/Support/LEB128.h"
     32 #include "llvm/Support/LineIterator.h"
     33 #include "llvm/Support/MD5.h"
     34 #include "llvm/Support/MemoryBuffer.h"
     35 #include "llvm/Support/raw_ostream.h"
     36 #include <algorithm>
     37 #include <cstddef>
     38 #include <cstdint>
     39 #include <limits>
     40 #include <memory>
     41 #include <set>
     42 #include <system_error>
     43 #include <vector>
     44 
     45 using namespace llvm;
     46 using namespace sampleprof;
     47 
     48 /// Dump the function profile for \p FName.
     49 ///
     50 /// \param FName Name of the function to print.
     51 /// \param OS Stream to emit the output to.
     52 void SampleProfileReader::dumpFunctionProfile(StringRef FName,
     53                                               raw_ostream &OS) {
     54   OS << "Function: " << FName << ": " << Profiles[FName];
     55 }
     56 
     57 /// Dump all the function profiles found on stream \p OS.
     58 void SampleProfileReader::dump(raw_ostream &OS) {
     59   for (const auto &I : Profiles)
     60     dumpFunctionProfile(I.getKey(), OS);
     61 }
     62 
     63 /// Parse \p Input as function head.
     64 ///
     65 /// Parse one line of \p Input, and update function name in \p FName,
     66 /// function's total sample count in \p NumSamples, function's entry
     67 /// count in \p NumHeadSamples.
     68 ///
     69 /// \returns true if parsing is successful.
     70 static bool ParseHead(const StringRef &Input, StringRef &FName,
     71                       uint64_t &NumSamples, uint64_t &NumHeadSamples) {
     72   if (Input[0] == ' ')
     73     return false;
     74   size_t n2 = Input.rfind(':');
     75   size_t n1 = Input.rfind(':', n2 - 1);
     76   FName = Input.substr(0, n1);
     77   if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
     78     return false;
     79   if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
     80     return false;
     81   return true;
     82 }
     83 
     84 /// Returns true if line offset \p L is legal (only has 16 bits).
     85 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
     86 
     87 /// Parse \p Input that contains metadata.
     88 /// Possible metadata:
     89 /// - CFG Checksum information:
     90 ///     !CFGChecksum: 12345
     91 /// - CFG Checksum information:
     92 ///     !Attributes: 1
     93 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
     94 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
     95                           uint32_t &Attributes) {
     96   if (Input.startswith("!CFGChecksum:")) {
     97     StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
     98     return !CFGInfo.getAsInteger(10, FunctionHash);
     99   }
    100 
    101   if (Input.startswith("!Attributes:")) {
    102     StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
    103     return !Attrib.getAsInteger(10, Attributes);
    104   }
    105 
    106   return false;
    107 }
    108 
    109 enum class LineType {
    110   CallSiteProfile,
    111   BodyProfile,
    112   Metadata,
    113 };
    114 
    115 /// Parse \p Input as line sample.
    116 ///
    117 /// \param Input input line.
    118 /// \param LineTy Type of this line.
    119 /// \param Depth the depth of the inline stack.
    120 /// \param NumSamples total samples of the line/inlined callsite.
    121 /// \param LineOffset line offset to the start of the function.
    122 /// \param Discriminator discriminator of the line.
    123 /// \param TargetCountMap map from indirect call target to count.
    124 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
    125 ///
    126 /// returns true if parsing is successful.
    127 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
    128                       uint64_t &NumSamples, uint32_t &LineOffset,
    129                       uint32_t &Discriminator, StringRef &CalleeName,
    130                       DenseMap<StringRef, uint64_t> &TargetCountMap,
    131                       uint64_t &FunctionHash, uint32_t &Attributes) {
    132   for (Depth = 0; Input[Depth] == ' '; Depth++)
    133     ;
    134   if (Depth == 0)
    135     return false;
    136 
    137   if (Depth == 1 && Input[Depth] == '!') {
    138     LineTy = LineType::Metadata;
    139     return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
    140   }
    141 
    142   size_t n1 = Input.find(':');
    143   StringRef Loc = Input.substr(Depth, n1 - Depth);
    144   size_t n2 = Loc.find('.');
    145   if (n2 == StringRef::npos) {
    146     if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
    147       return false;
    148     Discriminator = 0;
    149   } else {
    150     if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
    151       return false;
    152     if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
    153       return false;
    154   }
    155 
    156   StringRef Rest = Input.substr(n1 + 2);
    157   if (isDigit(Rest[0])) {
    158     LineTy = LineType::BodyProfile;
    159     size_t n3 = Rest.find(' ');
    160     if (n3 == StringRef::npos) {
    161       if (Rest.getAsInteger(10, NumSamples))
    162         return false;
    163     } else {
    164       if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
    165         return false;
    166     }
    167     // Find call targets and their sample counts.
    168     // Note: In some cases, there are symbols in the profile which are not
    169     // mangled. To accommodate such cases, use colon + integer pairs as the
    170     // anchor points.
    171     // An example:
    172     // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
    173     // ":1000" and ":437" are used as anchor points so the string above will
    174     // be interpreted as
    175     // target: _M_construct<char *>
    176     // count: 1000
    177     // target: string_view<std::allocator<char> >
    178     // count: 437
    179     while (n3 != StringRef::npos) {
    180       n3 += Rest.substr(n3).find_first_not_of(' ');
    181       Rest = Rest.substr(n3);
    182       n3 = Rest.find_first_of(':');
    183       if (n3 == StringRef::npos || n3 == 0)
    184         return false;
    185 
    186       StringRef Target;
    187       uint64_t count, n4;
    188       while (true) {
    189         // Get the segment after the current colon.
    190         StringRef AfterColon = Rest.substr(n3 + 1);
    191         // Get the target symbol before the current colon.
    192         Target = Rest.substr(0, n3);
    193         // Check if the word after the current colon is an integer.
    194         n4 = AfterColon.find_first_of(' ');
    195         n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
    196         StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
    197         if (!WordAfterColon.getAsInteger(10, count))
    198           break;
    199 
    200         // Try to find the next colon.
    201         uint64_t n5 = AfterColon.find_first_of(':');
    202         if (n5 == StringRef::npos)
    203           return false;
    204         n3 += n5 + 1;
    205       }
    206 
    207       // An anchor point is found. Save the {target, count} pair
    208       TargetCountMap[Target] = count;
    209       if (n4 == Rest.size())
    210         break;
    211       // Change n3 to the next blank space after colon + integer pair.
    212       n3 = n4;
    213     }
    214   } else {
    215     LineTy = LineType::CallSiteProfile;
    216     size_t n3 = Rest.find_last_of(':');
    217     CalleeName = Rest.substr(0, n3);
    218     if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
    219       return false;
    220   }
    221   return true;
    222 }
    223 
    224 /// Load samples from a text file.
    225 ///
    226 /// See the documentation at the top of the file for an explanation of
    227 /// the expected format.
    228 ///
    229 /// \returns true if the file was loaded successfully, false otherwise.
    230 std::error_code SampleProfileReaderText::readImpl() {
    231   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
    232   sampleprof_error Result = sampleprof_error::success;
    233 
    234   InlineCallStack InlineStack;
    235   uint32_t ProbeProfileCount = 0;
    236 
    237   // SeenMetadata tracks whether we have processed metadata for the current
    238   // top-level function profile.
    239   bool SeenMetadata = false;
    240 
    241   for (; !LineIt.is_at_eof(); ++LineIt) {
    242     if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
    243       continue;
    244     // Read the header of each function.
    245     //
    246     // Note that for function identifiers we are actually expecting
    247     // mangled names, but we may not always get them. This happens when
    248     // the compiler decides not to emit the function (e.g., it was inlined
    249     // and removed). In this case, the binary will not have the linkage
    250     // name for the function, so the profiler will emit the function's
    251     // unmangled name, which may contain characters like ':' and '>' in its
    252     // name (member functions, templates, etc).
    253     //
    254     // The only requirement we place on the identifier, then, is that it
    255     // should not begin with a number.
    256     if ((*LineIt)[0] != ' ') {
    257       uint64_t NumSamples, NumHeadSamples;
    258       StringRef FName;
    259       if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
    260         reportError(LineIt.line_number(),
    261                     "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
    262         return sampleprof_error::malformed;
    263       }
    264       SeenMetadata = false;
    265       SampleContext FContext(FName);
    266       if (FContext.hasContext())
    267         ++CSProfileCount;
    268       Profiles[FContext] = FunctionSamples();
    269       FunctionSamples &FProfile = Profiles[FContext];
    270       FProfile.setName(FContext.getNameWithoutContext());
    271       FProfile.setContext(FContext);
    272       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
    273       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
    274       InlineStack.clear();
    275       InlineStack.push_back(&FProfile);
    276     } else {
    277       uint64_t NumSamples;
    278       StringRef FName;
    279       DenseMap<StringRef, uint64_t> TargetCountMap;
    280       uint32_t Depth, LineOffset, Discriminator;
    281       LineType LineTy;
    282       uint64_t FunctionHash = 0;
    283       uint32_t Attributes = 0;
    284       if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
    285                      Discriminator, FName, TargetCountMap, FunctionHash,
    286                      Attributes)) {
    287         reportError(LineIt.line_number(),
    288                     "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
    289                         *LineIt);
    290         return sampleprof_error::malformed;
    291       }
    292       if (SeenMetadata && LineTy != LineType::Metadata) {
    293         // Metadata must be put at the end of a function profile.
    294         reportError(LineIt.line_number(),
    295                     "Found non-metadata after metadata: " + *LineIt);
    296         return sampleprof_error::malformed;
    297       }
    298       while (InlineStack.size() > Depth) {
    299         InlineStack.pop_back();
    300       }
    301       switch (LineTy) {
    302       case LineType::CallSiteProfile: {
    303         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
    304             LineLocation(LineOffset, Discriminator))[std::string(FName)];
    305         FSamples.setName(FName);
    306         MergeResult(Result, FSamples.addTotalSamples(NumSamples));
    307         InlineStack.push_back(&FSamples);
    308         break;
    309       }
    310       case LineType::BodyProfile: {
    311         while (InlineStack.size() > Depth) {
    312           InlineStack.pop_back();
    313         }
    314         FunctionSamples &FProfile = *InlineStack.back();
    315         for (const auto &name_count : TargetCountMap) {
    316           MergeResult(Result, FProfile.addCalledTargetSamples(
    317                                   LineOffset, Discriminator, name_count.first,
    318                                   name_count.second));
    319         }
    320         MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
    321                                                     NumSamples));
    322         break;
    323       }
    324       case LineType::Metadata: {
    325         FunctionSamples &FProfile = *InlineStack.back();
    326         if (FunctionHash) {
    327           FProfile.setFunctionHash(FunctionHash);
    328           ++ProbeProfileCount;
    329         }
    330         if (Attributes)
    331           FProfile.getContext().setAllAttributes(Attributes);
    332         SeenMetadata = true;
    333         break;
    334       }
    335       }
    336     }
    337   }
    338 
    339   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
    340          "Cannot have both context-sensitive and regular profile");
    341   ProfileIsCS = (CSProfileCount > 0);
    342   assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
    343          "Cannot have both probe-based profiles and regular profiles");
    344   ProfileIsProbeBased = (ProbeProfileCount > 0);
    345   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
    346   FunctionSamples::ProfileIsCS = ProfileIsCS;
    347 
    348   if (Result == sampleprof_error::success)
    349     computeSummary();
    350 
    351   return Result;
    352 }
    353 
    354 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
    355   bool result = false;
    356 
    357   // Check that the first non-comment line is a valid function header.
    358   line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
    359   if (!LineIt.is_at_eof()) {
    360     if ((*LineIt)[0] != ' ') {
    361       uint64_t NumSamples, NumHeadSamples;
    362       StringRef FName;
    363       result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
    364     }
    365   }
    366 
    367   return result;
    368 }
    369 
    370 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
    371   unsigned NumBytesRead = 0;
    372   std::error_code EC;
    373   uint64_t Val = decodeULEB128(Data, &NumBytesRead);
    374 
    375   if (Val > std::numeric_limits<T>::max())
    376     EC = sampleprof_error::malformed;
    377   else if (Data + NumBytesRead > End)
    378     EC = sampleprof_error::truncated;
    379   else
    380     EC = sampleprof_error::success;
    381 
    382   if (EC) {
    383     reportError(0, EC.message());
    384     return EC;
    385   }
    386 
    387   Data += NumBytesRead;
    388   return static_cast<T>(Val);
    389 }
    390 
    391 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
    392   std::error_code EC;
    393   StringRef Str(reinterpret_cast<const char *>(Data));
    394   if (Data + Str.size() + 1 > End) {
    395     EC = sampleprof_error::truncated;
    396     reportError(0, EC.message());
    397     return EC;
    398   }
    399 
    400   Data += Str.size() + 1;
    401   return Str;
    402 }
    403 
    404 template <typename T>
    405 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
    406   std::error_code EC;
    407 
    408   if (Data + sizeof(T) > End) {
    409     EC = sampleprof_error::truncated;
    410     reportError(0, EC.message());
    411     return EC;
    412   }
    413 
    414   using namespace support;
    415   T Val = endian::readNext<T, little, unaligned>(Data);
    416   return Val;
    417 }
    418 
    419 template <typename T>
    420 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
    421   std::error_code EC;
    422   auto Idx = readNumber<uint32_t>();
    423   if (std::error_code EC = Idx.getError())
    424     return EC;
    425   if (*Idx >= Table.size())
    426     return sampleprof_error::truncated_name_table;
    427   return *Idx;
    428 }
    429 
    430 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
    431   auto Idx = readStringIndex(NameTable);
    432   if (std::error_code EC = Idx.getError())
    433     return EC;
    434 
    435   return NameTable[*Idx];
    436 }
    437 
    438 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
    439   if (!FixedLengthMD5)
    440     return SampleProfileReaderBinary::readStringFromTable();
    441 
    442   // read NameTable index.
    443   auto Idx = readStringIndex(NameTable);
    444   if (std::error_code EC = Idx.getError())
    445     return EC;
    446 
    447   // Check whether the name to be accessed has been accessed before,
    448   // if not, read it from memory directly.
    449   StringRef &SR = NameTable[*Idx];
    450   if (SR.empty()) {
    451     const uint8_t *SavedData = Data;
    452     Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
    453     auto FID = readUnencodedNumber<uint64_t>();
    454     if (std::error_code EC = FID.getError())
    455       return EC;
    456     // Save the string converted from uint64_t in MD5StringBuf. All the
    457     // references to the name are all StringRefs refering to the string
    458     // in MD5StringBuf.
    459     MD5StringBuf->push_back(std::to_string(*FID));
    460     SR = MD5StringBuf->back();
    461     Data = SavedData;
    462   }
    463   return SR;
    464 }
    465 
    466 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
    467   auto Idx = readStringIndex(NameTable);
    468   if (std::error_code EC = Idx.getError())
    469     return EC;
    470 
    471   return StringRef(NameTable[*Idx]);
    472 }
    473 
    474 std::error_code
    475 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
    476   auto NumSamples = readNumber<uint64_t>();
    477   if (std::error_code EC = NumSamples.getError())
    478     return EC;
    479   FProfile.addTotalSamples(*NumSamples);
    480 
    481   // Read the samples in the body.
    482   auto NumRecords = readNumber<uint32_t>();
    483   if (std::error_code EC = NumRecords.getError())
    484     return EC;
    485 
    486   for (uint32_t I = 0; I < *NumRecords; ++I) {
    487     auto LineOffset = readNumber<uint64_t>();
    488     if (std::error_code EC = LineOffset.getError())
    489       return EC;
    490 
    491     if (!isOffsetLegal(*LineOffset)) {
    492       return std::error_code();
    493     }
    494 
    495     auto Discriminator = readNumber<uint64_t>();
    496     if (std::error_code EC = Discriminator.getError())
    497       return EC;
    498 
    499     auto NumSamples = readNumber<uint64_t>();
    500     if (std::error_code EC = NumSamples.getError())
    501       return EC;
    502 
    503     auto NumCalls = readNumber<uint32_t>();
    504     if (std::error_code EC = NumCalls.getError())
    505       return EC;
    506 
    507     for (uint32_t J = 0; J < *NumCalls; ++J) {
    508       auto CalledFunction(readStringFromTable());
    509       if (std::error_code EC = CalledFunction.getError())
    510         return EC;
    511 
    512       auto CalledFunctionSamples = readNumber<uint64_t>();
    513       if (std::error_code EC = CalledFunctionSamples.getError())
    514         return EC;
    515 
    516       FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
    517                                       *CalledFunction, *CalledFunctionSamples);
    518     }
    519 
    520     FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
    521   }
    522 
    523   // Read all the samples for inlined function calls.
    524   auto NumCallsites = readNumber<uint32_t>();
    525   if (std::error_code EC = NumCallsites.getError())
    526     return EC;
    527 
    528   for (uint32_t J = 0; J < *NumCallsites; ++J) {
    529     auto LineOffset = readNumber<uint64_t>();
    530     if (std::error_code EC = LineOffset.getError())
    531       return EC;
    532 
    533     auto Discriminator = readNumber<uint64_t>();
    534     if (std::error_code EC = Discriminator.getError())
    535       return EC;
    536 
    537     auto FName(readStringFromTable());
    538     if (std::error_code EC = FName.getError())
    539       return EC;
    540 
    541     FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
    542         LineLocation(*LineOffset, *Discriminator))[std::string(*FName)];
    543     CalleeProfile.setName(*FName);
    544     if (std::error_code EC = readProfile(CalleeProfile))
    545       return EC;
    546   }
    547 
    548   return sampleprof_error::success;
    549 }
    550 
    551 std::error_code
    552 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
    553   Data = Start;
    554   auto NumHeadSamples = readNumber<uint64_t>();
    555   if (std::error_code EC = NumHeadSamples.getError())
    556     return EC;
    557 
    558   auto FName(readStringFromTable());
    559   if (std::error_code EC = FName.getError())
    560     return EC;
    561 
    562   SampleContext FContext(*FName);
    563   Profiles[FContext] = FunctionSamples();
    564   FunctionSamples &FProfile = Profiles[FContext];
    565   FProfile.setName(FContext.getNameWithoutContext());
    566   FProfile.setContext(FContext);
    567   FProfile.addHeadSamples(*NumHeadSamples);
    568 
    569   if (FContext.hasContext())
    570     CSProfileCount++;
    571 
    572   if (std::error_code EC = readProfile(FProfile))
    573     return EC;
    574   return sampleprof_error::success;
    575 }
    576 
    577 std::error_code SampleProfileReaderBinary::readImpl() {
    578   while (!at_eof()) {
    579     if (std::error_code EC = readFuncProfile(Data))
    580       return EC;
    581   }
    582 
    583   return sampleprof_error::success;
    584 }
    585 
    586 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
    587     const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
    588   Data = Start;
    589   End = Start + Size;
    590   switch (Entry.Type) {
    591   case SecProfSummary:
    592     if (std::error_code EC = readSummary())
    593       return EC;
    594     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
    595       Summary->setPartialProfile(true);
    596     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
    597       FunctionSamples::ProfileIsCS = ProfileIsCS = true;
    598     break;
    599   case SecNameTable: {
    600     FixedLengthMD5 =
    601         hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
    602     bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
    603     assert((!FixedLengthMD5 || UseMD5) &&
    604            "If FixedLengthMD5 is true, UseMD5 has to be true");
    605     FunctionSamples::HasUniqSuffix =
    606         hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
    607     if (std::error_code EC = readNameTableSec(UseMD5))
    608       return EC;
    609     break;
    610   }
    611   case SecLBRProfile:
    612     if (std::error_code EC = readFuncProfiles())
    613       return EC;
    614     break;
    615   case SecFuncOffsetTable:
    616     if (std::error_code EC = readFuncOffsetTable())
    617       return EC;
    618     break;
    619   case SecFuncMetadata: {
    620     ProfileIsProbeBased =
    621         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
    622     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
    623     bool HasAttribute =
    624         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
    625     if (std::error_code EC = readFuncMetadata(HasAttribute))
    626       return EC;
    627     break;
    628   }
    629   case SecProfileSymbolList:
    630     if (std::error_code EC = readProfileSymbolList())
    631       return EC;
    632     break;
    633   default:
    634     if (std::error_code EC = readCustomSection(Entry))
    635       return EC;
    636     break;
    637   }
    638   return sampleprof_error::success;
    639 }
    640 
    641 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
    642   if (!M)
    643     return false;
    644   FuncsToUse.clear();
    645   for (auto &F : *M)
    646     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
    647   return true;
    648 }
    649 
    650 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
    651   // If there are more than one FuncOffsetTable, the profile read associated
    652   // with previous FuncOffsetTable has to be done before next FuncOffsetTable
    653   // is read.
    654   FuncOffsetTable.clear();
    655 
    656   auto Size = readNumber<uint64_t>();
    657   if (std::error_code EC = Size.getError())
    658     return EC;
    659 
    660   FuncOffsetTable.reserve(*Size);
    661   for (uint32_t I = 0; I < *Size; ++I) {
    662     auto FName(readStringFromTable());
    663     if (std::error_code EC = FName.getError())
    664       return EC;
    665 
    666     auto Offset = readNumber<uint64_t>();
    667     if (std::error_code EC = Offset.getError())
    668       return EC;
    669 
    670     FuncOffsetTable[*FName] = *Offset;
    671   }
    672   return sampleprof_error::success;
    673 }
    674 
    675 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
    676   // Collect functions used by current module if the Reader has been
    677   // given a module.
    678   // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
    679   // which will query FunctionSamples::HasUniqSuffix, so it has to be
    680   // called after FunctionSamples::HasUniqSuffix is set, i.e. after
    681   // NameTable section is read.
    682   bool LoadFuncsToBeUsed = collectFuncsFromModule();
    683 
    684   // When LoadFuncsToBeUsed is false, load all the function profiles.
    685   const uint8_t *Start = Data;
    686   if (!LoadFuncsToBeUsed) {
    687     while (Data < End) {
    688       if (std::error_code EC = readFuncProfile(Data))
    689         return EC;
    690     }
    691     assert(Data == End && "More data is read than expected");
    692   } else {
    693     // Load function profiles on demand.
    694     if (Remapper) {
    695       for (auto Name : FuncsToUse) {
    696         Remapper->insert(Name);
    697       }
    698     }
    699 
    700     if (useMD5()) {
    701       for (auto Name : FuncsToUse) {
    702         auto GUID = std::to_string(MD5Hash(Name));
    703         auto iter = FuncOffsetTable.find(StringRef(GUID));
    704         if (iter == FuncOffsetTable.end())
    705           continue;
    706         const uint8_t *FuncProfileAddr = Start + iter->second;
    707         assert(FuncProfileAddr < End && "out of LBRProfile section");
    708         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
    709           return EC;
    710       }
    711     } else if (FunctionSamples::ProfileIsCS) {
    712       // Compute the ordered set of names, so we can
    713       // get all context profiles under a subtree by
    714       // iterating through the ordered names.
    715       struct Comparer {
    716         // Ignore the closing ']' when ordering context
    717         bool operator()(const StringRef &L, const StringRef &R) const {
    718           return L.substr(0, L.size() - 1) < R.substr(0, R.size() - 1);
    719         }
    720       };
    721       std::set<StringRef, Comparer> OrderedNames;
    722       for (auto Name : FuncOffsetTable) {
    723         OrderedNames.insert(Name.first);
    724       }
    725 
    726       // For each function in current module, load all
    727       // context profiles for the function.
    728       for (auto NameOffset : FuncOffsetTable) {
    729         StringRef ContextName = NameOffset.first;
    730         SampleContext FContext(ContextName);
    731         auto FuncName = FContext.getNameWithoutContext();
    732         if (!FuncsToUse.count(FuncName) &&
    733             (!Remapper || !Remapper->exist(FuncName)))
    734           continue;
    735 
    736         // For each context profile we need, try to load
    737         // all context profile in the subtree. This can
    738         // help profile guided importing for ThinLTO.
    739         auto It = OrderedNames.find(ContextName);
    740         while (It != OrderedNames.end() &&
    741                It->startswith(ContextName.substr(0, ContextName.size() - 1))) {
    742           const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It];
    743           assert(FuncProfileAddr < End && "out of LBRProfile section");
    744           if (std::error_code EC = readFuncProfile(FuncProfileAddr))
    745             return EC;
    746           // Remove loaded context profile so we won't
    747           // load it repeatedly.
    748           It = OrderedNames.erase(It);
    749         }
    750       }
    751     } else {
    752       for (auto NameOffset : FuncOffsetTable) {
    753         SampleContext FContext(NameOffset.first);
    754         auto FuncName = FContext.getNameWithoutContext();
    755         if (!FuncsToUse.count(FuncName) &&
    756             (!Remapper || !Remapper->exist(FuncName)))
    757           continue;
    758         const uint8_t *FuncProfileAddr = Start + NameOffset.second;
    759         assert(FuncProfileAddr < End && "out of LBRProfile section");
    760         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
    761           return EC;
    762       }
    763     }
    764     Data = End;
    765   }
    766   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
    767          "Cannot have both context-sensitive and regular profile");
    768   assert(ProfileIsCS == (CSProfileCount > 0) &&
    769          "Section flag should be consistent with actual profile");
    770   return sampleprof_error::success;
    771 }
    772 
    773 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
    774   if (!ProfSymList)
    775     ProfSymList = std::make_unique<ProfileSymbolList>();
    776 
    777   if (std::error_code EC = ProfSymList->read(Data, End - Data))
    778     return EC;
    779 
    780   Data = End;
    781   return sampleprof_error::success;
    782 }
    783 
    784 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
    785     const uint8_t *SecStart, const uint64_t SecSize,
    786     const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
    787   Data = SecStart;
    788   End = SecStart + SecSize;
    789   auto DecompressSize = readNumber<uint64_t>();
    790   if (std::error_code EC = DecompressSize.getError())
    791     return EC;
    792   DecompressBufSize = *DecompressSize;
    793 
    794   auto CompressSize = readNumber<uint64_t>();
    795   if (std::error_code EC = CompressSize.getError())
    796     return EC;
    797 
    798   if (!llvm::zlib::isAvailable())
    799     return sampleprof_error::zlib_unavailable;
    800 
    801   StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
    802                               *CompressSize);
    803   char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
    804   size_t UCSize = DecompressBufSize;
    805   llvm::Error E =
    806       zlib::uncompress(CompressedStrings, Buffer, UCSize);
    807   if (E)
    808     return sampleprof_error::uncompress_failed;
    809   DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
    810   return sampleprof_error::success;
    811 }
    812 
    813 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
    814   const uint8_t *BufStart =
    815       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
    816 
    817   for (auto &Entry : SecHdrTable) {
    818     // Skip empty section.
    819     if (!Entry.Size)
    820       continue;
    821 
    822     // Skip sections without context when SkipFlatProf is true.
    823     if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
    824       continue;
    825 
    826     const uint8_t *SecStart = BufStart + Entry.Offset;
    827     uint64_t SecSize = Entry.Size;
    828 
    829     // If the section is compressed, decompress it into a buffer
    830     // DecompressBuf before reading the actual data. The pointee of
    831     // 'Data' will be changed to buffer hold by DecompressBuf
    832     // temporarily when reading the actual data.
    833     bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
    834     if (isCompressed) {
    835       const uint8_t *DecompressBuf;
    836       uint64_t DecompressBufSize;
    837       if (std::error_code EC = decompressSection(
    838               SecStart, SecSize, DecompressBuf, DecompressBufSize))
    839         return EC;
    840       SecStart = DecompressBuf;
    841       SecSize = DecompressBufSize;
    842     }
    843 
    844     if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
    845       return EC;
    846     if (Data != SecStart + SecSize)
    847       return sampleprof_error::malformed;
    848 
    849     // Change the pointee of 'Data' from DecompressBuf to original Buffer.
    850     if (isCompressed) {
    851       Data = BufStart + Entry.Offset;
    852       End = BufStart + Buffer->getBufferSize();
    853     }
    854   }
    855 
    856   return sampleprof_error::success;
    857 }
    858 
    859 std::error_code SampleProfileReaderCompactBinary::readImpl() {
    860   // Collect functions used by current module if the Reader has been
    861   // given a module.
    862   bool LoadFuncsToBeUsed = collectFuncsFromModule();
    863 
    864   std::vector<uint64_t> OffsetsToUse;
    865   if (!LoadFuncsToBeUsed) {
    866     // load all the function profiles.
    867     for (auto FuncEntry : FuncOffsetTable) {
    868       OffsetsToUse.push_back(FuncEntry.second);
    869     }
    870   } else {
    871     // load function profiles on demand.
    872     for (auto Name : FuncsToUse) {
    873       auto GUID = std::to_string(MD5Hash(Name));
    874       auto iter = FuncOffsetTable.find(StringRef(GUID));
    875       if (iter == FuncOffsetTable.end())
    876         continue;
    877       OffsetsToUse.push_back(iter->second);
    878     }
    879   }
    880 
    881   for (auto Offset : OffsetsToUse) {
    882     const uint8_t *SavedData = Data;
    883     if (std::error_code EC = readFuncProfile(
    884             reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
    885             Offset))
    886       return EC;
    887     Data = SavedData;
    888   }
    889   return sampleprof_error::success;
    890 }
    891 
    892 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
    893   if (Magic == SPMagic())
    894     return sampleprof_error::success;
    895   return sampleprof_error::bad_magic;
    896 }
    897 
    898 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
    899   if (Magic == SPMagic(SPF_Ext_Binary))
    900     return sampleprof_error::success;
    901   return sampleprof_error::bad_magic;
    902 }
    903 
    904 std::error_code
    905 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
    906   if (Magic == SPMagic(SPF_Compact_Binary))
    907     return sampleprof_error::success;
    908   return sampleprof_error::bad_magic;
    909 }
    910 
    911 std::error_code SampleProfileReaderBinary::readNameTable() {
    912   auto Size = readNumber<uint32_t>();
    913   if (std::error_code EC = Size.getError())
    914     return EC;
    915   NameTable.reserve(*Size + NameTable.size());
    916   for (uint32_t I = 0; I < *Size; ++I) {
    917     auto Name(readString());
    918     if (std::error_code EC = Name.getError())
    919       return EC;
    920     NameTable.push_back(*Name);
    921   }
    922 
    923   return sampleprof_error::success;
    924 }
    925 
    926 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
    927   auto Size = readNumber<uint64_t>();
    928   if (std::error_code EC = Size.getError())
    929     return EC;
    930   MD5StringBuf = std::make_unique<std::vector<std::string>>();
    931   MD5StringBuf->reserve(*Size);
    932   if (FixedLengthMD5) {
    933     // Preallocate and initialize NameTable so we can check whether a name
    934     // index has been read before by checking whether the element in the
    935     // NameTable is empty, meanwhile readStringIndex can do the boundary
    936     // check using the size of NameTable.
    937     NameTable.resize(*Size + NameTable.size());
    938 
    939     MD5NameMemStart = Data;
    940     Data = Data + (*Size) * sizeof(uint64_t);
    941     return sampleprof_error::success;
    942   }
    943   NameTable.reserve(*Size);
    944   for (uint32_t I = 0; I < *Size; ++I) {
    945     auto FID = readNumber<uint64_t>();
    946     if (std::error_code EC = FID.getError())
    947       return EC;
    948     MD5StringBuf->push_back(std::to_string(*FID));
    949     // NameTable is a vector of StringRef. Here it is pushing back a
    950     // StringRef initialized with the last string in MD5stringBuf.
    951     NameTable.push_back(MD5StringBuf->back());
    952   }
    953   return sampleprof_error::success;
    954 }
    955 
    956 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
    957   if (IsMD5)
    958     return readMD5NameTable();
    959   return SampleProfileReaderBinary::readNameTable();
    960 }
    961 
    962 std::error_code
    963 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
    964   while (Data < End) {
    965     auto FName(readStringFromTable());
    966     if (std::error_code EC = FName.getError())
    967       return EC;
    968 
    969     SampleContext FContext(*FName);
    970     bool ProfileInMap = Profiles.count(FContext);
    971 
    972     if (ProfileIsProbeBased) {
    973       auto Checksum = readNumber<uint64_t>();
    974       if (std::error_code EC = Checksum.getError())
    975         return EC;
    976       if (ProfileInMap)
    977         Profiles[FContext].setFunctionHash(*Checksum);
    978     }
    979 
    980     if (ProfileHasAttribute) {
    981       auto Attributes = readNumber<uint32_t>();
    982       if (std::error_code EC = Attributes.getError())
    983         return EC;
    984       if (ProfileInMap)
    985         Profiles[FContext].getContext().setAllAttributes(*Attributes);
    986     }
    987   }
    988 
    989   assert(Data == End && "More data is read than expected");
    990   return sampleprof_error::success;
    991 }
    992 
    993 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
    994   auto Size = readNumber<uint64_t>();
    995   if (std::error_code EC = Size.getError())
    996     return EC;
    997   NameTable.reserve(*Size);
    998   for (uint32_t I = 0; I < *Size; ++I) {
    999     auto FID = readNumber<uint64_t>();
   1000     if (std::error_code EC = FID.getError())
   1001       return EC;
   1002     NameTable.push_back(std::to_string(*FID));
   1003   }
   1004   return sampleprof_error::success;
   1005 }
   1006 
   1007 std::error_code
   1008 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
   1009   SecHdrTableEntry Entry;
   1010   auto Type = readUnencodedNumber<uint64_t>();
   1011   if (std::error_code EC = Type.getError())
   1012     return EC;
   1013   Entry.Type = static_cast<SecType>(*Type);
   1014 
   1015   auto Flags = readUnencodedNumber<uint64_t>();
   1016   if (std::error_code EC = Flags.getError())
   1017     return EC;
   1018   Entry.Flags = *Flags;
   1019 
   1020   auto Offset = readUnencodedNumber<uint64_t>();
   1021   if (std::error_code EC = Offset.getError())
   1022     return EC;
   1023   Entry.Offset = *Offset;
   1024 
   1025   auto Size = readUnencodedNumber<uint64_t>();
   1026   if (std::error_code EC = Size.getError())
   1027     return EC;
   1028   Entry.Size = *Size;
   1029 
   1030   Entry.LayoutIndex = Idx;
   1031   SecHdrTable.push_back(std::move(Entry));
   1032   return sampleprof_error::success;
   1033 }
   1034 
   1035 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
   1036   auto EntryNum = readUnencodedNumber<uint64_t>();
   1037   if (std::error_code EC = EntryNum.getError())
   1038     return EC;
   1039 
   1040   for (uint32_t i = 0; i < (*EntryNum); i++)
   1041     if (std::error_code EC = readSecHdrTableEntry(i))
   1042       return EC;
   1043 
   1044   return sampleprof_error::success;
   1045 }
   1046 
   1047 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
   1048   const uint8_t *BufStart =
   1049       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
   1050   Data = BufStart;
   1051   End = BufStart + Buffer->getBufferSize();
   1052 
   1053   if (std::error_code EC = readMagicIdent())
   1054     return EC;
   1055 
   1056   if (std::error_code EC = readSecHdrTable())
   1057     return EC;
   1058 
   1059   return sampleprof_error::success;
   1060 }
   1061 
   1062 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
   1063   uint64_t Size = 0;
   1064   for (auto &Entry : SecHdrTable) {
   1065     if (Entry.Type == Type)
   1066       Size += Entry.Size;
   1067   }
   1068   return Size;
   1069 }
   1070 
   1071 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
   1072   // Sections in SecHdrTable is not necessarily in the same order as
   1073   // sections in the profile because section like FuncOffsetTable needs
   1074   // to be written after section LBRProfile but needs to be read before
   1075   // section LBRProfile, so we cannot simply use the last entry in
   1076   // SecHdrTable to calculate the file size.
   1077   uint64_t FileSize = 0;
   1078   for (auto &Entry : SecHdrTable) {
   1079     FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
   1080   }
   1081   return FileSize;
   1082 }
   1083 
   1084 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
   1085   std::string Flags;
   1086   if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
   1087     Flags.append("{compressed,");
   1088   else
   1089     Flags.append("{");
   1090 
   1091   if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
   1092     Flags.append("flat,");
   1093 
   1094   switch (Entry.Type) {
   1095   case SecNameTable:
   1096     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
   1097       Flags.append("fixlenmd5,");
   1098     else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
   1099       Flags.append("md5,");
   1100     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
   1101       Flags.append("uniq,");
   1102     break;
   1103   case SecProfSummary:
   1104     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
   1105       Flags.append("partial,");
   1106     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
   1107       Flags.append("context,");
   1108     break;
   1109   default:
   1110     break;
   1111   }
   1112   char &last = Flags.back();
   1113   if (last == ',')
   1114     last = '}';
   1115   else
   1116     Flags.append("}");
   1117   return Flags;
   1118 }
   1119 
   1120 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
   1121   uint64_t TotalSecsSize = 0;
   1122   for (auto &Entry : SecHdrTable) {
   1123     OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
   1124        << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
   1125        << "\n";
   1126     ;
   1127     TotalSecsSize += Entry.Size;
   1128   }
   1129   uint64_t HeaderSize = SecHdrTable.front().Offset;
   1130   assert(HeaderSize + TotalSecsSize == getFileSize() &&
   1131          "Size of 'header + sections' doesn't match the total size of profile");
   1132 
   1133   OS << "Header Size: " << HeaderSize << "\n";
   1134   OS << "Total Sections Size: " << TotalSecsSize << "\n";
   1135   OS << "File Size: " << getFileSize() << "\n";
   1136   return true;
   1137 }
   1138 
   1139 std::error_code SampleProfileReaderBinary::readMagicIdent() {
   1140   // Read and check the magic identifier.
   1141   auto Magic = readNumber<uint64_t>();
   1142   if (std::error_code EC = Magic.getError())
   1143     return EC;
   1144   else if (std::error_code EC = verifySPMagic(*Magic))
   1145     return EC;
   1146 
   1147   // Read the version number.
   1148   auto Version = readNumber<uint64_t>();
   1149   if (std::error_code EC = Version.getError())
   1150     return EC;
   1151   else if (*Version != SPVersion())
   1152     return sampleprof_error::unsupported_version;
   1153 
   1154   return sampleprof_error::success;
   1155 }
   1156 
   1157 std::error_code SampleProfileReaderBinary::readHeader() {
   1158   Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
   1159   End = Data + Buffer->getBufferSize();
   1160 
   1161   if (std::error_code EC = readMagicIdent())
   1162     return EC;
   1163 
   1164   if (std::error_code EC = readSummary())
   1165     return EC;
   1166 
   1167   if (std::error_code EC = readNameTable())
   1168     return EC;
   1169   return sampleprof_error::success;
   1170 }
   1171 
   1172 std::error_code SampleProfileReaderCompactBinary::readHeader() {
   1173   SampleProfileReaderBinary::readHeader();
   1174   if (std::error_code EC = readFuncOffsetTable())
   1175     return EC;
   1176   return sampleprof_error::success;
   1177 }
   1178 
   1179 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
   1180   auto TableOffset = readUnencodedNumber<uint64_t>();
   1181   if (std::error_code EC = TableOffset.getError())
   1182     return EC;
   1183 
   1184   const uint8_t *SavedData = Data;
   1185   const uint8_t *TableStart =
   1186       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
   1187       *TableOffset;
   1188   Data = TableStart;
   1189 
   1190   auto Size = readNumber<uint64_t>();
   1191   if (std::error_code EC = Size.getError())
   1192     return EC;
   1193 
   1194   FuncOffsetTable.reserve(*Size);
   1195   for (uint32_t I = 0; I < *Size; ++I) {
   1196     auto FName(readStringFromTable());
   1197     if (std::error_code EC = FName.getError())
   1198       return EC;
   1199 
   1200     auto Offset = readNumber<uint64_t>();
   1201     if (std::error_code EC = Offset.getError())
   1202       return EC;
   1203 
   1204     FuncOffsetTable[*FName] = *Offset;
   1205   }
   1206   End = TableStart;
   1207   Data = SavedData;
   1208   return sampleprof_error::success;
   1209 }
   1210 
   1211 bool SampleProfileReaderCompactBinary::collectFuncsFromModule() {
   1212   if (!M)
   1213     return false;
   1214   FuncsToUse.clear();
   1215   for (auto &F : *M)
   1216     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
   1217   return true;
   1218 }
   1219 
   1220 std::error_code SampleProfileReaderBinary::readSummaryEntry(
   1221     std::vector<ProfileSummaryEntry> &Entries) {
   1222   auto Cutoff = readNumber<uint64_t>();
   1223   if (std::error_code EC = Cutoff.getError())
   1224     return EC;
   1225 
   1226   auto MinBlockCount = readNumber<uint64_t>();
   1227   if (std::error_code EC = MinBlockCount.getError())
   1228     return EC;
   1229 
   1230   auto NumBlocks = readNumber<uint64_t>();
   1231   if (std::error_code EC = NumBlocks.getError())
   1232     return EC;
   1233 
   1234   Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
   1235   return sampleprof_error::success;
   1236 }
   1237 
   1238 std::error_code SampleProfileReaderBinary::readSummary() {
   1239   auto TotalCount = readNumber<uint64_t>();
   1240   if (std::error_code EC = TotalCount.getError())
   1241     return EC;
   1242 
   1243   auto MaxBlockCount = readNumber<uint64_t>();
   1244   if (std::error_code EC = MaxBlockCount.getError())
   1245     return EC;
   1246 
   1247   auto MaxFunctionCount = readNumber<uint64_t>();
   1248   if (std::error_code EC = MaxFunctionCount.getError())
   1249     return EC;
   1250 
   1251   auto NumBlocks = readNumber<uint64_t>();
   1252   if (std::error_code EC = NumBlocks.getError())
   1253     return EC;
   1254 
   1255   auto NumFunctions = readNumber<uint64_t>();
   1256   if (std::error_code EC = NumFunctions.getError())
   1257     return EC;
   1258 
   1259   auto NumSummaryEntries = readNumber<uint64_t>();
   1260   if (std::error_code EC = NumSummaryEntries.getError())
   1261     return EC;
   1262 
   1263   std::vector<ProfileSummaryEntry> Entries;
   1264   for (unsigned i = 0; i < *NumSummaryEntries; i++) {
   1265     std::error_code EC = readSummaryEntry(Entries);
   1266     if (EC != sampleprof_error::success)
   1267       return EC;
   1268   }
   1269   Summary = std::make_unique<ProfileSummary>(
   1270       ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
   1271       *MaxFunctionCount, *NumBlocks, *NumFunctions);
   1272 
   1273   return sampleprof_error::success;
   1274 }
   1275 
   1276 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
   1277   const uint8_t *Data =
   1278       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
   1279   uint64_t Magic = decodeULEB128(Data);
   1280   return Magic == SPMagic();
   1281 }
   1282 
   1283 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
   1284   const uint8_t *Data =
   1285       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
   1286   uint64_t Magic = decodeULEB128(Data);
   1287   return Magic == SPMagic(SPF_Ext_Binary);
   1288 }
   1289 
   1290 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
   1291   const uint8_t *Data =
   1292       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
   1293   uint64_t Magic = decodeULEB128(Data);
   1294   return Magic == SPMagic(SPF_Compact_Binary);
   1295 }
   1296 
   1297 std::error_code SampleProfileReaderGCC::skipNextWord() {
   1298   uint32_t dummy;
   1299   if (!GcovBuffer.readInt(dummy))
   1300     return sampleprof_error::truncated;
   1301   return sampleprof_error::success;
   1302 }
   1303 
   1304 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
   1305   if (sizeof(T) <= sizeof(uint32_t)) {
   1306     uint32_t Val;
   1307     if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
   1308       return static_cast<T>(Val);
   1309   } else if (sizeof(T) <= sizeof(uint64_t)) {
   1310     uint64_t Val;
   1311     if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
   1312       return static_cast<T>(Val);
   1313   }
   1314 
   1315   std::error_code EC = sampleprof_error::malformed;
   1316   reportError(0, EC.message());
   1317   return EC;
   1318 }
   1319 
   1320 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
   1321   StringRef Str;
   1322   if (!GcovBuffer.readString(Str))
   1323     return sampleprof_error::truncated;
   1324   return Str;
   1325 }
   1326 
   1327 std::error_code SampleProfileReaderGCC::readHeader() {
   1328   // Read the magic identifier.
   1329   if (!GcovBuffer.readGCDAFormat())
   1330     return sampleprof_error::unrecognized_format;
   1331 
   1332   // Read the version number. Note - the GCC reader does not validate this
   1333   // version, but the profile creator generates v704.
   1334   GCOV::GCOVVersion version;
   1335   if (!GcovBuffer.readGCOVVersion(version))
   1336     return sampleprof_error::unrecognized_format;
   1337 
   1338   if (version != GCOV::V407)
   1339     return sampleprof_error::unsupported_version;
   1340 
   1341   // Skip the empty integer.
   1342   if (std::error_code EC = skipNextWord())
   1343     return EC;
   1344 
   1345   return sampleprof_error::success;
   1346 }
   1347 
   1348 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
   1349   uint32_t Tag;
   1350   if (!GcovBuffer.readInt(Tag))
   1351     return sampleprof_error::truncated;
   1352 
   1353   if (Tag != Expected)
   1354     return sampleprof_error::malformed;
   1355 
   1356   if (std::error_code EC = skipNextWord())
   1357     return EC;
   1358 
   1359   return sampleprof_error::success;
   1360 }
   1361 
   1362 std::error_code SampleProfileReaderGCC::readNameTable() {
   1363   if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
   1364     return EC;
   1365 
   1366   uint32_t Size;
   1367   if (!GcovBuffer.readInt(Size))
   1368     return sampleprof_error::truncated;
   1369 
   1370   for (uint32_t I = 0; I < Size; ++I) {
   1371     StringRef Str;
   1372     if (!GcovBuffer.readString(Str))
   1373       return sampleprof_error::truncated;
   1374     Names.push_back(std::string(Str));
   1375   }
   1376 
   1377   return sampleprof_error::success;
   1378 }
   1379 
   1380 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
   1381   if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
   1382     return EC;
   1383 
   1384   uint32_t NumFunctions;
   1385   if (!GcovBuffer.readInt(NumFunctions))
   1386     return sampleprof_error::truncated;
   1387 
   1388   InlineCallStack Stack;
   1389   for (uint32_t I = 0; I < NumFunctions; ++I)
   1390     if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
   1391       return EC;
   1392 
   1393   computeSummary();
   1394   return sampleprof_error::success;
   1395 }
   1396 
   1397 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
   1398     const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
   1399   uint64_t HeadCount = 0;
   1400   if (InlineStack.size() == 0)
   1401     if (!GcovBuffer.readInt64(HeadCount))
   1402       return sampleprof_error::truncated;
   1403 
   1404   uint32_t NameIdx;
   1405   if (!GcovBuffer.readInt(NameIdx))
   1406     return sampleprof_error::truncated;
   1407 
   1408   StringRef Name(Names[NameIdx]);
   1409 
   1410   uint32_t NumPosCounts;
   1411   if (!GcovBuffer.readInt(NumPosCounts))
   1412     return sampleprof_error::truncated;
   1413 
   1414   uint32_t NumCallsites;
   1415   if (!GcovBuffer.readInt(NumCallsites))
   1416     return sampleprof_error::truncated;
   1417 
   1418   FunctionSamples *FProfile = nullptr;
   1419   if (InlineStack.size() == 0) {
   1420     // If this is a top function that we have already processed, do not
   1421     // update its profile again.  This happens in the presence of
   1422     // function aliases.  Since these aliases share the same function
   1423     // body, there will be identical replicated profiles for the
   1424     // original function.  In this case, we simply not bother updating
   1425     // the profile of the original function.
   1426     FProfile = &Profiles[Name];
   1427     FProfile->addHeadSamples(HeadCount);
   1428     if (FProfile->getTotalSamples() > 0)
   1429       Update = false;
   1430   } else {
   1431     // Otherwise, we are reading an inlined instance. The top of the
   1432     // inline stack contains the profile of the caller. Insert this
   1433     // callee in the caller's CallsiteMap.
   1434     FunctionSamples *CallerProfile = InlineStack.front();
   1435     uint32_t LineOffset = Offset >> 16;
   1436     uint32_t Discriminator = Offset & 0xffff;
   1437     FProfile = &CallerProfile->functionSamplesAt(
   1438         LineLocation(LineOffset, Discriminator))[std::string(Name)];
   1439   }
   1440   FProfile->setName(Name);
   1441 
   1442   for (uint32_t I = 0; I < NumPosCounts; ++I) {
   1443     uint32_t Offset;
   1444     if (!GcovBuffer.readInt(Offset))
   1445       return sampleprof_error::truncated;
   1446 
   1447     uint32_t NumTargets;
   1448     if (!GcovBuffer.readInt(NumTargets))
   1449       return sampleprof_error::truncated;
   1450 
   1451     uint64_t Count;
   1452     if (!GcovBuffer.readInt64(Count))
   1453       return sampleprof_error::truncated;
   1454 
   1455     // The line location is encoded in the offset as:
   1456     //   high 16 bits: line offset to the start of the function.
   1457     //   low 16 bits: discriminator.
   1458     uint32_t LineOffset = Offset >> 16;
   1459     uint32_t Discriminator = Offset & 0xffff;
   1460 
   1461     InlineCallStack NewStack;
   1462     NewStack.push_back(FProfile);
   1463     llvm::append_range(NewStack, InlineStack);
   1464     if (Update) {
   1465       // Walk up the inline stack, adding the samples on this line to
   1466       // the total sample count of the callers in the chain.
   1467       for (auto CallerProfile : NewStack)
   1468         CallerProfile->addTotalSamples(Count);
   1469 
   1470       // Update the body samples for the current profile.
   1471       FProfile->addBodySamples(LineOffset, Discriminator, Count);
   1472     }
   1473 
   1474     // Process the list of functions called at an indirect call site.
   1475     // These are all the targets that a function pointer (or virtual
   1476     // function) resolved at runtime.
   1477     for (uint32_t J = 0; J < NumTargets; J++) {
   1478       uint32_t HistVal;
   1479       if (!GcovBuffer.readInt(HistVal))
   1480         return sampleprof_error::truncated;
   1481 
   1482       if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
   1483         return sampleprof_error::malformed;
   1484 
   1485       uint64_t TargetIdx;
   1486       if (!GcovBuffer.readInt64(TargetIdx))
   1487         return sampleprof_error::truncated;
   1488       StringRef TargetName(Names[TargetIdx]);
   1489 
   1490       uint64_t TargetCount;
   1491       if (!GcovBuffer.readInt64(TargetCount))
   1492         return sampleprof_error::truncated;
   1493 
   1494       if (Update)
   1495         FProfile->addCalledTargetSamples(LineOffset, Discriminator,
   1496                                          TargetName, TargetCount);
   1497     }
   1498   }
   1499 
   1500   // Process all the inlined callers into the current function. These
   1501   // are all the callsites that were inlined into this function.
   1502   for (uint32_t I = 0; I < NumCallsites; I++) {
   1503     // The offset is encoded as:
   1504     //   high 16 bits: line offset to the start of the function.
   1505     //   low 16 bits: discriminator.
   1506     uint32_t Offset;
   1507     if (!GcovBuffer.readInt(Offset))
   1508       return sampleprof_error::truncated;
   1509     InlineCallStack NewStack;
   1510     NewStack.push_back(FProfile);
   1511     llvm::append_range(NewStack, InlineStack);
   1512     if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
   1513       return EC;
   1514   }
   1515 
   1516   return sampleprof_error::success;
   1517 }
   1518 
   1519 /// Read a GCC AutoFDO profile.
   1520 ///
   1521 /// This format is generated by the Linux Perf conversion tool at
   1522 /// https://github.com/google/autofdo.
   1523 std::error_code SampleProfileReaderGCC::readImpl() {
   1524   // Read the string table.
   1525   if (std::error_code EC = readNameTable())
   1526     return EC;
   1527 
   1528   // Read the source profile.
   1529   if (std::error_code EC = readFunctionProfiles())
   1530     return EC;
   1531 
   1532   return sampleprof_error::success;
   1533 }
   1534 
   1535 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
   1536   StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
   1537   return Magic == "adcg*704";
   1538 }
   1539 
   1540 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
   1541   // If the reader uses MD5 to represent string, we can't remap it because
   1542   // we don't know what the original function names were.
   1543   if (Reader.useMD5()) {
   1544     Ctx.diagnose(DiagnosticInfoSampleProfile(
   1545         Reader.getBuffer()->getBufferIdentifier(),
   1546         "Profile data remapping cannot be applied to profile data "
   1547         "in compact format (original mangled names are not available).",
   1548         DS_Warning));
   1549     return;
   1550   }
   1551 
   1552   // CSSPGO-TODO: Remapper is not yet supported.
   1553   // We will need to remap the entire context string.
   1554   assert(Remappings && "should be initialized while creating remapper");
   1555   for (auto &Sample : Reader.getProfiles()) {
   1556     DenseSet<StringRef> NamesInSample;
   1557     Sample.second.findAllNames(NamesInSample);
   1558     for (auto &Name : NamesInSample)
   1559       if (auto Key = Remappings->insert(Name))
   1560         NameMap.insert({Key, Name});
   1561   }
   1562 
   1563   RemappingApplied = true;
   1564 }
   1565 
   1566 Optional<StringRef>
   1567 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
   1568   if (auto Key = Remappings->lookup(Fname))
   1569     return NameMap.lookup(Key);
   1570   return None;
   1571 }
   1572 
   1573 /// Prepare a memory buffer for the contents of \p Filename.
   1574 ///
   1575 /// \returns an error code indicating the status of the buffer.
   1576 static ErrorOr<std::unique_ptr<MemoryBuffer>>
   1577 setupMemoryBuffer(const Twine &Filename) {
   1578   auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
   1579   if (std::error_code EC = BufferOrErr.getError())
   1580     return EC;
   1581   auto Buffer = std::move(BufferOrErr.get());
   1582 
   1583   // Sanity check the file.
   1584   if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
   1585     return sampleprof_error::too_large;
   1586 
   1587   return std::move(Buffer);
   1588 }
   1589 
   1590 /// Create a sample profile reader based on the format of the input file.
   1591 ///
   1592 /// \param Filename The file to open.
   1593 ///
   1594 /// \param C The LLVM context to use to emit diagnostics.
   1595 ///
   1596 /// \param RemapFilename The file used for profile remapping.
   1597 ///
   1598 /// \returns an error code indicating the status of the created reader.
   1599 ErrorOr<std::unique_ptr<SampleProfileReader>>
   1600 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
   1601                             const std::string RemapFilename) {
   1602   auto BufferOrError = setupMemoryBuffer(Filename);
   1603   if (std::error_code EC = BufferOrError.getError())
   1604     return EC;
   1605   return create(BufferOrError.get(), C, RemapFilename);
   1606 }
   1607 
   1608 /// Create a sample profile remapper from the given input, to remap the
   1609 /// function names in the given profile data.
   1610 ///
   1611 /// \param Filename The file to open.
   1612 ///
   1613 /// \param Reader The profile reader the remapper is going to be applied to.
   1614 ///
   1615 /// \param C The LLVM context to use to emit diagnostics.
   1616 ///
   1617 /// \returns an error code indicating the status of the created reader.
   1618 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
   1619 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
   1620                                            SampleProfileReader &Reader,
   1621                                            LLVMContext &C) {
   1622   auto BufferOrError = setupMemoryBuffer(Filename);
   1623   if (std::error_code EC = BufferOrError.getError())
   1624     return EC;
   1625   return create(BufferOrError.get(), Reader, C);
   1626 }
   1627 
   1628 /// Create a sample profile remapper from the given input, to remap the
   1629 /// function names in the given profile data.
   1630 ///
   1631 /// \param B The memory buffer to create the reader from (assumes ownership).
   1632 ///
   1633 /// \param C The LLVM context to use to emit diagnostics.
   1634 ///
   1635 /// \param Reader The profile reader the remapper is going to be applied to.
   1636 ///
   1637 /// \returns an error code indicating the status of the created reader.
   1638 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
   1639 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
   1640                                            SampleProfileReader &Reader,
   1641                                            LLVMContext &C) {
   1642   auto Remappings = std::make_unique<SymbolRemappingReader>();
   1643   if (Error E = Remappings->read(*B.get())) {
   1644     handleAllErrors(
   1645         std::move(E), [&](const SymbolRemappingParseError &ParseError) {
   1646           C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
   1647                                                  ParseError.getLineNum(),
   1648                                                  ParseError.getMessage()));
   1649         });
   1650     return sampleprof_error::malformed;
   1651   }
   1652 
   1653   return std::make_unique<SampleProfileReaderItaniumRemapper>(
   1654       std::move(B), std::move(Remappings), Reader);
   1655 }
   1656 
   1657 /// Create a sample profile reader based on the format of the input data.
   1658 ///
   1659 /// \param B The memory buffer to create the reader from (assumes ownership).
   1660 ///
   1661 /// \param C The LLVM context to use to emit diagnostics.
   1662 ///
   1663 /// \param RemapFilename The file used for profile remapping.
   1664 ///
   1665 /// \returns an error code indicating the status of the created reader.
   1666 ErrorOr<std::unique_ptr<SampleProfileReader>>
   1667 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
   1668                             const std::string RemapFilename) {
   1669   std::unique_ptr<SampleProfileReader> Reader;
   1670   if (SampleProfileReaderRawBinary::hasFormat(*B))
   1671     Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
   1672   else if (SampleProfileReaderExtBinary::hasFormat(*B))
   1673     Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
   1674   else if (SampleProfileReaderCompactBinary::hasFormat(*B))
   1675     Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
   1676   else if (SampleProfileReaderGCC::hasFormat(*B))
   1677     Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
   1678   else if (SampleProfileReaderText::hasFormat(*B))
   1679     Reader.reset(new SampleProfileReaderText(std::move(B), C));
   1680   else
   1681     return sampleprof_error::unrecognized_format;
   1682 
   1683   if (!RemapFilename.empty()) {
   1684     auto ReaderOrErr =
   1685         SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
   1686     if (std::error_code EC = ReaderOrErr.getError()) {
   1687       std::string Msg = "Could not create remapper: " + EC.message();
   1688       C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
   1689       return EC;
   1690     }
   1691     Reader->Remapper = std::move(ReaderOrErr.get());
   1692   }
   1693 
   1694   FunctionSamples::Format = Reader->getFormat();
   1695   if (std::error_code EC = Reader->readHeader()) {
   1696     return EC;
   1697   }
   1698 
   1699   return std::move(Reader);
   1700 }
   1701 
   1702 // For text and GCC file formats, we compute the summary after reading the
   1703 // profile. Binary format has the profile summary in its header.
   1704 void SampleProfileReader::computeSummary() {
   1705   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
   1706   Summary = Builder.computeSummaryForProfiles(Profiles);
   1707 }
   1708