Home | History | Annotate | Line # | Download | only in ProfileData
      1 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file contains common definitions used in the reading and writing of
     10 // sample profile data.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H
     15 #define LLVM_PROFILEDATA_SAMPLEPROF_H
     16 
     17 #include "llvm/ADT/DenseSet.h"
     18 #include "llvm/ADT/SmallVector.h"
     19 #include "llvm/ADT/StringMap.h"
     20 #include "llvm/ADT/StringRef.h"
     21 #include "llvm/ADT/StringSet.h"
     22 #include "llvm/IR/Function.h"
     23 #include "llvm/IR/GlobalValue.h"
     24 #include "llvm/IR/Module.h"
     25 #include "llvm/Support/Allocator.h"
     26 #include "llvm/Support/Debug.h"
     27 #include "llvm/Support/ErrorOr.h"
     28 #include "llvm/Support/MathExtras.h"
     29 #include "llvm/Support/raw_ostream.h"
     30 #include <algorithm>
     31 #include <cstdint>
     32 #include <map>
     33 #include <set>
     34 #include <string>
     35 #include <system_error>
     36 #include <utility>
     37 
     38 namespace llvm {
     39 
     40 const std::error_category &sampleprof_category();
     41 
     42 enum class sampleprof_error {
     43   success = 0,
     44   bad_magic,
     45   unsupported_version,
     46   too_large,
     47   truncated,
     48   malformed,
     49   unrecognized_format,
     50   unsupported_writing_format,
     51   truncated_name_table,
     52   not_implemented,
     53   counter_overflow,
     54   ostream_seek_unsupported,
     55   compress_failed,
     56   uncompress_failed,
     57   zlib_unavailable,
     58   hash_mismatch
     59 };
     60 
     61 inline std::error_code make_error_code(sampleprof_error E) {
     62   return std::error_code(static_cast<int>(E), sampleprof_category());
     63 }
     64 
     65 inline sampleprof_error MergeResult(sampleprof_error &Accumulator,
     66                                     sampleprof_error Result) {
     67   // Prefer first error encountered as later errors may be secondary effects of
     68   // the initial problem.
     69   if (Accumulator == sampleprof_error::success &&
     70       Result != sampleprof_error::success)
     71     Accumulator = Result;
     72   return Accumulator;
     73 }
     74 
     75 } // end namespace llvm
     76 
     77 namespace std {
     78 
     79 template <>
     80 struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {};
     81 
     82 } // end namespace std
     83 
     84 namespace llvm {
     85 namespace sampleprof {
     86 
     87 enum SampleProfileFormat {
     88   SPF_None = 0,
     89   SPF_Text = 0x1,
     90   SPF_Compact_Binary = 0x2,
     91   SPF_GCC = 0x3,
     92   SPF_Ext_Binary = 0x4,
     93   SPF_Binary = 0xff
     94 };
     95 
     96 static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
     97   return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
     98          uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
     99          uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) |
    100          uint64_t('2') << (64 - 56) | uint64_t(Format);
    101 }
    102 
    103 /// Get the proper representation of a string according to whether the
    104 /// current Format uses MD5 to represent the string.
    105 static inline StringRef getRepInFormat(StringRef Name, bool UseMD5,
    106                                        std::string &GUIDBuf) {
    107   if (Name.empty())
    108     return Name;
    109   GUIDBuf = std::to_string(Function::getGUID(Name));
    110   return UseMD5 ? StringRef(GUIDBuf) : Name;
    111 }
    112 
    113 static inline uint64_t SPVersion() { return 103; }
    114 
    115 // Section Type used by SampleProfileExtBinaryBaseReader and
    116 // SampleProfileExtBinaryBaseWriter. Never change the existing
    117 // value of enum. Only append new ones.
    118 enum SecType {
    119   SecInValid = 0,
    120   SecProfSummary = 1,
    121   SecNameTable = 2,
    122   SecProfileSymbolList = 3,
    123   SecFuncOffsetTable = 4,
    124   SecFuncMetadata = 5,
    125   // marker for the first type of profile.
    126   SecFuncProfileFirst = 32,
    127   SecLBRProfile = SecFuncProfileFirst
    128 };
    129 
    130 static inline std::string getSecName(SecType Type) {
    131   switch (Type) {
    132   case SecInValid:
    133     return "InvalidSection";
    134   case SecProfSummary:
    135     return "ProfileSummarySection";
    136   case SecNameTable:
    137     return "NameTableSection";
    138   case SecProfileSymbolList:
    139     return "ProfileSymbolListSection";
    140   case SecFuncOffsetTable:
    141     return "FuncOffsetTableSection";
    142   case SecFuncMetadata:
    143     return "FunctionMetadata";
    144   case SecLBRProfile:
    145     return "LBRProfileSection";
    146   }
    147   llvm_unreachable("A SecType has no name for output");
    148 }
    149 
    150 // Entry type of section header table used by SampleProfileExtBinaryBaseReader
    151 // and SampleProfileExtBinaryBaseWriter.
    152 struct SecHdrTableEntry {
    153   SecType Type;
    154   uint64_t Flags;
    155   uint64_t Offset;
    156   uint64_t Size;
    157   // The index indicating the location of the current entry in
    158   // SectionHdrLayout table.
    159   uint32_t LayoutIndex;
    160 };
    161 
    162 // Flags common for all sections are defined here. In SecHdrTableEntry::Flags,
    163 // common flags will be saved in the lower 32bits and section specific flags
    164 // will be saved in the higher 32 bits.
    165 enum class SecCommonFlags : uint32_t {
    166   SecFlagInValid = 0,
    167   SecFlagCompress = (1 << 0),
    168   // Indicate the section contains only profile without context.
    169   SecFlagFlat = (1 << 1)
    170 };
    171 
    172 // Section specific flags are defined here.
    173 // !!!Note: Everytime a new enum class is created here, please add
    174 // a new check in verifySecFlag.
    175 enum class SecNameTableFlags : uint32_t {
    176   SecFlagInValid = 0,
    177   SecFlagMD5Name = (1 << 0),
    178   // Store MD5 in fixed length instead of ULEB128 so NameTable can be
    179   // accessed like an array.
    180   SecFlagFixedLengthMD5 = (1 << 1),
    181   // Profile contains ".__uniq." suffix name. Compiler shouldn't strip
    182   // the suffix when doing profile matching when seeing the flag.
    183   SecFlagUniqSuffix = (1 << 2)
    184 };
    185 enum class SecProfSummaryFlags : uint32_t {
    186   SecFlagInValid = 0,
    187   /// SecFlagPartial means the profile is for common/shared code.
    188   /// The common profile is usually merged from profiles collected
    189   /// from running other targets.
    190   SecFlagPartial = (1 << 0),
    191   /// SecFlagContext means this is context-sensitive profile for
    192   /// CSSPGO
    193   SecFlagFullContext = (1 << 1)
    194 };
    195 
    196 enum class SecFuncMetadataFlags : uint32_t {
    197   SecFlagInvalid = 0,
    198   SecFlagIsProbeBased = (1 << 0),
    199   SecFlagHasAttribute = (1 << 1)
    200 };
    201 
    202 // Verify section specific flag is used for the correct section.
    203 template <class SecFlagType>
    204 static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
    205   // No verification is needed for common flags.
    206   if (std::is_same<SecCommonFlags, SecFlagType>())
    207     return;
    208 
    209   // Verification starts here for section specific flag.
    210   bool IsFlagLegal = false;
    211   switch (Type) {
    212   case SecNameTable:
    213     IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>();
    214     break;
    215   case SecProfSummary:
    216     IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>();
    217     break;
    218   case SecFuncMetadata:
    219     IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>();
    220     break;
    221   default:
    222     break;
    223   }
    224   if (!IsFlagLegal)
    225     llvm_unreachable("Misuse of a flag in an incompatible section");
    226 }
    227 
    228 template <class SecFlagType>
    229 static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
    230   verifySecFlag(Entry.Type, Flag);
    231   auto FVal = static_cast<uint64_t>(Flag);
    232   bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
    233   Entry.Flags |= IsCommon ? FVal : (FVal << 32);
    234 }
    235 
    236 template <class SecFlagType>
    237 static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
    238   verifySecFlag(Entry.Type, Flag);
    239   auto FVal = static_cast<uint64_t>(Flag);
    240   bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
    241   Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32));
    242 }
    243 
    244 template <class SecFlagType>
    245 static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) {
    246   verifySecFlag(Entry.Type, Flag);
    247   auto FVal = static_cast<uint64_t>(Flag);
    248   bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
    249   return Entry.Flags & (IsCommon ? FVal : (FVal << 32));
    250 }
    251 
    252 /// Represents the relative location of an instruction.
    253 ///
    254 /// Instruction locations are specified by the line offset from the
    255 /// beginning of the function (marked by the line where the function
    256 /// header is) and the discriminator value within that line.
    257 ///
    258 /// The discriminator value is useful to distinguish instructions
    259 /// that are on the same line but belong to different basic blocks
    260 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
    261 struct LineLocation {
    262   LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {}
    263 
    264   void print(raw_ostream &OS) const;
    265   void dump() const;
    266 
    267   bool operator<(const LineLocation &O) const {
    268     return LineOffset < O.LineOffset ||
    269            (LineOffset == O.LineOffset && Discriminator < O.Discriminator);
    270   }
    271 
    272   bool operator==(const LineLocation &O) const {
    273     return LineOffset == O.LineOffset && Discriminator == O.Discriminator;
    274   }
    275 
    276   bool operator!=(const LineLocation &O) const {
    277     return LineOffset != O.LineOffset || Discriminator != O.Discriminator;
    278   }
    279 
    280   uint32_t LineOffset;
    281   uint32_t Discriminator;
    282 };
    283 
    284 raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
    285 
    286 /// Representation of a single sample record.
    287 ///
    288 /// A sample record is represented by a positive integer value, which
    289 /// indicates how frequently was the associated line location executed.
    290 ///
    291 /// Additionally, if the associated location contains a function call,
    292 /// the record will hold a list of all the possible called targets. For
    293 /// direct calls, this will be the exact function being invoked. For
    294 /// indirect calls (function pointers, virtual table dispatch), this
    295 /// will be a list of one or more functions.
    296 class SampleRecord {
    297 public:
    298   using CallTarget = std::pair<StringRef, uint64_t>;
    299   struct CallTargetComparator {
    300     bool operator()(const CallTarget &LHS, const CallTarget &RHS) const {
    301       if (LHS.second != RHS.second)
    302         return LHS.second > RHS.second;
    303 
    304       return LHS.first < RHS.first;
    305     }
    306   };
    307 
    308   using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>;
    309   using CallTargetMap = StringMap<uint64_t>;
    310   SampleRecord() = default;
    311 
    312   /// Increment the number of samples for this record by \p S.
    313   /// Optionally scale sample count \p S by \p Weight.
    314   ///
    315   /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
    316   /// around unsigned integers.
    317   sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) {
    318     bool Overflowed;
    319     NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed);
    320     return Overflowed ? sampleprof_error::counter_overflow
    321                       : sampleprof_error::success;
    322   }
    323 
    324   /// Add called function \p F with samples \p S.
    325   /// Optionally scale sample count \p S by \p Weight.
    326   ///
    327   /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
    328   /// around unsigned integers.
    329   sampleprof_error addCalledTarget(StringRef F, uint64_t S,
    330                                    uint64_t Weight = 1) {
    331     uint64_t &TargetSamples = CallTargets[F];
    332     bool Overflowed;
    333     TargetSamples =
    334         SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed);
    335     return Overflowed ? sampleprof_error::counter_overflow
    336                       : sampleprof_error::success;
    337   }
    338 
    339   /// Return true if this sample record contains function calls.
    340   bool hasCalls() const { return !CallTargets.empty(); }
    341 
    342   uint64_t getSamples() const { return NumSamples; }
    343   const CallTargetMap &getCallTargets() const { return CallTargets; }
    344   const SortedCallTargetSet getSortedCallTargets() const {
    345     return SortCallTargets(CallTargets);
    346   }
    347 
    348   /// Sort call targets in descending order of call frequency.
    349   static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) {
    350     SortedCallTargetSet SortedTargets;
    351     for (const auto &I : Targets) {
    352       SortedTargets.emplace(I.first(), I.second);
    353     }
    354     return SortedTargets;
    355   }
    356 
    357   /// Prorate call targets by a distribution factor.
    358   static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets,
    359                                                float DistributionFactor) {
    360     CallTargetMap AdjustedTargets;
    361     for (const auto &I : Targets) {
    362       AdjustedTargets[I.first()] = I.second * DistributionFactor;
    363     }
    364     return AdjustedTargets;
    365   }
    366 
    367   /// Merge the samples in \p Other into this record.
    368   /// Optionally scale sample counts by \p Weight.
    369   sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1);
    370   void print(raw_ostream &OS, unsigned Indent) const;
    371   void dump() const;
    372 
    373 private:
    374   uint64_t NumSamples = 0;
    375   CallTargetMap CallTargets;
    376 };
    377 
    378 raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample);
    379 
    380 // State of context associated with FunctionSamples
    381 enum ContextStateMask {
    382   UnknownContext = 0x0,   // Profile without context
    383   RawContext = 0x1,       // Full context profile from input profile
    384   SyntheticContext = 0x2, // Synthetic context created for context promotion
    385   InlinedContext = 0x4,   // Profile for context that is inlined into caller
    386   MergedContext = 0x8     // Profile for context merged into base profile
    387 };
    388 
    389 // Attribute of context associated with FunctionSamples
    390 enum ContextAttributeMask {
    391   ContextNone = 0x0,
    392   ContextWasInlined = 0x1,      // Leaf of context was inlined in previous build
    393   ContextShouldBeInlined = 0x2, // Leaf of context should be inlined
    394 };
    395 
    396 // Sample context for FunctionSamples. It consists of the calling context,
    397 // the function name and context state. Internally sample context is represented
    398 // using StringRef, which is also the input for constructing a `SampleContext`.
    399 // It can accept and represent both full context string as well as context-less
    400 // function name.
    401 // Example of full context string (note the wrapping `[]`):
    402 //    `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
    403 // Example of context-less function name (same as AutoFDO):
    404 //    `_Z8funcLeafi`
    405 class SampleContext {
    406 public:
    407   SampleContext() : State(UnknownContext), Attributes(ContextNone) {}
    408   SampleContext(StringRef ContextStr, ContextStateMask CState = UnknownContext)
    409       : Attributes(ContextNone) {
    410     setContext(ContextStr, CState);
    411   }
    412 
    413   // Promote context by removing top frames (represented by `ContextStrToRemove`).
    414   // Note that with string representation of context, the promotion is effectively
    415   // a substr operation with `ContextStrToRemove` removed from left.
    416   void promoteOnPath(StringRef ContextStrToRemove) {
    417     assert(FullContext.startswith(ContextStrToRemove));
    418 
    419     // Remove leading context and frame separator " @ ".
    420     FullContext = FullContext.substr(ContextStrToRemove.size() + 3);
    421     CallingContext = CallingContext.substr(ContextStrToRemove.size() + 3);
    422   }
    423 
    424   // Split the top context frame (left-most substr) from context.
    425   static std::pair<StringRef, StringRef>
    426   splitContextString(StringRef ContextStr) {
    427     return ContextStr.split(" @ ");
    428   }
    429 
    430   // Decode context string for a frame to get function name and location.
    431   // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
    432   static void decodeContextString(StringRef ContextStr, StringRef &FName,
    433                                   LineLocation &LineLoc) {
    434     // Get function name
    435     auto EntrySplit = ContextStr.split(':');
    436     FName = EntrySplit.first;
    437 
    438     LineLoc = {0, 0};
    439     if (!EntrySplit.second.empty()) {
    440       // Get line offset, use signed int for getAsInteger so string will
    441       // be parsed as signed.
    442       int LineOffset = 0;
    443       auto LocSplit = EntrySplit.second.split('.');
    444       LocSplit.first.getAsInteger(10, LineOffset);
    445       LineLoc.LineOffset = LineOffset;
    446 
    447       // Get discriminator
    448       if (!LocSplit.second.empty())
    449         LocSplit.second.getAsInteger(10, LineLoc.Discriminator);
    450     }
    451   }
    452 
    453   operator StringRef() const { return FullContext; }
    454   bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; }
    455   void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; }
    456   uint32_t getAllAttributes() { return Attributes; }
    457   void setAllAttributes(uint32_t A) { Attributes = A; }
    458   bool hasState(ContextStateMask S) { return State & (uint32_t)S; }
    459   void setState(ContextStateMask S) { State |= (uint32_t)S; }
    460   void clearState(ContextStateMask S) { State &= (uint32_t)~S; }
    461   bool hasContext() const { return State != UnknownContext; }
    462   bool isBaseContext() const { return CallingContext.empty(); }
    463   StringRef getNameWithoutContext() const { return Name; }
    464   StringRef getCallingContext() const { return CallingContext; }
    465   StringRef getNameWithContext() const { return FullContext; }
    466 
    467 private:
    468   // Give a context string, decode and populate internal states like
    469   // Function name, Calling context and context state. Example of input
    470   // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
    471   void setContext(StringRef ContextStr, ContextStateMask CState) {
    472     assert(!ContextStr.empty());
    473     // Note that `[]` wrapped input indicates a full context string, otherwise
    474     // it's treated as context-less function name only.
    475     bool HasContext = ContextStr.startswith("[");
    476     if (!HasContext && CState == UnknownContext) {
    477       State = UnknownContext;
    478       Name = FullContext = ContextStr;
    479     } else {
    480       // Assume raw context profile if unspecified
    481       if (CState == UnknownContext)
    482         State = RawContext;
    483       else
    484         State = CState;
    485 
    486       // Remove encapsulating '[' and ']' if any
    487       if (HasContext)
    488         FullContext = ContextStr.substr(1, ContextStr.size() - 2);
    489       else
    490         FullContext = ContextStr;
    491 
    492       // Caller is to the left of callee in context string
    493       auto NameContext = FullContext.rsplit(" @ ");
    494       if (NameContext.second.empty()) {
    495         Name = NameContext.first;
    496         CallingContext = NameContext.second;
    497       } else {
    498         Name = NameContext.second;
    499         CallingContext = NameContext.first;
    500       }
    501     }
    502   }
    503 
    504   // Full context string including calling context and leaf function name
    505   StringRef FullContext;
    506   // Function name for the associated sample profile
    507   StringRef Name;
    508   // Calling context (leaf function excluded) for the associated sample profile
    509   StringRef CallingContext;
    510   // State of the associated sample profile
    511   uint32_t State;
    512   // Attribute of the associated sample profile
    513   uint32_t Attributes;
    514 };
    515 
    516 class FunctionSamples;
    517 class SampleProfileReaderItaniumRemapper;
    518 
    519 using BodySampleMap = std::map<LineLocation, SampleRecord>;
    520 // NOTE: Using a StringMap here makes parsed profiles consume around 17% more
    521 // memory, which is *very* significant for large profiles.
    522 using FunctionSamplesMap = std::map<std::string, FunctionSamples, std::less<>>;
    523 using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>;
    524 
    525 /// Representation of the samples collected for a function.
    526 ///
    527 /// This data structure contains all the collected samples for the body
    528 /// of a function. Each sample corresponds to a LineLocation instance
    529 /// within the body of the function.
    530 class FunctionSamples {
    531 public:
    532   FunctionSamples() = default;
    533 
    534   void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const;
    535   void dump() const;
    536 
    537   sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) {
    538     bool Overflowed;
    539     TotalSamples =
    540         SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed);
    541     return Overflowed ? sampleprof_error::counter_overflow
    542                       : sampleprof_error::success;
    543   }
    544 
    545   void setTotalSamples(uint64_t Num) { TotalSamples = Num; }
    546 
    547   sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
    548     bool Overflowed;
    549     TotalHeadSamples =
    550         SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed);
    551     return Overflowed ? sampleprof_error::counter_overflow
    552                       : sampleprof_error::success;
    553   }
    554 
    555   sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator,
    556                                   uint64_t Num, uint64_t Weight = 1) {
    557     return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(
    558         Num, Weight);
    559   }
    560 
    561   sampleprof_error addCalledTargetSamples(uint32_t LineOffset,
    562                                           uint32_t Discriminator,
    563                                           StringRef FName, uint64_t Num,
    564                                           uint64_t Weight = 1) {
    565     return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(
    566         FName, Num, Weight);
    567   }
    568 
    569   sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num,
    570                                           uint64_t Weight = 1) {
    571     SampleRecord S;
    572     S.addSamples(Num, Weight);
    573     return BodySamples[LineLocation(Index, 0)].merge(S, Weight);
    574   }
    575 
    576   /// Return the number of samples collected at the given location.
    577   /// Each location is specified by \p LineOffset and \p Discriminator.
    578   /// If the location is not found in profile, return error.
    579   ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
    580                                   uint32_t Discriminator) const {
    581     const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
    582     if (ret == BodySamples.end()) {
    583       // For CSSPGO, in order to conserve profile size, we no longer write out
    584       // locations profile for those not hit during training, so we need to
    585       // treat them as zero instead of error here.
    586       if (FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsProbeBased)
    587         return 0;
    588       return std::error_code();
    589     } else {
    590       // Return error for an invalid sample count which is usually assigned to
    591       // dangling probe.
    592       if (FunctionSamples::ProfileIsProbeBased &&
    593           ret->second.getSamples() == FunctionSamples::InvalidProbeCount)
    594         return std::error_code();
    595       return ret->second.getSamples();
    596     }
    597   }
    598 
    599   /// Returns the call target map collected at a given location.
    600   /// Each location is specified by \p LineOffset and \p Discriminator.
    601   /// If the location is not found in profile, return error.
    602   ErrorOr<SampleRecord::CallTargetMap>
    603   findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const {
    604     const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
    605     if (ret == BodySamples.end())
    606       return std::error_code();
    607     return ret->second.getCallTargets();
    608   }
    609 
    610   /// Returns the call target map collected at a given location specified by \p
    611   /// CallSite. If the location is not found in profile, return error.
    612   ErrorOr<SampleRecord::CallTargetMap>
    613   findCallTargetMapAt(const LineLocation &CallSite) const {
    614     const auto &Ret = BodySamples.find(CallSite);
    615     if (Ret == BodySamples.end())
    616       return std::error_code();
    617     return Ret->second.getCallTargets();
    618   }
    619 
    620   /// Return the function samples at the given callsite location.
    621   FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) {
    622     return CallsiteSamples[Loc];
    623   }
    624 
    625   /// Returns the FunctionSamplesMap at the given \p Loc.
    626   const FunctionSamplesMap *
    627   findFunctionSamplesMapAt(const LineLocation &Loc) const {
    628     auto iter = CallsiteSamples.find(Loc);
    629     if (iter == CallsiteSamples.end())
    630       return nullptr;
    631     return &iter->second;
    632   }
    633 
    634   /// Returns a pointer to FunctionSamples at the given callsite location
    635   /// \p Loc with callee \p CalleeName. If no callsite can be found, relax
    636   /// the restriction to return the FunctionSamples at callsite location
    637   /// \p Loc with the maximum total sample count. If \p Remapper is not
    638   /// nullptr, use \p Remapper to find FunctionSamples with equivalent name
    639   /// as \p CalleeName.
    640   const FunctionSamples *
    641   findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName,
    642                         SampleProfileReaderItaniumRemapper *Remapper) const;
    643 
    644   bool empty() const { return TotalSamples == 0; }
    645 
    646   /// Return the total number of samples collected inside the function.
    647   uint64_t getTotalSamples() const { return TotalSamples; }
    648 
    649   /// Return the total number of branch samples that have the function as the
    650   /// branch target. This should be equivalent to the sample of the first
    651   /// instruction of the symbol. But as we directly get this info for raw
    652   /// profile without referring to potentially inaccurate debug info, this
    653   /// gives more accurate profile data and is preferred for standalone symbols.
    654   uint64_t getHeadSamples() const { return TotalHeadSamples; }
    655 
    656   /// Return the sample count of the first instruction of the function.
    657   /// The function can be either a standalone symbol or an inlined function.
    658   uint64_t getEntrySamples() const {
    659     if (FunctionSamples::ProfileIsCS && getHeadSamples()) {
    660       // For CS profile, if we already have more accurate head samples
    661       // counted by branch sample from caller, use them as entry samples.
    662       return getHeadSamples();
    663     }
    664     uint64_t Count = 0;
    665     // Use either BodySamples or CallsiteSamples which ever has the smaller
    666     // lineno.
    667     if (!BodySamples.empty() &&
    668         (CallsiteSamples.empty() ||
    669          BodySamples.begin()->first < CallsiteSamples.begin()->first))
    670       Count = BodySamples.begin()->second.getSamples();
    671     else if (!CallsiteSamples.empty()) {
    672       // An indirect callsite may be promoted to several inlined direct calls.
    673       // We need to get the sum of them.
    674       for (const auto &N_FS : CallsiteSamples.begin()->second)
    675         Count += N_FS.second.getEntrySamples();
    676     }
    677     // Return at least 1 if total sample is not 0.
    678     return Count ? Count : TotalSamples > 0;
    679   }
    680 
    681   /// Return all the samples collected in the body of the function.
    682   const BodySampleMap &getBodySamples() const { return BodySamples; }
    683 
    684   /// Return all the callsite samples collected in the body of the function.
    685   const CallsiteSampleMap &getCallsiteSamples() const {
    686     return CallsiteSamples;
    687   }
    688 
    689   /// Return the maximum of sample counts in a function body including functions
    690   /// inlined in it.
    691   uint64_t getMaxCountInside() const {
    692     uint64_t MaxCount = 0;
    693     for (const auto &L : getBodySamples())
    694       MaxCount = std::max(MaxCount, L.second.getSamples());
    695     for (const auto &C : getCallsiteSamples())
    696       for (const FunctionSamplesMap::value_type &F : C.second)
    697         MaxCount = std::max(MaxCount, F.second.getMaxCountInside());
    698     return MaxCount;
    699   }
    700 
    701   /// Merge the samples in \p Other into this one.
    702   /// Optionally scale samples by \p Weight.
    703   sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
    704     sampleprof_error Result = sampleprof_error::success;
    705     Name = Other.getName();
    706     if (!GUIDToFuncNameMap)
    707       GUIDToFuncNameMap = Other.GUIDToFuncNameMap;
    708     if (Context.getNameWithContext().empty())
    709       Context = Other.getContext();
    710     if (FunctionHash == 0) {
    711       // Set the function hash code for the target profile.
    712       FunctionHash = Other.getFunctionHash();
    713     } else if (FunctionHash != Other.getFunctionHash()) {
    714       // The two profiles coming with different valid hash codes indicates
    715       // either:
    716       // 1. They are same-named static functions from different compilation
    717       // units (without using -unique-internal-linkage-names), or
    718       // 2. They are really the same function but from different compilations.
    719       // Let's bail out in either case for now, which means one profile is
    720       // dropped.
    721       return sampleprof_error::hash_mismatch;
    722     }
    723 
    724     MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight));
    725     MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight));
    726     for (const auto &I : Other.getBodySamples()) {
    727       const LineLocation &Loc = I.first;
    728       const SampleRecord &Rec = I.second;
    729       MergeResult(Result, BodySamples[Loc].merge(Rec, Weight));
    730     }
    731     for (const auto &I : Other.getCallsiteSamples()) {
    732       const LineLocation &Loc = I.first;
    733       FunctionSamplesMap &FSMap = functionSamplesAt(Loc);
    734       for (const auto &Rec : I.second)
    735         MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight));
    736     }
    737     return Result;
    738   }
    739 
    740   /// Recursively traverses all children, if the total sample count of the
    741   /// corresponding function is no less than \p Threshold, add its corresponding
    742   /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
    743   /// to \p S.
    744   void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S,
    745                             const StringMap<Function *> &SymbolMap,
    746                             uint64_t Threshold) const {
    747     if (TotalSamples <= Threshold)
    748       return;
    749     auto isDeclaration = [](const Function *F) {
    750       return !F || F->isDeclaration();
    751     };
    752     if (isDeclaration(SymbolMap.lookup(getFuncName()))) {
    753       // Add to the import list only when it's defined out of module.
    754       S.insert(getGUID(Name));
    755     }
    756     // Import hot CallTargets, which may not be available in IR because full
    757     // profile annotation cannot be done until backend compilation in ThinLTO.
    758     for (const auto &BS : BodySamples)
    759       for (const auto &TS : BS.second.getCallTargets())
    760         if (TS.getValue() > Threshold) {
    761           const Function *Callee = SymbolMap.lookup(getFuncName(TS.getKey()));
    762           if (isDeclaration(Callee))
    763             S.insert(getGUID(TS.getKey()));
    764         }
    765     for (const auto &CS : CallsiteSamples)
    766       for (const auto &NameFS : CS.second)
    767         NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold);
    768   }
    769 
    770   /// Set the name of the function.
    771   void setName(StringRef FunctionName) { Name = FunctionName; }
    772 
    773   /// Return the function name.
    774   StringRef getName() const { return Name; }
    775 
    776   /// Return function name with context.
    777   StringRef getNameWithContext() const {
    778     return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name;
    779   }
    780 
    781   /// Return the original function name.
    782   StringRef getFuncName() const { return getFuncName(Name); }
    783 
    784   void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; }
    785 
    786   uint64_t getFunctionHash() const { return FunctionHash; }
    787 
    788   /// Return the canonical name for a function, taking into account
    789   /// suffix elision policy attributes.
    790   static StringRef getCanonicalFnName(const Function &F) {
    791     auto AttrName = "sample-profile-suffix-elision-policy";
    792     auto Attr = F.getFnAttribute(AttrName).getValueAsString();
    793     return getCanonicalFnName(F.getName(), Attr);
    794   }
    795 
    796   /// Name suffixes which canonicalization should handle to avoid
    797   /// profile mismatch.
    798   static constexpr const char *LLVMSuffix = ".llvm.";
    799   static constexpr const char *PartSuffix = ".part.";
    800   static constexpr const char *UniqSuffix = ".__uniq.";
    801 
    802   static StringRef getCanonicalFnName(StringRef FnName,
    803                                       StringRef Attr = "selected") {
    804     // Note the sequence of the suffixes in the knownSuffixes array matters.
    805     // If suffix "A" is appended after the suffix "B", "A" should be in front
    806     // of "B" in knownSuffixes.
    807     const char *knownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix};
    808     if (Attr == "" || Attr == "all") {
    809       return FnName.split('.').first;
    810     } else if (Attr == "selected") {
    811       StringRef Cand(FnName);
    812       for (const auto &Suf : knownSuffixes) {
    813         StringRef Suffix(Suf);
    814         // If the profile contains ".__uniq." suffix, don't strip the
    815         // suffix for names in the IR.
    816         if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix)
    817           continue;
    818         auto It = Cand.rfind(Suffix);
    819         if (It == StringRef::npos)
    820           continue;
    821         auto Dit = Cand.rfind('.');
    822         if (Dit == It + Suffix.size() - 1)
    823           Cand = Cand.substr(0, It);
    824       }
    825       return Cand;
    826     } else if (Attr == "none") {
    827       return FnName;
    828     } else {
    829       assert(false && "internal error: unknown suffix elision policy");
    830     }
    831     return FnName;
    832   }
    833 
    834   /// Translate \p Name into its original name.
    835   /// When profile doesn't use MD5, \p Name needs no translation.
    836   /// When profile uses MD5, \p Name in current FunctionSamples
    837   /// is actually GUID of the original function name. getFuncName will
    838   /// translate \p Name in current FunctionSamples into its original name
    839   /// by looking up in the function map GUIDToFuncNameMap.
    840   /// If the original name doesn't exist in the map, return empty StringRef.
    841   StringRef getFuncName(StringRef Name) const {
    842     if (!UseMD5)
    843       return Name;
    844 
    845     assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be popluated first");
    846     return GUIDToFuncNameMap->lookup(std::stoull(Name.data()));
    847   }
    848 
    849   /// Returns the line offset to the start line of the subprogram.
    850   /// We assume that a single function will not exceed 65535 LOC.
    851   static unsigned getOffset(const DILocation *DIL);
    852 
    853   /// Returns a unique call site identifier for a given debug location of a call
    854   /// instruction. This is wrapper of two scenarios, the probe-based profile and
    855   /// regular profile, to hide implementation details from the sample loader and
    856   /// the context tracker.
    857   static LineLocation getCallSiteIdentifier(const DILocation *DIL);
    858 
    859   /// Get the FunctionSamples of the inline instance where DIL originates
    860   /// from.
    861   ///
    862   /// The FunctionSamples of the instruction (Machine or IR) associated to
    863   /// \p DIL is the inlined instance in which that instruction is coming from.
    864   /// We traverse the inline stack of that instruction, and match it with the
    865   /// tree nodes in the profile.
    866   ///
    867   /// \returns the FunctionSamples pointer to the inlined instance.
    868   /// If \p Remapper is not nullptr, it will be used to find matching
    869   /// FunctionSamples with not exactly the same but equivalent name.
    870   const FunctionSamples *findFunctionSamples(
    871       const DILocation *DIL,
    872       SampleProfileReaderItaniumRemapper *Remapper = nullptr) const;
    873 
    874   // The invalid sample count is used to represent samples collected for a
    875   // dangling probe.
    876   static constexpr uint64_t InvalidProbeCount = UINT64_MAX;
    877 
    878   static bool ProfileIsProbeBased;
    879 
    880   static bool ProfileIsCS;
    881 
    882   SampleContext &getContext() const { return Context; }
    883 
    884   void setContext(const SampleContext &FContext) { Context = FContext; }
    885 
    886   static SampleProfileFormat Format;
    887 
    888   /// Whether the profile uses MD5 to represent string.
    889   static bool UseMD5;
    890 
    891   /// Whether the profile contains any ".__uniq." suffix in a name.
    892   static bool HasUniqSuffix;
    893 
    894   /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
    895   /// all the function symbols defined or declared in current module.
    896   DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr;
    897 
    898   // Assume the input \p Name is a name coming from FunctionSamples itself.
    899   // If UseMD5 is true, the name is already a GUID and we
    900   // don't want to return the GUID of GUID.
    901   static uint64_t getGUID(StringRef Name) {
    902     return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name);
    903   }
    904 
    905   // Find all the names in the current FunctionSamples including names in
    906   // all the inline instances and names of call targets.
    907   void findAllNames(DenseSet<StringRef> &NameSet) const;
    908 
    909 private:
    910   /// Mangled name of the function.
    911   StringRef Name;
    912 
    913   /// CFG hash value for the function.
    914   uint64_t FunctionHash = 0;
    915 
    916   /// Calling context for function profile
    917   mutable SampleContext Context;
    918 
    919   /// Total number of samples collected inside this function.
    920   ///
    921   /// Samples are cumulative, they include all the samples collected
    922   /// inside this function and all its inlined callees.
    923   uint64_t TotalSamples = 0;
    924 
    925   /// Total number of samples collected at the head of the function.
    926   /// This is an approximation of the number of calls made to this function
    927   /// at runtime.
    928   uint64_t TotalHeadSamples = 0;
    929 
    930   /// Map instruction locations to collected samples.
    931   ///
    932   /// Each entry in this map contains the number of samples
    933   /// collected at the corresponding line offset. All line locations
    934   /// are an offset from the start of the function.
    935   BodySampleMap BodySamples;
    936 
    937   /// Map call sites to collected samples for the called function.
    938   ///
    939   /// Each entry in this map corresponds to all the samples
    940   /// collected for the inlined function call at the given
    941   /// location. For example, given:
    942   ///
    943   ///     void foo() {
    944   ///  1    bar();
    945   ///  ...
    946   ///  8    baz();
    947   ///     }
    948   ///
    949   /// If the bar() and baz() calls were inlined inside foo(), this
    950   /// map will contain two entries.  One for all the samples collected
    951   /// in the call to bar() at line offset 1, the other for all the samples
    952   /// collected in the call to baz() at line offset 8.
    953   CallsiteSampleMap CallsiteSamples;
    954 };
    955 
    956 raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
    957 
    958 /// Sort a LocationT->SampleT map by LocationT.
    959 ///
    960 /// It produces a sorted list of <LocationT, SampleT> records by ascending
    961 /// order of LocationT.
    962 template <class LocationT, class SampleT> class SampleSorter {
    963 public:
    964   using SamplesWithLoc = std::pair<const LocationT, SampleT>;
    965   using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>;
    966 
    967   SampleSorter(const std::map<LocationT, SampleT> &Samples) {
    968     for (const auto &I : Samples)
    969       V.push_back(&I);
    970     llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
    971       return A->first < B->first;
    972     });
    973   }
    974 
    975   const SamplesWithLocList &get() const { return V; }
    976 
    977 private:
    978   SamplesWithLocList V;
    979 };
    980 
    981 /// SampleContextTrimmer impelements helper functions to trim, merge cold
    982 /// context profiles. It also supports context profile canonicalization to make
    983 /// sure ProfileMap's key is consistent with FunctionSample's name/context.
    984 class SampleContextTrimmer {
    985 public:
    986   SampleContextTrimmer(StringMap<FunctionSamples> &Profiles)
    987       : ProfileMap(Profiles){};
    988   // Trim and merge cold context profile when requested.
    989   void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold,
    990                                        bool TrimColdContext = true,
    991                                        bool MergeColdContext = true);
    992   // Canonicalize context profile name and attributes.
    993   void canonicalizeContextProfiles();
    994 
    995 private:
    996   StringMap<FunctionSamples> &ProfileMap;
    997 };
    998 
    999 /// ProfileSymbolList records the list of function symbols shown up
   1000 /// in the binary used to generate the profile. It is useful to
   1001 /// to discriminate a function being so cold as not to shown up
   1002 /// in the profile and a function newly added.
   1003 class ProfileSymbolList {
   1004 public:
   1005   /// copy indicates whether we need to copy the underlying memory
   1006   /// for the input Name.
   1007   void add(StringRef Name, bool copy = false) {
   1008     if (!copy) {
   1009       Syms.insert(Name);
   1010       return;
   1011     }
   1012     Syms.insert(Name.copy(Allocator));
   1013   }
   1014 
   1015   bool contains(StringRef Name) { return Syms.count(Name); }
   1016 
   1017   void merge(const ProfileSymbolList &List) {
   1018     for (auto Sym : List.Syms)
   1019       add(Sym, true);
   1020   }
   1021 
   1022   unsigned size() { return Syms.size(); }
   1023 
   1024   void setToCompress(bool TC) { ToCompress = TC; }
   1025   bool toCompress() { return ToCompress; }
   1026 
   1027   std::error_code read(const uint8_t *Data, uint64_t ListSize);
   1028   std::error_code write(raw_ostream &OS);
   1029   void dump(raw_ostream &OS = dbgs()) const;
   1030 
   1031 private:
   1032   // Determine whether or not to compress the symbol list when
   1033   // writing it into profile. The variable is unused when the symbol
   1034   // list is read from an existing profile.
   1035   bool ToCompress = false;
   1036   DenseSet<StringRef> Syms;
   1037   BumpPtrAllocator Allocator;
   1038 };
   1039 
   1040 } // end namespace sampleprof
   1041 } // end namespace llvm
   1042 
   1043 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H
   1044