Home | History | Annotate | Line # | Download | only in llvm-profdata
      1 //===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // llvm-profdata merges .profdata files.
     10 //
     11 //===----------------------------------------------------------------------===//
     12 
     13 #include "llvm/ADT/SmallSet.h"
     14 #include "llvm/ADT/SmallVector.h"
     15 #include "llvm/ADT/StringRef.h"
     16 #include "llvm/IR/LLVMContext.h"
     17 #include "llvm/ProfileData/InstrProfReader.h"
     18 #include "llvm/ProfileData/InstrProfWriter.h"
     19 #include "llvm/ProfileData/ProfileCommon.h"
     20 #include "llvm/ProfileData/SampleProfReader.h"
     21 #include "llvm/ProfileData/SampleProfWriter.h"
     22 #include "llvm/Support/CommandLine.h"
     23 #include "llvm/Support/Errc.h"
     24 #include "llvm/Support/FileSystem.h"
     25 #include "llvm/Support/Format.h"
     26 #include "llvm/Support/FormattedStream.h"
     27 #include "llvm/Support/InitLLVM.h"
     28 #include "llvm/Support/MemoryBuffer.h"
     29 #include "llvm/Support/Path.h"
     30 #include "llvm/Support/ThreadPool.h"
     31 #include "llvm/Support/Threading.h"
     32 #include "llvm/Support/WithColor.h"
     33 #include "llvm/Support/raw_ostream.h"
     34 #include <algorithm>
     35 
     36 using namespace llvm;
     37 
     38 enum ProfileFormat {
     39   PF_None = 0,
     40   PF_Text,
     41   PF_Compact_Binary,
     42   PF_Ext_Binary,
     43   PF_GCC,
     44   PF_Binary
     45 };
     46 
     47 static void warn(Twine Message, std::string Whence = "",
     48                  std::string Hint = "") {
     49   WithColor::warning();
     50   if (!Whence.empty())
     51     errs() << Whence << ": ";
     52   errs() << Message << "\n";
     53   if (!Hint.empty())
     54     WithColor::note() << Hint << "\n";
     55 }
     56 
     57 static void warn(Error E, StringRef Whence = "") {
     58   if (E.isA<InstrProfError>()) {
     59     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
     60       warn(IPE.message(), std::string(Whence), std::string(""));
     61     });
     62   }
     63 }
     64 
     65 static void exitWithError(Twine Message, std::string Whence = "",
     66                           std::string Hint = "") {
     67   WithColor::error();
     68   if (!Whence.empty())
     69     errs() << Whence << ": ";
     70   errs() << Message << "\n";
     71   if (!Hint.empty())
     72     WithColor::note() << Hint << "\n";
     73   ::exit(1);
     74 }
     75 
     76 static void exitWithError(Error E, StringRef Whence = "") {
     77   if (E.isA<InstrProfError>()) {
     78     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
     79       instrprof_error instrError = IPE.get();
     80       StringRef Hint = "";
     81       if (instrError == instrprof_error::unrecognized_format) {
     82         // Hint for common error of forgetting --sample for sample profiles.
     83         Hint = "Perhaps you forgot to use the --sample option?";
     84       }
     85       exitWithError(IPE.message(), std::string(Whence), std::string(Hint));
     86     });
     87   }
     88 
     89   exitWithError(toString(std::move(E)), std::string(Whence));
     90 }
     91 
     92 static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
     93   exitWithError(EC.message(), std::string(Whence));
     94 }
     95 
     96 namespace {
     97 enum ProfileKinds { instr, sample };
     98 enum FailureMode { failIfAnyAreInvalid, failIfAllAreInvalid };
     99 }
    100 
    101 static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
    102                                  StringRef Whence = "") {
    103   if (FailMode == failIfAnyAreInvalid)
    104     exitWithErrorCode(EC, Whence);
    105   else
    106     warn(EC.message(), std::string(Whence));
    107 }
    108 
    109 static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
    110                                    StringRef WhenceFunction = "",
    111                                    bool ShowHint = true) {
    112   if (!WhenceFile.empty())
    113     errs() << WhenceFile << ": ";
    114   if (!WhenceFunction.empty())
    115     errs() << WhenceFunction << ": ";
    116 
    117   auto IPE = instrprof_error::success;
    118   E = handleErrors(std::move(E),
    119                    [&IPE](std::unique_ptr<InstrProfError> E) -> Error {
    120                      IPE = E->get();
    121                      return Error(std::move(E));
    122                    });
    123   errs() << toString(std::move(E)) << "\n";
    124 
    125   if (ShowHint) {
    126     StringRef Hint = "";
    127     if (IPE != instrprof_error::success) {
    128       switch (IPE) {
    129       case instrprof_error::hash_mismatch:
    130       case instrprof_error::count_mismatch:
    131       case instrprof_error::value_site_count_mismatch:
    132         Hint = "Make sure that all profile data to be merged is generated "
    133                "from the same binary.";
    134         break;
    135       default:
    136         break;
    137       }
    138     }
    139 
    140     if (!Hint.empty())
    141       errs() << Hint << "\n";
    142   }
    143 }
    144 
    145 namespace {
    146 /// A remapper from original symbol names to new symbol names based on a file
    147 /// containing a list of mappings from old name to new name.
    148 class SymbolRemapper {
    149   std::unique_ptr<MemoryBuffer> File;
    150   DenseMap<StringRef, StringRef> RemappingTable;
    151 
    152 public:
    153   /// Build a SymbolRemapper from a file containing a list of old/new symbols.
    154   static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
    155     auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
    156     if (!BufOrError)
    157       exitWithErrorCode(BufOrError.getError(), InputFile);
    158 
    159     auto Remapper = std::make_unique<SymbolRemapper>();
    160     Remapper->File = std::move(BufOrError.get());
    161 
    162     for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
    163          !LineIt.is_at_eof(); ++LineIt) {
    164       std::pair<StringRef, StringRef> Parts = LineIt->split(' ');
    165       if (Parts.first.empty() || Parts.second.empty() ||
    166           Parts.second.count(' ')) {
    167         exitWithError("unexpected line in remapping file",
    168                       (InputFile + ":" + Twine(LineIt.line_number())).str(),
    169                       "expected 'old_symbol new_symbol'");
    170       }
    171       Remapper->RemappingTable.insert(Parts);
    172     }
    173     return Remapper;
    174   }
    175 
    176   /// Attempt to map the given old symbol into a new symbol.
    177   ///
    178   /// \return The new symbol, or \p Name if no such symbol was found.
    179   StringRef operator()(StringRef Name) {
    180     StringRef New = RemappingTable.lookup(Name);
    181     return New.empty() ? Name : New;
    182   }
    183 };
    184 }
    185 
    186 struct WeightedFile {
    187   std::string Filename;
    188   uint64_t Weight;
    189 };
    190 typedef SmallVector<WeightedFile, 5> WeightedFileVector;
    191 
    192 /// Keep track of merged data and reported errors.
    193 struct WriterContext {
    194   std::mutex Lock;
    195   InstrProfWriter Writer;
    196   std::vector<std::pair<Error, std::string>> Errors;
    197   std::mutex &ErrLock;
    198   SmallSet<instrprof_error, 4> &WriterErrorCodes;
    199 
    200   WriterContext(bool IsSparse, std::mutex &ErrLock,
    201                 SmallSet<instrprof_error, 4> &WriterErrorCodes)
    202       : Lock(), Writer(IsSparse), Errors(), ErrLock(ErrLock),
    203         WriterErrorCodes(WriterErrorCodes) {}
    204 };
    205 
    206 /// Computer the overlap b/w profile BaseFilename and TestFileName,
    207 /// and store the program level result to Overlap.
    208 static void overlapInput(const std::string &BaseFilename,
    209                          const std::string &TestFilename, WriterContext *WC,
    210                          OverlapStats &Overlap,
    211                          const OverlapFuncFilters &FuncFilter,
    212                          raw_fd_ostream &OS, bool IsCS) {
    213   auto ReaderOrErr = InstrProfReader::create(TestFilename);
    214   if (Error E = ReaderOrErr.takeError()) {
    215     // Skip the empty profiles by returning sliently.
    216     instrprof_error IPE = InstrProfError::take(std::move(E));
    217     if (IPE != instrprof_error::empty_raw_profile)
    218       WC->Errors.emplace_back(make_error<InstrProfError>(IPE), TestFilename);
    219     return;
    220   }
    221 
    222   auto Reader = std::move(ReaderOrErr.get());
    223   for (auto &I : *Reader) {
    224     OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
    225     FuncOverlap.setFuncInfo(I.Name, I.Hash);
    226 
    227     WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter);
    228     FuncOverlap.dump(OS);
    229   }
    230 }
    231 
    232 /// Load an input into a writer context.
    233 static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
    234                       WriterContext *WC) {
    235   std::unique_lock<std::mutex> CtxGuard{WC->Lock};
    236 
    237   // Copy the filename, because llvm::ThreadPool copied the input "const
    238   // WeightedFile &" by value, making a reference to the filename within it
    239   // invalid outside of this packaged task.
    240   std::string Filename = Input.Filename;
    241 
    242   auto ReaderOrErr = InstrProfReader::create(Input.Filename);
    243   if (Error E = ReaderOrErr.takeError()) {
    244     // Skip the empty profiles by returning sliently.
    245     instrprof_error IPE = InstrProfError::take(std::move(E));
    246     if (IPE != instrprof_error::empty_raw_profile)
    247       WC->Errors.emplace_back(make_error<InstrProfError>(IPE), Filename);
    248     return;
    249   }
    250 
    251   auto Reader = std::move(ReaderOrErr.get());
    252   bool IsIRProfile = Reader->isIRLevelProfile();
    253   bool HasCSIRProfile = Reader->hasCSIRLevelProfile();
    254   if (Error E = WC->Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) {
    255     consumeError(std::move(E));
    256     WC->Errors.emplace_back(
    257         make_error<StringError>(
    258             "Merge IR generated profile with Clang generated profile.",
    259             std::error_code()),
    260         Filename);
    261     return;
    262   }
    263   WC->Writer.setInstrEntryBBEnabled(Reader->instrEntryBBEnabled());
    264 
    265   for (auto &I : *Reader) {
    266     if (Remapper)
    267       I.Name = (*Remapper)(I.Name);
    268     const StringRef FuncName = I.Name;
    269     bool Reported = false;
    270     WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) {
    271       if (Reported) {
    272         consumeError(std::move(E));
    273         return;
    274       }
    275       Reported = true;
    276       // Only show hint the first time an error occurs.
    277       instrprof_error IPE = InstrProfError::take(std::move(E));
    278       std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
    279       bool firstTime = WC->WriterErrorCodes.insert(IPE).second;
    280       handleMergeWriterError(make_error<InstrProfError>(IPE), Input.Filename,
    281                              FuncName, firstTime);
    282     });
    283   }
    284   if (Reader->hasError())
    285     if (Error E = Reader->getError())
    286       WC->Errors.emplace_back(std::move(E), Filename);
    287 }
    288 
    289 /// Merge the \p Src writer context into \p Dst.
    290 static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
    291   for (auto &ErrorPair : Src->Errors)
    292     Dst->Errors.push_back(std::move(ErrorPair));
    293   Src->Errors.clear();
    294 
    295   Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) {
    296     instrprof_error IPE = InstrProfError::take(std::move(E));
    297     std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
    298     bool firstTime = Dst->WriterErrorCodes.insert(IPE).second;
    299     if (firstTime)
    300       warn(toString(make_error<InstrProfError>(IPE)));
    301   });
    302 }
    303 
    304 static void writeInstrProfile(StringRef OutputFilename,
    305                               ProfileFormat OutputFormat,
    306                               InstrProfWriter &Writer) {
    307   std::error_code EC;
    308   raw_fd_ostream Output(OutputFilename.data(), EC,
    309                         OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF
    310                                                 : sys::fs::OF_None);
    311   if (EC)
    312     exitWithErrorCode(EC, OutputFilename);
    313 
    314   if (OutputFormat == PF_Text) {
    315     if (Error E = Writer.writeText(Output))
    316       warn(std::move(E));
    317   } else {
    318     if (Error E = Writer.write(Output))
    319       warn(std::move(E));
    320   }
    321 }
    322 
    323 static void mergeInstrProfile(const WeightedFileVector &Inputs,
    324                               SymbolRemapper *Remapper,
    325                               StringRef OutputFilename,
    326                               ProfileFormat OutputFormat, bool OutputSparse,
    327                               unsigned NumThreads, FailureMode FailMode) {
    328   if (OutputFilename.compare("-") == 0)
    329     exitWithError("Cannot write indexed profdata format to stdout.");
    330 
    331   if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary &&
    332       OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text)
    333     exitWithError("Unknown format is specified.");
    334 
    335   std::mutex ErrorLock;
    336   SmallSet<instrprof_error, 4> WriterErrorCodes;
    337 
    338   // If NumThreads is not specified, auto-detect a good default.
    339   if (NumThreads == 0)
    340     NumThreads = std::min(hardware_concurrency().compute_thread_count(),
    341                           unsigned((Inputs.size() + 1) / 2));
    342   // FIXME: There's a bug here, where setting NumThreads = Inputs.size() fails
    343   // the merge_empty_profile.test because the InstrProfWriter.ProfileKind isn't
    344   // merged, thus the emitted file ends up with a PF_Unknown kind.
    345 
    346   // Initialize the writer contexts.
    347   SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
    348   for (unsigned I = 0; I < NumThreads; ++I)
    349     Contexts.emplace_back(std::make_unique<WriterContext>(
    350         OutputSparse, ErrorLock, WriterErrorCodes));
    351 
    352   if (NumThreads == 1) {
    353     for (const auto &Input : Inputs)
    354       loadInput(Input, Remapper, Contexts[0].get());
    355   } else {
    356     ThreadPool Pool(hardware_concurrency(NumThreads));
    357 
    358     // Load the inputs in parallel (N/NumThreads serial steps).
    359     unsigned Ctx = 0;
    360     for (const auto &Input : Inputs) {
    361       Pool.async(loadInput, Input, Remapper, Contexts[Ctx].get());
    362       Ctx = (Ctx + 1) % NumThreads;
    363     }
    364     Pool.wait();
    365 
    366     // Merge the writer contexts together (~ lg(NumThreads) serial steps).
    367     unsigned Mid = Contexts.size() / 2;
    368     unsigned End = Contexts.size();
    369     assert(Mid > 0 && "Expected more than one context");
    370     do {
    371       for (unsigned I = 0; I < Mid; ++I)
    372         Pool.async(mergeWriterContexts, Contexts[I].get(),
    373                    Contexts[I + Mid].get());
    374       Pool.wait();
    375       if (End & 1) {
    376         Pool.async(mergeWriterContexts, Contexts[0].get(),
    377                    Contexts[End - 1].get());
    378         Pool.wait();
    379       }
    380       End = Mid;
    381       Mid /= 2;
    382     } while (Mid > 0);
    383   }
    384 
    385   // Handle deferred errors encountered during merging. If the number of errors
    386   // is equal to the number of inputs the merge failed.
    387   unsigned NumErrors = 0;
    388   for (std::unique_ptr<WriterContext> &WC : Contexts) {
    389     for (auto &ErrorPair : WC->Errors) {
    390       ++NumErrors;
    391       warn(toString(std::move(ErrorPair.first)), ErrorPair.second);
    392     }
    393   }
    394   if (NumErrors == Inputs.size() ||
    395       (NumErrors > 0 && FailMode == failIfAnyAreInvalid))
    396     exitWithError("No profiles could be merged.");
    397 
    398   writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
    399 }
    400 
    401 /// The profile entry for a function in instrumentation profile.
    402 struct InstrProfileEntry {
    403   uint64_t MaxCount = 0;
    404   float ZeroCounterRatio = 0.0;
    405   InstrProfRecord *ProfRecord;
    406   InstrProfileEntry(InstrProfRecord *Record);
    407   InstrProfileEntry() = default;
    408 };
    409 
    410 InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
    411   ProfRecord = Record;
    412   uint64_t CntNum = Record->Counts.size();
    413   uint64_t ZeroCntNum = 0;
    414   for (size_t I = 0; I < CntNum; ++I) {
    415     MaxCount = std::max(MaxCount, Record->Counts[I]);
    416     ZeroCntNum += !Record->Counts[I];
    417   }
    418   ZeroCounterRatio = (float)ZeroCntNum / CntNum;
    419 }
    420 
    421 /// Either set all the counters in the instr profile entry \p IFE to -1
    422 /// in order to drop the profile or scale up the counters in \p IFP to
    423 /// be above hot threshold. We use the ratio of zero counters in the
    424 /// profile of a function to decide the profile is helpful or harmful
    425 /// for performance, and to choose whether to scale up or drop it.
    426 static void updateInstrProfileEntry(InstrProfileEntry &IFE,
    427                                     uint64_t HotInstrThreshold,
    428                                     float ZeroCounterThreshold) {
    429   InstrProfRecord *ProfRecord = IFE.ProfRecord;
    430   if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
    431     // If all or most of the counters of the function are zero, the
    432     // profile is unaccountable and shuld be dropped. Reset all the
    433     // counters to be -1 and PGO profile-use will drop the profile.
    434     // All counters being -1 also implies that the function is hot so
    435     // PGO profile-use will also set the entry count metadata to be
    436     // above hot threshold.
    437     for (size_t I = 0; I < ProfRecord->Counts.size(); ++I)
    438       ProfRecord->Counts[I] = -1;
    439     return;
    440   }
    441 
    442   // Scale up the MaxCount to be multiple times above hot threshold.
    443   const unsigned MultiplyFactor = 3;
    444   uint64_t Numerator = HotInstrThreshold * MultiplyFactor;
    445   uint64_t Denominator = IFE.MaxCount;
    446   ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) {
    447     warn(toString(make_error<InstrProfError>(E)));
    448   });
    449 }
    450 
    451 const uint64_t ColdPercentileIdx = 15;
    452 const uint64_t HotPercentileIdx = 11;
    453 
    454 /// Adjust the instr profile in \p WC based on the sample profile in
    455 /// \p Reader.
    456 static void
    457 adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
    458                    std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
    459                    unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
    460                    unsigned InstrProfColdThreshold) {
    461   // Function to its entry in instr profile.
    462   StringMap<InstrProfileEntry> InstrProfileMap;
    463   InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
    464   for (auto &PD : WC->Writer.getProfileData()) {
    465     // Populate IPBuilder.
    466     for (const auto &PDV : PD.getValue()) {
    467       InstrProfRecord Record = PDV.second;
    468       IPBuilder.addRecord(Record);
    469     }
    470 
    471     // If a function has multiple entries in instr profile, skip it.
    472     if (PD.getValue().size() != 1)
    473       continue;
    474 
    475     // Initialize InstrProfileMap.
    476     InstrProfRecord *R = &PD.getValue().begin()->second;
    477     InstrProfileMap[PD.getKey()] = InstrProfileEntry(R);
    478   }
    479 
    480   ProfileSummary InstrPS = *IPBuilder.getSummary();
    481   ProfileSummary SamplePS = Reader->getSummary();
    482 
    483   // Compute cold thresholds for instr profile and sample profile.
    484   uint64_t ColdSampleThreshold =
    485       ProfileSummaryBuilder::getEntryForPercentile(
    486           SamplePS.getDetailedSummary(),
    487           ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
    488           .MinCount;
    489   uint64_t HotInstrThreshold =
    490       ProfileSummaryBuilder::getEntryForPercentile(
    491           InstrPS.getDetailedSummary(),
    492           ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
    493           .MinCount;
    494   uint64_t ColdInstrThreshold =
    495       InstrProfColdThreshold
    496           ? InstrProfColdThreshold
    497           : ProfileSummaryBuilder::getEntryForPercentile(
    498                 InstrPS.getDetailedSummary(),
    499                 ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
    500                 .MinCount;
    501 
    502   // Find hot/warm functions in sample profile which is cold in instr profile
    503   // and adjust the profiles of those functions in the instr profile.
    504   for (const auto &PD : Reader->getProfiles()) {
    505     StringRef FName = PD.getKey();
    506     const sampleprof::FunctionSamples &FS = PD.getValue();
    507     auto It = InstrProfileMap.find(FName);
    508     if (FS.getHeadSamples() > ColdSampleThreshold &&
    509         It != InstrProfileMap.end() &&
    510         It->second.MaxCount <= ColdInstrThreshold &&
    511         FS.getBodySamples().size() >= SupplMinSizeThreshold) {
    512       updateInstrProfileEntry(It->second, HotInstrThreshold,
    513                               ZeroCounterThreshold);
    514     }
    515   }
    516 }
    517 
    518 /// The main function to supplement instr profile with sample profile.
    519 /// \Inputs contains the instr profile. \p SampleFilename specifies the
    520 /// sample profile. \p OutputFilename specifies the output profile name.
    521 /// \p OutputFormat specifies the output profile format. \p OutputSparse
    522 /// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
    523 /// specifies the minimal size for the functions whose profile will be
    524 /// adjusted. \p ZeroCounterThreshold is the threshold to check whether
    525 /// a function contains too many zero counters and whether its profile
    526 /// should be dropped. \p InstrProfColdThreshold is the user specified
    527 /// cold threshold which will override the cold threshold got from the
    528 /// instr profile summary.
    529 static void supplementInstrProfile(
    530     const WeightedFileVector &Inputs, StringRef SampleFilename,
    531     StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse,
    532     unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
    533     unsigned InstrProfColdThreshold) {
    534   if (OutputFilename.compare("-") == 0)
    535     exitWithError("Cannot write indexed profdata format to stdout.");
    536   if (Inputs.size() != 1)
    537     exitWithError("Expect one input to be an instr profile.");
    538   if (Inputs[0].Weight != 1)
    539     exitWithError("Expect instr profile doesn't have weight.");
    540 
    541   StringRef InstrFilename = Inputs[0].Filename;
    542 
    543   // Read sample profile.
    544   LLVMContext Context;
    545   auto ReaderOrErr =
    546       sampleprof::SampleProfileReader::create(SampleFilename.str(), Context);
    547   if (std::error_code EC = ReaderOrErr.getError())
    548     exitWithErrorCode(EC, SampleFilename);
    549   auto Reader = std::move(ReaderOrErr.get());
    550   if (std::error_code EC = Reader->read())
    551     exitWithErrorCode(EC, SampleFilename);
    552 
    553   // Read instr profile.
    554   std::mutex ErrorLock;
    555   SmallSet<instrprof_error, 4> WriterErrorCodes;
    556   auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
    557                                             WriterErrorCodes);
    558   loadInput(Inputs[0], nullptr, WC.get());
    559   if (WC->Errors.size() > 0)
    560     exitWithError(std::move(WC->Errors[0].first), InstrFilename);
    561 
    562   adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
    563                      InstrProfColdThreshold);
    564   writeInstrProfile(OutputFilename, OutputFormat, WC->Writer);
    565 }
    566 
    567 /// Make a copy of the given function samples with all symbol names remapped
    568 /// by the provided symbol remapper.
    569 static sampleprof::FunctionSamples
    570 remapSamples(const sampleprof::FunctionSamples &Samples,
    571              SymbolRemapper &Remapper, sampleprof_error &Error) {
    572   sampleprof::FunctionSamples Result;
    573   Result.setName(Remapper(Samples.getName()));
    574   Result.addTotalSamples(Samples.getTotalSamples());
    575   Result.addHeadSamples(Samples.getHeadSamples());
    576   for (const auto &BodySample : Samples.getBodySamples()) {
    577     Result.addBodySamples(BodySample.first.LineOffset,
    578                           BodySample.first.Discriminator,
    579                           BodySample.second.getSamples());
    580     for (const auto &Target : BodySample.second.getCallTargets()) {
    581       Result.addCalledTargetSamples(BodySample.first.LineOffset,
    582                                     BodySample.first.Discriminator,
    583                                     Remapper(Target.first()), Target.second);
    584     }
    585   }
    586   for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
    587     sampleprof::FunctionSamplesMap &Target =
    588         Result.functionSamplesAt(CallsiteSamples.first);
    589     for (const auto &Callsite : CallsiteSamples.second) {
    590       sampleprof::FunctionSamples Remapped =
    591           remapSamples(Callsite.second, Remapper, Error);
    592       MergeResult(Error,
    593                   Target[std::string(Remapped.getName())].merge(Remapped));
    594     }
    595   }
    596   return Result;
    597 }
    598 
    599 static sampleprof::SampleProfileFormat FormatMap[] = {
    600     sampleprof::SPF_None,
    601     sampleprof::SPF_Text,
    602     sampleprof::SPF_Compact_Binary,
    603     sampleprof::SPF_Ext_Binary,
    604     sampleprof::SPF_GCC,
    605     sampleprof::SPF_Binary};
    606 
    607 static std::unique_ptr<MemoryBuffer>
    608 getInputFileBuf(const StringRef &InputFile) {
    609   if (InputFile == "")
    610     return {};
    611 
    612   auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
    613   if (!BufOrError)
    614     exitWithErrorCode(BufOrError.getError(), InputFile);
    615 
    616   return std::move(*BufOrError);
    617 }
    618 
    619 static void populateProfileSymbolList(MemoryBuffer *Buffer,
    620                                       sampleprof::ProfileSymbolList &PSL) {
    621   if (!Buffer)
    622     return;
    623 
    624   SmallVector<StringRef, 32> SymbolVec;
    625   StringRef Data = Buffer->getBuffer();
    626   Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
    627 
    628   for (StringRef symbol : SymbolVec)
    629     PSL.add(symbol);
    630 }
    631 
    632 static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
    633                                   ProfileFormat OutputFormat,
    634                                   MemoryBuffer *Buffer,
    635                                   sampleprof::ProfileSymbolList &WriterList,
    636                                   bool CompressAllSections, bool UseMD5,
    637                                   bool GenPartialProfile) {
    638   populateProfileSymbolList(Buffer, WriterList);
    639   if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
    640     warn("Profile Symbol list is not empty but the output format is not "
    641          "ExtBinary format. The list will be lost in the output. ");
    642 
    643   Writer.setProfileSymbolList(&WriterList);
    644 
    645   if (CompressAllSections) {
    646     if (OutputFormat != PF_Ext_Binary)
    647       warn("-compress-all-section is ignored. Specify -extbinary to enable it");
    648     else
    649       Writer.setToCompressAllSections();
    650   }
    651   if (UseMD5) {
    652     if (OutputFormat != PF_Ext_Binary)
    653       warn("-use-md5 is ignored. Specify -extbinary to enable it");
    654     else
    655       Writer.setUseMD5();
    656   }
    657   if (GenPartialProfile) {
    658     if (OutputFormat != PF_Ext_Binary)
    659       warn("-gen-partial-profile is ignored. Specify -extbinary to enable it");
    660     else
    661       Writer.setPartialProfile();
    662   }
    663 }
    664 
    665 static void
    666 mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
    667                    StringRef OutputFilename, ProfileFormat OutputFormat,
    668                    StringRef ProfileSymbolListFile, bool CompressAllSections,
    669                    bool UseMD5, bool GenPartialProfile,
    670                    bool SampleMergeColdContext, bool SampleTrimColdContext,
    671                    FailureMode FailMode) {
    672   using namespace sampleprof;
    673   StringMap<FunctionSamples> ProfileMap;
    674   SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
    675   LLVMContext Context;
    676   sampleprof::ProfileSymbolList WriterList;
    677   Optional<bool> ProfileIsProbeBased;
    678   Optional<bool> ProfileIsCS;
    679   for (const auto &Input : Inputs) {
    680     auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context);
    681     if (std::error_code EC = ReaderOrErr.getError()) {
    682       warnOrExitGivenError(FailMode, EC, Input.Filename);
    683       continue;
    684     }
    685 
    686     // We need to keep the readers around until after all the files are
    687     // read so that we do not lose the function names stored in each
    688     // reader's memory. The function names are needed to write out the
    689     // merged profile map.
    690     Readers.push_back(std::move(ReaderOrErr.get()));
    691     const auto Reader = Readers.back().get();
    692     if (std::error_code EC = Reader->read()) {
    693       warnOrExitGivenError(FailMode, EC, Input.Filename);
    694       Readers.pop_back();
    695       continue;
    696     }
    697 
    698     StringMap<FunctionSamples> &Profiles = Reader->getProfiles();
    699     if (ProfileIsProbeBased.hasValue() &&
    700         ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
    701       exitWithError(
    702           "cannot merge probe-based profile with non-probe-based profile");
    703     ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
    704     if (ProfileIsCS.hasValue() && ProfileIsCS != FunctionSamples::ProfileIsCS)
    705       exitWithError("cannot merge CS profile with non-CS profile");
    706     ProfileIsCS = FunctionSamples::ProfileIsCS;
    707     for (StringMap<FunctionSamples>::iterator I = Profiles.begin(),
    708                                               E = Profiles.end();
    709          I != E; ++I) {
    710       sampleprof_error Result = sampleprof_error::success;
    711       FunctionSamples Remapped =
    712           Remapper ? remapSamples(I->second, *Remapper, Result)
    713                    : FunctionSamples();
    714       FunctionSamples &Samples = Remapper ? Remapped : I->second;
    715       StringRef FName = Samples.getNameWithContext();
    716       MergeResult(Result, ProfileMap[FName].merge(Samples, Input.Weight));
    717       if (Result != sampleprof_error::success) {
    718         std::error_code EC = make_error_code(Result);
    719         handleMergeWriterError(errorCodeToError(EC), Input.Filename, FName);
    720       }
    721     }
    722 
    723     std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
    724         Reader->getProfileSymbolList();
    725     if (ReaderList)
    726       WriterList.merge(*ReaderList);
    727   }
    728 
    729   if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
    730     // Use threshold calculated from profile summary unless specified.
    731     SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
    732     auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
    733     uint64_t SampleProfColdThreshold =
    734         ProfileSummaryBuilder::getColdCountThreshold(
    735             (Summary->getDetailedSummary()));
    736 
    737     // Trim and merge cold context profile using cold threshold above;
    738     SampleContextTrimmer(ProfileMap)
    739         .trimAndMergeColdContextProfiles(SampleProfColdThreshold,
    740                                          SampleTrimColdContext,
    741                                          SampleMergeColdContext);
    742   }
    743 
    744   auto WriterOrErr =
    745       SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
    746   if (std::error_code EC = WriterOrErr.getError())
    747     exitWithErrorCode(EC, OutputFilename);
    748 
    749   auto Writer = std::move(WriterOrErr.get());
    750   // WriterList will have StringRef refering to string in Buffer.
    751   // Make sure Buffer lives as long as WriterList.
    752   auto Buffer = getInputFileBuf(ProfileSymbolListFile);
    753   handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList,
    754                         CompressAllSections, UseMD5, GenPartialProfile);
    755   if (std::error_code EC = Writer->write(ProfileMap))
    756     exitWithErrorCode(std::move(EC));
    757 }
    758 
    759 static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
    760   StringRef WeightStr, FileName;
    761   std::tie(WeightStr, FileName) = WeightedFilename.split(',');
    762 
    763   uint64_t Weight;
    764   if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
    765     exitWithError("Input weight must be a positive integer.");
    766 
    767   return {std::string(FileName), Weight};
    768 }
    769 
    770 static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
    771   StringRef Filename = WF.Filename;
    772   uint64_t Weight = WF.Weight;
    773 
    774   // If it's STDIN just pass it on.
    775   if (Filename == "-") {
    776     WNI.push_back({std::string(Filename), Weight});
    777     return;
    778   }
    779 
    780   llvm::sys::fs::file_status Status;
    781   llvm::sys::fs::status(Filename, Status);
    782   if (!llvm::sys::fs::exists(Status))
    783     exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
    784                       Filename);
    785   // If it's a source file, collect it.
    786   if (llvm::sys::fs::is_regular_file(Status)) {
    787     WNI.push_back({std::string(Filename), Weight});
    788     return;
    789   }
    790 
    791   if (llvm::sys::fs::is_directory(Status)) {
    792     std::error_code EC;
    793     for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
    794          F != E && !EC; F.increment(EC)) {
    795       if (llvm::sys::fs::is_regular_file(F->path())) {
    796         addWeightedInput(WNI, {F->path(), Weight});
    797       }
    798     }
    799     if (EC)
    800       exitWithErrorCode(EC, Filename);
    801   }
    802 }
    803 
    804 static void parseInputFilenamesFile(MemoryBuffer *Buffer,
    805                                     WeightedFileVector &WFV) {
    806   if (!Buffer)
    807     return;
    808 
    809   SmallVector<StringRef, 8> Entries;
    810   StringRef Data = Buffer->getBuffer();
    811   Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
    812   for (const StringRef &FileWeightEntry : Entries) {
    813     StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
    814     // Skip comments.
    815     if (SanitizedEntry.startswith("#"))
    816       continue;
    817     // If there's no comma, it's an unweighted profile.
    818     else if (SanitizedEntry.find(',') == StringRef::npos)
    819       addWeightedInput(WFV, {std::string(SanitizedEntry), 1});
    820     else
    821       addWeightedInput(WFV, parseWeightedFile(SanitizedEntry));
    822   }
    823 }
    824 
    825 static int merge_main(int argc, const char *argv[]) {
    826   cl::list<std::string> InputFilenames(cl::Positional,
    827                                        cl::desc("<filename...>"));
    828   cl::list<std::string> WeightedInputFilenames("weighted-input",
    829                                                cl::desc("<weight>,<filename>"));
    830   cl::opt<std::string> InputFilenamesFile(
    831       "input-files", cl::init(""),
    832       cl::desc("Path to file containing newline-separated "
    833                "[<weight>,]<filename> entries"));
    834   cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
    835                                 cl::aliasopt(InputFilenamesFile));
    836   cl::opt<bool> DumpInputFileList(
    837       "dump-input-file-list", cl::init(false), cl::Hidden,
    838       cl::desc("Dump the list of input files and their weights, then exit"));
    839   cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc("file"),
    840                                      cl::desc("Symbol remapping file"));
    841   cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
    842                            cl::aliasopt(RemappingFile));
    843   cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
    844                                       cl::init("-"), cl::Required,
    845                                       cl::desc("Output file"));
    846   cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
    847                             cl::aliasopt(OutputFilename));
    848   cl::opt<ProfileKinds> ProfileKind(
    849       cl::desc("Profile kind:"), cl::init(instr),
    850       cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
    851                  clEnumVal(sample, "Sample profile")));
    852   cl::opt<ProfileFormat> OutputFormat(
    853       cl::desc("Format of output profile"), cl::init(PF_Binary),
    854       cl::values(
    855           clEnumValN(PF_Binary, "binary", "Binary encoding (default)"),
    856           clEnumValN(PF_Compact_Binary, "compbinary",
    857                      "Compact binary encoding"),
    858           clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding"),
    859           clEnumValN(PF_Text, "text", "Text encoding"),
    860           clEnumValN(PF_GCC, "gcc",
    861                      "GCC encoding (only meaningful for -sample)")));
    862   cl::opt<FailureMode> FailureMode(
    863       "failure-mode", cl::init(failIfAnyAreInvalid), cl::desc("Failure mode:"),
    864       cl::values(clEnumValN(failIfAnyAreInvalid, "any",
    865                             "Fail if any profile is invalid."),
    866                  clEnumValN(failIfAllAreInvalid, "all",
    867                             "Fail only if all profiles are invalid.")));
    868   cl::opt<bool> OutputSparse("sparse", cl::init(false),
    869       cl::desc("Generate a sparse profile (only meaningful for -instr)"));
    870   cl::opt<unsigned> NumThreads(
    871       "num-threads", cl::init(0),
    872       cl::desc("Number of merge threads to use (default: autodetect)"));
    873   cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
    874                         cl::aliasopt(NumThreads));
    875   cl::opt<std::string> ProfileSymbolListFile(
    876       "prof-sym-list", cl::init(""),
    877       cl::desc("Path to file containing the list of function symbols "
    878                "used to populate profile symbol list"));
    879   cl::opt<bool> CompressAllSections(
    880       "compress-all-sections", cl::init(false), cl::Hidden,
    881       cl::desc("Compress all sections when writing the profile (only "
    882                "meaningful for -extbinary)"));
    883   cl::opt<bool> UseMD5(
    884       "use-md5", cl::init(false), cl::Hidden,
    885       cl::desc("Choose to use MD5 to represent string in name table (only "
    886                "meaningful for -extbinary)"));
    887   cl::opt<bool> SampleMergeColdContext(
    888       "sample-merge-cold-context", cl::init(false), cl::Hidden,
    889       cl::desc(
    890           "Merge context sample profiles whose count is below cold threshold"));
    891   cl::opt<bool> SampleTrimColdContext(
    892       "sample-trim-cold-context", cl::init(false), cl::Hidden,
    893       cl::desc(
    894           "Trim context sample profiles whose count is below cold threshold"));
    895   cl::opt<bool> GenPartialProfile(
    896       "gen-partial-profile", cl::init(false), cl::Hidden,
    897       cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
    898   cl::opt<std::string> SupplInstrWithSample(
    899       "supplement-instr-with-sample", cl::init(""), cl::Hidden,
    900       cl::desc("Supplement an instr profile with sample profile, to correct "
    901                "the profile unrepresentativeness issue. The sample "
    902                "profile is the input of the flag. Output will be in instr "
    903                "format (The flag only works with -instr)"));
    904   cl::opt<float> ZeroCounterThreshold(
    905       "zero-counter-threshold", cl::init(0.7), cl::Hidden,
    906       cl::desc("For the function which is cold in instr profile but hot in "
    907                "sample profile, if the ratio of the number of zero counters "
    908                "divided by the the total number of counters is above the "
    909                "threshold, the profile of the function will be regarded as "
    910                "being harmful for performance and will be dropped. "));
    911   cl::opt<unsigned> SupplMinSizeThreshold(
    912       "suppl-min-size-threshold", cl::init(10), cl::Hidden,
    913       cl::desc("If the size of a function is smaller than the threshold, "
    914                "assume it can be inlined by PGO early inliner and it won't "
    915                "be adjusted based on sample profile. "));
    916   cl::opt<unsigned> InstrProfColdThreshold(
    917       "instr-prof-cold-threshold", cl::init(0), cl::Hidden,
    918       cl::desc("User specified cold threshold for instr profile which will "
    919                "override the cold threshold got from profile summary. "));
    920 
    921   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
    922 
    923   WeightedFileVector WeightedInputs;
    924   for (StringRef Filename : InputFilenames)
    925     addWeightedInput(WeightedInputs, {std::string(Filename), 1});
    926   for (StringRef WeightedFilename : WeightedInputFilenames)
    927     addWeightedInput(WeightedInputs, parseWeightedFile(WeightedFilename));
    928 
    929   // Make sure that the file buffer stays alive for the duration of the
    930   // weighted input vector's lifetime.
    931   auto Buffer = getInputFileBuf(InputFilenamesFile);
    932   parseInputFilenamesFile(Buffer.get(), WeightedInputs);
    933 
    934   if (WeightedInputs.empty())
    935     exitWithError("No input files specified. See " +
    936                   sys::path::filename(argv[0]) + " -help");
    937 
    938   if (DumpInputFileList) {
    939     for (auto &WF : WeightedInputs)
    940       outs() << WF.Weight << "," << WF.Filename << "\n";
    941     return 0;
    942   }
    943 
    944   std::unique_ptr<SymbolRemapper> Remapper;
    945   if (!RemappingFile.empty())
    946     Remapper = SymbolRemapper::create(RemappingFile);
    947 
    948   if (!SupplInstrWithSample.empty()) {
    949     if (ProfileKind != instr)
    950       exitWithError(
    951           "-supplement-instr-with-sample can only work with -instr. ");
    952 
    953     supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputFilename,
    954                            OutputFormat, OutputSparse, SupplMinSizeThreshold,
    955                            ZeroCounterThreshold, InstrProfColdThreshold);
    956     return 0;
    957   }
    958 
    959   if (ProfileKind == instr)
    960     mergeInstrProfile(WeightedInputs, Remapper.get(), OutputFilename,
    961                       OutputFormat, OutputSparse, NumThreads, FailureMode);
    962   else
    963     mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
    964                        OutputFormat, ProfileSymbolListFile, CompressAllSections,
    965                        UseMD5, GenPartialProfile, SampleMergeColdContext,
    966                        SampleTrimColdContext, FailureMode);
    967 
    968   return 0;
    969 }
    970 
    971 /// Computer the overlap b/w profile BaseFilename and profile TestFilename.
    972 static void overlapInstrProfile(const std::string &BaseFilename,
    973                                 const std::string &TestFilename,
    974                                 const OverlapFuncFilters &FuncFilter,
    975                                 raw_fd_ostream &OS, bool IsCS) {
    976   std::mutex ErrorLock;
    977   SmallSet<instrprof_error, 4> WriterErrorCodes;
    978   WriterContext Context(false, ErrorLock, WriterErrorCodes);
    979   WeightedFile WeightedInput{BaseFilename, 1};
    980   OverlapStats Overlap;
    981   Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
    982   if (E)
    983     exitWithError(std::move(E), "Error in getting profile count sums");
    984   if (Overlap.Base.CountSum < 1.0f) {
    985     OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
    986     exit(0);
    987   }
    988   if (Overlap.Test.CountSum < 1.0f) {
    989     OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
    990     exit(0);
    991   }
    992   loadInput(WeightedInput, nullptr, &Context);
    993   overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS,
    994                IsCS);
    995   Overlap.dump(OS);
    996 }
    997 
    998 namespace {
    999 struct SampleOverlapStats {
   1000   StringRef BaseName;
   1001   StringRef TestName;
   1002   // Number of overlap units
   1003   uint64_t OverlapCount;
   1004   // Total samples of overlap units
   1005   uint64_t OverlapSample;
   1006   // Number of and total samples of units that only present in base or test
   1007   // profile
   1008   uint64_t BaseUniqueCount;
   1009   uint64_t BaseUniqueSample;
   1010   uint64_t TestUniqueCount;
   1011   uint64_t TestUniqueSample;
   1012   // Number of units and total samples in base or test profile
   1013   uint64_t BaseCount;
   1014   uint64_t BaseSample;
   1015   uint64_t TestCount;
   1016   uint64_t TestSample;
   1017   // Number of and total samples of units that present in at least one profile
   1018   uint64_t UnionCount;
   1019   uint64_t UnionSample;
   1020   // Weighted similarity
   1021   double Similarity;
   1022   // For SampleOverlapStats instances representing functions, weights of the
   1023   // function in base and test profiles
   1024   double BaseWeight;
   1025   double TestWeight;
   1026 
   1027   SampleOverlapStats()
   1028       : OverlapCount(0), OverlapSample(0), BaseUniqueCount(0),
   1029         BaseUniqueSample(0), TestUniqueCount(0), TestUniqueSample(0),
   1030         BaseCount(0), BaseSample(0), TestCount(0), TestSample(0), UnionCount(0),
   1031         UnionSample(0), Similarity(0.0), BaseWeight(0.0), TestWeight(0.0) {}
   1032 };
   1033 } // end anonymous namespace
   1034 
   1035 namespace {
   1036 struct FuncSampleStats {
   1037   uint64_t SampleSum;
   1038   uint64_t MaxSample;
   1039   uint64_t HotBlockCount;
   1040   FuncSampleStats() : SampleSum(0), MaxSample(0), HotBlockCount(0) {}
   1041   FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
   1042                   uint64_t HotBlockCount)
   1043       : SampleSum(SampleSum), MaxSample(MaxSample),
   1044         HotBlockCount(HotBlockCount) {}
   1045 };
   1046 } // end anonymous namespace
   1047 
   1048 namespace {
   1049 enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
   1050 
   1051 // Class for updating merging steps for two sorted maps. The class should be
   1052 // instantiated with a map iterator type.
   1053 template <class T> class MatchStep {
   1054 public:
   1055   MatchStep() = delete;
   1056 
   1057   MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
   1058       : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
   1059         SecondEnd(SecondEnd), Status(MS_None) {}
   1060 
   1061   bool areBothFinished() const {
   1062     return (FirstIter == FirstEnd && SecondIter == SecondEnd);
   1063   }
   1064 
   1065   bool isFirstFinished() const { return FirstIter == FirstEnd; }
   1066 
   1067   bool isSecondFinished() const { return SecondIter == SecondEnd; }
   1068 
   1069   /// Advance one step based on the previous match status unless the previous
   1070   /// status is MS_None. Then update Status based on the comparison between two
   1071   /// container iterators at the current step. If the previous status is
   1072   /// MS_None, it means two iterators are at the beginning and no comparison has
   1073   /// been made, so we simply update Status without advancing the iterators.
   1074   void updateOneStep();
   1075 
   1076   T getFirstIter() const { return FirstIter; }
   1077 
   1078   T getSecondIter() const { return SecondIter; }
   1079 
   1080   MatchStatus getMatchStatus() const { return Status; }
   1081 
   1082 private:
   1083   // Current iterator and end iterator of the first container.
   1084   T FirstIter;
   1085   T FirstEnd;
   1086   // Current iterator and end iterator of the second container.
   1087   T SecondIter;
   1088   T SecondEnd;
   1089   // Match status of the current step.
   1090   MatchStatus Status;
   1091 };
   1092 } // end anonymous namespace
   1093 
   1094 template <class T> void MatchStep<T>::updateOneStep() {
   1095   switch (Status) {
   1096   case MS_Match:
   1097     ++FirstIter;
   1098     ++SecondIter;
   1099     break;
   1100   case MS_FirstUnique:
   1101     ++FirstIter;
   1102     break;
   1103   case MS_SecondUnique:
   1104     ++SecondIter;
   1105     break;
   1106   case MS_None:
   1107     break;
   1108   }
   1109 
   1110   // Update Status according to iterators at the current step.
   1111   if (areBothFinished())
   1112     return;
   1113   if (FirstIter != FirstEnd &&
   1114       (SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
   1115     Status = MS_FirstUnique;
   1116   else if (SecondIter != SecondEnd &&
   1117            (FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
   1118     Status = MS_SecondUnique;
   1119   else
   1120     Status = MS_Match;
   1121 }
   1122 
   1123 // Return the sum of line/block samples, the max line/block sample, and the
   1124 // number of line/block samples above the given threshold in a function
   1125 // including its inlinees.
   1126 static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
   1127                                FuncSampleStats &FuncStats,
   1128                                uint64_t HotThreshold) {
   1129   for (const auto &L : Func.getBodySamples()) {
   1130     uint64_t Sample = L.second.getSamples();
   1131     FuncStats.SampleSum += Sample;
   1132     FuncStats.MaxSample = std::max(FuncStats.MaxSample, Sample);
   1133     if (Sample >= HotThreshold)
   1134       ++FuncStats.HotBlockCount;
   1135   }
   1136 
   1137   for (const auto &C : Func.getCallsiteSamples()) {
   1138     for (const auto &F : C.second)
   1139       getFuncSampleStats(F.second, FuncStats, HotThreshold);
   1140   }
   1141 }
   1142 
   1143 /// Predicate that determines if a function is hot with a given threshold. We
   1144 /// keep it separate from its callsites for possible extension in the future.
   1145 static bool isFunctionHot(const FuncSampleStats &FuncStats,
   1146                           uint64_t HotThreshold) {
   1147   // We intentionally compare the maximum sample count in a function with the
   1148   // HotThreshold to get an approximate determination on hot functions.
   1149   return (FuncStats.MaxSample >= HotThreshold);
   1150 }
   1151 
   1152 namespace {
   1153 class SampleOverlapAggregator {
   1154 public:
   1155   SampleOverlapAggregator(const std::string &BaseFilename,
   1156                           const std::string &TestFilename,
   1157                           double LowSimilarityThreshold, double Epsilon,
   1158                           const OverlapFuncFilters &FuncFilter)
   1159       : BaseFilename(BaseFilename), TestFilename(TestFilename),
   1160         LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
   1161         FuncFilter(FuncFilter) {}
   1162 
   1163   /// Detect 0-sample input profile and report to output stream. This interface
   1164   /// should be called after loadProfiles().
   1165   bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
   1166 
   1167   /// Write out function-level similarity statistics for functions specified by
   1168   /// options --function, --value-cutoff, and --similarity-cutoff.
   1169   void dumpFuncSimilarity(raw_fd_ostream &OS) const;
   1170 
   1171   /// Write out program-level similarity and overlap statistics.
   1172   void dumpProgramSummary(raw_fd_ostream &OS) const;
   1173 
   1174   /// Write out hot-function and hot-block statistics for base_profile,
   1175   /// test_profile, and their overlap. For both cases, the overlap HO is
   1176   /// calculated as follows:
   1177   ///    Given the number of functions (or blocks) that are hot in both profiles
   1178   ///    HCommon and the number of functions (or blocks) that are hot in at
   1179   ///    least one profile HUnion, HO = HCommon / HUnion.
   1180   void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
   1181 
   1182   /// This function tries matching functions in base and test profiles. For each
   1183   /// pair of matched functions, it aggregates the function-level
   1184   /// similarity into a profile-level similarity. It also dump function-level
   1185   /// similarity information of functions specified by --function,
   1186   /// --value-cutoff, and --similarity-cutoff options. The program-level
   1187   /// similarity PS is computed as follows:
   1188   ///     Given function-level similarity FS(A) for all function A, the
   1189   ///     weight of function A in base profile WB(A), and the weight of function
   1190   ///     A in test profile WT(A), compute PS(base_profile, test_profile) =
   1191   ///     sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
   1192   ///     meaning no-overlap.
   1193   void computeSampleProfileOverlap(raw_fd_ostream &OS);
   1194 
   1195   /// Initialize ProfOverlap with the sum of samples in base and test
   1196   /// profiles. This function also computes and keeps the sum of samples and
   1197   /// max sample counts of each function in BaseStats and TestStats for later
   1198   /// use to avoid re-computations.
   1199   void initializeSampleProfileOverlap();
   1200 
   1201   /// Load profiles specified by BaseFilename and TestFilename.
   1202   std::error_code loadProfiles();
   1203 
   1204 private:
   1205   SampleOverlapStats ProfOverlap;
   1206   SampleOverlapStats HotFuncOverlap;
   1207   SampleOverlapStats HotBlockOverlap;
   1208   std::string BaseFilename;
   1209   std::string TestFilename;
   1210   std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
   1211   std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
   1212   // BaseStats and TestStats hold FuncSampleStats for each function, with
   1213   // function name as the key.
   1214   StringMap<FuncSampleStats> BaseStats;
   1215   StringMap<FuncSampleStats> TestStats;
   1216   // Low similarity threshold in floating point number
   1217   double LowSimilarityThreshold;
   1218   // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
   1219   // for tracking hot blocks.
   1220   uint64_t BaseHotThreshold;
   1221   uint64_t TestHotThreshold;
   1222   // A small threshold used to round the results of floating point accumulations
   1223   // to resolve imprecision.
   1224   const double Epsilon;
   1225   std::multimap<double, SampleOverlapStats, std::greater<double>>
   1226       FuncSimilarityDump;
   1227   // FuncFilter carries specifications in options --value-cutoff and
   1228   // --function.
   1229   OverlapFuncFilters FuncFilter;
   1230   // Column offsets for printing the function-level details table.
   1231   static const unsigned int TestWeightCol = 15;
   1232   static const unsigned int SimilarityCol = 30;
   1233   static const unsigned int OverlapCol = 43;
   1234   static const unsigned int BaseUniqueCol = 53;
   1235   static const unsigned int TestUniqueCol = 67;
   1236   static const unsigned int BaseSampleCol = 81;
   1237   static const unsigned int TestSampleCol = 96;
   1238   static const unsigned int FuncNameCol = 111;
   1239 
   1240   /// Return a similarity of two line/block sample counters in the same
   1241   /// function in base and test profiles. The line/block-similarity BS(i) is
   1242   /// computed as follows:
   1243   ///    For an offsets i, given the sample count at i in base profile BB(i),
   1244   ///    the sample count at i in test profile BT(i), the sum of sample counts
   1245   ///    in this function in base profile SB, and the sum of sample counts in
   1246   ///    this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
   1247   ///    BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
   1248   double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
   1249                                 const SampleOverlapStats &FuncOverlap) const;
   1250 
   1251   void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
   1252                              uint64_t HotBlockCount);
   1253 
   1254   void getHotFunctions(const StringMap<FuncSampleStats> &ProfStats,
   1255                        StringMap<FuncSampleStats> &HotFunc,
   1256                        uint64_t HotThreshold) const;
   1257 
   1258   void computeHotFuncOverlap();
   1259 
   1260   /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
   1261   /// Difference for two sample units in a matched function according to the
   1262   /// given match status.
   1263   void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
   1264                                      uint64_t HotBlockCount,
   1265                                      SampleOverlapStats &FuncOverlap,
   1266                                      double &Difference, MatchStatus Status);
   1267 
   1268   /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
   1269   /// Difference for unmatched callees that only present in one profile in a
   1270   /// matched caller function.
   1271   void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
   1272                                 SampleOverlapStats &FuncOverlap,
   1273                                 double &Difference, MatchStatus Status);
   1274 
   1275   /// This function updates sample overlap statistics of an overlap function in
   1276   /// base and test profile. It also calculates a function-internal similarity
   1277   /// FIS as follows:
   1278   ///    For offsets i that have samples in at least one profile in this
   1279   ///    function A, given BS(i) returned by computeBlockSimilarity(), compute
   1280   ///    FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
   1281   ///    0.0 meaning no overlap.
   1282   double computeSampleFunctionInternalOverlap(
   1283       const sampleprof::FunctionSamples &BaseFunc,
   1284       const sampleprof::FunctionSamples &TestFunc,
   1285       SampleOverlapStats &FuncOverlap);
   1286 
   1287   /// Function-level similarity (FS) is a weighted value over function internal
   1288   /// similarity (FIS). This function computes a function's FS from its FIS by
   1289   /// applying the weight.
   1290   double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
   1291                                  uint64_t TestFuncSample) const;
   1292 
   1293   /// The function-level similarity FS(A) for a function A is computed as
   1294   /// follows:
   1295   ///     Compute a function-internal similarity FIS(A) by
   1296   ///     computeSampleFunctionInternalOverlap(). Then, with the weight of
   1297   ///     function A in base profile WB(A), and the weight of function A in test
   1298   ///     profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
   1299   ///     ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
   1300   double
   1301   computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
   1302                                const sampleprof::FunctionSamples *TestFunc,
   1303                                SampleOverlapStats *FuncOverlap,
   1304                                uint64_t BaseFuncSample,
   1305                                uint64_t TestFuncSample);
   1306 
   1307   /// Profile-level similarity (PS) is a weighted aggregate over function-level
   1308   /// similarities (FS). This method weights the FS value by the function
   1309   /// weights in the base and test profiles for the aggregation.
   1310   double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
   1311                             uint64_t TestFuncSample) const;
   1312 };
   1313 } // end anonymous namespace
   1314 
   1315 bool SampleOverlapAggregator::detectZeroSampleProfile(
   1316     raw_fd_ostream &OS) const {
   1317   bool HaveZeroSample = false;
   1318   if (ProfOverlap.BaseSample == 0) {
   1319     OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
   1320     HaveZeroSample = true;
   1321   }
   1322   if (ProfOverlap.TestSample == 0) {
   1323     OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
   1324     HaveZeroSample = true;
   1325   }
   1326   return HaveZeroSample;
   1327 }
   1328 
   1329 double SampleOverlapAggregator::computeBlockSimilarity(
   1330     uint64_t BaseSample, uint64_t TestSample,
   1331     const SampleOverlapStats &FuncOverlap) const {
   1332   double BaseFrac = 0.0;
   1333   double TestFrac = 0.0;
   1334   if (FuncOverlap.BaseSample > 0)
   1335     BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
   1336   if (FuncOverlap.TestSample > 0)
   1337     TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
   1338   return 1.0 - std::fabs(BaseFrac - TestFrac);
   1339 }
   1340 
   1341 void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
   1342                                                     uint64_t TestSample,
   1343                                                     uint64_t HotBlockCount) {
   1344   bool IsBaseHot = (BaseSample >= BaseHotThreshold);
   1345   bool IsTestHot = (TestSample >= TestHotThreshold);
   1346   if (!IsBaseHot && !IsTestHot)
   1347     return;
   1348 
   1349   HotBlockOverlap.UnionCount += HotBlockCount;
   1350   if (IsBaseHot)
   1351     HotBlockOverlap.BaseCount += HotBlockCount;
   1352   if (IsTestHot)
   1353     HotBlockOverlap.TestCount += HotBlockCount;
   1354   if (IsBaseHot && IsTestHot)
   1355     HotBlockOverlap.OverlapCount += HotBlockCount;
   1356 }
   1357 
   1358 void SampleOverlapAggregator::getHotFunctions(
   1359     const StringMap<FuncSampleStats> &ProfStats,
   1360     StringMap<FuncSampleStats> &HotFunc, uint64_t HotThreshold) const {
   1361   for (const auto &F : ProfStats) {
   1362     if (isFunctionHot(F.second, HotThreshold))
   1363       HotFunc.try_emplace(F.first(), F.second);
   1364   }
   1365 }
   1366 
   1367 void SampleOverlapAggregator::computeHotFuncOverlap() {
   1368   StringMap<FuncSampleStats> BaseHotFunc;
   1369   getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold);
   1370   HotFuncOverlap.BaseCount = BaseHotFunc.size();
   1371 
   1372   StringMap<FuncSampleStats> TestHotFunc;
   1373   getHotFunctions(TestStats, TestHotFunc, TestHotThreshold);
   1374   HotFuncOverlap.TestCount = TestHotFunc.size();
   1375   HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
   1376 
   1377   for (const auto &F : BaseHotFunc) {
   1378     if (TestHotFunc.count(F.first()))
   1379       ++HotFuncOverlap.OverlapCount;
   1380     else
   1381       ++HotFuncOverlap.UnionCount;
   1382   }
   1383 }
   1384 
   1385 void SampleOverlapAggregator::updateOverlapStatsForFunction(
   1386     uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
   1387     SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
   1388   assert(Status != MS_None &&
   1389          "Match status should be updated before updating overlap statistics");
   1390   if (Status == MS_FirstUnique) {
   1391     TestSample = 0;
   1392     FuncOverlap.BaseUniqueSample += BaseSample;
   1393   } else if (Status == MS_SecondUnique) {
   1394     BaseSample = 0;
   1395     FuncOverlap.TestUniqueSample += TestSample;
   1396   } else {
   1397     ++FuncOverlap.OverlapCount;
   1398   }
   1399 
   1400   FuncOverlap.UnionSample += std::max(BaseSample, TestSample);
   1401   FuncOverlap.OverlapSample += std::min(BaseSample, TestSample);
   1402   Difference +=
   1403       1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
   1404   updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
   1405 }
   1406 
   1407 void SampleOverlapAggregator::updateForUnmatchedCallee(
   1408     const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
   1409     double &Difference, MatchStatus Status) {
   1410   assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&
   1411          "Status must be either of the two unmatched cases");
   1412   FuncSampleStats FuncStats;
   1413   if (Status == MS_FirstUnique) {
   1414     getFuncSampleStats(Func, FuncStats, BaseHotThreshold);
   1415     updateOverlapStatsForFunction(FuncStats.SampleSum, 0,
   1416                                   FuncStats.HotBlockCount, FuncOverlap,
   1417                                   Difference, Status);
   1418   } else {
   1419     getFuncSampleStats(Func, FuncStats, TestHotThreshold);
   1420     updateOverlapStatsForFunction(0, FuncStats.SampleSum,
   1421                                   FuncStats.HotBlockCount, FuncOverlap,
   1422                                   Difference, Status);
   1423   }
   1424 }
   1425 
   1426 double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
   1427     const sampleprof::FunctionSamples &BaseFunc,
   1428     const sampleprof::FunctionSamples &TestFunc,
   1429     SampleOverlapStats &FuncOverlap) {
   1430 
   1431   using namespace sampleprof;
   1432 
   1433   double Difference = 0;
   1434 
   1435   // Accumulate Difference for regular line/block samples in the function.
   1436   // We match them through sort-merge join algorithm because
   1437   // FunctionSamples::getBodySamples() returns a map of sample counters ordered
   1438   // by their offsets.
   1439   MatchStep<BodySampleMap::const_iterator> BlockIterStep(
   1440       BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
   1441       TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
   1442   BlockIterStep.updateOneStep();
   1443   while (!BlockIterStep.areBothFinished()) {
   1444     uint64_t BaseSample =
   1445         BlockIterStep.isFirstFinished()
   1446             ? 0
   1447             : BlockIterStep.getFirstIter()->second.getSamples();
   1448     uint64_t TestSample =
   1449         BlockIterStep.isSecondFinished()
   1450             ? 0
   1451             : BlockIterStep.getSecondIter()->second.getSamples();
   1452     updateOverlapStatsForFunction(BaseSample, TestSample, 1, FuncOverlap,
   1453                                   Difference, BlockIterStep.getMatchStatus());
   1454 
   1455     BlockIterStep.updateOneStep();
   1456   }
   1457 
   1458   // Accumulate Difference for callsite lines in the function. We match
   1459   // them through sort-merge algorithm because
   1460   // FunctionSamples::getCallsiteSamples() returns a map of callsite records
   1461   // ordered by their offsets.
   1462   MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
   1463       BaseFunc.getCallsiteSamples().cbegin(),
   1464       BaseFunc.getCallsiteSamples().cend(),
   1465       TestFunc.getCallsiteSamples().cbegin(),
   1466       TestFunc.getCallsiteSamples().cend());
   1467   CallsiteIterStep.updateOneStep();
   1468   while (!CallsiteIterStep.areBothFinished()) {
   1469     MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
   1470     assert(CallsiteStepStatus != MS_None &&
   1471            "Match status should be updated before entering loop body");
   1472 
   1473     if (CallsiteStepStatus != MS_Match) {
   1474       auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
   1475                           ? CallsiteIterStep.getFirstIter()
   1476                           : CallsiteIterStep.getSecondIter();
   1477       for (const auto &F : Callsite->second)
   1478         updateForUnmatchedCallee(F.second, FuncOverlap, Difference,
   1479                                  CallsiteStepStatus);
   1480     } else {
   1481       // There may be multiple inlinees at the same offset, so we need to try
   1482       // matching all of them. This match is implemented through sort-merge
   1483       // algorithm because callsite records at the same offset are ordered by
   1484       // function names.
   1485       MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
   1486           CallsiteIterStep.getFirstIter()->second.cbegin(),
   1487           CallsiteIterStep.getFirstIter()->second.cend(),
   1488           CallsiteIterStep.getSecondIter()->second.cbegin(),
   1489           CallsiteIterStep.getSecondIter()->second.cend());
   1490       CalleeIterStep.updateOneStep();
   1491       while (!CalleeIterStep.areBothFinished()) {
   1492         MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
   1493         if (CalleeStepStatus != MS_Match) {
   1494           auto Callee = (CalleeStepStatus == MS_FirstUnique)
   1495                             ? CalleeIterStep.getFirstIter()
   1496                             : CalleeIterStep.getSecondIter();
   1497           updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference,
   1498                                    CalleeStepStatus);
   1499         } else {
   1500           // An inlined function can contain other inlinees inside, so compute
   1501           // the Difference recursively.
   1502           Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
   1503                                       CalleeIterStep.getFirstIter()->second,
   1504                                       CalleeIterStep.getSecondIter()->second,
   1505                                       FuncOverlap);
   1506         }
   1507         CalleeIterStep.updateOneStep();
   1508       }
   1509     }
   1510     CallsiteIterStep.updateOneStep();
   1511   }
   1512 
   1513   // Difference reflects the total differences of line/block samples in this
   1514   // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
   1515   // reflect the similarity between function profiles in [0.0f to 1.0f].
   1516   return (2.0 - Difference) / 2;
   1517 }
   1518 
   1519 double SampleOverlapAggregator::weightForFuncSimilarity(
   1520     double FuncInternalSimilarity, uint64_t BaseFuncSample,
   1521     uint64_t TestFuncSample) const {
   1522   // Compute the weight as the distance between the function weights in two
   1523   // profiles.
   1524   double BaseFrac = 0.0;
   1525   double TestFrac = 0.0;
   1526   assert(ProfOverlap.BaseSample > 0 &&
   1527          "Total samples in base profile should be greater than 0");
   1528   BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
   1529   assert(ProfOverlap.TestSample > 0 &&
   1530          "Total samples in test profile should be greater than 0");
   1531   TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
   1532   double WeightDistance = std::fabs(BaseFrac - TestFrac);
   1533 
   1534   // Take WeightDistance into the similarity.
   1535   return FuncInternalSimilarity * (1 - WeightDistance);
   1536 }
   1537 
   1538 double
   1539 SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
   1540                                             uint64_t BaseFuncSample,
   1541                                             uint64_t TestFuncSample) const {
   1542 
   1543   double BaseFrac = 0.0;
   1544   double TestFrac = 0.0;
   1545   assert(ProfOverlap.BaseSample > 0 &&
   1546          "Total samples in base profile should be greater than 0");
   1547   BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0;
   1548   assert(ProfOverlap.TestSample > 0 &&
   1549          "Total samples in test profile should be greater than 0");
   1550   TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0;
   1551   return FuncSimilarity * (BaseFrac + TestFrac);
   1552 }
   1553 
   1554 double SampleOverlapAggregator::computeSampleFunctionOverlap(
   1555     const sampleprof::FunctionSamples *BaseFunc,
   1556     const sampleprof::FunctionSamples *TestFunc,
   1557     SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
   1558     uint64_t TestFuncSample) {
   1559   // Default function internal similarity before weighted, meaning two functions
   1560   // has no overlap.
   1561   const double DefaultFuncInternalSimilarity = 0;
   1562   double FuncSimilarity;
   1563   double FuncInternalSimilarity;
   1564 
   1565   // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
   1566   // In this case, we use DefaultFuncInternalSimilarity as the function internal
   1567   // similarity.
   1568   if (!BaseFunc || !TestFunc) {
   1569     FuncInternalSimilarity = DefaultFuncInternalSimilarity;
   1570   } else {
   1571     assert(FuncOverlap != nullptr &&
   1572            "FuncOverlap should be provided in this case");
   1573     FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
   1574         *BaseFunc, *TestFunc, *FuncOverlap);
   1575     // Now, FuncInternalSimilarity may be a little less than 0 due to
   1576     // imprecision of floating point accumulations. Make it zero if the
   1577     // difference is below Epsilon.
   1578     FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon)
   1579                                  ? 0
   1580                                  : FuncInternalSimilarity;
   1581   }
   1582   FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
   1583                                            BaseFuncSample, TestFuncSample);
   1584   return FuncSimilarity;
   1585 }
   1586 
   1587 void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
   1588   using namespace sampleprof;
   1589 
   1590   StringMap<const FunctionSamples *> BaseFuncProf;
   1591   const auto &BaseProfiles = BaseReader->getProfiles();
   1592   for (const auto &BaseFunc : BaseProfiles) {
   1593     BaseFuncProf.try_emplace(BaseFunc.second.getNameWithContext(),
   1594                              &(BaseFunc.second));
   1595   }
   1596   ProfOverlap.UnionCount = BaseFuncProf.size();
   1597 
   1598   const auto &TestProfiles = TestReader->getProfiles();
   1599   for (const auto &TestFunc : TestProfiles) {
   1600     SampleOverlapStats FuncOverlap;
   1601     FuncOverlap.TestName = TestFunc.second.getNameWithContext();
   1602     assert(TestStats.count(FuncOverlap.TestName) &&
   1603            "TestStats should have records for all functions in test profile "
   1604            "except inlinees");
   1605     FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
   1606 
   1607     const auto Match = BaseFuncProf.find(FuncOverlap.TestName);
   1608     if (Match == BaseFuncProf.end()) {
   1609       const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
   1610       ++ProfOverlap.TestUniqueCount;
   1611       ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
   1612       FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
   1613 
   1614       updateHotBlockOverlap(0, FuncStats.SampleSum, FuncStats.HotBlockCount);
   1615 
   1616       double FuncSimilarity = computeSampleFunctionOverlap(
   1617           nullptr, nullptr, nullptr, 0, FuncStats.SampleSum);
   1618       ProfOverlap.Similarity +=
   1619           weightByImportance(FuncSimilarity, 0, FuncStats.SampleSum);
   1620 
   1621       ++ProfOverlap.UnionCount;
   1622       ProfOverlap.UnionSample += FuncStats.SampleSum;
   1623     } else {
   1624       ++ProfOverlap.OverlapCount;
   1625 
   1626       // Two functions match with each other. Compute function-level overlap and
   1627       // aggregate them into profile-level overlap.
   1628       FuncOverlap.BaseName = Match->second->getNameWithContext();
   1629       assert(BaseStats.count(FuncOverlap.BaseName) &&
   1630              "BaseStats should have records for all functions in base profile "
   1631              "except inlinees");
   1632       FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum;
   1633 
   1634       FuncOverlap.Similarity = computeSampleFunctionOverlap(
   1635           Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample,
   1636           FuncOverlap.TestSample);
   1637       ProfOverlap.Similarity +=
   1638           weightByImportance(FuncOverlap.Similarity, FuncOverlap.BaseSample,
   1639                              FuncOverlap.TestSample);
   1640       ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
   1641       ProfOverlap.UnionSample += FuncOverlap.UnionSample;
   1642 
   1643       // Accumulate the percentage of base unique and test unique samples into
   1644       // ProfOverlap.
   1645       ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
   1646       ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
   1647 
   1648       // Remove matched base functions for later reporting functions not found
   1649       // in test profile.
   1650       BaseFuncProf.erase(Match);
   1651     }
   1652 
   1653     // Print function-level similarity information if specified by options.
   1654     assert(TestStats.count(FuncOverlap.TestName) &&
   1655            "TestStats should have records for all functions in test profile "
   1656            "except inlinees");
   1657     if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
   1658         (Match != BaseFuncProf.end() &&
   1659          FuncOverlap.Similarity < LowSimilarityThreshold) ||
   1660         (Match != BaseFuncProf.end() && !FuncFilter.NameFilter.empty() &&
   1661          FuncOverlap.BaseName.find(FuncFilter.NameFilter) !=
   1662              FuncOverlap.BaseName.npos)) {
   1663       assert(ProfOverlap.BaseSample > 0 &&
   1664              "Total samples in base profile should be greater than 0");
   1665       FuncOverlap.BaseWeight =
   1666           static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
   1667       assert(ProfOverlap.TestSample > 0 &&
   1668              "Total samples in test profile should be greater than 0");
   1669       FuncOverlap.TestWeight =
   1670           static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
   1671       FuncSimilarityDump.emplace(FuncOverlap.BaseWeight, FuncOverlap);
   1672     }
   1673   }
   1674 
   1675   // Traverse through functions in base profile but not in test profile.
   1676   for (const auto &F : BaseFuncProf) {
   1677     assert(BaseStats.count(F.second->getNameWithContext()) &&
   1678            "BaseStats should have records for all functions in base profile "
   1679            "except inlinees");
   1680     const FuncSampleStats &FuncStats =
   1681         BaseStats[F.second->getNameWithContext()];
   1682     ++ProfOverlap.BaseUniqueCount;
   1683     ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
   1684 
   1685     updateHotBlockOverlap(FuncStats.SampleSum, 0, FuncStats.HotBlockCount);
   1686 
   1687     double FuncSimilarity = computeSampleFunctionOverlap(
   1688         nullptr, nullptr, nullptr, FuncStats.SampleSum, 0);
   1689     ProfOverlap.Similarity +=
   1690         weightByImportance(FuncSimilarity, FuncStats.SampleSum, 0);
   1691 
   1692     ProfOverlap.UnionSample += FuncStats.SampleSum;
   1693   }
   1694 
   1695   // Now, ProfSimilarity may be a little greater than 1 due to imprecision
   1696   // of floating point accumulations. Make it 1.0 if the difference is below
   1697   // Epsilon.
   1698   ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon)
   1699                                ? 1
   1700                                : ProfOverlap.Similarity;
   1701 
   1702   computeHotFuncOverlap();
   1703 }
   1704 
   1705 void SampleOverlapAggregator::initializeSampleProfileOverlap() {
   1706   const auto &BaseProf = BaseReader->getProfiles();
   1707   for (const auto &I : BaseProf) {
   1708     ++ProfOverlap.BaseCount;
   1709     FuncSampleStats FuncStats;
   1710     getFuncSampleStats(I.second, FuncStats, BaseHotThreshold);
   1711     ProfOverlap.BaseSample += FuncStats.SampleSum;
   1712     BaseStats.try_emplace(I.second.getNameWithContext(), FuncStats);
   1713   }
   1714 
   1715   const auto &TestProf = TestReader->getProfiles();
   1716   for (const auto &I : TestProf) {
   1717     ++ProfOverlap.TestCount;
   1718     FuncSampleStats FuncStats;
   1719     getFuncSampleStats(I.second, FuncStats, TestHotThreshold);
   1720     ProfOverlap.TestSample += FuncStats.SampleSum;
   1721     TestStats.try_emplace(I.second.getNameWithContext(), FuncStats);
   1722   }
   1723 
   1724   ProfOverlap.BaseName = StringRef(BaseFilename);
   1725   ProfOverlap.TestName = StringRef(TestFilename);
   1726 }
   1727 
   1728 void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
   1729   using namespace sampleprof;
   1730 
   1731   if (FuncSimilarityDump.empty())
   1732     return;
   1733 
   1734   formatted_raw_ostream FOS(OS);
   1735   FOS << "Function-level details:\n";
   1736   FOS << "Base weight";
   1737   FOS.PadToColumn(TestWeightCol);
   1738   FOS << "Test weight";
   1739   FOS.PadToColumn(SimilarityCol);
   1740   FOS << "Similarity";
   1741   FOS.PadToColumn(OverlapCol);
   1742   FOS << "Overlap";
   1743   FOS.PadToColumn(BaseUniqueCol);
   1744   FOS << "Base unique";
   1745   FOS.PadToColumn(TestUniqueCol);
   1746   FOS << "Test unique";
   1747   FOS.PadToColumn(BaseSampleCol);
   1748   FOS << "Base samples";
   1749   FOS.PadToColumn(TestSampleCol);
   1750   FOS << "Test samples";
   1751   FOS.PadToColumn(FuncNameCol);
   1752   FOS << "Function name\n";
   1753   for (const auto &F : FuncSimilarityDump) {
   1754     double OverlapPercent =
   1755         F.second.UnionSample > 0
   1756             ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
   1757             : 0;
   1758     double BaseUniquePercent =
   1759         F.second.BaseSample > 0
   1760             ? static_cast<double>(F.second.BaseUniqueSample) /
   1761                   F.second.BaseSample
   1762             : 0;
   1763     double TestUniquePercent =
   1764         F.second.TestSample > 0
   1765             ? static_cast<double>(F.second.TestUniqueSample) /
   1766                   F.second.TestSample
   1767             : 0;
   1768 
   1769     FOS << format("%.2f%%", F.second.BaseWeight * 100);
   1770     FOS.PadToColumn(TestWeightCol);
   1771     FOS << format("%.2f%%", F.second.TestWeight * 100);
   1772     FOS.PadToColumn(SimilarityCol);
   1773     FOS << format("%.2f%%", F.second.Similarity * 100);
   1774     FOS.PadToColumn(OverlapCol);
   1775     FOS << format("%.2f%%", OverlapPercent * 100);
   1776     FOS.PadToColumn(BaseUniqueCol);
   1777     FOS << format("%.2f%%", BaseUniquePercent * 100);
   1778     FOS.PadToColumn(TestUniqueCol);
   1779     FOS << format("%.2f%%", TestUniquePercent * 100);
   1780     FOS.PadToColumn(BaseSampleCol);
   1781     FOS << F.second.BaseSample;
   1782     FOS.PadToColumn(TestSampleCol);
   1783     FOS << F.second.TestSample;
   1784     FOS.PadToColumn(FuncNameCol);
   1785     FOS << F.second.TestName << "\n";
   1786   }
   1787 }
   1788 
   1789 void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
   1790   OS << "Profile overlap infomation for base_profile: " << ProfOverlap.BaseName
   1791      << " and test_profile: " << ProfOverlap.TestName << "\nProgram level:\n";
   1792 
   1793   OS << "  Whole program profile similarity: "
   1794      << format("%.3f%%", ProfOverlap.Similarity * 100) << "\n";
   1795 
   1796   assert(ProfOverlap.UnionSample > 0 &&
   1797          "Total samples in two profile should be greater than 0");
   1798   double OverlapPercent =
   1799       static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
   1800   assert(ProfOverlap.BaseSample > 0 &&
   1801          "Total samples in base profile should be greater than 0");
   1802   double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
   1803                              ProfOverlap.BaseSample;
   1804   assert(ProfOverlap.TestSample > 0 &&
   1805          "Total samples in test profile should be greater than 0");
   1806   double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
   1807                              ProfOverlap.TestSample;
   1808 
   1809   OS << "  Whole program sample overlap: "
   1810      << format("%.3f%%", OverlapPercent * 100) << "\n";
   1811   OS << "    percentage of samples unique in base profile: "
   1812      << format("%.3f%%", BaseUniquePercent * 100) << "\n";
   1813   OS << "    percentage of samples unique in test profile: "
   1814      << format("%.3f%%", TestUniquePercent * 100) << "\n";
   1815   OS << "    total samples in base profile: " << ProfOverlap.BaseSample << "\n"
   1816      << "    total samples in test profile: " << ProfOverlap.TestSample << "\n";
   1817 
   1818   assert(ProfOverlap.UnionCount > 0 &&
   1819          "There should be at least one function in two input profiles");
   1820   double FuncOverlapPercent =
   1821       static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
   1822   OS << "  Function overlap: " << format("%.3f%%", FuncOverlapPercent * 100)
   1823      << "\n";
   1824   OS << "    overlap functions: " << ProfOverlap.OverlapCount << "\n";
   1825   OS << "    functions unique in base profile: " << ProfOverlap.BaseUniqueCount
   1826      << "\n";
   1827   OS << "    functions unique in test profile: " << ProfOverlap.TestUniqueCount
   1828      << "\n";
   1829 }
   1830 
   1831 void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
   1832     raw_fd_ostream &OS) const {
   1833   assert(HotFuncOverlap.UnionCount > 0 &&
   1834          "There should be at least one hot function in two input profiles");
   1835   OS << "  Hot-function overlap: "
   1836      << format("%.3f%%", static_cast<double>(HotFuncOverlap.OverlapCount) /
   1837                              HotFuncOverlap.UnionCount * 100)
   1838      << "\n";
   1839   OS << "    overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
   1840   OS << "    hot functions unique in base profile: "
   1841      << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
   1842   OS << "    hot functions unique in test profile: "
   1843      << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
   1844 
   1845   assert(HotBlockOverlap.UnionCount > 0 &&
   1846          "There should be at least one hot block in two input profiles");
   1847   OS << "  Hot-block overlap: "
   1848      << format("%.3f%%", static_cast<double>(HotBlockOverlap.OverlapCount) /
   1849                              HotBlockOverlap.UnionCount * 100)
   1850      << "\n";
   1851   OS << "    overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
   1852   OS << "    hot blocks unique in base profile: "
   1853      << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
   1854   OS << "    hot blocks unique in test profile: "
   1855      << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
   1856 }
   1857 
   1858 std::error_code SampleOverlapAggregator::loadProfiles() {
   1859   using namespace sampleprof;
   1860 
   1861   LLVMContext Context;
   1862   auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context);
   1863   if (std::error_code EC = BaseReaderOrErr.getError())
   1864     exitWithErrorCode(EC, BaseFilename);
   1865 
   1866   auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context);
   1867   if (std::error_code EC = TestReaderOrErr.getError())
   1868     exitWithErrorCode(EC, TestFilename);
   1869 
   1870   BaseReader = std::move(BaseReaderOrErr.get());
   1871   TestReader = std::move(TestReaderOrErr.get());
   1872 
   1873   if (std::error_code EC = BaseReader->read())
   1874     exitWithErrorCode(EC, BaseFilename);
   1875   if (std::error_code EC = TestReader->read())
   1876     exitWithErrorCode(EC, TestFilename);
   1877   if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
   1878     exitWithError(
   1879         "cannot compare probe-based profile with non-probe-based profile");
   1880   if (BaseReader->profileIsCS() != TestReader->profileIsCS())
   1881     exitWithError("cannot compare CS profile with non-CS profile");
   1882 
   1883   // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
   1884   // profile summary.
   1885   const uint64_t HotCutoff = 990000;
   1886   ProfileSummary &BasePS = BaseReader->getSummary();
   1887   for (const auto &SummaryEntry : BasePS.getDetailedSummary()) {
   1888     if (SummaryEntry.Cutoff == HotCutoff) {
   1889       BaseHotThreshold = SummaryEntry.MinCount;
   1890       break;
   1891     }
   1892   }
   1893 
   1894   ProfileSummary &TestPS = TestReader->getSummary();
   1895   for (const auto &SummaryEntry : TestPS.getDetailedSummary()) {
   1896     if (SummaryEntry.Cutoff == HotCutoff) {
   1897       TestHotThreshold = SummaryEntry.MinCount;
   1898       break;
   1899     }
   1900   }
   1901   return std::error_code();
   1902 }
   1903 
   1904 void overlapSampleProfile(const std::string &BaseFilename,
   1905                           const std::string &TestFilename,
   1906                           const OverlapFuncFilters &FuncFilter,
   1907                           uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
   1908   using namespace sampleprof;
   1909 
   1910   // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
   1911   // report 2--3 places after decimal point in percentage numbers.
   1912   SampleOverlapAggregator OverlapAggr(
   1913       BaseFilename, TestFilename,
   1914       static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter);
   1915   if (std::error_code EC = OverlapAggr.loadProfiles())
   1916     exitWithErrorCode(EC);
   1917 
   1918   OverlapAggr.initializeSampleProfileOverlap();
   1919   if (OverlapAggr.detectZeroSampleProfile(OS))
   1920     return;
   1921 
   1922   OverlapAggr.computeSampleProfileOverlap(OS);
   1923 
   1924   OverlapAggr.dumpProgramSummary(OS);
   1925   OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
   1926   OverlapAggr.dumpFuncSimilarity(OS);
   1927 }
   1928 
   1929 static int overlap_main(int argc, const char *argv[]) {
   1930   cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
   1931                                     cl::desc("<base profile file>"));
   1932   cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
   1933                                     cl::desc("<test profile file>"));
   1934   cl::opt<std::string> Output("output", cl::value_desc("output"), cl::init("-"),
   1935                               cl::desc("Output file"));
   1936   cl::alias OutputA("o", cl::desc("Alias for --output"), cl::aliasopt(Output));
   1937   cl::opt<bool> IsCS(
   1938       "cs", cl::init(false),
   1939       cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."));
   1940   cl::opt<unsigned long long> ValueCutoff(
   1941       "value-cutoff", cl::init(-1),
   1942       cl::desc(
   1943           "Function level overlap information for every function (with calling "
   1944           "context for csspgo) in test "
   1945           "profile with max count value greater then the parameter value"));
   1946   cl::opt<std::string> FuncNameFilter(
   1947       "function",
   1948       cl::desc("Function level overlap information for matching functions. For "
   1949                "CSSPGO this takes a a function name with calling context"));
   1950   cl::opt<unsigned long long> SimilarityCutoff(
   1951       "similarity-cutoff", cl::init(0),
   1952       cl::desc("For sample profiles, list function names (with calling context "
   1953                "for csspgo) for overlapped functions "
   1954                "with similarities below the cutoff (percentage times 10000)."));
   1955   cl::opt<ProfileKinds> ProfileKind(
   1956       cl::desc("Profile kind:"), cl::init(instr),
   1957       cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
   1958                  clEnumVal(sample, "Sample profile")));
   1959   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n");
   1960 
   1961   std::error_code EC;
   1962   raw_fd_ostream OS(Output.data(), EC, sys::fs::OF_TextWithCRLF);
   1963   if (EC)
   1964     exitWithErrorCode(EC, Output);
   1965 
   1966   if (ProfileKind == instr)
   1967     overlapInstrProfile(BaseFilename, TestFilename,
   1968                         OverlapFuncFilters{ValueCutoff, FuncNameFilter}, OS,
   1969                         IsCS);
   1970   else
   1971     overlapSampleProfile(BaseFilename, TestFilename,
   1972                          OverlapFuncFilters{ValueCutoff, FuncNameFilter},
   1973                          SimilarityCutoff, OS);
   1974 
   1975   return 0;
   1976 }
   1977 
   1978 typedef struct ValueSitesStats {
   1979   ValueSitesStats()
   1980       : TotalNumValueSites(0), TotalNumValueSitesWithValueProfile(0),
   1981         TotalNumValues(0) {}
   1982   uint64_t TotalNumValueSites;
   1983   uint64_t TotalNumValueSitesWithValueProfile;
   1984   uint64_t TotalNumValues;
   1985   std::vector<unsigned> ValueSitesHistogram;
   1986 } ValueSitesStats;
   1987 
   1988 static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
   1989                                   ValueSitesStats &Stats, raw_fd_ostream &OS,
   1990                                   InstrProfSymtab *Symtab) {
   1991   uint32_t NS = Func.getNumValueSites(VK);
   1992   Stats.TotalNumValueSites += NS;
   1993   for (size_t I = 0; I < NS; ++I) {
   1994     uint32_t NV = Func.getNumValueDataForSite(VK, I);
   1995     std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, I);
   1996     Stats.TotalNumValues += NV;
   1997     if (NV) {
   1998       Stats.TotalNumValueSitesWithValueProfile++;
   1999       if (NV > Stats.ValueSitesHistogram.size())
   2000         Stats.ValueSitesHistogram.resize(NV, 0);
   2001       Stats.ValueSitesHistogram[NV - 1]++;
   2002     }
   2003 
   2004     uint64_t SiteSum = 0;
   2005     for (uint32_t V = 0; V < NV; V++)
   2006       SiteSum += VD[V].Count;
   2007     if (SiteSum == 0)
   2008       SiteSum = 1;
   2009 
   2010     for (uint32_t V = 0; V < NV; V++) {
   2011       OS << "\t[ " << format("%2u", I) << ", ";
   2012       if (Symtab == nullptr)
   2013         OS << format("%4" PRIu64, VD[V].Value);
   2014       else
   2015         OS << Symtab->getFuncName(VD[V].Value);
   2016       OS << ", " << format("%10" PRId64, VD[V].Count) << " ] ("
   2017          << format("%.2f%%", (VD[V].Count * 100.0 / SiteSum)) << ")\n";
   2018     }
   2019   }
   2020 }
   2021 
   2022 static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
   2023                                 ValueSitesStats &Stats) {
   2024   OS << "  Total number of sites: " << Stats.TotalNumValueSites << "\n";
   2025   OS << "  Total number of sites with values: "
   2026      << Stats.TotalNumValueSitesWithValueProfile << "\n";
   2027   OS << "  Total number of profiled values: " << Stats.TotalNumValues << "\n";
   2028 
   2029   OS << "  Value sites histogram:\n\tNumTargets, SiteCount\n";
   2030   for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) {
   2031     if (Stats.ValueSitesHistogram[I] > 0)
   2032       OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n";
   2033   }
   2034 }
   2035 
   2036 static int showInstrProfile(const std::string &Filename, bool ShowCounts,
   2037                             uint32_t TopN, bool ShowIndirectCallTargets,
   2038                             bool ShowMemOPSizes, bool ShowDetailedSummary,
   2039                             std::vector<uint32_t> DetailedSummaryCutoffs,
   2040                             bool ShowAllFunctions, bool ShowCS,
   2041                             uint64_t ValueCutoff, bool OnlyListBelow,
   2042                             const std::string &ShowFunction, bool TextFormat,
   2043                             raw_fd_ostream &OS) {
   2044   auto ReaderOrErr = InstrProfReader::create(Filename);
   2045   std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
   2046   if (ShowDetailedSummary && Cutoffs.empty()) {
   2047     Cutoffs = {800000, 900000, 950000, 990000, 999000, 999900, 999990};
   2048   }
   2049   InstrProfSummaryBuilder Builder(std::move(Cutoffs));
   2050   if (Error E = ReaderOrErr.takeError())
   2051     exitWithError(std::move(E), Filename);
   2052 
   2053   auto Reader = std::move(ReaderOrErr.get());
   2054   bool IsIRInstr = Reader->isIRLevelProfile();
   2055   size_t ShownFunctions = 0;
   2056   size_t BelowCutoffFunctions = 0;
   2057   int NumVPKind = IPVK_Last - IPVK_First + 1;
   2058   std::vector<ValueSitesStats> VPStats(NumVPKind);
   2059 
   2060   auto MinCmp = [](const std::pair<std::string, uint64_t> &v1,
   2061                    const std::pair<std::string, uint64_t> &v2) {
   2062     return v1.second > v2.second;
   2063   };
   2064 
   2065   std::priority_queue<std::pair<std::string, uint64_t>,
   2066                       std::vector<std::pair<std::string, uint64_t>>,
   2067                       decltype(MinCmp)>
   2068       HottestFuncs(MinCmp);
   2069 
   2070   if (!TextFormat && OnlyListBelow) {
   2071     OS << "The list of functions with the maximum counter less than "
   2072        << ValueCutoff << ":\n";
   2073   }
   2074 
   2075   // Add marker so that IR-level instrumentation round-trips properly.
   2076   if (TextFormat && IsIRInstr)
   2077     OS << ":ir\n";
   2078 
   2079   for (const auto &Func : *Reader) {
   2080     if (Reader->isIRLevelProfile()) {
   2081       bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
   2082       if (FuncIsCS != ShowCS)
   2083         continue;
   2084     }
   2085     bool Show =
   2086         ShowAllFunctions || (!ShowFunction.empty() &&
   2087                              Func.Name.find(ShowFunction) != Func.Name.npos);
   2088 
   2089     bool doTextFormatDump = (Show && TextFormat);
   2090 
   2091     if (doTextFormatDump) {
   2092       InstrProfSymtab &Symtab = Reader->getSymtab();
   2093       InstrProfWriter::writeRecordInText(Func.Name, Func.Hash, Func, Symtab,
   2094                                          OS);
   2095       continue;
   2096     }
   2097 
   2098     assert(Func.Counts.size() > 0 && "function missing entry counter");
   2099     Builder.addRecord(Func);
   2100 
   2101     uint64_t FuncMax = 0;
   2102     uint64_t FuncSum = 0;
   2103     for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
   2104       if (Func.Counts[I] == (uint64_t)-1)
   2105         continue;
   2106       FuncMax = std::max(FuncMax, Func.Counts[I]);
   2107       FuncSum += Func.Counts[I];
   2108     }
   2109 
   2110     if (FuncMax < ValueCutoff) {
   2111       ++BelowCutoffFunctions;
   2112       if (OnlyListBelow) {
   2113         OS << "  " << Func.Name << ": (Max = " << FuncMax
   2114            << " Sum = " << FuncSum << ")\n";
   2115       }
   2116       continue;
   2117     } else if (OnlyListBelow)
   2118       continue;
   2119 
   2120     if (TopN) {
   2121       if (HottestFuncs.size() == TopN) {
   2122         if (HottestFuncs.top().second < FuncMax) {
   2123           HottestFuncs.pop();
   2124           HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
   2125         }
   2126       } else
   2127         HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
   2128     }
   2129 
   2130     if (Show) {
   2131       if (!ShownFunctions)
   2132         OS << "Counters:\n";
   2133 
   2134       ++ShownFunctions;
   2135 
   2136       OS << "  " << Func.Name << ":\n"
   2137          << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
   2138          << "    Counters: " << Func.Counts.size() << "\n";
   2139       if (!IsIRInstr)
   2140         OS << "    Function count: " << Func.Counts[0] << "\n";
   2141 
   2142       if (ShowIndirectCallTargets)
   2143         OS << "    Indirect Call Site Count: "
   2144            << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
   2145 
   2146       uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize);
   2147       if (ShowMemOPSizes && NumMemOPCalls > 0)
   2148         OS << "    Number of Memory Intrinsics Calls: " << NumMemOPCalls
   2149            << "\n";
   2150 
   2151       if (ShowCounts) {
   2152         OS << "    Block counts: [";
   2153         size_t Start = (IsIRInstr ? 0 : 1);
   2154         for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
   2155           OS << (I == Start ? "" : ", ") << Func.Counts[I];
   2156         }
   2157         OS << "]\n";
   2158       }
   2159 
   2160       if (ShowIndirectCallTargets) {
   2161         OS << "    Indirect Target Results:\n";
   2162         traverseAllValueSites(Func, IPVK_IndirectCallTarget,
   2163                               VPStats[IPVK_IndirectCallTarget], OS,
   2164                               &(Reader->getSymtab()));
   2165       }
   2166 
   2167       if (ShowMemOPSizes && NumMemOPCalls > 0) {
   2168         OS << "    Memory Intrinsic Size Results:\n";
   2169         traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS,
   2170                               nullptr);
   2171       }
   2172     }
   2173   }
   2174   if (Reader->hasError())
   2175     exitWithError(Reader->getError(), Filename);
   2176 
   2177   if (TextFormat)
   2178     return 0;
   2179   std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
   2180   bool IsIR = Reader->isIRLevelProfile();
   2181   OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
   2182   if (IsIR)
   2183     OS << "  entry_first = " << Reader->instrEntryBBEnabled();
   2184   OS << "\n";
   2185   if (ShowAllFunctions || !ShowFunction.empty())
   2186     OS << "Functions shown: " << ShownFunctions << "\n";
   2187   OS << "Total functions: " << PS->getNumFunctions() << "\n";
   2188   if (ValueCutoff > 0) {
   2189     OS << "Number of functions with maximum count (< " << ValueCutoff
   2190        << "): " << BelowCutoffFunctions << "\n";
   2191     OS << "Number of functions with maximum count (>= " << ValueCutoff
   2192        << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
   2193   }
   2194   OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n";
   2195   OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n";
   2196 
   2197   if (TopN) {
   2198     std::vector<std::pair<std::string, uint64_t>> SortedHottestFuncs;
   2199     while (!HottestFuncs.empty()) {
   2200       SortedHottestFuncs.emplace_back(HottestFuncs.top());
   2201       HottestFuncs.pop();
   2202     }
   2203     OS << "Top " << TopN
   2204        << " functions with the largest internal block counts: \n";
   2205     for (auto &hotfunc : llvm::reverse(SortedHottestFuncs))
   2206       OS << "  " << hotfunc.first << ", max count = " << hotfunc.second << "\n";
   2207   }
   2208 
   2209   if (ShownFunctions && ShowIndirectCallTargets) {
   2210     OS << "Statistics for indirect call sites profile:\n";
   2211     showValueSitesStats(OS, IPVK_IndirectCallTarget,
   2212                         VPStats[IPVK_IndirectCallTarget]);
   2213   }
   2214 
   2215   if (ShownFunctions && ShowMemOPSizes) {
   2216     OS << "Statistics for memory intrinsic calls sizes profile:\n";
   2217     showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]);
   2218   }
   2219 
   2220   if (ShowDetailedSummary) {
   2221     OS << "Total number of blocks: " << PS->getNumCounts() << "\n";
   2222     OS << "Total count: " << PS->getTotalCount() << "\n";
   2223     PS->printDetailedSummary(OS);
   2224   }
   2225   return 0;
   2226 }
   2227 
   2228 static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
   2229                             raw_fd_ostream &OS) {
   2230   if (!Reader->dumpSectionInfo(OS)) {
   2231     WithColor::warning() << "-show-sec-info-only is only supported for "
   2232                          << "sample profile in extbinary format and is "
   2233                          << "ignored for other formats.\n";
   2234     return;
   2235   }
   2236 }
   2237 
   2238 namespace {
   2239 struct HotFuncInfo {
   2240   StringRef FuncName;
   2241   uint64_t TotalCount;
   2242   double TotalCountPercent;
   2243   uint64_t MaxCount;
   2244   uint64_t EntryCount;
   2245 
   2246   HotFuncInfo()
   2247       : FuncName(), TotalCount(0), TotalCountPercent(0.0f), MaxCount(0),
   2248         EntryCount(0) {}
   2249 
   2250   HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
   2251       : FuncName(FN), TotalCount(TS), TotalCountPercent(TSP), MaxCount(MS),
   2252         EntryCount(ES) {}
   2253 };
   2254 } // namespace
   2255 
   2256 // Print out detailed information about hot functions in PrintValues vector.
   2257 // Users specify titles and offset of every columns through ColumnTitle and
   2258 // ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
   2259 // and at least 4. Besides, users can optionally give a HotFuncMetric string to
   2260 // print out or let it be an empty string.
   2261 static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
   2262                                 const std::vector<int> &ColumnOffset,
   2263                                 const std::vector<HotFuncInfo> &PrintValues,
   2264                                 uint64_t HotFuncCount, uint64_t TotalFuncCount,
   2265                                 uint64_t HotProfCount, uint64_t TotalProfCount,
   2266                                 const std::string &HotFuncMetric,
   2267                                 raw_fd_ostream &OS) {
   2268   assert(ColumnOffset.size() == ColumnTitle.size() &&
   2269          "ColumnOffset and ColumnTitle should have the same size");
   2270   assert(ColumnTitle.size() >= 4 &&
   2271          "ColumnTitle should have at least 4 elements");
   2272   assert(TotalFuncCount > 0 &&
   2273          "There should be at least one function in the profile");
   2274   double TotalProfPercent = 0;
   2275   if (TotalProfCount > 0)
   2276     TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100;
   2277 
   2278   formatted_raw_ostream FOS(OS);
   2279   FOS << HotFuncCount << " out of " << TotalFuncCount
   2280       << " functions with profile ("
   2281       << format("%.2f%%",
   2282                 (static_cast<double>(HotFuncCount) / TotalFuncCount * 100))
   2283       << ") are considered hot functions";
   2284   if (!HotFuncMetric.empty())
   2285     FOS << " (" << HotFuncMetric << ")";
   2286   FOS << ".\n";
   2287   FOS << HotProfCount << " out of " << TotalProfCount << " profile counts ("
   2288       << format("%.2f%%", TotalProfPercent) << ") are from hot functions.\n";
   2289 
   2290   for (size_t I = 0; I < ColumnTitle.size(); ++I) {
   2291     FOS.PadToColumn(ColumnOffset[I]);
   2292     FOS << ColumnTitle[I];
   2293   }
   2294   FOS << "\n";
   2295 
   2296   for (const HotFuncInfo &R : PrintValues) {
   2297     FOS.PadToColumn(ColumnOffset[0]);
   2298     FOS << R.TotalCount << " (" << format("%.2f%%", R.TotalCountPercent) << ")";
   2299     FOS.PadToColumn(ColumnOffset[1]);
   2300     FOS << R.MaxCount;
   2301     FOS.PadToColumn(ColumnOffset[2]);
   2302     FOS << R.EntryCount;
   2303     FOS.PadToColumn(ColumnOffset[3]);
   2304     FOS << R.FuncName << "\n";
   2305   }
   2306 }
   2307 
   2308 static int
   2309 showHotFunctionList(const StringMap<sampleprof::FunctionSamples> &Profiles,
   2310                     ProfileSummary &PS, raw_fd_ostream &OS) {
   2311   using namespace sampleprof;
   2312 
   2313   const uint32_t HotFuncCutoff = 990000;
   2314   auto &SummaryVector = PS.getDetailedSummary();
   2315   uint64_t MinCountThreshold = 0;
   2316   for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) {
   2317     if (SummaryEntry.Cutoff == HotFuncCutoff) {
   2318       MinCountThreshold = SummaryEntry.MinCount;
   2319       break;
   2320     }
   2321   }
   2322 
   2323   // Traverse all functions in the profile and keep only hot functions.
   2324   // The following loop also calculates the sum of total samples of all
   2325   // functions.
   2326   std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>,
   2327                 std::greater<uint64_t>>
   2328       HotFunc;
   2329   uint64_t ProfileTotalSample = 0;
   2330   uint64_t HotFuncSample = 0;
   2331   uint64_t HotFuncCount = 0;
   2332 
   2333   for (const auto &I : Profiles) {
   2334     FuncSampleStats FuncStats;
   2335     const FunctionSamples &FuncProf = I.second;
   2336     ProfileTotalSample += FuncProf.getTotalSamples();
   2337     getFuncSampleStats(FuncProf, FuncStats, MinCountThreshold);
   2338 
   2339     if (isFunctionHot(FuncStats, MinCountThreshold)) {
   2340       HotFunc.emplace(FuncProf.getTotalSamples(),
   2341                       std::make_pair(&(I.second), FuncStats.MaxSample));
   2342       HotFuncSample += FuncProf.getTotalSamples();
   2343       ++HotFuncCount;
   2344     }
   2345   }
   2346 
   2347   std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample",
   2348                                        "Entry sample", "Function name"};
   2349   std::vector<int> ColumnOffset{0, 24, 42, 58};
   2350   std::string Metric =
   2351       std::string("max sample >= ") + std::to_string(MinCountThreshold);
   2352   std::vector<HotFuncInfo> PrintValues;
   2353   for (const auto &FuncPair : HotFunc) {
   2354     const FunctionSamples &Func = *FuncPair.second.first;
   2355     double TotalSamplePercent =
   2356         (ProfileTotalSample > 0)
   2357             ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
   2358             : 0;
   2359     PrintValues.emplace_back(HotFuncInfo(
   2360         Func.getNameWithContext(), Func.getTotalSamples(), TotalSamplePercent,
   2361         FuncPair.second.second, Func.getEntrySamples()));
   2362   }
   2363   dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
   2364                       Profiles.size(), HotFuncSample, ProfileTotalSample,
   2365                       Metric, OS);
   2366 
   2367   return 0;
   2368 }
   2369 
   2370 static int showSampleProfile(const std::string &Filename, bool ShowCounts,
   2371                              bool ShowAllFunctions, bool ShowDetailedSummary,
   2372                              const std::string &ShowFunction,
   2373                              bool ShowProfileSymbolList,
   2374                              bool ShowSectionInfoOnly, bool ShowHotFuncList,
   2375                              raw_fd_ostream &OS) {
   2376   using namespace sampleprof;
   2377   LLVMContext Context;
   2378   auto ReaderOrErr = SampleProfileReader::create(Filename, Context);
   2379   if (std::error_code EC = ReaderOrErr.getError())
   2380     exitWithErrorCode(EC, Filename);
   2381 
   2382   auto Reader = std::move(ReaderOrErr.get());
   2383 
   2384   if (ShowSectionInfoOnly) {
   2385     showSectionInfo(Reader.get(), OS);
   2386     return 0;
   2387   }
   2388 
   2389   if (std::error_code EC = Reader->read())
   2390     exitWithErrorCode(EC, Filename);
   2391 
   2392   if (ShowAllFunctions || ShowFunction.empty())
   2393     Reader->dump(OS);
   2394   else
   2395     Reader->dumpFunctionProfile(ShowFunction, OS);
   2396 
   2397   if (ShowProfileSymbolList) {
   2398     std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
   2399         Reader->getProfileSymbolList();
   2400     ReaderList->dump(OS);
   2401   }
   2402 
   2403   if (ShowDetailedSummary) {
   2404     auto &PS = Reader->getSummary();
   2405     PS.printSummary(OS);
   2406     PS.printDetailedSummary(OS);
   2407   }
   2408 
   2409   if (ShowHotFuncList)
   2410     showHotFunctionList(Reader->getProfiles(), Reader->getSummary(), OS);
   2411 
   2412   return 0;
   2413 }
   2414 
   2415 static int show_main(int argc, const char *argv[]) {
   2416   cl::opt<std::string> Filename(cl::Positional, cl::Required,
   2417                                 cl::desc("<profdata-file>"));
   2418 
   2419   cl::opt<bool> ShowCounts("counts", cl::init(false),
   2420                            cl::desc("Show counter values for shown functions"));
   2421   cl::opt<bool> TextFormat(
   2422       "text", cl::init(false),
   2423       cl::desc("Show instr profile data in text dump format"));
   2424   cl::opt<bool> ShowIndirectCallTargets(
   2425       "ic-targets", cl::init(false),
   2426       cl::desc("Show indirect call site target values for shown functions"));
   2427   cl::opt<bool> ShowMemOPSizes(
   2428       "memop-sizes", cl::init(false),
   2429       cl::desc("Show the profiled sizes of the memory intrinsic calls "
   2430                "for shown functions"));
   2431   cl::opt<bool> ShowDetailedSummary("detailed-summary", cl::init(false),
   2432                                     cl::desc("Show detailed profile summary"));
   2433   cl::list<uint32_t> DetailedSummaryCutoffs(
   2434       cl::CommaSeparated, "detailed-summary-cutoffs",
   2435       cl::desc(
   2436           "Cutoff percentages (times 10000) for generating detailed summary"),
   2437       cl::value_desc("800000,901000,999999"));
   2438   cl::opt<bool> ShowHotFuncList(
   2439       "hot-func-list", cl::init(false),
   2440       cl::desc("Show profile summary of a list of hot functions"));
   2441   cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
   2442                                  cl::desc("Details for every function"));
   2443   cl::opt<bool> ShowCS("showcs", cl::init(false),
   2444                        cl::desc("Show context sensitive counts"));
   2445   cl::opt<std::string> ShowFunction("function",
   2446                                     cl::desc("Details for matching functions"));
   2447 
   2448   cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
   2449                                       cl::init("-"), cl::desc("Output file"));
   2450   cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
   2451                             cl::aliasopt(OutputFilename));
   2452   cl::opt<ProfileKinds> ProfileKind(
   2453       cl::desc("Profile kind:"), cl::init(instr),
   2454       cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
   2455                  clEnumVal(sample, "Sample profile")));
   2456   cl::opt<uint32_t> TopNFunctions(
   2457       "topn", cl::init(0),
   2458       cl::desc("Show the list of functions with the largest internal counts"));
   2459   cl::opt<uint32_t> ValueCutoff(
   2460       "value-cutoff", cl::init(0),
   2461       cl::desc("Set the count value cutoff. Functions with the maximum count "
   2462                "less than this value will not be printed out. (Default is 0)"));
   2463   cl::opt<bool> OnlyListBelow(
   2464       "list-below-cutoff", cl::init(false),
   2465       cl::desc("Only output names of functions whose max count values are "
   2466                "below the cutoff value"));
   2467   cl::opt<bool> ShowProfileSymbolList(
   2468       "show-prof-sym-list", cl::init(false),
   2469       cl::desc("Show profile symbol list if it exists in the profile. "));
   2470   cl::opt<bool> ShowSectionInfoOnly(
   2471       "show-sec-info-only", cl::init(false),
   2472       cl::desc("Show the information of each section in the sample profile. "
   2473                "The flag is only usable when the sample profile is in "
   2474                "extbinary format"));
   2475 
   2476   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
   2477 
   2478   if (OutputFilename.empty())
   2479     OutputFilename = "-";
   2480 
   2481   if (Filename == OutputFilename) {
   2482     errs() << sys::path::filename(argv[0])
   2483            << ": Input file name cannot be the same as the output file name!\n";
   2484     return 1;
   2485   }
   2486 
   2487   std::error_code EC;
   2488   raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
   2489   if (EC)
   2490     exitWithErrorCode(EC, OutputFilename);
   2491 
   2492   if (ShowAllFunctions && !ShowFunction.empty())
   2493     WithColor::warning() << "-function argument ignored: showing all functions\n";
   2494 
   2495   if (ProfileKind == instr)
   2496     return showInstrProfile(Filename, ShowCounts, TopNFunctions,
   2497                             ShowIndirectCallTargets, ShowMemOPSizes,
   2498                             ShowDetailedSummary, DetailedSummaryCutoffs,
   2499                             ShowAllFunctions, ShowCS, ValueCutoff,
   2500                             OnlyListBelow, ShowFunction, TextFormat, OS);
   2501   else
   2502     return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
   2503                              ShowDetailedSummary, ShowFunction,
   2504                              ShowProfileSymbolList, ShowSectionInfoOnly,
   2505                              ShowHotFuncList, OS);
   2506 }
   2507 
   2508 int main(int argc, const char *argv[]) {
   2509   InitLLVM X(argc, argv);
   2510 
   2511   StringRef ProgName(sys::path::filename(argv[0]));
   2512   if (argc > 1) {
   2513     int (*func)(int, const char *[]) = nullptr;
   2514 
   2515     if (strcmp(argv[1], "merge") == 0)
   2516       func = merge_main;
   2517     else if (strcmp(argv[1], "show") == 0)
   2518       func = show_main;
   2519     else if (strcmp(argv[1], "overlap") == 0)
   2520       func = overlap_main;
   2521 
   2522     if (func) {
   2523       std::string Invocation(ProgName.str() + " " + argv[1]);
   2524       argv[1] = Invocation.c_str();
   2525       return func(argc - 1, argv + 1);
   2526     }
   2527 
   2528     if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0 ||
   2529         strcmp(argv[1], "--help") == 0) {
   2530 
   2531       errs() << "OVERVIEW: LLVM profile data tools\n\n"
   2532              << "USAGE: " << ProgName << " <command> [args...]\n"
   2533              << "USAGE: " << ProgName << " <command> -help\n\n"
   2534              << "See each individual command --help for more details.\n"
   2535              << "Available commands: merge, show, overlap\n";
   2536       return 0;
   2537     }
   2538   }
   2539 
   2540   if (argc < 2)
   2541     errs() << ProgName << ": No command specified!\n";
   2542   else
   2543     errs() << ProgName << ": Unknown command!\n";
   2544 
   2545   errs() << "USAGE: " << ProgName << " <merge|show|overlap> [args...]\n";
   2546   return 1;
   2547 }
   2548