Home | History | Annotate | Line # | Download | only in IPO
      1 //===- PartialInlining.cpp - Inline parts of functions --------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This pass performs partial inlining, typically by inlining an if statement
     10 // that surrounds the body of the function.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "llvm/Transforms/IPO/PartialInlining.h"
     15 #include "llvm/ADT/DenseMap.h"
     16 #include "llvm/ADT/DenseSet.h"
     17 #include "llvm/ADT/None.h"
     18 #include "llvm/ADT/Optional.h"
     19 #include "llvm/ADT/STLExtras.h"
     20 #include "llvm/ADT/SmallVector.h"
     21 #include "llvm/ADT/Statistic.h"
     22 #include "llvm/Analysis/BlockFrequencyInfo.h"
     23 #include "llvm/Analysis/BranchProbabilityInfo.h"
     24 #include "llvm/Analysis/InlineCost.h"
     25 #include "llvm/Analysis/LoopInfo.h"
     26 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
     27 #include "llvm/Analysis/ProfileSummaryInfo.h"
     28 #include "llvm/Analysis/TargetLibraryInfo.h"
     29 #include "llvm/Analysis/TargetTransformInfo.h"
     30 #include "llvm/IR/Attributes.h"
     31 #include "llvm/IR/BasicBlock.h"
     32 #include "llvm/IR/CFG.h"
     33 #include "llvm/IR/DebugLoc.h"
     34 #include "llvm/IR/DiagnosticInfo.h"
     35 #include "llvm/IR/Dominators.h"
     36 #include "llvm/IR/Function.h"
     37 #include "llvm/IR/InstrTypes.h"
     38 #include "llvm/IR/Instruction.h"
     39 #include "llvm/IR/Instructions.h"
     40 #include "llvm/IR/IntrinsicInst.h"
     41 #include "llvm/IR/Intrinsics.h"
     42 #include "llvm/IR/Module.h"
     43 #include "llvm/IR/User.h"
     44 #include "llvm/InitializePasses.h"
     45 #include "llvm/Pass.h"
     46 #include "llvm/Support/BlockFrequency.h"
     47 #include "llvm/Support/BranchProbability.h"
     48 #include "llvm/Support/Casting.h"
     49 #include "llvm/Support/CommandLine.h"
     50 #include "llvm/Support/ErrorHandling.h"
     51 #include "llvm/Transforms/IPO.h"
     52 #include "llvm/Transforms/Utils/Cloning.h"
     53 #include "llvm/Transforms/Utils/CodeExtractor.h"
     54 #include "llvm/Transforms/Utils/ValueMapper.h"
     55 #include <algorithm>
     56 #include <cassert>
     57 #include <cstdint>
     58 #include <functional>
     59 #include <iterator>
     60 #include <memory>
     61 #include <tuple>
     62 #include <vector>
     63 
     64 using namespace llvm;
     65 
     66 #define DEBUG_TYPE "partial-inlining"
     67 
     68 STATISTIC(NumPartialInlined,
     69           "Number of callsites functions partially inlined into.");
     70 STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
     71                                         "cold outlined regions were partially "
     72                                         "inlined into its caller(s).");
     73 STATISTIC(NumColdRegionsFound,
     74            "Number of cold single entry/exit regions found.");
     75 STATISTIC(NumColdRegionsOutlined,
     76            "Number of cold single entry/exit regions outlined.");
     77 
     78 // Command line option to disable partial-inlining. The default is false:
     79 static cl::opt<bool>
     80     DisablePartialInlining("disable-partial-inlining", cl::init(false),
     81                            cl::Hidden, cl::desc("Disable partial inlining"));
     82 // Command line option to disable multi-region partial-inlining. The default is
     83 // false:
     84 static cl::opt<bool> DisableMultiRegionPartialInline(
     85     "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
     86     cl::desc("Disable multi-region partial inlining"));
     87 
     88 // Command line option to force outlining in regions with live exit variables.
     89 // The default is false:
     90 static cl::opt<bool>
     91     ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
     92                cl::desc("Force outline regions with live exits"));
     93 
     94 // Command line option to enable marking outline functions with Cold Calling
     95 // Convention. The default is false:
     96 static cl::opt<bool>
     97     MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
     98                        cl::desc("Mark outline function calls with ColdCC"));
     99 
    100 // This is an option used by testing:
    101 static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
    102                                       cl::init(false), cl::ZeroOrMore,
    103                                       cl::ReallyHidden,
    104                                       cl::desc("Skip Cost Analysis"));
    105 // Used to determine if a cold region is worth outlining based on
    106 // its inlining cost compared to the original function.  Default is set at 10%.
    107 // ie. if the cold region reduces the inlining cost of the original function by
    108 // at least 10%.
    109 static cl::opt<float> MinRegionSizeRatio(
    110     "min-region-size-ratio", cl::init(0.1), cl::Hidden,
    111     cl::desc("Minimum ratio comparing relative sizes of each "
    112              "outline candidate and original function"));
    113 // Used to tune the minimum number of execution counts needed in the predecessor
    114 // block to the cold edge. ie. confidence interval.
    115 static cl::opt<unsigned>
    116     MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
    117                              cl::desc("Minimum block executions to consider "
    118                                       "its BranchProbabilityInfo valid"));
    119 // Used to determine when an edge is considered cold. Default is set to 10%. ie.
    120 // if the branch probability is 10% or less, then it is deemed as 'cold'.
    121 static cl::opt<float> ColdBranchRatio(
    122     "cold-branch-ratio", cl::init(0.1), cl::Hidden,
    123     cl::desc("Minimum BranchProbability to consider a region cold."));
    124 
    125 static cl::opt<unsigned> MaxNumInlineBlocks(
    126     "max-num-inline-blocks", cl::init(5), cl::Hidden,
    127     cl::desc("Max number of blocks to be partially inlined"));
    128 
    129 // Command line option to set the maximum number of partial inlining allowed
    130 // for the module. The default value of -1 means no limit.
    131 static cl::opt<int> MaxNumPartialInlining(
    132     "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
    133     cl::desc("Max number of partial inlining. The default is unlimited"));
    134 
    135 // Used only when PGO or user annotated branch data is absent. It is
    136 // the least value that is used to weigh the outline region. If BFI
    137 // produces larger value, the BFI value will be used.
    138 static cl::opt<int>
    139     OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
    140                              cl::Hidden, cl::ZeroOrMore,
    141                              cl::desc("Relative frequency of outline region to "
    142                                       "the entry block"));
    143 
    144 static cl::opt<unsigned> ExtraOutliningPenalty(
    145     "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
    146     cl::desc("A debug option to add additional penalty to the computed one."));
    147 
    148 namespace {
    149 
    150 struct FunctionOutliningInfo {
    151   FunctionOutliningInfo() = default;
    152 
    153   // Returns the number of blocks to be inlined including all blocks
    154   // in Entries and one return block.
    155   unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
    156 
    157   // A set of blocks including the function entry that guard
    158   // the region to be outlined.
    159   SmallVector<BasicBlock *, 4> Entries;
    160 
    161   // The return block that is not included in the outlined region.
    162   BasicBlock *ReturnBlock = nullptr;
    163 
    164   // The dominating block of the region to be outlined.
    165   BasicBlock *NonReturnBlock = nullptr;
    166 
    167   // The set of blocks in Entries that that are predecessors to ReturnBlock
    168   SmallVector<BasicBlock *, 4> ReturnBlockPreds;
    169 };
    170 
    171 struct FunctionOutliningMultiRegionInfo {
    172   FunctionOutliningMultiRegionInfo()
    173       : ORI() {}
    174 
    175   // Container for outline regions
    176   struct OutlineRegionInfo {
    177     OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
    178                       BasicBlock *EntryBlock, BasicBlock *ExitBlock,
    179                       BasicBlock *ReturnBlock)
    180         : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
    181           ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
    182     SmallVector<BasicBlock *, 8> Region;
    183     BasicBlock *EntryBlock;
    184     BasicBlock *ExitBlock;
    185     BasicBlock *ReturnBlock;
    186   };
    187 
    188   SmallVector<OutlineRegionInfo, 4> ORI;
    189 };
    190 
    191 struct PartialInlinerImpl {
    192 
    193   PartialInlinerImpl(
    194       function_ref<AssumptionCache &(Function &)> GetAC,
    195       function_ref<AssumptionCache *(Function &)> LookupAC,
    196       function_ref<TargetTransformInfo &(Function &)> GTTI,
    197       function_ref<const TargetLibraryInfo &(Function &)> GTLI,
    198       ProfileSummaryInfo &ProfSI,
    199       function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
    200       : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
    201         GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
    202 
    203   bool run(Module &M);
    204   // Main part of the transformation that calls helper functions to find
    205   // outlining candidates, clone & outline the function, and attempt to
    206   // partially inline the resulting function. Returns true if
    207   // inlining was successful, false otherwise.  Also returns the outline
    208   // function (only if we partially inlined early returns) as there is a
    209   // possibility to further "peel" early return statements that were left in the
    210   // outline function due to code size.
    211   std::pair<bool, Function *> unswitchFunction(Function &F);
    212 
    213   // This class speculatively clones the function to be partial inlined.
    214   // At the end of partial inlining, the remaining callsites to the cloned
    215   // function that are not partially inlined will be fixed up to reference
    216   // the original function, and the cloned function will be erased.
    217   struct FunctionCloner {
    218     // Two constructors, one for single region outlining, the other for
    219     // multi-region outlining.
    220     FunctionCloner(Function *F, FunctionOutliningInfo *OI,
    221                    OptimizationRemarkEmitter &ORE,
    222                    function_ref<AssumptionCache *(Function &)> LookupAC,
    223                    function_ref<TargetTransformInfo &(Function &)> GetTTI);
    224     FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
    225                    OptimizationRemarkEmitter &ORE,
    226                    function_ref<AssumptionCache *(Function &)> LookupAC,
    227                    function_ref<TargetTransformInfo &(Function &)> GetTTI);
    228 
    229     ~FunctionCloner();
    230 
    231     // Prepare for function outlining: making sure there is only
    232     // one incoming edge from the extracted/outlined region to
    233     // the return block.
    234     void normalizeReturnBlock() const;
    235 
    236     // Do function outlining for cold regions.
    237     bool doMultiRegionFunctionOutlining();
    238     // Do function outlining for region after early return block(s).
    239     // NOTE: For vararg functions that do the vararg handling in the outlined
    240     //       function, we temporarily generate IR that does not properly
    241     //       forward varargs to the outlined function. Calling InlineFunction
    242     //       will update calls to the outlined functions to properly forward
    243     //       the varargs.
    244     Function *doSingleRegionFunctionOutlining();
    245 
    246     Function *OrigFunc = nullptr;
    247     Function *ClonedFunc = nullptr;
    248 
    249     typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
    250     // Keep track of Outlined Functions and the basic block they're called from.
    251     SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
    252 
    253     // ClonedFunc is inlined in one of its callers after function
    254     // outlining.
    255     bool IsFunctionInlined = false;
    256     // The cost of the region to be outlined.
    257     InstructionCost OutlinedRegionCost = 0;
    258     // ClonedOI is specific to outlining non-early return blocks.
    259     std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
    260     // ClonedOMRI is specific to outlining cold regions.
    261     std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
    262     std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
    263     OptimizationRemarkEmitter &ORE;
    264     function_ref<AssumptionCache *(Function &)> LookupAC;
    265     function_ref<TargetTransformInfo &(Function &)> GetTTI;
    266   };
    267 
    268 private:
    269   int NumPartialInlining = 0;
    270   function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
    271   function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
    272   function_ref<TargetTransformInfo &(Function &)> GetTTI;
    273   function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
    274   function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
    275   ProfileSummaryInfo &PSI;
    276 
    277   // Return the frequency of the OutlininingBB relative to F's entry point.
    278   // The result is no larger than 1 and is represented using BP.
    279   // (Note that the outlined region's 'head' block can only have incoming
    280   // edges from the guarding entry blocks).
    281   BranchProbability
    282   getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
    283 
    284   // Return true if the callee of CB should be partially inlined with
    285   // profit.
    286   bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
    287                            BlockFrequency WeightedOutliningRcost,
    288                            OptimizationRemarkEmitter &ORE) const;
    289 
    290   // Try to inline DuplicateFunction (cloned from F with call to
    291   // the OutlinedFunction into its callers. Return true
    292   // if there is any successful inlining.
    293   bool tryPartialInline(FunctionCloner &Cloner);
    294 
    295   // Compute the mapping from use site of DuplicationFunction to the enclosing
    296   // BB's profile count.
    297   void
    298   computeCallsiteToProfCountMap(Function *DuplicateFunction,
    299                                 DenseMap<User *, uint64_t> &SiteCountMap) const;
    300 
    301   bool isLimitReached() const {
    302     return (MaxNumPartialInlining != -1 &&
    303             NumPartialInlining >= MaxNumPartialInlining);
    304   }
    305 
    306   static CallBase *getSupportedCallBase(User *U) {
    307     if (isa<CallInst>(U) || isa<InvokeInst>(U))
    308       return cast<CallBase>(U);
    309     llvm_unreachable("All uses must be calls");
    310     return nullptr;
    311   }
    312 
    313   static CallBase *getOneCallSiteTo(Function &F) {
    314     User *User = *F.user_begin();
    315     return getSupportedCallBase(User);
    316   }
    317 
    318   std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
    319     CallBase *CB = getOneCallSiteTo(F);
    320     DebugLoc DLoc = CB->getDebugLoc();
    321     BasicBlock *Block = CB->getParent();
    322     return std::make_tuple(DLoc, Block);
    323   }
    324 
    325   // Returns the costs associated with function outlining:
    326   // - The first value is the non-weighted runtime cost for making the call
    327   //   to the outlined function, including the addtional  setup cost in the
    328   //    outlined function itself;
    329   // - The second value is the estimated size of the new call sequence in
    330   //   basic block Cloner.OutliningCallBB;
    331   std::tuple<InstructionCost, InstructionCost>
    332   computeOutliningCosts(FunctionCloner &Cloner) const;
    333 
    334   // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
    335   // approximate both the size and runtime cost (Note that in the current
    336   // inline cost analysis, there is no clear distinction there either).
    337   static InstructionCost computeBBInlineCost(BasicBlock *BB,
    338                                              TargetTransformInfo *TTI);
    339 
    340   std::unique_ptr<FunctionOutliningInfo>
    341   computeOutliningInfo(Function &F) const;
    342 
    343   std::unique_ptr<FunctionOutliningMultiRegionInfo>
    344   computeOutliningColdRegionsInfo(Function &F,
    345                                   OptimizationRemarkEmitter &ORE) const;
    346 };
    347 
    348 struct PartialInlinerLegacyPass : public ModulePass {
    349   static char ID; // Pass identification, replacement for typeid
    350 
    351   PartialInlinerLegacyPass() : ModulePass(ID) {
    352     initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
    353   }
    354 
    355   void getAnalysisUsage(AnalysisUsage &AU) const override {
    356     AU.addRequired<AssumptionCacheTracker>();
    357     AU.addRequired<ProfileSummaryInfoWrapperPass>();
    358     AU.addRequired<TargetTransformInfoWrapperPass>();
    359     AU.addRequired<TargetLibraryInfoWrapperPass>();
    360   }
    361 
    362   bool runOnModule(Module &M) override {
    363     if (skipModule(M))
    364       return false;
    365 
    366     AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
    367     TargetTransformInfoWrapperPass *TTIWP =
    368         &getAnalysis<TargetTransformInfoWrapperPass>();
    369     ProfileSummaryInfo &PSI =
    370         getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
    371 
    372     auto GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & {
    373       return ACT->getAssumptionCache(F);
    374     };
    375 
    376     auto LookupAssumptionCache = [ACT](Function &F) -> AssumptionCache * {
    377       return ACT->lookupAssumptionCache(F);
    378     };
    379 
    380     auto GetTTI = [&TTIWP](Function &F) -> TargetTransformInfo & {
    381       return TTIWP->getTTI(F);
    382     };
    383 
    384     auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
    385       return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
    386     };
    387 
    388     return PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
    389                               GetTLI, PSI)
    390         .run(M);
    391   }
    392 };
    393 
    394 } // end anonymous namespace
    395 
    396 std::unique_ptr<FunctionOutliningMultiRegionInfo>
    397 PartialInlinerImpl::computeOutliningColdRegionsInfo(
    398     Function &F, OptimizationRemarkEmitter &ORE) const {
    399   BasicBlock *EntryBlock = &F.front();
    400 
    401   DominatorTree DT(F);
    402   LoopInfo LI(DT);
    403   BranchProbabilityInfo BPI(F, LI);
    404   std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
    405   BlockFrequencyInfo *BFI;
    406   if (!GetBFI) {
    407     ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
    408     BFI = ScopedBFI.get();
    409   } else
    410     BFI = &(GetBFI(F));
    411 
    412   // Return if we don't have profiling information.
    413   if (!PSI.hasInstrumentationProfile())
    414     return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
    415 
    416   std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
    417       std::make_unique<FunctionOutliningMultiRegionInfo>();
    418 
    419   auto IsSingleExit =
    420       [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
    421     BasicBlock *ExitBlock = nullptr;
    422     for (auto *Block : BlockList) {
    423       for (BasicBlock *Succ : successors(Block)) {
    424         if (!is_contained(BlockList, Succ)) {
    425           if (ExitBlock) {
    426             ORE.emit([&]() {
    427               return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
    428                                               &Succ->front())
    429                      << "Region dominated by "
    430                      << ore::NV("Block", BlockList.front()->getName())
    431                      << " has more than one region exit edge.";
    432             });
    433             return nullptr;
    434           }
    435 
    436           ExitBlock = Block;
    437         }
    438       }
    439     }
    440     return ExitBlock;
    441   };
    442 
    443   auto BBProfileCount = [BFI](BasicBlock *BB) {
    444     return BFI->getBlockProfileCount(BB)
    445                ? BFI->getBlockProfileCount(BB).getValue()
    446                : 0;
    447   };
    448 
    449   // Use the same computeBBInlineCost function to compute the cost savings of
    450   // the outlining the candidate region.
    451   TargetTransformInfo *FTTI = &GetTTI(F);
    452   InstructionCost OverallFunctionCost = 0;
    453   for (auto &BB : F)
    454     OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
    455 
    456   LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
    457                     << "\n";);
    458 
    459   InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
    460       [&](auto Cost) { return Cost * MinRegionSizeRatio; });
    461 
    462   BranchProbability MinBranchProbability(
    463       static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
    464       MinBlockCounterExecution);
    465   bool ColdCandidateFound = false;
    466   BasicBlock *CurrEntry = EntryBlock;
    467   std::vector<BasicBlock *> DFS;
    468   DenseMap<BasicBlock *, bool> VisitedMap;
    469   DFS.push_back(CurrEntry);
    470   VisitedMap[CurrEntry] = true;
    471 
    472   // Use Depth First Search on the basic blocks to find CFG edges that are
    473   // considered cold.
    474   // Cold regions considered must also have its inline cost compared to the
    475   // overall inline cost of the original function.  The region is outlined only
    476   // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
    477   // more.
    478   while (!DFS.empty()) {
    479     auto *ThisBB = DFS.back();
    480     DFS.pop_back();
    481     // Only consider regions with predecessor blocks that are considered
    482     // not-cold (default: part of the top 99.99% of all block counters)
    483     // AND greater than our minimum block execution count (default: 100).
    484     if (PSI.isColdBlock(ThisBB, BFI) ||
    485         BBProfileCount(ThisBB) < MinBlockCounterExecution)
    486       continue;
    487     for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
    488       if (VisitedMap[*SI])
    489         continue;
    490       VisitedMap[*SI] = true;
    491       DFS.push_back(*SI);
    492       // If branch isn't cold, we skip to the next one.
    493       BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
    494       if (SuccProb > MinBranchProbability)
    495         continue;
    496 
    497       LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
    498                         << SI->getName()
    499                         << "\nBranch Probability = " << SuccProb << "\n";);
    500 
    501       SmallVector<BasicBlock *, 8> DominateVector;
    502       DT.getDescendants(*SI, DominateVector);
    503       assert(!DominateVector.empty() &&
    504              "SI should be reachable and have at least itself as descendant");
    505 
    506       // We can only outline single entry regions (for now).
    507       if (!DominateVector.front()->hasNPredecessors(1)) {
    508         LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
    509                           << " doesn't have a single predecessor in the "
    510                              "dominator tree\n";);
    511         continue;
    512       }
    513 
    514       BasicBlock *ExitBlock = nullptr;
    515       // We can only outline single exit regions (for now).
    516       if (!(ExitBlock = IsSingleExit(DominateVector))) {
    517         LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
    518                           << " doesn't have a unique successor\n";);
    519         continue;
    520       }
    521 
    522       InstructionCost OutlineRegionCost = 0;
    523       for (auto *BB : DominateVector)
    524         OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
    525 
    526       LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
    527                         << "\n";);
    528 
    529       if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
    530         ORE.emit([&]() {
    531           return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
    532                                             &SI->front())
    533                  << ore::NV("Callee", &F)
    534                  << " inline cost-savings smaller than "
    535                  << ore::NV("Cost", MinOutlineRegionCost);
    536         });
    537 
    538         LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
    539                           << MinOutlineRegionCost << "\n";);
    540         continue;
    541       }
    542 
    543       // For now, ignore blocks that belong to a SISE region that is a
    544       // candidate for outlining.  In the future, we may want to look
    545       // at inner regions because the outer region may have live-exit
    546       // variables.
    547       for (auto *BB : DominateVector)
    548         VisitedMap[BB] = true;
    549 
    550       // ReturnBlock here means the block after the outline call
    551       BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
    552       FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
    553           DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
    554       OutliningInfo->ORI.push_back(RegInfo);
    555       LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
    556                         << DominateVector.front()->getName() << "\n";);
    557       ColdCandidateFound = true;
    558       NumColdRegionsFound++;
    559     }
    560   }
    561 
    562   if (ColdCandidateFound)
    563     return OutliningInfo;
    564 
    565   return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
    566 }
    567 
    568 std::unique_ptr<FunctionOutliningInfo>
    569 PartialInlinerImpl::computeOutliningInfo(Function &F) const {
    570   BasicBlock *EntryBlock = &F.front();
    571   BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
    572   if (!BR || BR->isUnconditional())
    573     return std::unique_ptr<FunctionOutliningInfo>();
    574 
    575   // Returns true if Succ is BB's successor
    576   auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
    577     return is_contained(successors(BB), Succ);
    578   };
    579 
    580   auto IsReturnBlock = [](BasicBlock *BB) {
    581     Instruction *TI = BB->getTerminator();
    582     return isa<ReturnInst>(TI);
    583   };
    584 
    585   auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
    586     if (IsReturnBlock(Succ1))
    587       return std::make_tuple(Succ1, Succ2);
    588     if (IsReturnBlock(Succ2))
    589       return std::make_tuple(Succ2, Succ1);
    590 
    591     return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
    592   };
    593 
    594   // Detect a triangular shape:
    595   auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
    596     if (IsSuccessor(Succ1, Succ2))
    597       return std::make_tuple(Succ1, Succ2);
    598     if (IsSuccessor(Succ2, Succ1))
    599       return std::make_tuple(Succ2, Succ1);
    600 
    601     return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
    602   };
    603 
    604   std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
    605       std::make_unique<FunctionOutliningInfo>();
    606 
    607   BasicBlock *CurrEntry = EntryBlock;
    608   bool CandidateFound = false;
    609   do {
    610     // The number of blocks to be inlined has already reached
    611     // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
    612     // disables partial inlining for the function.
    613     if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
    614       break;
    615 
    616     if (succ_size(CurrEntry) != 2)
    617       break;
    618 
    619     BasicBlock *Succ1 = *succ_begin(CurrEntry);
    620     BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
    621 
    622     BasicBlock *ReturnBlock, *NonReturnBlock;
    623     std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
    624 
    625     if (ReturnBlock) {
    626       OutliningInfo->Entries.push_back(CurrEntry);
    627       OutliningInfo->ReturnBlock = ReturnBlock;
    628       OutliningInfo->NonReturnBlock = NonReturnBlock;
    629       CandidateFound = true;
    630       break;
    631     }
    632 
    633     BasicBlock *CommSucc, *OtherSucc;
    634     std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
    635 
    636     if (!CommSucc)
    637       break;
    638 
    639     OutliningInfo->Entries.push_back(CurrEntry);
    640     CurrEntry = OtherSucc;
    641   } while (true);
    642 
    643   if (!CandidateFound)
    644     return std::unique_ptr<FunctionOutliningInfo>();
    645 
    646   // Do sanity check of the entries: threre should not
    647   // be any successors (not in the entry set) other than
    648   // {ReturnBlock, NonReturnBlock}
    649   assert(OutliningInfo->Entries[0] == &F.front() &&
    650          "Function Entry must be the first in Entries vector");
    651   DenseSet<BasicBlock *> Entries;
    652   for (BasicBlock *E : OutliningInfo->Entries)
    653     Entries.insert(E);
    654 
    655   // Returns true of BB has Predecessor which is not
    656   // in Entries set.
    657   auto HasNonEntryPred = [Entries](BasicBlock *BB) {
    658     for (auto *Pred : predecessors(BB)) {
    659       if (!Entries.count(Pred))
    660         return true;
    661     }
    662     return false;
    663   };
    664   auto CheckAndNormalizeCandidate =
    665       [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
    666         for (BasicBlock *E : OutliningInfo->Entries) {
    667           for (auto *Succ : successors(E)) {
    668             if (Entries.count(Succ))
    669               continue;
    670             if (Succ == OutliningInfo->ReturnBlock)
    671               OutliningInfo->ReturnBlockPreds.push_back(E);
    672             else if (Succ != OutliningInfo->NonReturnBlock)
    673               return false;
    674           }
    675           // There should not be any outside incoming edges either:
    676           if (HasNonEntryPred(E))
    677             return false;
    678         }
    679         return true;
    680       };
    681 
    682   if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
    683     return std::unique_ptr<FunctionOutliningInfo>();
    684 
    685   // Now further growing the candidate's inlining region by
    686   // peeling off dominating blocks from the outlining region:
    687   while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
    688     BasicBlock *Cand = OutliningInfo->NonReturnBlock;
    689     if (succ_size(Cand) != 2)
    690       break;
    691 
    692     if (HasNonEntryPred(Cand))
    693       break;
    694 
    695     BasicBlock *Succ1 = *succ_begin(Cand);
    696     BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
    697 
    698     BasicBlock *ReturnBlock, *NonReturnBlock;
    699     std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
    700     if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
    701       break;
    702 
    703     if (NonReturnBlock->getSinglePredecessor() != Cand)
    704       break;
    705 
    706     // Now grow and update OutlininigInfo:
    707     OutliningInfo->Entries.push_back(Cand);
    708     OutliningInfo->NonReturnBlock = NonReturnBlock;
    709     OutliningInfo->ReturnBlockPreds.push_back(Cand);
    710     Entries.insert(Cand);
    711   }
    712 
    713   return OutliningInfo;
    714 }
    715 
    716 // Check if there is PGO data or user annotated branch data:
    717 static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
    718   if (F.hasProfileData())
    719     return true;
    720   // Now check if any of the entry block has MD_prof data:
    721   for (auto *E : OI.Entries) {
    722     BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
    723     if (!BR || BR->isUnconditional())
    724       continue;
    725     uint64_t T, F;
    726     if (BR->extractProfMetadata(T, F))
    727       return true;
    728   }
    729   return false;
    730 }
    731 
    732 BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
    733     FunctionCloner &Cloner) const {
    734   BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
    735   auto EntryFreq =
    736       Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
    737   auto OutliningCallFreq =
    738       Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
    739   // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
    740   // we outlined any regions, so we may encounter situations where the
    741   // OutliningCallFreq is *slightly* bigger than the EntryFreq.
    742   if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
    743     OutliningCallFreq = EntryFreq;
    744 
    745   auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
    746       OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
    747 
    748   if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI.get()))
    749     return OutlineRegionRelFreq;
    750 
    751   // When profile data is not available, we need to be conservative in
    752   // estimating the overall savings. Static branch prediction can usually
    753   // guess the branch direction right (taken/non-taken), but the guessed
    754   // branch probability is usually not biased enough. In case when the
    755   // outlined region is predicted to be likely, its probability needs
    756   // to be made higher (more biased) to not under-estimate the cost of
    757   // function outlining. On the other hand, if the outlined region
    758   // is predicted to be less likely, the predicted probablity is usually
    759   // higher than the actual. For instance, the actual probability of the
    760   // less likely target is only 5%, but the guessed probablity can be
    761   // 40%. In the latter case, there is no need for further adjustement.
    762   // FIXME: add an option for this.
    763   if (OutlineRegionRelFreq < BranchProbability(45, 100))
    764     return OutlineRegionRelFreq;
    765 
    766   OutlineRegionRelFreq = std::max(
    767       OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
    768 
    769   return OutlineRegionRelFreq;
    770 }
    771 
    772 bool PartialInlinerImpl::shouldPartialInline(
    773     CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
    774     OptimizationRemarkEmitter &ORE) const {
    775   using namespace ore;
    776 
    777   Function *Callee = CB.getCalledFunction();
    778   assert(Callee == Cloner.ClonedFunc);
    779 
    780   if (SkipCostAnalysis)
    781     return isInlineViable(*Callee).isSuccess();
    782 
    783   Function *Caller = CB.getCaller();
    784   auto &CalleeTTI = GetTTI(*Callee);
    785   bool RemarksEnabled =
    786       Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
    787           DEBUG_TYPE);
    788   InlineCost IC =
    789       getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
    790                     GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
    791 
    792   if (IC.isAlways()) {
    793     ORE.emit([&]() {
    794       return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
    795              << NV("Callee", Cloner.OrigFunc)
    796              << " should always be fully inlined, not partially";
    797     });
    798     return false;
    799   }
    800 
    801   if (IC.isNever()) {
    802     ORE.emit([&]() {
    803       return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
    804              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
    805              << NV("Caller", Caller)
    806              << " because it should never be inlined (cost=never)";
    807     });
    808     return false;
    809   }
    810 
    811   if (!IC) {
    812     ORE.emit([&]() {
    813       return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
    814              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
    815              << NV("Caller", Caller) << " because too costly to inline (cost="
    816              << NV("Cost", IC.getCost()) << ", threshold="
    817              << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
    818     });
    819     return false;
    820   }
    821   const DataLayout &DL = Caller->getParent()->getDataLayout();
    822 
    823   // The savings of eliminating the call:
    824   int NonWeightedSavings = getCallsiteCost(CB, DL);
    825   BlockFrequency NormWeightedSavings(NonWeightedSavings);
    826 
    827   // Weighted saving is smaller than weighted cost, return false
    828   if (NormWeightedSavings < WeightedOutliningRcost) {
    829     ORE.emit([&]() {
    830       return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
    831                                         &CB)
    832              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
    833              << NV("Caller", Caller) << " runtime overhead (overhead="
    834              << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
    835              << ", savings="
    836              << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
    837              << ")"
    838              << " of making the outlined call is too high";
    839     });
    840 
    841     return false;
    842   }
    843 
    844   ORE.emit([&]() {
    845     return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
    846            << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
    847            << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
    848            << " (threshold="
    849            << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
    850   });
    851   return true;
    852 }
    853 
    854 // TODO: Ideally  we should share Inliner's InlineCost Analysis code.
    855 // For now use a simplified version. The returned 'InlineCost' will be used
    856 // to esimate the size cost as well as runtime cost of the BB.
    857 InstructionCost
    858 PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
    859                                         TargetTransformInfo *TTI) {
    860   InstructionCost InlineCost = 0;
    861   const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
    862   for (Instruction &I : BB->instructionsWithoutDebug()) {
    863     // Skip free instructions.
    864     switch (I.getOpcode()) {
    865     case Instruction::BitCast:
    866     case Instruction::PtrToInt:
    867     case Instruction::IntToPtr:
    868     case Instruction::Alloca:
    869     case Instruction::PHI:
    870       continue;
    871     case Instruction::GetElementPtr:
    872       if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
    873         continue;
    874       break;
    875     default:
    876       break;
    877     }
    878 
    879     if (I.isLifetimeStartOrEnd())
    880       continue;
    881 
    882     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
    883       Intrinsic::ID IID = II->getIntrinsicID();
    884       SmallVector<Type *, 4> Tys;
    885       FastMathFlags FMF;
    886       for (Value *Val : II->args())
    887         Tys.push_back(Val->getType());
    888 
    889       if (auto *FPMO = dyn_cast<FPMathOperator>(II))
    890         FMF = FPMO->getFastMathFlags();
    891 
    892       IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
    893       InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
    894       continue;
    895     }
    896 
    897     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
    898       InlineCost += getCallsiteCost(*CI, DL);
    899       continue;
    900     }
    901 
    902     if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
    903       InlineCost += getCallsiteCost(*II, DL);
    904       continue;
    905     }
    906 
    907     if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
    908       InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
    909       continue;
    910     }
    911     InlineCost += InlineConstants::InstrCost;
    912   }
    913 
    914   return InlineCost;
    915 }
    916 
    917 std::tuple<InstructionCost, InstructionCost>
    918 PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
    919   InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
    920   for (auto FuncBBPair : Cloner.OutlinedFunctions) {
    921     Function *OutlinedFunc = FuncBBPair.first;
    922     BasicBlock* OutliningCallBB = FuncBBPair.second;
    923     // Now compute the cost of the call sequence to the outlined function
    924     // 'OutlinedFunction' in BB 'OutliningCallBB':
    925     auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
    926     OutliningFuncCallCost +=
    927         computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
    928 
    929     // Now compute the cost of the extracted/outlined function itself:
    930     for (BasicBlock &BB : *OutlinedFunc)
    931       OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
    932   }
    933   assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
    934          "Outlined function cost should be no less than the outlined region");
    935 
    936   // The code extractor introduces a new root and exit stub blocks with
    937   // additional unconditional branches. Those branches will be eliminated
    938   // later with bb layout. The cost should be adjusted accordingly:
    939   OutlinedFunctionCost -=
    940       2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
    941 
    942   InstructionCost OutliningRuntimeOverhead =
    943       OutliningFuncCallCost +
    944       (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
    945       ExtraOutliningPenalty.getValue();
    946 
    947   return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
    948 }
    949 
    950 // Create the callsite to profile count map which is
    951 // used to update the original function's entry count,
    952 // after the function is partially inlined into the callsite.
    953 void PartialInlinerImpl::computeCallsiteToProfCountMap(
    954     Function *DuplicateFunction,
    955     DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
    956   std::vector<User *> Users(DuplicateFunction->user_begin(),
    957                             DuplicateFunction->user_end());
    958   Function *CurrentCaller = nullptr;
    959   std::unique_ptr<BlockFrequencyInfo> TempBFI;
    960   BlockFrequencyInfo *CurrentCallerBFI = nullptr;
    961 
    962   auto ComputeCurrBFI = [&,this](Function *Caller) {
    963       // For the old pass manager:
    964       if (!GetBFI) {
    965         DominatorTree DT(*Caller);
    966         LoopInfo LI(DT);
    967         BranchProbabilityInfo BPI(*Caller, LI);
    968         TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
    969         CurrentCallerBFI = TempBFI.get();
    970       } else {
    971         // New pass manager:
    972         CurrentCallerBFI = &(GetBFI(*Caller));
    973       }
    974   };
    975 
    976   for (User *User : Users) {
    977     CallBase *CB = getSupportedCallBase(User);
    978     Function *Caller = CB->getCaller();
    979     if (CurrentCaller != Caller) {
    980       CurrentCaller = Caller;
    981       ComputeCurrBFI(Caller);
    982     } else {
    983       assert(CurrentCallerBFI && "CallerBFI is not set");
    984     }
    985     BasicBlock *CallBB = CB->getParent();
    986     auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
    987     if (Count)
    988       CallSiteToProfCountMap[User] = *Count;
    989     else
    990       CallSiteToProfCountMap[User] = 0;
    991   }
    992 }
    993 
    994 PartialInlinerImpl::FunctionCloner::FunctionCloner(
    995     Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
    996     function_ref<AssumptionCache *(Function &)> LookupAC,
    997     function_ref<TargetTransformInfo &(Function &)> GetTTI)
    998     : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
    999   ClonedOI = std::make_unique<FunctionOutliningInfo>();
   1000 
   1001   // Clone the function, so that we can hack away on it.
   1002   ValueToValueMapTy VMap;
   1003   ClonedFunc = CloneFunction(F, VMap);
   1004 
   1005   ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
   1006   ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
   1007   for (BasicBlock *BB : OI->Entries)
   1008     ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
   1009 
   1010   for (BasicBlock *E : OI->ReturnBlockPreds) {
   1011     BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
   1012     ClonedOI->ReturnBlockPreds.push_back(NewE);
   1013   }
   1014   // Go ahead and update all uses to the duplicate, so that we can just
   1015   // use the inliner functionality when we're done hacking.
   1016   F->replaceAllUsesWith(ClonedFunc);
   1017 }
   1018 
   1019 PartialInlinerImpl::FunctionCloner::FunctionCloner(
   1020     Function *F, FunctionOutliningMultiRegionInfo *OI,
   1021     OptimizationRemarkEmitter &ORE,
   1022     function_ref<AssumptionCache *(Function &)> LookupAC,
   1023     function_ref<TargetTransformInfo &(Function &)> GetTTI)
   1024     : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
   1025   ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
   1026 
   1027   // Clone the function, so that we can hack away on it.
   1028   ValueToValueMapTy VMap;
   1029   ClonedFunc = CloneFunction(F, VMap);
   1030 
   1031   // Go through all Outline Candidate Regions and update all BasicBlock
   1032   // information.
   1033   for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
   1034        OI->ORI) {
   1035     SmallVector<BasicBlock *, 8> Region;
   1036     for (BasicBlock *BB : RegionInfo.Region)
   1037       Region.push_back(cast<BasicBlock>(VMap[BB]));
   1038 
   1039     BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
   1040     BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
   1041     BasicBlock *NewReturnBlock = nullptr;
   1042     if (RegionInfo.ReturnBlock)
   1043       NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
   1044     FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
   1045         Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
   1046     ClonedOMRI->ORI.push_back(MappedRegionInfo);
   1047   }
   1048   // Go ahead and update all uses to the duplicate, so that we can just
   1049   // use the inliner functionality when we're done hacking.
   1050   F->replaceAllUsesWith(ClonedFunc);
   1051 }
   1052 
   1053 void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
   1054   auto GetFirstPHI = [](BasicBlock *BB) {
   1055     BasicBlock::iterator I = BB->begin();
   1056     PHINode *FirstPhi = nullptr;
   1057     while (I != BB->end()) {
   1058       PHINode *Phi = dyn_cast<PHINode>(I);
   1059       if (!Phi)
   1060         break;
   1061       if (!FirstPhi) {
   1062         FirstPhi = Phi;
   1063         break;
   1064       }
   1065     }
   1066     return FirstPhi;
   1067   };
   1068 
   1069   // Shouldn't need to normalize PHIs if we're not outlining non-early return
   1070   // blocks.
   1071   if (!ClonedOI)
   1072     return;
   1073 
   1074   // Special hackery is needed with PHI nodes that have inputs from more than
   1075   // one extracted block.  For simplicity, just split the PHIs into a two-level
   1076   // sequence of PHIs, some of which will go in the extracted region, and some
   1077   // of which will go outside.
   1078   BasicBlock *PreReturn = ClonedOI->ReturnBlock;
   1079   // only split block when necessary:
   1080   PHINode *FirstPhi = GetFirstPHI(PreReturn);
   1081   unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
   1082 
   1083   if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
   1084     return;
   1085 
   1086   auto IsTrivialPhi = [](PHINode *PN) -> Value * {
   1087     Value *CommonValue = PN->getIncomingValue(0);
   1088     if (all_of(PN->incoming_values(),
   1089                [&](Value *V) { return V == CommonValue; }))
   1090       return CommonValue;
   1091     return nullptr;
   1092   };
   1093 
   1094   ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
   1095       ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
   1096   BasicBlock::iterator I = PreReturn->begin();
   1097   Instruction *Ins = &ClonedOI->ReturnBlock->front();
   1098   SmallVector<Instruction *, 4> DeadPhis;
   1099   while (I != PreReturn->end()) {
   1100     PHINode *OldPhi = dyn_cast<PHINode>(I);
   1101     if (!OldPhi)
   1102       break;
   1103 
   1104     PHINode *RetPhi =
   1105         PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
   1106     OldPhi->replaceAllUsesWith(RetPhi);
   1107     Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
   1108 
   1109     RetPhi->addIncoming(&*I, PreReturn);
   1110     for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
   1111       RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
   1112       OldPhi->removeIncomingValue(E);
   1113     }
   1114 
   1115     // After incoming values splitting, the old phi may become trivial.
   1116     // Keeping the trivial phi can introduce definition inside the outline
   1117     // region which is live-out, causing necessary overhead (load, store
   1118     // arg passing etc).
   1119     if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
   1120       OldPhi->replaceAllUsesWith(OldPhiVal);
   1121       DeadPhis.push_back(OldPhi);
   1122     }
   1123     ++I;
   1124   }
   1125   for (auto *DP : DeadPhis)
   1126     DP->eraseFromParent();
   1127 
   1128   for (auto *E : ClonedOI->ReturnBlockPreds)
   1129     E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
   1130 }
   1131 
   1132 bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
   1133 
   1134   auto ComputeRegionCost =
   1135       [&](SmallVectorImpl<BasicBlock *> &Region) -> InstructionCost {
   1136     InstructionCost Cost = 0;
   1137     for (BasicBlock* BB : Region)
   1138       Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
   1139     return Cost;
   1140   };
   1141 
   1142   assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
   1143 
   1144   if (ClonedOMRI->ORI.empty())
   1145     return false;
   1146 
   1147   // The CodeExtractor needs a dominator tree.
   1148   DominatorTree DT;
   1149   DT.recalculate(*ClonedFunc);
   1150 
   1151   // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
   1152   LoopInfo LI(DT);
   1153   BranchProbabilityInfo BPI(*ClonedFunc, LI);
   1154   ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
   1155 
   1156   // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
   1157   CodeExtractorAnalysisCache CEAC(*ClonedFunc);
   1158 
   1159   SetVector<Value *> Inputs, Outputs, Sinks;
   1160   for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
   1161        ClonedOMRI->ORI) {
   1162     InstructionCost CurrentOutlinedRegionCost =
   1163         ComputeRegionCost(RegionInfo.Region);
   1164 
   1165     CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
   1166                      ClonedFuncBFI.get(), &BPI,
   1167                      LookupAC(*RegionInfo.EntryBlock->getParent()),
   1168                      /* AllowVarargs */ false);
   1169 
   1170     CE.findInputsOutputs(Inputs, Outputs, Sinks);
   1171 
   1172     LLVM_DEBUG({
   1173       dbgs() << "inputs: " << Inputs.size() << "\n";
   1174       dbgs() << "outputs: " << Outputs.size() << "\n";
   1175       for (Value *value : Inputs)
   1176         dbgs() << "value used in func: " << *value << "\n";
   1177       for (Value *output : Outputs)
   1178         dbgs() << "instr used in func: " << *output << "\n";
   1179     });
   1180 
   1181     // Do not extract regions that have live exit variables.
   1182     if (Outputs.size() > 0 && !ForceLiveExit)
   1183       continue;
   1184 
   1185     if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
   1186       CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
   1187       BasicBlock *OutliningCallBB = OCS->getParent();
   1188       assert(OutliningCallBB->getParent() == ClonedFunc);
   1189       OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
   1190       NumColdRegionsOutlined++;
   1191       OutlinedRegionCost += CurrentOutlinedRegionCost;
   1192 
   1193       if (MarkOutlinedColdCC) {
   1194         OutlinedFunc->setCallingConv(CallingConv::Cold);
   1195         OCS->setCallingConv(CallingConv::Cold);
   1196       }
   1197     } else
   1198       ORE.emit([&]() {
   1199         return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
   1200                                         &RegionInfo.Region.front()->front())
   1201                << "Failed to extract region at block "
   1202                << ore::NV("Block", RegionInfo.Region.front());
   1203       });
   1204   }
   1205 
   1206   return !OutlinedFunctions.empty();
   1207 }
   1208 
   1209 Function *
   1210 PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
   1211   // Returns true if the block is to be partial inlined into the caller
   1212   // (i.e. not to be extracted to the out of line function)
   1213   auto ToBeInlined = [&, this](BasicBlock *BB) {
   1214     return BB == ClonedOI->ReturnBlock ||
   1215            llvm::is_contained(ClonedOI->Entries, BB);
   1216   };
   1217 
   1218   assert(ClonedOI && "Expecting OutlineInfo for single region outline");
   1219   // The CodeExtractor needs a dominator tree.
   1220   DominatorTree DT;
   1221   DT.recalculate(*ClonedFunc);
   1222 
   1223   // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
   1224   LoopInfo LI(DT);
   1225   BranchProbabilityInfo BPI(*ClonedFunc, LI);
   1226   ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
   1227 
   1228   // Gather up the blocks that we're going to extract.
   1229   std::vector<BasicBlock *> ToExtract;
   1230   auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
   1231   ToExtract.push_back(ClonedOI->NonReturnBlock);
   1232   OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
   1233       ClonedOI->NonReturnBlock, ClonedFuncTTI);
   1234   for (BasicBlock &BB : *ClonedFunc)
   1235     if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
   1236       ToExtract.push_back(&BB);
   1237       // FIXME: the code extractor may hoist/sink more code
   1238       // into the outlined function which may make the outlining
   1239       // overhead (the difference of the outlined function cost
   1240       // and OutliningRegionCost) look larger.
   1241       OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
   1242     }
   1243 
   1244   // Extract the body of the if.
   1245   CodeExtractorAnalysisCache CEAC(*ClonedFunc);
   1246   Function *OutlinedFunc =
   1247       CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
   1248                     ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
   1249                     /* AllowVarargs */ true)
   1250           .extractCodeRegion(CEAC);
   1251 
   1252   if (OutlinedFunc) {
   1253     BasicBlock *OutliningCallBB =
   1254         PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
   1255     assert(OutliningCallBB->getParent() == ClonedFunc);
   1256     OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
   1257   } else
   1258     ORE.emit([&]() {
   1259       return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
   1260                                       &ToExtract.front()->front())
   1261              << "Failed to extract region at block "
   1262              << ore::NV("Block", ToExtract.front());
   1263     });
   1264 
   1265   return OutlinedFunc;
   1266 }
   1267 
   1268 PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
   1269   // Ditch the duplicate, since we're done with it, and rewrite all remaining
   1270   // users (function pointers, etc.) back to the original function.
   1271   ClonedFunc->replaceAllUsesWith(OrigFunc);
   1272   ClonedFunc->eraseFromParent();
   1273   if (!IsFunctionInlined) {
   1274     // Remove each function that was speculatively created if there is no
   1275     // reference.
   1276     for (auto FuncBBPair : OutlinedFunctions) {
   1277       Function *Func = FuncBBPair.first;
   1278       Func->eraseFromParent();
   1279     }
   1280   }
   1281 }
   1282 
   1283 std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
   1284   if (F.hasAddressTaken())
   1285     return {false, nullptr};
   1286 
   1287   // Let inliner handle it
   1288   if (F.hasFnAttribute(Attribute::AlwaysInline))
   1289     return {false, nullptr};
   1290 
   1291   if (F.hasFnAttribute(Attribute::NoInline))
   1292     return {false, nullptr};
   1293 
   1294   if (PSI.isFunctionEntryCold(&F))
   1295     return {false, nullptr};
   1296 
   1297   if (F.users().empty())
   1298     return {false, nullptr};
   1299 
   1300   OptimizationRemarkEmitter ORE(&F);
   1301 
   1302   // Only try to outline cold regions if we have a profile summary, which
   1303   // implies we have profiling information.
   1304   if (PSI.hasProfileSummary() && F.hasProfileData() &&
   1305       !DisableMultiRegionPartialInline) {
   1306     std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
   1307         computeOutliningColdRegionsInfo(F, ORE);
   1308     if (OMRI) {
   1309       FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
   1310 
   1311       LLVM_DEBUG({
   1312         dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
   1313         dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
   1314                << "\n";
   1315       });
   1316 
   1317       bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
   1318 
   1319       if (DidOutline) {
   1320         LLVM_DEBUG({
   1321           dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
   1322           Cloner.ClonedFunc->print(dbgs());
   1323           dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
   1324         });
   1325 
   1326         if (tryPartialInline(Cloner))
   1327           return {true, nullptr};
   1328       }
   1329     }
   1330   }
   1331 
   1332   // Fall-thru to regular partial inlining if we:
   1333   //    i) can't find any cold regions to outline, or
   1334   //   ii) can't inline the outlined function anywhere.
   1335   std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
   1336   if (!OI)
   1337     return {false, nullptr};
   1338 
   1339   FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
   1340   Cloner.normalizeReturnBlock();
   1341 
   1342   Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
   1343 
   1344   if (!OutlinedFunction)
   1345     return {false, nullptr};
   1346 
   1347   if (tryPartialInline(Cloner))
   1348     return {true, OutlinedFunction};
   1349 
   1350   return {false, nullptr};
   1351 }
   1352 
   1353 bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
   1354   if (Cloner.OutlinedFunctions.empty())
   1355     return false;
   1356 
   1357   int SizeCost = 0;
   1358   BlockFrequency WeightedRcost;
   1359   int NonWeightedRcost;
   1360 
   1361   auto OutliningCosts = computeOutliningCosts(Cloner);
   1362   assert(std::get<0>(OutliningCosts).isValid() &&
   1363          std::get<1>(OutliningCosts).isValid() && "Expected valid costs");
   1364 
   1365   SizeCost = *std::get<0>(OutliningCosts).getValue();
   1366   NonWeightedRcost = *std::get<1>(OutliningCosts).getValue();
   1367 
   1368   // Only calculate RelativeToEntryFreq when we are doing single region
   1369   // outlining.
   1370   BranchProbability RelativeToEntryFreq;
   1371   if (Cloner.ClonedOI)
   1372     RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
   1373   else
   1374     // RelativeToEntryFreq doesn't make sense when we have more than one
   1375     // outlined call because each call will have a different relative frequency
   1376     // to the entry block.  We can consider using the average, but the
   1377     // usefulness of that information is questionable. For now, assume we never
   1378     // execute the calls to outlined functions.
   1379     RelativeToEntryFreq = BranchProbability(0, 1);
   1380 
   1381   WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
   1382 
   1383   // The call sequence(s) to the outlined function(s) are larger than the sum of
   1384   // the original outlined region size(s), it does not increase the chances of
   1385   // inlining the function with outlining (The inliner uses the size increase to
   1386   // model the cost of inlining a callee).
   1387   if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
   1388     OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
   1389     DebugLoc DLoc;
   1390     BasicBlock *Block;
   1391     std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
   1392     OrigFuncORE.emit([&]() {
   1393       return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
   1394                                         DLoc, Block)
   1395              << ore::NV("Function", Cloner.OrigFunc)
   1396              << " not partially inlined into callers (Original Size = "
   1397              << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
   1398              << ", Size of call sequence to outlined function = "
   1399              << ore::NV("NewSize", SizeCost) << ")";
   1400     });
   1401     return false;
   1402   }
   1403 
   1404   assert(Cloner.OrigFunc->users().empty() &&
   1405          "F's users should all be replaced!");
   1406 
   1407   std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
   1408                             Cloner.ClonedFunc->user_end());
   1409 
   1410   DenseMap<User *, uint64_t> CallSiteToProfCountMap;
   1411   auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
   1412   if (CalleeEntryCount)
   1413     computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
   1414 
   1415   uint64_t CalleeEntryCountV =
   1416       (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
   1417 
   1418   bool AnyInline = false;
   1419   for (User *User : Users) {
   1420     CallBase *CB = getSupportedCallBase(User);
   1421 
   1422     if (isLimitReached())
   1423       continue;
   1424 
   1425     OptimizationRemarkEmitter CallerORE(CB->getCaller());
   1426     if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
   1427       continue;
   1428 
   1429     // Construct remark before doing the inlining, as after successful inlining
   1430     // the callsite is removed.
   1431     OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
   1432     OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
   1433        << ore::NV("Caller", CB->getCaller());
   1434 
   1435     InlineFunctionInfo IFI(nullptr, GetAssumptionCache, &PSI);
   1436     // We can only forward varargs when we outlined a single region, else we
   1437     // bail on vararg functions.
   1438     if (!InlineFunction(*CB, IFI, nullptr, true,
   1439                         (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
   1440                                          : nullptr))
   1441              .isSuccess())
   1442       continue;
   1443 
   1444     CallerORE.emit(OR);
   1445 
   1446     // Now update the entry count:
   1447     if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
   1448       uint64_t CallSiteCount = CallSiteToProfCountMap[User];
   1449       CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
   1450     }
   1451 
   1452     AnyInline = true;
   1453     NumPartialInlining++;
   1454     // Update the stats
   1455     if (Cloner.ClonedOI)
   1456       NumPartialInlined++;
   1457     else
   1458       NumColdOutlinePartialInlined++;
   1459   }
   1460 
   1461   if (AnyInline) {
   1462     Cloner.IsFunctionInlined = true;
   1463     if (CalleeEntryCount)
   1464       Cloner.OrigFunc->setEntryCount(
   1465           CalleeEntryCount.setCount(CalleeEntryCountV));
   1466     OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
   1467     OrigFuncORE.emit([&]() {
   1468       return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
   1469              << "Partially inlined into at least one caller";
   1470     });
   1471   }
   1472 
   1473   return AnyInline;
   1474 }
   1475 
   1476 bool PartialInlinerImpl::run(Module &M) {
   1477   if (DisablePartialInlining)
   1478     return false;
   1479 
   1480   std::vector<Function *> Worklist;
   1481   Worklist.reserve(M.size());
   1482   for (Function &F : M)
   1483     if (!F.use_empty() && !F.isDeclaration())
   1484       Worklist.push_back(&F);
   1485 
   1486   bool Changed = false;
   1487   while (!Worklist.empty()) {
   1488     Function *CurrFunc = Worklist.back();
   1489     Worklist.pop_back();
   1490 
   1491     if (CurrFunc->use_empty())
   1492       continue;
   1493 
   1494     bool Recursive = false;
   1495     for (User *U : CurrFunc->users())
   1496       if (Instruction *I = dyn_cast<Instruction>(U))
   1497         if (I->getParent()->getParent() == CurrFunc) {
   1498           Recursive = true;
   1499           break;
   1500         }
   1501     if (Recursive)
   1502       continue;
   1503 
   1504     std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
   1505     if (Result.second)
   1506       Worklist.push_back(Result.second);
   1507     Changed |= Result.first;
   1508   }
   1509 
   1510   return Changed;
   1511 }
   1512 
   1513 char PartialInlinerLegacyPass::ID = 0;
   1514 
   1515 INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
   1516                       "Partial Inliner", false, false)
   1517 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
   1518 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
   1519 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
   1520 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
   1521 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
   1522                     "Partial Inliner", false, false)
   1523 
   1524 ModulePass *llvm::createPartialInliningPass() {
   1525   return new PartialInlinerLegacyPass();
   1526 }
   1527 
   1528 PreservedAnalyses PartialInlinerPass::run(Module &M,
   1529                                           ModuleAnalysisManager &AM) {
   1530   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
   1531 
   1532   auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
   1533     return FAM.getResult<AssumptionAnalysis>(F);
   1534   };
   1535 
   1536   auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
   1537     return FAM.getCachedResult<AssumptionAnalysis>(F);
   1538   };
   1539 
   1540   auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
   1541     return FAM.getResult<BlockFrequencyAnalysis>(F);
   1542   };
   1543 
   1544   auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
   1545     return FAM.getResult<TargetIRAnalysis>(F);
   1546   };
   1547 
   1548   auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
   1549     return FAM.getResult<TargetLibraryAnalysis>(F);
   1550   };
   1551 
   1552   ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
   1553 
   1554   if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
   1555                          GetTLI, PSI, GetBFI)
   1556           .run(M))
   1557     return PreservedAnalyses::none();
   1558   return PreservedAnalyses::all();
   1559 }
   1560