Home | History | Annotate | Line # | Download | only in Scalar
      1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This pass performs loop invariant code motion, attempting to remove as much
     10 // code from the body of a loop as possible.  It does this by either hoisting
     11 // code into the preheader block, or by sinking code to the exit blocks if it is
     12 // safe.  This pass also promotes must-aliased memory locations in the loop to
     13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
     14 //
     15 // Hoisting operations out of loops is a canonicalization transform.  It
     16 // enables and simplifies subsequent optimizations in the middle-end.
     17 // Rematerialization of hoisted instructions to reduce register pressure is the
     18 // responsibility of the back-end, which has more accurate information about
     19 // register pressure and also handles other optimizations than LICM that
     20 // increase live-ranges.
     21 //
     22 // This pass uses alias analysis for two purposes:
     23 //
     24 //  1. Moving loop invariant loads and calls out of loops.  If we can determine
     25 //     that a load or call inside of a loop never aliases anything stored to,
     26 //     we can hoist it or sink it like any other instruction.
     27 //  2. Scalar Promotion of Memory - If there is a store instruction inside of
     28 //     the loop, we try to move the store to happen AFTER the loop instead of
     29 //     inside of the loop.  This can only happen if a few conditions are true:
     30 //       A. The pointer stored through is loop invariant
     31 //       B. There are no stores or loads in the loop which _may_ alias the
     32 //          pointer.  There are no calls in the loop which mod/ref the pointer.
     33 //     If these conditions are true, we can promote the loads and stores in the
     34 //     loop of the pointer to use a temporary alloca'd variable.  We then use
     35 //     the SSAUpdater to construct the appropriate SSA form for the value.
     36 //
     37 //===----------------------------------------------------------------------===//
     38 
     39 #include "llvm/Transforms/Scalar/LICM.h"
     40 #include "llvm/ADT/SetOperations.h"
     41 #include "llvm/ADT/Statistic.h"
     42 #include "llvm/Analysis/AliasAnalysis.h"
     43 #include "llvm/Analysis/AliasSetTracker.h"
     44 #include "llvm/Analysis/BasicAliasAnalysis.h"
     45 #include "llvm/Analysis/BlockFrequencyInfo.h"
     46 #include "llvm/Analysis/CaptureTracking.h"
     47 #include "llvm/Analysis/ConstantFolding.h"
     48 #include "llvm/Analysis/GlobalsModRef.h"
     49 #include "llvm/Analysis/GuardUtils.h"
     50 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
     51 #include "llvm/Analysis/Loads.h"
     52 #include "llvm/Analysis/LoopInfo.h"
     53 #include "llvm/Analysis/LoopIterator.h"
     54 #include "llvm/Analysis/LoopPass.h"
     55 #include "llvm/Analysis/MemoryBuiltins.h"
     56 #include "llvm/Analysis/MemorySSA.h"
     57 #include "llvm/Analysis/MemorySSAUpdater.h"
     58 #include "llvm/Analysis/MustExecute.h"
     59 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
     60 #include "llvm/Analysis/ScalarEvolution.h"
     61 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
     62 #include "llvm/Analysis/TargetLibraryInfo.h"
     63 #include "llvm/Analysis/ValueTracking.h"
     64 #include "llvm/IR/CFG.h"
     65 #include "llvm/IR/Constants.h"
     66 #include "llvm/IR/DataLayout.h"
     67 #include "llvm/IR/DebugInfoMetadata.h"
     68 #include "llvm/IR/DerivedTypes.h"
     69 #include "llvm/IR/Dominators.h"
     70 #include "llvm/IR/Instructions.h"
     71 #include "llvm/IR/IntrinsicInst.h"
     72 #include "llvm/IR/LLVMContext.h"
     73 #include "llvm/IR/Metadata.h"
     74 #include "llvm/IR/PatternMatch.h"
     75 #include "llvm/IR/PredIteratorCache.h"
     76 #include "llvm/InitializePasses.h"
     77 #include "llvm/Support/CommandLine.h"
     78 #include "llvm/Support/Debug.h"
     79 #include "llvm/Support/raw_ostream.h"
     80 #include "llvm/Transforms/Scalar.h"
     81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
     82 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
     83 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     84 #include "llvm/Transforms/Utils/Local.h"
     85 #include "llvm/Transforms/Utils/LoopUtils.h"
     86 #include "llvm/Transforms/Utils/SSAUpdater.h"
     87 #include <algorithm>
     88 #include <utility>
     89 using namespace llvm;
     90 
     91 #define DEBUG_TYPE "licm"
     92 
     93 STATISTIC(NumCreatedBlocks, "Number of blocks created");
     94 STATISTIC(NumClonedBranches, "Number of branches cloned");
     95 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
     96 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
     97 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
     98 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
     99 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
    100 
    101 /// Memory promotion is enabled by default.
    102 static cl::opt<bool>
    103     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
    104                      cl::desc("Disable memory promotion in LICM pass"));
    105 
    106 static cl::opt<bool> ControlFlowHoisting(
    107     "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
    108     cl::desc("Enable control flow (and PHI) hoisting in LICM"));
    109 
    110 static cl::opt<unsigned> HoistSinkColdnessThreshold(
    111     "licm-coldness-threshold", cl::Hidden, cl::init(4),
    112     cl::desc("Relative coldness Threshold of hoisting/sinking destination "
    113              "block for LICM to be considered beneficial"));
    114 
    115 static cl::opt<uint32_t> MaxNumUsesTraversed(
    116     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
    117     cl::desc("Max num uses visited for identifying load "
    118              "invariance in loop using invariant start (default = 8)"));
    119 
    120 // Default value of zero implies we use the regular alias set tracker mechanism
    121 // instead of the cross product using AA to identify aliasing of the memory
    122 // location we are interested in.
    123 static cl::opt<int>
    124 LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
    125                cl::desc("How many instruction to cross product using AA"));
    126 
    127 // Experimental option to allow imprecision in LICM in pathological cases, in
    128 // exchange for faster compile. This is to be removed if MemorySSA starts to
    129 // address the same issue. This flag applies only when LICM uses MemorySSA
    130 // instead on AliasSetTracker. LICM calls MemorySSAWalker's
    131 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
    132 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
    133 // which may not be precise, since optimizeUses is capped. The result is
    134 // correct, but we may not get as "far up" as possible to get which access is
    135 // clobbering the one queried.
    136 cl::opt<unsigned> llvm::SetLicmMssaOptCap(
    137     "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
    138     cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
    139              "for faster compile. Caps the MemorySSA clobbering calls."));
    140 
    141 // Experimentally, memory promotion carries less importance than sinking and
    142 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
    143 // compile time.
    144 cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
    145     "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
    146     cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
    147              "effect. When MSSA in LICM is enabled, then this is the maximum "
    148              "number of accesses allowed to be present in a loop in order to "
    149              "enable memory promotion."));
    150 
    151 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
    152 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
    153                                   const LoopSafetyInfo *SafetyInfo,
    154                                   TargetTransformInfo *TTI, bool &FreeInLoop);
    155 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
    156                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
    157                   MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
    158                   OptimizationRemarkEmitter *ORE);
    159 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
    160                  BlockFrequencyInfo *BFI, const Loop *CurLoop,
    161                  ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
    162                  OptimizationRemarkEmitter *ORE);
    163 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
    164                                            const DominatorTree *DT,
    165                                            const TargetLibraryInfo *TLI,
    166                                            const Loop *CurLoop,
    167                                            const LoopSafetyInfo *SafetyInfo,
    168                                            OptimizationRemarkEmitter *ORE,
    169                                            const Instruction *CtxI = nullptr);
    170 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
    171                                      AliasSetTracker *CurAST, Loop *CurLoop,
    172                                      AAResults *AA);
    173 static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
    174                                              Loop *CurLoop, Instruction &I,
    175                                              SinkAndHoistLICMFlags &Flags);
    176 static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
    177                                               MemoryUse &MU);
    178 static Instruction *cloneInstructionInExitBlock(
    179     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
    180     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
    181 
    182 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
    183                              AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
    184 
    185 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
    186                                   ICFLoopSafetyInfo &SafetyInfo,
    187                                   MemorySSAUpdater *MSSAU, ScalarEvolution *SE);
    188 
    189 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
    190                                 function_ref<void(Instruction *)> Fn);
    191 static SmallVector<SmallSetVector<Value *, 8>, 0>
    192 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L);
    193 
    194 namespace {
    195 struct LoopInvariantCodeMotion {
    196   bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
    197                  BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI,
    198                  TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
    199                  OptimizationRemarkEmitter *ORE);
    200 
    201   LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
    202                           unsigned LicmMssaNoAccForPromotionCap)
    203       : LicmMssaOptCap(LicmMssaOptCap),
    204         LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
    205 
    206 private:
    207   unsigned LicmMssaOptCap;
    208   unsigned LicmMssaNoAccForPromotionCap;
    209 
    210   std::unique_ptr<AliasSetTracker>
    211   collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AAResults *AA);
    212 };
    213 
    214 struct LegacyLICMPass : public LoopPass {
    215   static char ID; // Pass identification, replacement for typeid
    216   LegacyLICMPass(
    217       unsigned LicmMssaOptCap = SetLicmMssaOptCap,
    218       unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
    219       : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
    220     initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
    221   }
    222 
    223   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
    224     if (skipLoop(L))
    225       return false;
    226 
    227     LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
    228                       << L->getHeader()->getNameOrAsOperand() << "\n");
    229 
    230     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
    231     MemorySSA *MSSA = EnableMSSALoopDependency
    232                           ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
    233                           : nullptr;
    234     bool hasProfileData = L->getHeader()->getParent()->hasProfileData();
    235     BlockFrequencyInfo *BFI =
    236         hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
    237                        : nullptr;
    238     // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
    239     // pass. Function analyses need to be preserved across loop transformations
    240     // but ORE cannot be preserved (see comment before the pass definition).
    241     OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
    242     return LICM.runOnLoop(
    243         L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
    244         &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
    245         &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI,
    246         &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
    247             *L->getHeader()->getParent()),
    248         &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
    249             *L->getHeader()->getParent()),
    250         SE ? &SE->getSE() : nullptr, MSSA, &ORE);
    251   }
    252 
    253   /// This transformation requires natural loop information & requires that
    254   /// loop preheaders be inserted into the CFG...
    255   ///
    256   void getAnalysisUsage(AnalysisUsage &AU) const override {
    257     AU.addPreserved<DominatorTreeWrapperPass>();
    258     AU.addPreserved<LoopInfoWrapperPass>();
    259     AU.addRequired<TargetLibraryInfoWrapperPass>();
    260     if (EnableMSSALoopDependency) {
    261       AU.addRequired<MemorySSAWrapperPass>();
    262       AU.addPreserved<MemorySSAWrapperPass>();
    263     }
    264     AU.addRequired<TargetTransformInfoWrapperPass>();
    265     getLoopAnalysisUsage(AU);
    266     LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
    267     AU.addPreserved<LazyBlockFrequencyInfoPass>();
    268     AU.addPreserved<LazyBranchProbabilityInfoPass>();
    269   }
    270 
    271 private:
    272   LoopInvariantCodeMotion LICM;
    273 };
    274 } // namespace
    275 
    276 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
    277                                 LoopStandardAnalysisResults &AR, LPMUpdater &) {
    278   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
    279   // pass.  Function analyses need to be preserved across loop transformations
    280   // but ORE cannot be preserved (see comment before the pass definition).
    281   OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
    282 
    283   LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
    284   if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
    285                       &AR.SE, AR.MSSA, &ORE))
    286     return PreservedAnalyses::all();
    287 
    288   auto PA = getLoopPassPreservedAnalyses();
    289 
    290   PA.preserve<DominatorTreeAnalysis>();
    291   PA.preserve<LoopAnalysis>();
    292   if (AR.MSSA)
    293     PA.preserve<MemorySSAAnalysis>();
    294 
    295   return PA;
    296 }
    297 
    298 char LegacyLICMPass::ID = 0;
    299 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
    300                       false, false)
    301 INITIALIZE_PASS_DEPENDENCY(LoopPass)
    302 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
    303 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
    304 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
    305 INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
    306 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
    307                     false)
    308 
    309 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
    310 Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
    311                            unsigned LicmMssaNoAccForPromotionCap) {
    312   return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
    313 }
    314 
    315 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
    316                                                    MemorySSA *MSSA)
    317     : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
    318                             IsSink, L, MSSA) {}
    319 
    320 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
    321     unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
    322     Loop *L, MemorySSA *MSSA)
    323     : LicmMssaOptCap(LicmMssaOptCap),
    324       LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
    325       IsSink(IsSink) {
    326   assert(((L != nullptr) == (MSSA != nullptr)) &&
    327          "Unexpected values for SinkAndHoistLICMFlags");
    328   if (!MSSA)
    329     return;
    330 
    331   unsigned AccessCapCount = 0;
    332   for (auto *BB : L->getBlocks())
    333     if (const auto *Accesses = MSSA->getBlockAccesses(BB))
    334       for (const auto &MA : *Accesses) {
    335         (void)MA;
    336         ++AccessCapCount;
    337         if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
    338           NoOfMemAccTooLarge = true;
    339           return;
    340         }
    341       }
    342 }
    343 
    344 /// Hoist expressions out of the specified loop. Note, alias info for inner
    345 /// loop is not preserved so it is not a good idea to run LICM multiple
    346 /// times on one loop.
    347 bool LoopInvariantCodeMotion::runOnLoop(
    348     Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
    349     BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
    350     ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) {
    351   bool Changed = false;
    352 
    353   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
    354 
    355   // If this loop has metadata indicating that LICM is not to be performed then
    356   // just exit.
    357   if (hasDisableLICMTransformsHint(L)) {
    358     return false;
    359   }
    360 
    361   std::unique_ptr<AliasSetTracker> CurAST;
    362   std::unique_ptr<MemorySSAUpdater> MSSAU;
    363   std::unique_ptr<SinkAndHoistLICMFlags> Flags;
    364 
    365   // Don't sink stores from loops with coroutine suspend instructions.
    366   // LICM would sink instructions into the default destination of
    367   // the coroutine switch. The default destination of the switch is to
    368   // handle the case where the coroutine is suspended, by which point the
    369   // coroutine frame may have been destroyed. No instruction can be sunk there.
    370   // FIXME: This would unfortunately hurt the performance of coroutines, however
    371   // there is currently no general solution for this. Similar issues could also
    372   // potentially happen in other passes where instructions are being moved
    373   // across that edge.
    374   bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) {
    375     return llvm::any_of(*BB, [](Instruction &I) {
    376       IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
    377       return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
    378     });
    379   });
    380 
    381   if (!MSSA) {
    382     LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
    383     CurAST = collectAliasInfoForLoop(L, LI, AA);
    384     Flags = std::make_unique<SinkAndHoistLICMFlags>(
    385         LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true);
    386   } else {
    387     LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
    388     MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
    389     Flags = std::make_unique<SinkAndHoistLICMFlags>(
    390         LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true, L, MSSA);
    391   }
    392 
    393   // Get the preheader block to move instructions into...
    394   BasicBlock *Preheader = L->getLoopPreheader();
    395 
    396   // Compute loop safety information.
    397   ICFLoopSafetyInfo SafetyInfo;
    398   SafetyInfo.computeLoopSafetyInfo(L);
    399 
    400   // We want to visit all of the instructions in this loop... that are not parts
    401   // of our subloops (they have already had their invariants hoisted out of
    402   // their loop, into this loop, so there is no need to process the BODIES of
    403   // the subloops).
    404   //
    405   // Traverse the body of the loop in depth first order on the dominator tree so
    406   // that we are guaranteed to see definitions before we see uses.  This allows
    407   // us to sink instructions in one pass, without iteration.  After sinking
    408   // instructions, we perform another pass to hoist them out of the loop.
    409   if (L->hasDedicatedExits())
    410     Changed |=
    411         sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
    412                    CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE);
    413   Flags->setIsSink(false);
    414   if (Preheader)
    415     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
    416                            CurAST.get(), MSSAU.get(), SE, &SafetyInfo,
    417                            *Flags.get(), ORE);
    418 
    419   // Now that all loop invariants have been removed from the loop, promote any
    420   // memory references to scalars that we can.
    421   // Don't sink stores from loops without dedicated block exits. Exits
    422   // containing indirect branches are not transformed by loop simplify,
    423   // make sure we catch that. An additional load may be generated in the
    424   // preheader for SSA updater, so also avoid sinking when no preheader
    425   // is available.
    426   if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
    427       !Flags->tooManyMemoryAccesses() && !HasCoroSuspendInst) {
    428     // Figure out the loop exits and their insertion points
    429     SmallVector<BasicBlock *, 8> ExitBlocks;
    430     L->getUniqueExitBlocks(ExitBlocks);
    431 
    432     // We can't insert into a catchswitch.
    433     bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
    434       return isa<CatchSwitchInst>(Exit->getTerminator());
    435     });
    436 
    437     if (!HasCatchSwitch) {
    438       SmallVector<Instruction *, 8> InsertPts;
    439       SmallVector<MemoryAccess *, 8> MSSAInsertPts;
    440       InsertPts.reserve(ExitBlocks.size());
    441       if (MSSAU)
    442         MSSAInsertPts.reserve(ExitBlocks.size());
    443       for (BasicBlock *ExitBlock : ExitBlocks) {
    444         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
    445         if (MSSAU)
    446           MSSAInsertPts.push_back(nullptr);
    447       }
    448 
    449       PredIteratorCache PIC;
    450 
    451       bool Promoted = false;
    452       if (CurAST.get()) {
    453         // Loop over all of the alias sets in the tracker object.
    454         for (AliasSet &AS : *CurAST) {
    455           // We can promote this alias set if it has a store, if it is a "Must"
    456           // alias set, if the pointer is loop invariant, and if we are not
    457           // eliminating any volatile loads or stores.
    458           if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
    459               !L->isLoopInvariant(AS.begin()->getValue()))
    460             continue;
    461 
    462           assert(
    463               !AS.empty() &&
    464               "Must alias set should have at least one pointer element in it!");
    465 
    466           SmallSetVector<Value *, 8> PointerMustAliases;
    467           for (const auto &ASI : AS)
    468             PointerMustAliases.insert(ASI.getValue());
    469 
    470           Promoted |= promoteLoopAccessesToScalars(
    471               PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
    472               DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
    473         }
    474       } else {
    475         // Promoting one set of accesses may make the pointers for another set
    476         // loop invariant, so run this in a loop (with the MaybePromotable set
    477         // decreasing in size over time).
    478         bool LocalPromoted;
    479         do {
    480           LocalPromoted = false;
    481           for (const SmallSetVector<Value *, 8> &PointerMustAliases :
    482                collectPromotionCandidates(MSSA, AA, L)) {
    483             LocalPromoted |= promoteLoopAccessesToScalars(
    484                 PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
    485                 LI, DT, TLI, L, /*AST*/nullptr, MSSAU.get(), &SafetyInfo, ORE);
    486           }
    487           Promoted |= LocalPromoted;
    488         } while (LocalPromoted);
    489       }
    490 
    491       // Once we have promoted values across the loop body we have to
    492       // recursively reform LCSSA as any nested loop may now have values defined
    493       // within the loop used in the outer loop.
    494       // FIXME: This is really heavy handed. It would be a bit better to use an
    495       // SSAUpdater strategy during promotion that was LCSSA aware and reformed
    496       // it as it went.
    497       if (Promoted)
    498         formLCSSARecursively(*L, *DT, LI, SE);
    499 
    500       Changed |= Promoted;
    501     }
    502   }
    503 
    504   // Check that neither this loop nor its parent have had LCSSA broken. LICM is
    505   // specifically moving instructions across the loop boundary and so it is
    506   // especially in need of sanity checking here.
    507   assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
    508   assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
    509          "Parent loop not left in LCSSA form after LICM!");
    510 
    511   if (MSSAU.get() && VerifyMemorySSA)
    512     MSSAU->getMemorySSA()->verifyMemorySSA();
    513 
    514   if (Changed && SE)
    515     SE->forgetLoopDispositions(L);
    516   return Changed;
    517 }
    518 
    519 /// Walk the specified region of the CFG (defined by all blocks dominated by
    520 /// the specified block, and that are in the current loop) in reverse depth
    521 /// first order w.r.t the DominatorTree.  This allows us to visit uses before
    522 /// definitions, allowing us to sink a loop body in one pass without iteration.
    523 ///
    524 bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
    525                       DominatorTree *DT, BlockFrequencyInfo *BFI,
    526                       TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
    527                       Loop *CurLoop, AliasSetTracker *CurAST,
    528                       MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
    529                       SinkAndHoistLICMFlags &Flags,
    530                       OptimizationRemarkEmitter *ORE) {
    531 
    532   // Verify inputs.
    533   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
    534          CurLoop != nullptr && SafetyInfo != nullptr &&
    535          "Unexpected input to sinkRegion.");
    536   assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
    537          "Either AliasSetTracker or MemorySSA should be initialized.");
    538 
    539   // We want to visit children before parents. We will enque all the parents
    540   // before their children in the worklist and process the worklist in reverse
    541   // order.
    542   SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
    543 
    544   bool Changed = false;
    545   for (DomTreeNode *DTN : reverse(Worklist)) {
    546     BasicBlock *BB = DTN->getBlock();
    547     // Only need to process the contents of this block if it is not part of a
    548     // subloop (which would already have been processed).
    549     if (inSubLoop(BB, CurLoop, LI))
    550       continue;
    551 
    552     for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
    553       Instruction &I = *--II;
    554 
    555       // The instruction is not used in the loop if it is dead.  In this case,
    556       // we just delete it instead of sinking it.
    557       if (isInstructionTriviallyDead(&I, TLI)) {
    558         LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
    559         salvageKnowledge(&I);
    560         salvageDebugInfo(I);
    561         ++II;
    562         eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
    563         Changed = true;
    564         continue;
    565       }
    566 
    567       // Check to see if we can sink this instruction to the exit blocks
    568       // of the loop.  We can do this if the all users of the instruction are
    569       // outside of the loop.  In this case, it doesn't even matter if the
    570       // operands of the instruction are loop invariant.
    571       //
    572       bool FreeInLoop = false;
    573       if (!I.mayHaveSideEffects() &&
    574           isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
    575           canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
    576                              ORE)) {
    577         if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
    578           if (!FreeInLoop) {
    579             ++II;
    580             salvageDebugInfo(I);
    581             eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
    582           }
    583           Changed = true;
    584         }
    585       }
    586     }
    587   }
    588   if (MSSAU && VerifyMemorySSA)
    589     MSSAU->getMemorySSA()->verifyMemorySSA();
    590   return Changed;
    591 }
    592 
    593 namespace {
    594 // This is a helper class for hoistRegion to make it able to hoist control flow
    595 // in order to be able to hoist phis. The way this works is that we initially
    596 // start hoisting to the loop preheader, and when we see a loop invariant branch
    597 // we make note of this. When we then come to hoist an instruction that's
    598 // conditional on such a branch we duplicate the branch and the relevant control
    599 // flow, then hoist the instruction into the block corresponding to its original
    600 // block in the duplicated control flow.
    601 class ControlFlowHoister {
    602 private:
    603   // Information about the loop we are hoisting from
    604   LoopInfo *LI;
    605   DominatorTree *DT;
    606   Loop *CurLoop;
    607   MemorySSAUpdater *MSSAU;
    608 
    609   // A map of blocks in the loop to the block their instructions will be hoisted
    610   // to.
    611   DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
    612 
    613   // The branches that we can hoist, mapped to the block that marks a
    614   // convergence point of their control flow.
    615   DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
    616 
    617 public:
    618   ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
    619                      MemorySSAUpdater *MSSAU)
    620       : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
    621 
    622   void registerPossiblyHoistableBranch(BranchInst *BI) {
    623     // We can only hoist conditional branches with loop invariant operands.
    624     if (!ControlFlowHoisting || !BI->isConditional() ||
    625         !CurLoop->hasLoopInvariantOperands(BI))
    626       return;
    627 
    628     // The branch destinations need to be in the loop, and we don't gain
    629     // anything by duplicating conditional branches with duplicate successors,
    630     // as it's essentially the same as an unconditional branch.
    631     BasicBlock *TrueDest = BI->getSuccessor(0);
    632     BasicBlock *FalseDest = BI->getSuccessor(1);
    633     if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
    634         TrueDest == FalseDest)
    635       return;
    636 
    637     // We can hoist BI if one branch destination is the successor of the other,
    638     // or both have common successor which we check by seeing if the
    639     // intersection of their successors is non-empty.
    640     // TODO: This could be expanded to allowing branches where both ends
    641     // eventually converge to a single block.
    642     SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
    643     TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
    644     FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
    645     BasicBlock *CommonSucc = nullptr;
    646     if (TrueDestSucc.count(FalseDest)) {
    647       CommonSucc = FalseDest;
    648     } else if (FalseDestSucc.count(TrueDest)) {
    649       CommonSucc = TrueDest;
    650     } else {
    651       set_intersect(TrueDestSucc, FalseDestSucc);
    652       // If there's one common successor use that.
    653       if (TrueDestSucc.size() == 1)
    654         CommonSucc = *TrueDestSucc.begin();
    655       // If there's more than one pick whichever appears first in the block list
    656       // (we can't use the value returned by TrueDestSucc.begin() as it's
    657       // unpredicatable which element gets returned).
    658       else if (!TrueDestSucc.empty()) {
    659         Function *F = TrueDest->getParent();
    660         auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
    661         auto It = llvm::find_if(*F, IsSucc);
    662         assert(It != F->end() && "Could not find successor in function");
    663         CommonSucc = &*It;
    664       }
    665     }
    666     // The common successor has to be dominated by the branch, as otherwise
    667     // there will be some other path to the successor that will not be
    668     // controlled by this branch so any phi we hoist would be controlled by the
    669     // wrong condition. This also takes care of avoiding hoisting of loop back
    670     // edges.
    671     // TODO: In some cases this could be relaxed if the successor is dominated
    672     // by another block that's been hoisted and we can guarantee that the
    673     // control flow has been replicated exactly.
    674     if (CommonSucc && DT->dominates(BI, CommonSucc))
    675       HoistableBranches[BI] = CommonSucc;
    676   }
    677 
    678   bool canHoistPHI(PHINode *PN) {
    679     // The phi must have loop invariant operands.
    680     if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
    681       return false;
    682     // We can hoist phis if the block they are in is the target of hoistable
    683     // branches which cover all of the predecessors of the block.
    684     SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
    685     BasicBlock *BB = PN->getParent();
    686     for (BasicBlock *PredBB : predecessors(BB))
    687       PredecessorBlocks.insert(PredBB);
    688     // If we have less predecessor blocks than predecessors then the phi will
    689     // have more than one incoming value for the same block which we can't
    690     // handle.
    691     // TODO: This could be handled be erasing some of the duplicate incoming
    692     // values.
    693     if (PredecessorBlocks.size() != pred_size(BB))
    694       return false;
    695     for (auto &Pair : HoistableBranches) {
    696       if (Pair.second == BB) {
    697         // Which blocks are predecessors via this branch depends on if the
    698         // branch is triangle-like or diamond-like.
    699         if (Pair.first->getSuccessor(0) == BB) {
    700           PredecessorBlocks.erase(Pair.first->getParent());
    701           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
    702         } else if (Pair.first->getSuccessor(1) == BB) {
    703           PredecessorBlocks.erase(Pair.first->getParent());
    704           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
    705         } else {
    706           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
    707           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
    708         }
    709       }
    710     }
    711     // PredecessorBlocks will now be empty if for every predecessor of BB we
    712     // found a hoistable branch source.
    713     return PredecessorBlocks.empty();
    714   }
    715 
    716   BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
    717     if (!ControlFlowHoisting)
    718       return CurLoop->getLoopPreheader();
    719     // If BB has already been hoisted, return that
    720     if (HoistDestinationMap.count(BB))
    721       return HoistDestinationMap[BB];
    722 
    723     // Check if this block is conditional based on a pending branch
    724     auto HasBBAsSuccessor =
    725         [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
    726           return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
    727                                        Pair.first->getSuccessor(1) == BB);
    728         };
    729     auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
    730 
    731     // If not involved in a pending branch, hoist to preheader
    732     BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
    733     if (It == HoistableBranches.end()) {
    734       LLVM_DEBUG(dbgs() << "LICM using "
    735                         << InitialPreheader->getNameOrAsOperand()
    736                         << " as hoist destination for "
    737                         << BB->getNameOrAsOperand() << "\n");
    738       HoistDestinationMap[BB] = InitialPreheader;
    739       return InitialPreheader;
    740     }
    741     BranchInst *BI = It->first;
    742     assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
    743                HoistableBranches.end() &&
    744            "BB is expected to be the target of at most one branch");
    745 
    746     LLVMContext &C = BB->getContext();
    747     BasicBlock *TrueDest = BI->getSuccessor(0);
    748     BasicBlock *FalseDest = BI->getSuccessor(1);
    749     BasicBlock *CommonSucc = HoistableBranches[BI];
    750     BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
    751 
    752     // Create hoisted versions of blocks that currently don't have them
    753     auto CreateHoistedBlock = [&](BasicBlock *Orig) {
    754       if (HoistDestinationMap.count(Orig))
    755         return HoistDestinationMap[Orig];
    756       BasicBlock *New =
    757           BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
    758       HoistDestinationMap[Orig] = New;
    759       DT->addNewBlock(New, HoistTarget);
    760       if (CurLoop->getParentLoop())
    761         CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
    762       ++NumCreatedBlocks;
    763       LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
    764                         << " as hoist destination for " << Orig->getName()
    765                         << "\n");
    766       return New;
    767     };
    768     BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
    769     BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
    770     BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
    771 
    772     // Link up these blocks with branches.
    773     if (!HoistCommonSucc->getTerminator()) {
    774       // The new common successor we've generated will branch to whatever that
    775       // hoist target branched to.
    776       BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
    777       assert(TargetSucc && "Expected hoist target to have a single successor");
    778       HoistCommonSucc->moveBefore(TargetSucc);
    779       BranchInst::Create(TargetSucc, HoistCommonSucc);
    780     }
    781     if (!HoistTrueDest->getTerminator()) {
    782       HoistTrueDest->moveBefore(HoistCommonSucc);
    783       BranchInst::Create(HoistCommonSucc, HoistTrueDest);
    784     }
    785     if (!HoistFalseDest->getTerminator()) {
    786       HoistFalseDest->moveBefore(HoistCommonSucc);
    787       BranchInst::Create(HoistCommonSucc, HoistFalseDest);
    788     }
    789 
    790     // If BI is being cloned to what was originally the preheader then
    791     // HoistCommonSucc will now be the new preheader.
    792     if (HoistTarget == InitialPreheader) {
    793       // Phis in the loop header now need to use the new preheader.
    794       InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
    795       if (MSSAU)
    796         MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
    797             HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
    798       // The new preheader dominates the loop header.
    799       DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
    800       DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
    801       DT->changeImmediateDominator(HeaderNode, PreheaderNode);
    802       // The preheader hoist destination is now the new preheader, with the
    803       // exception of the hoist destination of this branch.
    804       for (auto &Pair : HoistDestinationMap)
    805         if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
    806           Pair.second = HoistCommonSucc;
    807     }
    808 
    809     // Now finally clone BI.
    810     ReplaceInstWithInst(
    811         HoistTarget->getTerminator(),
    812         BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
    813     ++NumClonedBranches;
    814 
    815     assert(CurLoop->getLoopPreheader() &&
    816            "Hoisting blocks should not have destroyed preheader");
    817     return HoistDestinationMap[BB];
    818   }
    819 };
    820 } // namespace
    821 
    822 // Hoisting/sinking instruction out of a loop isn't always beneficial. It's only
    823 // only worthwhile if the destination block is actually colder than current
    824 // block.
    825 static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock,
    826                                  OptimizationRemarkEmitter *ORE,
    827                                  BlockFrequencyInfo *BFI) {
    828   // Check block frequency only when runtime profile is available
    829   // to avoid pathological cases. With static profile, lean towards
    830   // hosting because it helps canonicalize the loop for vectorizer.
    831   if (!DstBlock->getParent()->hasProfileData())
    832     return true;
    833 
    834   if (!HoistSinkColdnessThreshold || !BFI)
    835     return true;
    836 
    837   BasicBlock *SrcBlock = I.getParent();
    838   if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold >
    839       BFI->getBlockFreq(SrcBlock).getFrequency()) {
    840     ORE->emit([&]() {
    841       return OptimizationRemarkMissed(DEBUG_TYPE, "SinkHoistInst", &I)
    842              << "failed to sink or hoist instruction because containing block "
    843                 "has lower frequency than destination block";
    844     });
    845     return false;
    846   }
    847 
    848   return true;
    849 }
    850 
    851 /// Walk the specified region of the CFG (defined by all blocks dominated by
    852 /// the specified block, and that are in the current loop) in depth first
    853 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
    854 /// uses, allowing us to hoist a loop body in one pass without iteration.
    855 ///
    856 bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
    857                        DominatorTree *DT, BlockFrequencyInfo *BFI,
    858                        TargetLibraryInfo *TLI, Loop *CurLoop,
    859                        AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
    860                        ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo,
    861                        SinkAndHoistLICMFlags &Flags,
    862                        OptimizationRemarkEmitter *ORE) {
    863   // Verify inputs.
    864   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
    865          CurLoop != nullptr && SafetyInfo != nullptr &&
    866          "Unexpected input to hoistRegion.");
    867   assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
    868          "Either AliasSetTracker or MemorySSA should be initialized.");
    869 
    870   ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
    871 
    872   // Keep track of instructions that have been hoisted, as they may need to be
    873   // re-hoisted if they end up not dominating all of their uses.
    874   SmallVector<Instruction *, 16> HoistedInstructions;
    875 
    876   // For PHI hoisting to work we need to hoist blocks before their successors.
    877   // We can do this by iterating through the blocks in the loop in reverse
    878   // post-order.
    879   LoopBlocksRPO Worklist(CurLoop);
    880   Worklist.perform(LI);
    881   bool Changed = false;
    882   for (BasicBlock *BB : Worklist) {
    883     // Only need to process the contents of this block if it is not part of a
    884     // subloop (which would already have been processed).
    885     if (inSubLoop(BB, CurLoop, LI))
    886       continue;
    887 
    888     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
    889       Instruction &I = *II++;
    890       // Try constant folding this instruction.  If all the operands are
    891       // constants, it is technically hoistable, but it would be better to
    892       // just fold it.
    893       if (Constant *C = ConstantFoldInstruction(
    894               &I, I.getModule()->getDataLayout(), TLI)) {
    895         LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C
    896                           << '\n');
    897         if (CurAST)
    898           CurAST->copyValue(&I, C);
    899         // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
    900         I.replaceAllUsesWith(C);
    901         if (isInstructionTriviallyDead(&I, TLI))
    902           eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
    903         Changed = true;
    904         continue;
    905       }
    906 
    907       // Try hoisting the instruction out to the preheader.  We can only do
    908       // this if all of the operands of the instruction are loop invariant and
    909       // if it is safe to hoist the instruction. We also check block frequency
    910       // to make sure instruction only gets hoisted into colder blocks.
    911       // TODO: It may be safe to hoist if we are hoisting to a conditional block
    912       // and we have accurately duplicated the control flow from the loop header
    913       // to that block.
    914       if (CurLoop->hasLoopInvariantOperands(&I) &&
    915           canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
    916                              ORE) &&
    917           worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
    918           isSafeToExecuteUnconditionally(
    919               I, DT, TLI, CurLoop, SafetyInfo, ORE,
    920               CurLoop->getLoopPreheader()->getTerminator())) {
    921         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
    922               MSSAU, SE, ORE);
    923         HoistedInstructions.push_back(&I);
    924         Changed = true;
    925         continue;
    926       }
    927 
    928       // Attempt to remove floating point division out of the loop by
    929       // converting it to a reciprocal multiplication.
    930       if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
    931           CurLoop->isLoopInvariant(I.getOperand(1))) {
    932         auto Divisor = I.getOperand(1);
    933         auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
    934         auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
    935         ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
    936         SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
    937         ReciprocalDivisor->insertBefore(&I);
    938 
    939         auto Product =
    940             BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
    941         Product->setFastMathFlags(I.getFastMathFlags());
    942         SafetyInfo->insertInstructionTo(Product, I.getParent());
    943         Product->insertAfter(&I);
    944         I.replaceAllUsesWith(Product);
    945         eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
    946 
    947         hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
    948               SafetyInfo, MSSAU, SE, ORE);
    949         HoistedInstructions.push_back(ReciprocalDivisor);
    950         Changed = true;
    951         continue;
    952       }
    953 
    954       auto IsInvariantStart = [&](Instruction &I) {
    955         using namespace PatternMatch;
    956         return I.use_empty() &&
    957                match(&I, m_Intrinsic<Intrinsic::invariant_start>());
    958       };
    959       auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
    960         return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
    961                SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
    962       };
    963       if ((IsInvariantStart(I) || isGuard(&I)) &&
    964           CurLoop->hasLoopInvariantOperands(&I) &&
    965           MustExecuteWithoutWritesBefore(I)) {
    966         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
    967               MSSAU, SE, ORE);
    968         HoistedInstructions.push_back(&I);
    969         Changed = true;
    970         continue;
    971       }
    972 
    973       if (PHINode *PN = dyn_cast<PHINode>(&I)) {
    974         if (CFH.canHoistPHI(PN)) {
    975           // Redirect incoming blocks first to ensure that we create hoisted
    976           // versions of those blocks before we hoist the phi.
    977           for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
    978             PN->setIncomingBlock(
    979                 i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
    980           hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
    981                 MSSAU, SE, ORE);
    982           assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
    983           Changed = true;
    984           continue;
    985         }
    986       }
    987 
    988       // Remember possibly hoistable branches so we can actually hoist them
    989       // later if needed.
    990       if (BranchInst *BI = dyn_cast<BranchInst>(&I))
    991         CFH.registerPossiblyHoistableBranch(BI);
    992     }
    993   }
    994 
    995   // If we hoisted instructions to a conditional block they may not dominate
    996   // their uses that weren't hoisted (such as phis where some operands are not
    997   // loop invariant). If so make them unconditional by moving them to their
    998   // immediate dominator. We iterate through the instructions in reverse order
    999   // which ensures that when we rehoist an instruction we rehoist its operands,
   1000   // and also keep track of where in the block we are rehoisting to to make sure
   1001   // that we rehoist instructions before the instructions that use them.
   1002   Instruction *HoistPoint = nullptr;
   1003   if (ControlFlowHoisting) {
   1004     for (Instruction *I : reverse(HoistedInstructions)) {
   1005       if (!llvm::all_of(I->uses(),
   1006                         [&](Use &U) { return DT->dominates(I, U); })) {
   1007         BasicBlock *Dominator =
   1008             DT->getNode(I->getParent())->getIDom()->getBlock();
   1009         if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
   1010           if (HoistPoint)
   1011             assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
   1012                    "New hoist point expected to dominate old hoist point");
   1013           HoistPoint = Dominator->getTerminator();
   1014         }
   1015         LLVM_DEBUG(dbgs() << "LICM rehoisting to "
   1016                           << HoistPoint->getParent()->getNameOrAsOperand()
   1017                           << ": " << *I << "\n");
   1018         moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
   1019         HoistPoint = I;
   1020         Changed = true;
   1021       }
   1022     }
   1023   }
   1024   if (MSSAU && VerifyMemorySSA)
   1025     MSSAU->getMemorySSA()->verifyMemorySSA();
   1026 
   1027     // Now that we've finished hoisting make sure that LI and DT are still
   1028     // valid.
   1029 #ifdef EXPENSIVE_CHECKS
   1030   if (Changed) {
   1031     assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
   1032            "Dominator tree verification failed");
   1033     LI->verify(*DT);
   1034   }
   1035 #endif
   1036 
   1037   return Changed;
   1038 }
   1039 
   1040 // Return true if LI is invariant within scope of the loop. LI is invariant if
   1041 // CurLoop is dominated by an invariant.start representing the same memory
   1042 // location and size as the memory location LI loads from, and also the
   1043 // invariant.start has no uses.
   1044 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
   1045                                   Loop *CurLoop) {
   1046   Value *Addr = LI->getOperand(0);
   1047   const DataLayout &DL = LI->getModule()->getDataLayout();
   1048   const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
   1049 
   1050   // It is not currently possible for clang to generate an invariant.start
   1051   // intrinsic with scalable vector types because we don't support thread local
   1052   // sizeless types and we don't permit sizeless types in structs or classes.
   1053   // Furthermore, even if support is added for this in future the intrinsic
   1054   // itself is defined to have a size of -1 for variable sized objects. This
   1055   // makes it impossible to verify if the intrinsic envelops our region of
   1056   // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
   1057   // types would have a -1 parameter, but the former is clearly double the size
   1058   // of the latter.
   1059   if (LocSizeInBits.isScalable())
   1060     return false;
   1061 
   1062   // if the type is i8 addrspace(x)*, we know this is the type of
   1063   // llvm.invariant.start operand
   1064   auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
   1065                                      LI->getPointerAddressSpace());
   1066   unsigned BitcastsVisited = 0;
   1067   // Look through bitcasts until we reach the i8* type (this is invariant.start
   1068   // operand type).
   1069   while (Addr->getType() != PtrInt8Ty) {
   1070     auto *BC = dyn_cast<BitCastInst>(Addr);
   1071     // Avoid traversing high number of bitcast uses.
   1072     if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
   1073       return false;
   1074     Addr = BC->getOperand(0);
   1075   }
   1076 
   1077   unsigned UsesVisited = 0;
   1078   // Traverse all uses of the load operand value, to see if invariant.start is
   1079   // one of the uses, and whether it dominates the load instruction.
   1080   for (auto *U : Addr->users()) {
   1081     // Avoid traversing for Load operand with high number of users.
   1082     if (++UsesVisited > MaxNumUsesTraversed)
   1083       return false;
   1084     IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
   1085     // If there are escaping uses of invariant.start instruction, the load maybe
   1086     // non-invariant.
   1087     if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
   1088         !II->use_empty())
   1089       continue;
   1090     ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
   1091     // The intrinsic supports having a -1 argument for variable sized objects
   1092     // so we should check for that here.
   1093     if (InvariantSize->isNegative())
   1094       continue;
   1095     uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
   1096     // Confirm the invariant.start location size contains the load operand size
   1097     // in bits. Also, the invariant.start should dominate the load, and we
   1098     // should not hoist the load out of a loop that contains this dominating
   1099     // invariant.start.
   1100     if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits &&
   1101         DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
   1102       return true;
   1103   }
   1104 
   1105   return false;
   1106 }
   1107 
   1108 namespace {
   1109 /// Return true if-and-only-if we know how to (mechanically) both hoist and
   1110 /// sink a given instruction out of a loop.  Does not address legality
   1111 /// concerns such as aliasing or speculation safety.
   1112 bool isHoistableAndSinkableInst(Instruction &I) {
   1113   // Only these instructions are hoistable/sinkable.
   1114   return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
   1115           isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
   1116           isa<BinaryOperator>(I) || isa<SelectInst>(I) ||
   1117           isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
   1118           isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
   1119           isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
   1120           isa<InsertValueInst>(I) || isa<FreezeInst>(I));
   1121 }
   1122 /// Return true if all of the alias sets within this AST are known not to
   1123 /// contain a Mod, or if MSSA knows there are no MemoryDefs in the loop.
   1124 bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
   1125                 const Loop *L) {
   1126   if (CurAST) {
   1127     for (AliasSet &AS : *CurAST) {
   1128       if (!AS.isForwardingAliasSet() && AS.isMod()) {
   1129         return false;
   1130       }
   1131     }
   1132     return true;
   1133   } else { /*MSSAU*/
   1134     for (auto *BB : L->getBlocks())
   1135       if (MSSAU->getMemorySSA()->getBlockDefs(BB))
   1136         return false;
   1137     return true;
   1138   }
   1139 }
   1140 
   1141 /// Return true if I is the only Instruction with a MemoryAccess in L.
   1142 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
   1143                         const MemorySSAUpdater *MSSAU) {
   1144   for (auto *BB : L->getBlocks())
   1145     if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
   1146       int NotAPhi = 0;
   1147       for (const auto &Acc : *Accs) {
   1148         if (isa<MemoryPhi>(&Acc))
   1149           continue;
   1150         const auto *MUD = cast<MemoryUseOrDef>(&Acc);
   1151         if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
   1152           return false;
   1153       }
   1154     }
   1155   return true;
   1156 }
   1157 }
   1158 
   1159 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
   1160                               Loop *CurLoop, AliasSetTracker *CurAST,
   1161                               MemorySSAUpdater *MSSAU,
   1162                               bool TargetExecutesOncePerLoop,
   1163                               SinkAndHoistLICMFlags *Flags,
   1164                               OptimizationRemarkEmitter *ORE) {
   1165   assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
   1166          "Either AliasSetTracker or MemorySSA should be initialized.");
   1167 
   1168   // If we don't understand the instruction, bail early.
   1169   if (!isHoistableAndSinkableInst(I))
   1170     return false;
   1171 
   1172   MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
   1173   if (MSSA)
   1174     assert(Flags != nullptr && "Flags cannot be null.");
   1175 
   1176   // Loads have extra constraints we have to verify before we can hoist them.
   1177   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
   1178     if (!LI->isUnordered())
   1179       return false; // Don't sink/hoist volatile or ordered atomic loads!
   1180 
   1181     // Loads from constant memory are always safe to move, even if they end up
   1182     // in the same alias set as something that ends up being modified.
   1183     if (AA->pointsToConstantMemory(LI->getOperand(0)))
   1184       return true;
   1185     if (LI->hasMetadata(LLVMContext::MD_invariant_load))
   1186       return true;
   1187 
   1188     if (LI->isAtomic() && !TargetExecutesOncePerLoop)
   1189       return false; // Don't risk duplicating unordered loads
   1190 
   1191     // This checks for an invariant.start dominating the load.
   1192     if (isLoadInvariantInLoop(LI, DT, CurLoop))
   1193       return true;
   1194 
   1195     bool Invalidated;
   1196     if (CurAST)
   1197       Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
   1198                                              CurLoop, AA);
   1199     else
   1200       Invalidated = pointerInvalidatedByLoopWithMSSA(
   1201           MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags);
   1202     // Check loop-invariant address because this may also be a sinkable load
   1203     // whose address is not necessarily loop-invariant.
   1204     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
   1205       ORE->emit([&]() {
   1206         return OptimizationRemarkMissed(
   1207                    DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
   1208                << "failed to move load with loop-invariant address "
   1209                   "because the loop may invalidate its value";
   1210       });
   1211 
   1212     return !Invalidated;
   1213   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
   1214     // Don't sink or hoist dbg info; it's legal, but not useful.
   1215     if (isa<DbgInfoIntrinsic>(I))
   1216       return false;
   1217 
   1218     // Don't sink calls which can throw.
   1219     if (CI->mayThrow())
   1220       return false;
   1221 
   1222     // Convergent attribute has been used on operations that involve
   1223     // inter-thread communication which results are implicitly affected by the
   1224     // enclosing control flows. It is not safe to hoist or sink such operations
   1225     // across control flow.
   1226     if (CI->isConvergent())
   1227       return false;
   1228 
   1229     using namespace PatternMatch;
   1230     if (match(CI, m_Intrinsic<Intrinsic::assume>()))
   1231       // Assumes don't actually alias anything or throw
   1232       return true;
   1233 
   1234     if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
   1235       // Widenable conditions don't actually alias anything or throw
   1236       return true;
   1237 
   1238     // Handle simple cases by querying alias analysis.
   1239     FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
   1240     if (Behavior == FMRB_DoesNotAccessMemory)
   1241       return true;
   1242     if (AAResults::onlyReadsMemory(Behavior)) {
   1243       // A readonly argmemonly function only reads from memory pointed to by
   1244       // it's arguments with arbitrary offsets.  If we can prove there are no
   1245       // writes to this memory in the loop, we can hoist or sink.
   1246       if (AAResults::onlyAccessesArgPointees(Behavior)) {
   1247         // TODO: expand to writeable arguments
   1248         for (Value *Op : CI->arg_operands())
   1249           if (Op->getType()->isPointerTy()) {
   1250             bool Invalidated;
   1251             if (CurAST)
   1252               Invalidated = pointerInvalidatedByLoop(
   1253                   MemoryLocation::getBeforeOrAfter(Op), CurAST, CurLoop, AA);
   1254             else
   1255               Invalidated = pointerInvalidatedByLoopWithMSSA(
   1256                   MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
   1257                   *Flags);
   1258             if (Invalidated)
   1259               return false;
   1260           }
   1261         return true;
   1262       }
   1263 
   1264       // If this call only reads from memory and there are no writes to memory
   1265       // in the loop, we can hoist or sink the call as appropriate.
   1266       if (isReadOnly(CurAST, MSSAU, CurLoop))
   1267         return true;
   1268     }
   1269 
   1270     // FIXME: This should use mod/ref information to see if we can hoist or
   1271     // sink the call.
   1272 
   1273     return false;
   1274   } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
   1275     // Fences alias (most) everything to provide ordering.  For the moment,
   1276     // just give up if there are any other memory operations in the loop.
   1277     if (CurAST) {
   1278       auto Begin = CurAST->begin();
   1279       assert(Begin != CurAST->end() && "must contain FI");
   1280       if (std::next(Begin) != CurAST->end())
   1281         // constant memory for instance, TODO: handle better
   1282         return false;
   1283       auto *UniqueI = Begin->getUniqueInstruction();
   1284       if (!UniqueI)
   1285         // other memory op, give up
   1286         return false;
   1287       (void)FI; // suppress unused variable warning
   1288       assert(UniqueI == FI && "AS must contain FI");
   1289       return true;
   1290     } else // MSSAU
   1291       return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
   1292   } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
   1293     if (!SI->isUnordered())
   1294       return false; // Don't sink/hoist volatile or ordered atomic store!
   1295 
   1296     // We can only hoist a store that we can prove writes a value which is not
   1297     // read or overwritten within the loop.  For those cases, we fallback to
   1298     // load store promotion instead.  TODO: We can extend this to cases where
   1299     // there is exactly one write to the location and that write dominates an
   1300     // arbitrary number of reads in the loop.
   1301     if (CurAST) {
   1302       auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
   1303 
   1304       if (AS.isRef() || !AS.isMustAlias())
   1305         // Quick exit test, handled by the full path below as well.
   1306         return false;
   1307       auto *UniqueI = AS.getUniqueInstruction();
   1308       if (!UniqueI)
   1309         // other memory op, give up
   1310         return false;
   1311       assert(UniqueI == SI && "AS must contain SI");
   1312       return true;
   1313     } else { // MSSAU
   1314       if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
   1315         return true;
   1316       // If there are more accesses than the Promotion cap or no "quota" to
   1317       // check clobber, then give up as we're not walking a list that long.
   1318       if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls())
   1319         return false;
   1320       // If there are interfering Uses (i.e. their defining access is in the
   1321       // loop), or ordered loads (stored as Defs!), don't move this store.
   1322       // Could do better here, but this is conservatively correct.
   1323       // TODO: Cache set of Uses on the first walk in runOnLoop, update when
   1324       // moving accesses. Can also extend to dominating uses.
   1325       auto *SIMD = MSSA->getMemoryAccess(SI);
   1326       for (auto *BB : CurLoop->getBlocks())
   1327         if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
   1328           for (const auto &MA : *Accesses)
   1329             if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
   1330               auto *MD = MU->getDefiningAccess();
   1331               if (!MSSA->isLiveOnEntryDef(MD) &&
   1332                   CurLoop->contains(MD->getBlock()))
   1333                 return false;
   1334               // Disable hoisting past potentially interfering loads. Optimized
   1335               // Uses may point to an access outside the loop, as getClobbering
   1336               // checks the previous iteration when walking the backedge.
   1337               // FIXME: More precise: no Uses that alias SI.
   1338               if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU))
   1339                 return false;
   1340             } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
   1341               if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
   1342                 (void)LI; // Silence warning.
   1343                 assert(!LI->isUnordered() && "Expected unordered load");
   1344                 return false;
   1345               }
   1346               // Any call, while it may not be clobbering SI, it may be a use.
   1347               if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
   1348                 // Check if the call may read from the memory location written
   1349                 // to by SI. Check CI's attributes and arguments; the number of
   1350                 // such checks performed is limited above by NoOfMemAccTooLarge.
   1351                 ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI));
   1352                 if (isModOrRefSet(MRI))
   1353                   return false;
   1354               }
   1355             }
   1356         }
   1357       auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
   1358       Flags->incrementClobberingCalls();
   1359       // If there are no clobbering Defs in the loop, store is safe to hoist.
   1360       return MSSA->isLiveOnEntryDef(Source) ||
   1361              !CurLoop->contains(Source->getBlock());
   1362     }
   1363   }
   1364 
   1365   assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
   1366 
   1367   // We've established mechanical ability and aliasing, it's up to the caller
   1368   // to check fault safety
   1369   return true;
   1370 }
   1371 
   1372 /// Returns true if a PHINode is a trivially replaceable with an
   1373 /// Instruction.
   1374 /// This is true when all incoming values are that instruction.
   1375 /// This pattern occurs most often with LCSSA PHI nodes.
   1376 ///
   1377 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
   1378   for (const Value *IncValue : PN.incoming_values())
   1379     if (IncValue != &I)
   1380       return false;
   1381 
   1382   return true;
   1383 }
   1384 
   1385 /// Return true if the instruction is free in the loop.
   1386 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
   1387                          const TargetTransformInfo *TTI) {
   1388 
   1389   if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
   1390     if (TTI->getUserCost(GEP, TargetTransformInfo::TCK_SizeAndLatency) !=
   1391         TargetTransformInfo::TCC_Free)
   1392       return false;
   1393     // For a GEP, we cannot simply use getUserCost because currently it
   1394     // optimistically assume that a GEP will fold into addressing mode
   1395     // regardless of its users.
   1396     const BasicBlock *BB = GEP->getParent();
   1397     for (const User *U : GEP->users()) {
   1398       const Instruction *UI = cast<Instruction>(U);
   1399       if (CurLoop->contains(UI) &&
   1400           (BB != UI->getParent() ||
   1401            (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
   1402         return false;
   1403     }
   1404     return true;
   1405   } else
   1406     return TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
   1407            TargetTransformInfo::TCC_Free;
   1408 }
   1409 
   1410 /// Return true if the only users of this instruction are outside of
   1411 /// the loop. If this is true, we can sink the instruction to the exit
   1412 /// blocks of the loop.
   1413 ///
   1414 /// We also return true if the instruction could be folded away in lowering.
   1415 /// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
   1416 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
   1417                                   const LoopSafetyInfo *SafetyInfo,
   1418                                   TargetTransformInfo *TTI, bool &FreeInLoop) {
   1419   const auto &BlockColors = SafetyInfo->getBlockColors();
   1420   bool IsFree = isFreeInLoop(I, CurLoop, TTI);
   1421   for (const User *U : I.users()) {
   1422     const Instruction *UI = cast<Instruction>(U);
   1423     if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
   1424       const BasicBlock *BB = PN->getParent();
   1425       // We cannot sink uses in catchswitches.
   1426       if (isa<CatchSwitchInst>(BB->getTerminator()))
   1427         return false;
   1428 
   1429       // We need to sink a callsite to a unique funclet.  Avoid sinking if the
   1430       // phi use is too muddled.
   1431       if (isa<CallInst>(I))
   1432         if (!BlockColors.empty() &&
   1433             BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
   1434           return false;
   1435     }
   1436 
   1437     if (CurLoop->contains(UI)) {
   1438       if (IsFree) {
   1439         FreeInLoop = true;
   1440         continue;
   1441       }
   1442       return false;
   1443     }
   1444   }
   1445   return true;
   1446 }
   1447 
   1448 static Instruction *cloneInstructionInExitBlock(
   1449     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
   1450     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
   1451   Instruction *New;
   1452   if (auto *CI = dyn_cast<CallInst>(&I)) {
   1453     const auto &BlockColors = SafetyInfo->getBlockColors();
   1454 
   1455     // Sinking call-sites need to be handled differently from other
   1456     // instructions.  The cloned call-site needs a funclet bundle operand
   1457     // appropriate for its location in the CFG.
   1458     SmallVector<OperandBundleDef, 1> OpBundles;
   1459     for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
   1460          BundleIdx != BundleEnd; ++BundleIdx) {
   1461       OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
   1462       if (Bundle.getTagID() == LLVMContext::OB_funclet)
   1463         continue;
   1464 
   1465       OpBundles.emplace_back(Bundle);
   1466     }
   1467 
   1468     if (!BlockColors.empty()) {
   1469       const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
   1470       assert(CV.size() == 1 && "non-unique color for exit block!");
   1471       BasicBlock *BBColor = CV.front();
   1472       Instruction *EHPad = BBColor->getFirstNonPHI();
   1473       if (EHPad->isEHPad())
   1474         OpBundles.emplace_back("funclet", EHPad);
   1475     }
   1476 
   1477     New = CallInst::Create(CI, OpBundles);
   1478   } else {
   1479     New = I.clone();
   1480   }
   1481 
   1482   ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
   1483   if (!I.getName().empty())
   1484     New->setName(I.getName() + ".le");
   1485 
   1486   if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
   1487     // Create a new MemoryAccess and let MemorySSA set its defining access.
   1488     MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
   1489         New, nullptr, New->getParent(), MemorySSA::Beginning);
   1490     if (NewMemAcc) {
   1491       if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
   1492         MSSAU->insertDef(MemDef, /*RenameUses=*/true);
   1493       else {
   1494         auto *MemUse = cast<MemoryUse>(NewMemAcc);
   1495         MSSAU->insertUse(MemUse, /*RenameUses=*/true);
   1496       }
   1497     }
   1498   }
   1499 
   1500   // Build LCSSA PHI nodes for any in-loop operands (if legal).  Note that
   1501   // this is particularly cheap because we can rip off the PHI node that we're
   1502   // replacing for the number and blocks of the predecessors.
   1503   // OPT: If this shows up in a profile, we can instead finish sinking all
   1504   // invariant instructions, and then walk their operands to re-establish
   1505   // LCSSA. That will eliminate creating PHI nodes just to nuke them when
   1506   // sinking bottom-up.
   1507   for (Use &Op : New->operands())
   1508     if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) {
   1509       auto *OInst = cast<Instruction>(Op.get());
   1510       PHINode *OpPN =
   1511         PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
   1512                         OInst->getName() + ".lcssa", &ExitBlock.front());
   1513       for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
   1514         OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
   1515       Op = OpPN;
   1516     }
   1517   return New;
   1518 }
   1519 
   1520 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
   1521                              AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
   1522   if (AST)
   1523     AST->deleteValue(&I);
   1524   if (MSSAU)
   1525     MSSAU->removeMemoryAccess(&I);
   1526   SafetyInfo.removeInstruction(&I);
   1527   I.eraseFromParent();
   1528 }
   1529 
   1530 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
   1531                                   ICFLoopSafetyInfo &SafetyInfo,
   1532                                   MemorySSAUpdater *MSSAU,
   1533                                   ScalarEvolution *SE) {
   1534   SafetyInfo.removeInstruction(&I);
   1535   SafetyInfo.insertInstructionTo(&I, Dest.getParent());
   1536   I.moveBefore(&Dest);
   1537   if (MSSAU)
   1538     if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
   1539             MSSAU->getMemorySSA()->getMemoryAccess(&I)))
   1540       MSSAU->moveToPlace(OldMemAcc, Dest.getParent(),
   1541                          MemorySSA::BeforeTerminator);
   1542   if (SE)
   1543     SE->forgetValue(&I);
   1544 }
   1545 
   1546 static Instruction *sinkThroughTriviallyReplaceablePHI(
   1547     PHINode *TPN, Instruction *I, LoopInfo *LI,
   1548     SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
   1549     const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
   1550     MemorySSAUpdater *MSSAU) {
   1551   assert(isTriviallyReplaceablePHI(*TPN, *I) &&
   1552          "Expect only trivially replaceable PHI");
   1553   BasicBlock *ExitBlock = TPN->getParent();
   1554   Instruction *New;
   1555   auto It = SunkCopies.find(ExitBlock);
   1556   if (It != SunkCopies.end())
   1557     New = It->second;
   1558   else
   1559     New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock(
   1560         *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
   1561   return New;
   1562 }
   1563 
   1564 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
   1565   BasicBlock *BB = PN->getParent();
   1566   if (!BB->canSplitPredecessors())
   1567     return false;
   1568   // It's not impossible to split EHPad blocks, but if BlockColors already exist
   1569   // it require updating BlockColors for all offspring blocks accordingly. By
   1570   // skipping such corner case, we can make updating BlockColors after splitting
   1571   // predecessor fairly simple.
   1572   if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
   1573     return false;
   1574   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
   1575     BasicBlock *BBPred = *PI;
   1576     if (isa<IndirectBrInst>(BBPred->getTerminator()) ||
   1577         isa<CallBrInst>(BBPred->getTerminator()))
   1578       return false;
   1579   }
   1580   return true;
   1581 }
   1582 
   1583 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
   1584                                         LoopInfo *LI, const Loop *CurLoop,
   1585                                         LoopSafetyInfo *SafetyInfo,
   1586                                         MemorySSAUpdater *MSSAU) {
   1587 #ifndef NDEBUG
   1588   SmallVector<BasicBlock *, 32> ExitBlocks;
   1589   CurLoop->getUniqueExitBlocks(ExitBlocks);
   1590   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
   1591                                              ExitBlocks.end());
   1592 #endif
   1593   BasicBlock *ExitBB = PN->getParent();
   1594   assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
   1595 
   1596   // Split predecessors of the loop exit to make instructions in the loop are
   1597   // exposed to exit blocks through trivially replaceable PHIs while keeping the
   1598   // loop in the canonical form where each predecessor of each exit block should
   1599   // be contained within the loop. For example, this will convert the loop below
   1600   // from
   1601   //
   1602   // LB1:
   1603   //   %v1 =
   1604   //   br %LE, %LB2
   1605   // LB2:
   1606   //   %v2 =
   1607   //   br %LE, %LB1
   1608   // LE:
   1609   //   %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
   1610   //
   1611   // to
   1612   //
   1613   // LB1:
   1614   //   %v1 =
   1615   //   br %LE.split, %LB2
   1616   // LB2:
   1617   //   %v2 =
   1618   //   br %LE.split2, %LB1
   1619   // LE.split:
   1620   //   %p1 = phi [%v1, %LB1]  <-- trivially replaceable
   1621   //   br %LE
   1622   // LE.split2:
   1623   //   %p2 = phi [%v2, %LB2]  <-- trivially replaceable
   1624   //   br %LE
   1625   // LE:
   1626   //   %p = phi [%p1, %LE.split], [%p2, %LE.split2]
   1627   //
   1628   const auto &BlockColors = SafetyInfo->getBlockColors();
   1629   SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
   1630   while (!PredBBs.empty()) {
   1631     BasicBlock *PredBB = *PredBBs.begin();
   1632     assert(CurLoop->contains(PredBB) &&
   1633            "Expect all predecessors are in the loop");
   1634     if (PN->getBasicBlockIndex(PredBB) >= 0) {
   1635       BasicBlock *NewPred = SplitBlockPredecessors(
   1636           ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
   1637       // Since we do not allow splitting EH-block with BlockColors in
   1638       // canSplitPredecessors(), we can simply assign predecessor's color to
   1639       // the new block.
   1640       if (!BlockColors.empty())
   1641         // Grab a reference to the ColorVector to be inserted before getting the
   1642         // reference to the vector we are copying because inserting the new
   1643         // element in BlockColors might cause the map to be reallocated.
   1644         SafetyInfo->copyColors(NewPred, PredBB);
   1645     }
   1646     PredBBs.remove(PredBB);
   1647   }
   1648 }
   1649 
   1650 /// When an instruction is found to only be used outside of the loop, this
   1651 /// function moves it to the exit blocks and patches up SSA form as needed.
   1652 /// This method is guaranteed to remove the original instruction from its
   1653 /// position, and may either delete it or move it to outside of the loop.
   1654 ///
   1655 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
   1656                  BlockFrequencyInfo *BFI, const Loop *CurLoop,
   1657                  ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
   1658                  OptimizationRemarkEmitter *ORE) {
   1659   bool Changed = false;
   1660   LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
   1661 
   1662   // Iterate over users to be ready for actual sinking. Replace users via
   1663   // unreachable blocks with undef and make all user PHIs trivially replaceable.
   1664   SmallPtrSet<Instruction *, 8> VisitedUsers;
   1665   for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
   1666     auto *User = cast<Instruction>(*UI);
   1667     Use &U = UI.getUse();
   1668     ++UI;
   1669 
   1670     if (VisitedUsers.count(User) || CurLoop->contains(User))
   1671       continue;
   1672 
   1673     if (!DT->isReachableFromEntry(User->getParent())) {
   1674       U = UndefValue::get(I.getType());
   1675       Changed = true;
   1676       continue;
   1677     }
   1678 
   1679     // The user must be a PHI node.
   1680     PHINode *PN = cast<PHINode>(User);
   1681 
   1682     // Surprisingly, instructions can be used outside of loops without any
   1683     // exits.  This can only happen in PHI nodes if the incoming block is
   1684     // unreachable.
   1685     BasicBlock *BB = PN->getIncomingBlock(U);
   1686     if (!DT->isReachableFromEntry(BB)) {
   1687       U = UndefValue::get(I.getType());
   1688       Changed = true;
   1689       continue;
   1690     }
   1691 
   1692     VisitedUsers.insert(PN);
   1693     if (isTriviallyReplaceablePHI(*PN, I))
   1694       continue;
   1695 
   1696     if (!canSplitPredecessors(PN, SafetyInfo))
   1697       return Changed;
   1698 
   1699     // Split predecessors of the PHI so that we can make users trivially
   1700     // replaceable.
   1701     splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
   1702 
   1703     // Should rebuild the iterators, as they may be invalidated by
   1704     // splitPredecessorsOfLoopExit().
   1705     UI = I.user_begin();
   1706     UE = I.user_end();
   1707   }
   1708 
   1709   if (VisitedUsers.empty())
   1710     return Changed;
   1711 
   1712   ORE->emit([&]() {
   1713     return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
   1714            << "sinking " << ore::NV("Inst", &I);
   1715   });
   1716   if (isa<LoadInst>(I))
   1717     ++NumMovedLoads;
   1718   else if (isa<CallInst>(I))
   1719     ++NumMovedCalls;
   1720   ++NumSunk;
   1721 
   1722 #ifndef NDEBUG
   1723   SmallVector<BasicBlock *, 32> ExitBlocks;
   1724   CurLoop->getUniqueExitBlocks(ExitBlocks);
   1725   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
   1726                                              ExitBlocks.end());
   1727 #endif
   1728 
   1729   // Clones of this instruction. Don't create more than one per exit block!
   1730   SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
   1731 
   1732   // If this instruction is only used outside of the loop, then all users are
   1733   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
   1734   // the instruction.
   1735   // First check if I is worth sinking for all uses. Sink only when it is worth
   1736   // across all uses.
   1737   SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
   1738   SmallVector<PHINode *, 8> ExitPNs;
   1739   for (auto *UI : Users) {
   1740     auto *User = cast<Instruction>(UI);
   1741 
   1742     if (CurLoop->contains(User))
   1743       continue;
   1744 
   1745     PHINode *PN = cast<PHINode>(User);
   1746     assert(ExitBlockSet.count(PN->getParent()) &&
   1747            "The LCSSA PHI is not in an exit block!");
   1748     if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) {
   1749       return Changed;
   1750     }
   1751 
   1752     ExitPNs.push_back(PN);
   1753   }
   1754 
   1755   for (auto *PN : ExitPNs) {
   1756 
   1757     // The PHI must be trivially replaceable.
   1758     Instruction *New = sinkThroughTriviallyReplaceablePHI(
   1759         PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
   1760     PN->replaceAllUsesWith(New);
   1761     eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
   1762     Changed = true;
   1763   }
   1764   return Changed;
   1765 }
   1766 
   1767 /// When an instruction is found to only use loop invariant operands that
   1768 /// is safe to hoist, this instruction is called to do the dirty work.
   1769 ///
   1770 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
   1771                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
   1772                   MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
   1773                   OptimizationRemarkEmitter *ORE) {
   1774   LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
   1775                     << I << "\n");
   1776   ORE->emit([&]() {
   1777     return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
   1778                                                          << ore::NV("Inst", &I);
   1779   });
   1780 
   1781   // Metadata can be dependent on conditions we are hoisting above.
   1782   // Conservatively strip all metadata on the instruction unless we were
   1783   // guaranteed to execute I if we entered the loop, in which case the metadata
   1784   // is valid in the loop preheader.
   1785   if (I.hasMetadataOtherThanDebugLoc() &&
   1786       // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
   1787       // time in isGuaranteedToExecute if we don't actually have anything to
   1788       // drop.  It is a compile time optimization, not required for correctness.
   1789       !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
   1790     I.dropUnknownNonDebugMetadata();
   1791 
   1792   if (isa<PHINode>(I))
   1793     // Move the new node to the end of the phi list in the destination block.
   1794     moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE);
   1795   else
   1796     // Move the new node to the destination block, before its terminator.
   1797     moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
   1798 
   1799   I.updateLocationAfterHoist();
   1800 
   1801   if (isa<LoadInst>(I))
   1802     ++NumMovedLoads;
   1803   else if (isa<CallInst>(I))
   1804     ++NumMovedCalls;
   1805   ++NumHoisted;
   1806 }
   1807 
   1808 /// Only sink or hoist an instruction if it is not a trapping instruction,
   1809 /// or if the instruction is known not to trap when moved to the preheader.
   1810 /// or if it is a trapping instruction and is guaranteed to execute.
   1811 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
   1812                                            const DominatorTree *DT,
   1813                                            const TargetLibraryInfo *TLI,
   1814                                            const Loop *CurLoop,
   1815                                            const LoopSafetyInfo *SafetyInfo,
   1816                                            OptimizationRemarkEmitter *ORE,
   1817                                            const Instruction *CtxI) {
   1818   if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
   1819     return true;
   1820 
   1821   bool GuaranteedToExecute =
   1822       SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
   1823 
   1824   if (!GuaranteedToExecute) {
   1825     auto *LI = dyn_cast<LoadInst>(&Inst);
   1826     if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
   1827       ORE->emit([&]() {
   1828         return OptimizationRemarkMissed(
   1829                    DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
   1830                << "failed to hoist load with loop-invariant address "
   1831                   "because load is conditionally executed";
   1832       });
   1833   }
   1834 
   1835   return GuaranteedToExecute;
   1836 }
   1837 
   1838 namespace {
   1839 class LoopPromoter : public LoadAndStorePromoter {
   1840   Value *SomePtr; // Designated pointer to store to.
   1841   const SmallSetVector<Value *, 8> &PointerMustAliases;
   1842   SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
   1843   SmallVectorImpl<Instruction *> &LoopInsertPts;
   1844   SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
   1845   PredIteratorCache &PredCache;
   1846   AliasSetTracker *AST;
   1847   MemorySSAUpdater *MSSAU;
   1848   LoopInfo &LI;
   1849   DebugLoc DL;
   1850   int Alignment;
   1851   bool UnorderedAtomic;
   1852   AAMDNodes AATags;
   1853   ICFLoopSafetyInfo &SafetyInfo;
   1854 
   1855   // We're about to add a use of V in a loop exit block.  Insert an LCSSA phi
   1856   // (if legal) if doing so would add an out-of-loop use to an instruction
   1857   // defined in-loop.
   1858   Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
   1859     if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, BB))
   1860       return V;
   1861 
   1862     Instruction *I = cast<Instruction>(V);
   1863     // We need to create an LCSSA PHI node for the incoming value and
   1864     // store that.
   1865     PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
   1866                                   I->getName() + ".lcssa", &BB->front());
   1867     for (BasicBlock *Pred : PredCache.get(BB))
   1868       PN->addIncoming(I, Pred);
   1869     return PN;
   1870   }
   1871 
   1872 public:
   1873   LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
   1874                const SmallSetVector<Value *, 8> &PMA,
   1875                SmallVectorImpl<BasicBlock *> &LEB,
   1876                SmallVectorImpl<Instruction *> &LIP,
   1877                SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
   1878                AliasSetTracker *ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
   1879                DebugLoc dl, int alignment, bool UnorderedAtomic,
   1880                const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
   1881       : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
   1882         LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
   1883         PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
   1884         Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
   1885         SafetyInfo(SafetyInfo) {}
   1886 
   1887   bool isInstInList(Instruction *I,
   1888                     const SmallVectorImpl<Instruction *> &) const override {
   1889     Value *Ptr;
   1890     if (LoadInst *LI = dyn_cast<LoadInst>(I))
   1891       Ptr = LI->getOperand(0);
   1892     else
   1893       Ptr = cast<StoreInst>(I)->getPointerOperand();
   1894     return PointerMustAliases.count(Ptr);
   1895   }
   1896 
   1897   void doExtraRewritesBeforeFinalDeletion() override {
   1898     // Insert stores after in the loop exit blocks.  Each exit block gets a
   1899     // store of the live-out values that feed them.  Since we've already told
   1900     // the SSA updater about the defs in the loop and the preheader
   1901     // definition, it is all set and we can start using it.
   1902     for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
   1903       BasicBlock *ExitBlock = LoopExitBlocks[i];
   1904       Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
   1905       LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
   1906       Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
   1907       Instruction *InsertPos = LoopInsertPts[i];
   1908       StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
   1909       if (UnorderedAtomic)
   1910         NewSI->setOrdering(AtomicOrdering::Unordered);
   1911       NewSI->setAlignment(Align(Alignment));
   1912       NewSI->setDebugLoc(DL);
   1913       if (AATags)
   1914         NewSI->setAAMetadata(AATags);
   1915 
   1916       if (MSSAU) {
   1917         MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
   1918         MemoryAccess *NewMemAcc;
   1919         if (!MSSAInsertPoint) {
   1920           NewMemAcc = MSSAU->createMemoryAccessInBB(
   1921               NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
   1922         } else {
   1923           NewMemAcc =
   1924               MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
   1925         }
   1926         MSSAInsertPts[i] = NewMemAcc;
   1927         MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
   1928         // FIXME: true for safety, false may still be correct.
   1929       }
   1930     }
   1931   }
   1932 
   1933   void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
   1934     // Update alias analysis.
   1935     if (AST)
   1936       AST->copyValue(LI, V);
   1937   }
   1938   void instructionDeleted(Instruction *I) const override {
   1939     SafetyInfo.removeInstruction(I);
   1940     if (AST)
   1941       AST->deleteValue(I);
   1942     if (MSSAU)
   1943       MSSAU->removeMemoryAccess(I);
   1944   }
   1945 };
   1946 
   1947 bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
   1948                                  DominatorTree *DT) {
   1949   // We can perform the captured-before check against any instruction in the
   1950   // loop header, as the loop header is reachable from any instruction inside
   1951   // the loop.
   1952   // TODO: ReturnCaptures=true shouldn't be necessary here.
   1953   return !PointerMayBeCapturedBefore(V, /* ReturnCaptures */ true,
   1954                                      /* StoreCaptures */ true,
   1955                                      L->getHeader()->getTerminator(), DT);
   1956 }
   1957 
   1958 /// Return true iff we can prove that a caller of this function can not inspect
   1959 /// the contents of the provided object in a well defined program.
   1960 bool isKnownNonEscaping(Value *Object, const Loop *L,
   1961                         const TargetLibraryInfo *TLI, DominatorTree *DT) {
   1962   if (isa<AllocaInst>(Object))
   1963     // Since the alloca goes out of scope, we know the caller can't retain a
   1964     // reference to it and be well defined.  Thus, we don't need to check for
   1965     // capture.
   1966     return true;
   1967 
   1968   // For all other objects we need to know that the caller can't possibly
   1969   // have gotten a reference to the object.  There are two components of
   1970   // that:
   1971   //   1) Object can't be escaped by this function.  This is what
   1972   //      PointerMayBeCaptured checks.
   1973   //   2) Object can't have been captured at definition site.  For this, we
   1974   //      need to know the return value is noalias.  At the moment, we use a
   1975   //      weaker condition and handle only AllocLikeFunctions (which are
   1976   //      known to be noalias).  TODO
   1977   return isAllocLikeFn(Object, TLI) &&
   1978          isNotCapturedBeforeOrInLoop(Object, L, DT);
   1979 }
   1980 
   1981 } // namespace
   1982 
   1983 /// Try to promote memory values to scalars by sinking stores out of the
   1984 /// loop and moving loads to before the loop.  We do this by looping over
   1985 /// the stores in the loop, looking for stores to Must pointers which are
   1986 /// loop invariant.
   1987 ///
   1988 bool llvm::promoteLoopAccessesToScalars(
   1989     const SmallSetVector<Value *, 8> &PointerMustAliases,
   1990     SmallVectorImpl<BasicBlock *> &ExitBlocks,
   1991     SmallVectorImpl<Instruction *> &InsertPts,
   1992     SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
   1993     LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
   1994     Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
   1995     ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
   1996   // Verify inputs.
   1997   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
   1998          SafetyInfo != nullptr &&
   1999          "Unexpected Input to promoteLoopAccessesToScalars");
   2000 
   2001   Value *SomePtr = *PointerMustAliases.begin();
   2002   BasicBlock *Preheader = CurLoop->getLoopPreheader();
   2003 
   2004   // It is not safe to promote a load/store from the loop if the load/store is
   2005   // conditional.  For example, turning:
   2006   //
   2007   //    for () { if (c) *P += 1; }
   2008   //
   2009   // into:
   2010   //
   2011   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
   2012   //
   2013   // is not safe, because *P may only be valid to access if 'c' is true.
   2014   //
   2015   // The safety property divides into two parts:
   2016   // p1) The memory may not be dereferenceable on entry to the loop.  In this
   2017   //    case, we can't insert the required load in the preheader.
   2018   // p2) The memory model does not allow us to insert a store along any dynamic
   2019   //    path which did not originally have one.
   2020   //
   2021   // If at least one store is guaranteed to execute, both properties are
   2022   // satisfied, and promotion is legal.
   2023   //
   2024   // This, however, is not a necessary condition. Even if no store/load is
   2025   // guaranteed to execute, we can still establish these properties.
   2026   // We can establish (p1) by proving that hoisting the load into the preheader
   2027   // is safe (i.e. proving dereferenceability on all paths through the loop). We
   2028   // can use any access within the alias set to prove dereferenceability,
   2029   // since they're all must alias.
   2030   //
   2031   // There are two ways establish (p2):
   2032   // a) Prove the location is thread-local. In this case the memory model
   2033   // requirement does not apply, and stores are safe to insert.
   2034   // b) Prove a store dominates every exit block. In this case, if an exit
   2035   // blocks is reached, the original dynamic path would have taken us through
   2036   // the store, so inserting a store into the exit block is safe. Note that this
   2037   // is different from the store being guaranteed to execute. For instance,
   2038   // if an exception is thrown on the first iteration of the loop, the original
   2039   // store is never executed, but the exit blocks are not executed either.
   2040 
   2041   bool DereferenceableInPH = false;
   2042   bool SafeToInsertStore = false;
   2043 
   2044   SmallVector<Instruction *, 64> LoopUses;
   2045 
   2046   // We start with an alignment of one and try to find instructions that allow
   2047   // us to prove better alignment.
   2048   Align Alignment;
   2049   // Keep track of which types of access we see
   2050   bool SawUnorderedAtomic = false;
   2051   bool SawNotAtomic = false;
   2052   AAMDNodes AATags;
   2053 
   2054   const DataLayout &MDL = Preheader->getModule()->getDataLayout();
   2055 
   2056   bool IsKnownThreadLocalObject = false;
   2057   if (SafetyInfo->anyBlockMayThrow()) {
   2058     // If a loop can throw, we have to insert a store along each unwind edge.
   2059     // That said, we can't actually make the unwind edge explicit. Therefore,
   2060     // we have to prove that the store is dead along the unwind edge.  We do
   2061     // this by proving that the caller can't have a reference to the object
   2062     // after return and thus can't possibly load from the object.
   2063     Value *Object = getUnderlyingObject(SomePtr);
   2064     if (!isKnownNonEscaping(Object, CurLoop, TLI, DT))
   2065       return false;
   2066     // Subtlety: Alloca's aren't visible to callers, but *are* potentially
   2067     // visible to other threads if captured and used during their lifetimes.
   2068     IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
   2069   }
   2070 
   2071   // Check that all of the pointers in the alias set have the same type.  We
   2072   // cannot (yet) promote a memory location that is loaded and stored in
   2073   // different sizes.  While we are at it, collect alignment and AA info.
   2074   for (Value *ASIV : PointerMustAliases) {
   2075     // Check that all of the pointers in the alias set have the same type.  We
   2076     // cannot (yet) promote a memory location that is loaded and stored in
   2077     // different sizes.
   2078     if (SomePtr->getType() != ASIV->getType())
   2079       return false;
   2080 
   2081     for (User *U : ASIV->users()) {
   2082       // Ignore instructions that are outside the loop.
   2083       Instruction *UI = dyn_cast<Instruction>(U);
   2084       if (!UI || !CurLoop->contains(UI))
   2085         continue;
   2086 
   2087       // If there is an non-load/store instruction in the loop, we can't promote
   2088       // it.
   2089       if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
   2090         if (!Load->isUnordered())
   2091           return false;
   2092 
   2093         SawUnorderedAtomic |= Load->isAtomic();
   2094         SawNotAtomic |= !Load->isAtomic();
   2095 
   2096         Align InstAlignment = Load->getAlign();
   2097 
   2098         // Note that proving a load safe to speculate requires proving
   2099         // sufficient alignment at the target location.  Proving it guaranteed
   2100         // to execute does as well.  Thus we can increase our guaranteed
   2101         // alignment as well.
   2102         if (!DereferenceableInPH || (InstAlignment > Alignment))
   2103           if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop,
   2104                                              SafetyInfo, ORE,
   2105                                              Preheader->getTerminator())) {
   2106             DereferenceableInPH = true;
   2107             Alignment = std::max(Alignment, InstAlignment);
   2108           }
   2109       } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
   2110         // Stores *of* the pointer are not interesting, only stores *to* the
   2111         // pointer.
   2112         if (UI->getOperand(1) != ASIV)
   2113           continue;
   2114         if (!Store->isUnordered())
   2115           return false;
   2116 
   2117         SawUnorderedAtomic |= Store->isAtomic();
   2118         SawNotAtomic |= !Store->isAtomic();
   2119 
   2120         // If the store is guaranteed to execute, both properties are satisfied.
   2121         // We may want to check if a store is guaranteed to execute even if we
   2122         // already know that promotion is safe, since it may have higher
   2123         // alignment than any other guaranteed stores, in which case we can
   2124         // raise the alignment on the promoted store.
   2125         Align InstAlignment = Store->getAlign();
   2126 
   2127         if (!DereferenceableInPH || !SafeToInsertStore ||
   2128             (InstAlignment > Alignment)) {
   2129           if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
   2130             DereferenceableInPH = true;
   2131             SafeToInsertStore = true;
   2132             Alignment = std::max(Alignment, InstAlignment);
   2133           }
   2134         }
   2135 
   2136         // If a store dominates all exit blocks, it is safe to sink.
   2137         // As explained above, if an exit block was executed, a dominating
   2138         // store must have been executed at least once, so we are not
   2139         // introducing stores on paths that did not have them.
   2140         // Note that this only looks at explicit exit blocks. If we ever
   2141         // start sinking stores into unwind edges (see above), this will break.
   2142         if (!SafeToInsertStore)
   2143           SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
   2144             return DT->dominates(Store->getParent(), Exit);
   2145           });
   2146 
   2147         // If the store is not guaranteed to execute, we may still get
   2148         // deref info through it.
   2149         if (!DereferenceableInPH) {
   2150           DereferenceableInPH = isDereferenceableAndAlignedPointer(
   2151               Store->getPointerOperand(), Store->getValueOperand()->getType(),
   2152               Store->getAlign(), MDL, Preheader->getTerminator(), DT, TLI);
   2153         }
   2154       } else
   2155         return false; // Not a load or store.
   2156 
   2157       // Merge the AA tags.
   2158       if (LoopUses.empty()) {
   2159         // On the first load/store, just take its AA tags.
   2160         UI->getAAMetadata(AATags);
   2161       } else if (AATags) {
   2162         UI->getAAMetadata(AATags, /* Merge = */ true);
   2163       }
   2164 
   2165       LoopUses.push_back(UI);
   2166     }
   2167   }
   2168 
   2169   // If we found both an unordered atomic instruction and a non-atomic memory
   2170   // access, bail.  We can't blindly promote non-atomic to atomic since we
   2171   // might not be able to lower the result.  We can't downgrade since that
   2172   // would violate memory model.  Also, align 0 is an error for atomics.
   2173   if (SawUnorderedAtomic && SawNotAtomic)
   2174     return false;
   2175 
   2176   // If we're inserting an atomic load in the preheader, we must be able to
   2177   // lower it.  We're only guaranteed to be able to lower naturally aligned
   2178   // atomics.
   2179   auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
   2180   if (SawUnorderedAtomic &&
   2181       Alignment < MDL.getTypeStoreSize(SomePtrElemType))
   2182     return false;
   2183 
   2184   // If we couldn't prove we can hoist the load, bail.
   2185   if (!DereferenceableInPH)
   2186     return false;
   2187 
   2188   // We know we can hoist the load, but don't have a guaranteed store.
   2189   // Check whether the location is thread-local. If it is, then we can insert
   2190   // stores along paths which originally didn't have them without violating the
   2191   // memory model.
   2192   if (!SafeToInsertStore) {
   2193     if (IsKnownThreadLocalObject)
   2194       SafeToInsertStore = true;
   2195     else {
   2196       Value *Object = getUnderlyingObject(SomePtr);
   2197       SafeToInsertStore =
   2198           (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
   2199           isNotCapturedBeforeOrInLoop(Object, CurLoop, DT);
   2200     }
   2201   }
   2202 
   2203   // If we've still failed to prove we can sink the store, give up.
   2204   if (!SafeToInsertStore)
   2205     return false;
   2206 
   2207   // Otherwise, this is safe to promote, lets do it!
   2208   LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
   2209                     << '\n');
   2210   ORE->emit([&]() {
   2211     return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
   2212                               LoopUses[0])
   2213            << "Moving accesses to memory location out of the loop";
   2214   });
   2215   ++NumPromoted;
   2216 
   2217   // Look at all the loop uses, and try to merge their locations.
   2218   std::vector<const DILocation *> LoopUsesLocs;
   2219   for (auto U : LoopUses)
   2220     LoopUsesLocs.push_back(U->getDebugLoc().get());
   2221   auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs));
   2222 
   2223   // We use the SSAUpdater interface to insert phi nodes as required.
   2224   SmallVector<PHINode *, 16> NewPHIs;
   2225   SSAUpdater SSA(&NewPHIs);
   2226   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
   2227                         InsertPts, MSSAInsertPts, PIC, CurAST, MSSAU, *LI, DL,
   2228                         Alignment.value(), SawUnorderedAtomic, AATags,
   2229                         *SafetyInfo);
   2230 
   2231   // Set up the preheader to have a definition of the value.  It is the live-out
   2232   // value from the preheader that uses in the loop will use.
   2233   LoadInst *PreheaderLoad = new LoadInst(
   2234       SomePtr->getType()->getPointerElementType(), SomePtr,
   2235       SomePtr->getName() + ".promoted", Preheader->getTerminator());
   2236   if (SawUnorderedAtomic)
   2237     PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
   2238   PreheaderLoad->setAlignment(Alignment);
   2239   PreheaderLoad->setDebugLoc(DebugLoc());
   2240   if (AATags)
   2241     PreheaderLoad->setAAMetadata(AATags);
   2242   SSA.AddAvailableValue(Preheader, PreheaderLoad);
   2243 
   2244   if (MSSAU) {
   2245     MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
   2246         PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
   2247     MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
   2248     MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
   2249   }
   2250 
   2251   if (MSSAU && VerifyMemorySSA)
   2252     MSSAU->getMemorySSA()->verifyMemorySSA();
   2253   // Rewrite all the loads in the loop and remember all the definitions from
   2254   // stores in the loop.
   2255   Promoter.run(LoopUses);
   2256 
   2257   if (MSSAU && VerifyMemorySSA)
   2258     MSSAU->getMemorySSA()->verifyMemorySSA();
   2259   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
   2260   if (PreheaderLoad->use_empty())
   2261     eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
   2262 
   2263   return true;
   2264 }
   2265 
   2266 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
   2267                                 function_ref<void(Instruction *)> Fn) {
   2268   for (const BasicBlock *BB : L->blocks())
   2269     if (const auto *Accesses = MSSA->getBlockAccesses(BB))
   2270       for (const auto &Access : *Accesses)
   2271         if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
   2272           Fn(MUD->getMemoryInst());
   2273 }
   2274 
   2275 static SmallVector<SmallSetVector<Value *, 8>, 0>
   2276 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
   2277   AliasSetTracker AST(*AA);
   2278 
   2279   auto IsPotentiallyPromotable = [L](const Instruction *I) {
   2280     if (const auto *SI = dyn_cast<StoreInst>(I))
   2281       return L->isLoopInvariant(SI->getPointerOperand());
   2282     if (const auto *LI = dyn_cast<LoadInst>(I))
   2283       return L->isLoopInvariant(LI->getPointerOperand());
   2284     return false;
   2285   };
   2286 
   2287   // Populate AST with potentially promotable accesses and remove them from
   2288   // MaybePromotable, so they will not be checked again on the next iteration.
   2289   SmallPtrSet<Value *, 16> AttemptingPromotion;
   2290   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
   2291     if (IsPotentiallyPromotable(I)) {
   2292       AttemptingPromotion.insert(I);
   2293       AST.add(I);
   2294     }
   2295   });
   2296 
   2297   // We're only interested in must-alias sets that contain a mod.
   2298   SmallVector<const AliasSet *, 8> Sets;
   2299   for (AliasSet &AS : AST)
   2300     if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
   2301       Sets.push_back(&AS);
   2302 
   2303   if (Sets.empty())
   2304     return {}; // Nothing to promote...
   2305 
   2306   // Discard any sets for which there is an aliasing non-promotable access.
   2307   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
   2308     if (AttemptingPromotion.contains(I))
   2309       return;
   2310 
   2311     llvm::erase_if(Sets, [&](const AliasSet *AS) {
   2312       return AS->aliasesUnknownInst(I, *AA);
   2313     });
   2314   });
   2315 
   2316   SmallVector<SmallSetVector<Value *, 8>, 0> Result;
   2317   for (const AliasSet *Set : Sets) {
   2318     SmallSetVector<Value *, 8> PointerMustAliases;
   2319     for (const auto &ASI : *Set)
   2320       PointerMustAliases.insert(ASI.getValue());
   2321     Result.push_back(std::move(PointerMustAliases));
   2322   }
   2323 
   2324   return Result;
   2325 }
   2326 
   2327 /// Returns an owning pointer to an alias set which incorporates aliasing info
   2328 /// from L and all subloops of L.
   2329 std::unique_ptr<AliasSetTracker>
   2330 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
   2331                                                  AAResults *AA) {
   2332   auto CurAST = std::make_unique<AliasSetTracker>(*AA);
   2333 
   2334   // Add everything from all the sub loops.
   2335   for (Loop *InnerL : L->getSubLoops())
   2336     for (BasicBlock *BB : InnerL->blocks())
   2337       CurAST->add(*BB);
   2338 
   2339   // And merge in this loop (without anything from inner loops).
   2340   for (BasicBlock *BB : L->blocks())
   2341     if (LI->getLoopFor(BB) == L)
   2342       CurAST->add(*BB);
   2343 
   2344   return CurAST;
   2345 }
   2346 
   2347 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
   2348                                      AliasSetTracker *CurAST, Loop *CurLoop,
   2349                                      AAResults *AA) {
   2350   // First check to see if any of the basic blocks in CurLoop invalidate *V.
   2351   bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
   2352 
   2353   if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
   2354     return isInvalidatedAccordingToAST;
   2355 
   2356   // Check with a diagnostic analysis if we can refine the information above.
   2357   // This is to identify the limitations of using the AST.
   2358   // The alias set mechanism used by LICM has a major weakness in that it
   2359   // combines all things which may alias into a single set *before* asking
   2360   // modref questions. As a result, a single readonly call within a loop will
   2361   // collapse all loads and stores into a single alias set and report
   2362   // invalidation if the loop contains any store. For example, readonly calls
   2363   // with deopt states have this form and create a general alias set with all
   2364   // loads and stores.  In order to get any LICM in loops containing possible
   2365   // deopt states we need a more precise invalidation of checking the mod ref
   2366   // info of each instruction within the loop and LI. This has a complexity of
   2367   // O(N^2), so currently, it is used only as a diagnostic tool since the
   2368   // default value of LICMN2Threshold is zero.
   2369 
   2370   // Don't look at nested loops.
   2371   if (CurLoop->begin() != CurLoop->end())
   2372     return true;
   2373 
   2374   int N = 0;
   2375   for (BasicBlock *BB : CurLoop->getBlocks())
   2376     for (Instruction &I : *BB) {
   2377       if (N >= LICMN2Theshold) {
   2378         LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
   2379                           << *(MemLoc.Ptr) << "\n");
   2380         return true;
   2381       }
   2382       N++;
   2383       auto Res = AA->getModRefInfo(&I, MemLoc);
   2384       if (isModSet(Res)) {
   2385         LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
   2386                           << *(MemLoc.Ptr) << "\n");
   2387         return true;
   2388       }
   2389     }
   2390   LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
   2391   return false;
   2392 }
   2393 
   2394 bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
   2395                                       Loop *CurLoop, Instruction &I,
   2396                                       SinkAndHoistLICMFlags &Flags) {
   2397   // For hoisting, use the walker to determine safety
   2398   if (!Flags.getIsSink()) {
   2399     MemoryAccess *Source;
   2400     // See declaration of SetLicmMssaOptCap for usage details.
   2401     if (Flags.tooManyClobberingCalls())
   2402       Source = MU->getDefiningAccess();
   2403     else {
   2404       Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
   2405       Flags.incrementClobberingCalls();
   2406     }
   2407     return !MSSA->isLiveOnEntryDef(Source) &&
   2408            CurLoop->contains(Source->getBlock());
   2409   }
   2410 
   2411   // For sinking, we'd need to check all Defs below this use. The getClobbering
   2412   // call will look on the backedge of the loop, but will check aliasing with
   2413   // the instructions on the previous iteration.
   2414   // For example:
   2415   // for (i ... )
   2416   //   load a[i] ( Use (LoE)
   2417   //   store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
   2418   //   i++;
   2419   // The load sees no clobbering inside the loop, as the backedge alias check
   2420   // does phi translation, and will check aliasing against store a[i-1].
   2421   // However sinking the load outside the loop, below the store is incorrect.
   2422 
   2423   // For now, only sink if there are no Defs in the loop, and the existing ones
   2424   // precede the use and are in the same block.
   2425   // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
   2426   // needs PostDominatorTreeAnalysis.
   2427   // FIXME: More precise: no Defs that alias this Use.
   2428   if (Flags.tooManyMemoryAccesses())
   2429     return true;
   2430   for (auto *BB : CurLoop->getBlocks())
   2431     if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU))
   2432       return true;
   2433   // When sinking, the source block may not be part of the loop so check it.
   2434   if (!CurLoop->contains(&I))
   2435     return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU);
   2436 
   2437   return false;
   2438 }
   2439 
   2440 bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
   2441                                        MemoryUse &MU) {
   2442   if (const auto *Accesses = MSSA.getBlockDefs(&BB))
   2443     for (const auto &MA : *Accesses)
   2444       if (const auto *MD = dyn_cast<MemoryDef>(&MA))
   2445         if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
   2446           return true;
   2447   return false;
   2448 }
   2449 
   2450 /// Little predicate that returns true if the specified basic block is in
   2451 /// a subloop of the current one, not the current one itself.
   2452 ///
   2453 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
   2454   assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
   2455   return LI->getLoopFor(BB) != CurLoop;
   2456 }
   2457