Home | History | Annotate | Line # | Download | only in Scalar
      1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This transformation analyzes and transforms the induction variables (and
     10 // computations derived from them) into simpler forms suitable for subsequent
     11 // analysis and transformation.
     12 //
     13 // If the trip count of a loop is computable, this pass also makes the following
     14 // changes:
     15 //   1. The exit condition for the loop is canonicalized to compare the
     16 //      induction value against the exit value.  This turns loops like:
     17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
     18 //   2. Any use outside of the loop of an expression derived from the indvar
     19 //      is changed to compute the derived value outside of the loop, eliminating
     20 //      the dependence on the exit value of the induction variable.  If the only
     21 //      purpose of the loop is to compute the exit value of some derived
     22 //      expression, this transformation will make the loop dead.
     23 //
     24 //===----------------------------------------------------------------------===//
     25 
     26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
     27 #include "llvm/ADT/APFloat.h"
     28 #include "llvm/ADT/APInt.h"
     29 #include "llvm/ADT/ArrayRef.h"
     30 #include "llvm/ADT/DenseMap.h"
     31 #include "llvm/ADT/None.h"
     32 #include "llvm/ADT/Optional.h"
     33 #include "llvm/ADT/STLExtras.h"
     34 #include "llvm/ADT/SmallPtrSet.h"
     35 #include "llvm/ADT/SmallSet.h"
     36 #include "llvm/ADT/SmallVector.h"
     37 #include "llvm/ADT/Statistic.h"
     38 #include "llvm/ADT/iterator_range.h"
     39 #include "llvm/Analysis/LoopInfo.h"
     40 #include "llvm/Analysis/LoopPass.h"
     41 #include "llvm/Analysis/MemorySSA.h"
     42 #include "llvm/Analysis/MemorySSAUpdater.h"
     43 #include "llvm/Analysis/ScalarEvolution.h"
     44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
     45 #include "llvm/Analysis/TargetLibraryInfo.h"
     46 #include "llvm/Analysis/TargetTransformInfo.h"
     47 #include "llvm/Analysis/ValueTracking.h"
     48 #include "llvm/IR/BasicBlock.h"
     49 #include "llvm/IR/Constant.h"
     50 #include "llvm/IR/ConstantRange.h"
     51 #include "llvm/IR/Constants.h"
     52 #include "llvm/IR/DataLayout.h"
     53 #include "llvm/IR/DerivedTypes.h"
     54 #include "llvm/IR/Dominators.h"
     55 #include "llvm/IR/Function.h"
     56 #include "llvm/IR/IRBuilder.h"
     57 #include "llvm/IR/InstrTypes.h"
     58 #include "llvm/IR/Instruction.h"
     59 #include "llvm/IR/Instructions.h"
     60 #include "llvm/IR/IntrinsicInst.h"
     61 #include "llvm/IR/Intrinsics.h"
     62 #include "llvm/IR/Module.h"
     63 #include "llvm/IR/Operator.h"
     64 #include "llvm/IR/PassManager.h"
     65 #include "llvm/IR/PatternMatch.h"
     66 #include "llvm/IR/Type.h"
     67 #include "llvm/IR/Use.h"
     68 #include "llvm/IR/User.h"
     69 #include "llvm/IR/Value.h"
     70 #include "llvm/IR/ValueHandle.h"
     71 #include "llvm/InitializePasses.h"
     72 #include "llvm/Pass.h"
     73 #include "llvm/Support/Casting.h"
     74 #include "llvm/Support/CommandLine.h"
     75 #include "llvm/Support/Compiler.h"
     76 #include "llvm/Support/Debug.h"
     77 #include "llvm/Support/ErrorHandling.h"
     78 #include "llvm/Support/MathExtras.h"
     79 #include "llvm/Support/raw_ostream.h"
     80 #include "llvm/Transforms/Scalar.h"
     81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
     82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     83 #include "llvm/Transforms/Utils/Local.h"
     84 #include "llvm/Transforms/Utils/LoopUtils.h"
     85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
     86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
     87 #include <cassert>
     88 #include <cstdint>
     89 #include <utility>
     90 
     91 using namespace llvm;
     92 
     93 #define DEBUG_TYPE "indvars"
     94 
     95 STATISTIC(NumWidened     , "Number of indvars widened");
     96 STATISTIC(NumReplaced    , "Number of exit values replaced");
     97 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
     98 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
     99 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
    100 
    101 // Trip count verification can be enabled by default under NDEBUG if we
    102 // implement a strong expression equivalence checker in SCEV. Until then, we
    103 // use the verify-indvars flag, which may assert in some cases.
    104 static cl::opt<bool> VerifyIndvars(
    105     "verify-indvars", cl::Hidden,
    106     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
    107              "effect in release builds. (Note: this adds additional SCEV "
    108              "queries potentially changing the analysis result)"));
    109 
    110 static cl::opt<ReplaceExitVal> ReplaceExitValue(
    111     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
    112     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
    113     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
    114                clEnumValN(OnlyCheapRepl, "cheap",
    115                           "only replace exit value when the cost is cheap"),
    116                clEnumValN(NoHardUse, "noharduse",
    117                           "only replace exit values when loop def likely dead"),
    118                clEnumValN(AlwaysRepl, "always",
    119                           "always replace exit value whenever possible")));
    120 
    121 static cl::opt<bool> UsePostIncrementRanges(
    122   "indvars-post-increment-ranges", cl::Hidden,
    123   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
    124   cl::init(true));
    125 
    126 static cl::opt<bool>
    127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
    128             cl::desc("Disable Linear Function Test Replace optimization"));
    129 
    130 static cl::opt<bool>
    131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
    132                 cl::desc("Predicate conditions in read only loops"));
    133 
    134 static cl::opt<bool>
    135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
    136                 cl::desc("Allow widening of indvars to eliminate s/zext"));
    137 
    138 namespace {
    139 
    140 struct RewritePhi;
    141 
    142 class IndVarSimplify {
    143   LoopInfo *LI;
    144   ScalarEvolution *SE;
    145   DominatorTree *DT;
    146   const DataLayout &DL;
    147   TargetLibraryInfo *TLI;
    148   const TargetTransformInfo *TTI;
    149   std::unique_ptr<MemorySSAUpdater> MSSAU;
    150 
    151   SmallVector<WeakTrackingVH, 16> DeadInsts;
    152   bool WidenIndVars;
    153 
    154   bool handleFloatingPointIV(Loop *L, PHINode *PH);
    155   bool rewriteNonIntegerIVs(Loop *L);
    156 
    157   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
    158   /// Try to eliminate loop exits based on analyzeable exit counts
    159   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
    160   /// Try to form loop invariant tests for loop exits by changing how many
    161   /// iterations of the loop run when that is unobservable.
    162   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
    163 
    164   bool rewriteFirstIterationLoopExitValues(Loop *L);
    165 
    166   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
    167                                  const SCEV *ExitCount,
    168                                  PHINode *IndVar, SCEVExpander &Rewriter);
    169 
    170   bool sinkUnusedInvariants(Loop *L);
    171 
    172 public:
    173   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
    174                  const DataLayout &DL, TargetLibraryInfo *TLI,
    175                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
    176       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
    177         WidenIndVars(WidenIndVars) {
    178     if (MSSA)
    179       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
    180   }
    181 
    182   bool run(Loop *L);
    183 };
    184 
    185 } // end anonymous namespace
    186 
    187 //===----------------------------------------------------------------------===//
    188 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
    189 //===----------------------------------------------------------------------===//
    190 
    191 /// Convert APF to an integer, if possible.
    192 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
    193   bool isExact = false;
    194   // See if we can convert this to an int64_t
    195   uint64_t UIntVal;
    196   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
    197                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
    198       !isExact)
    199     return false;
    200   IntVal = UIntVal;
    201   return true;
    202 }
    203 
    204 /// If the loop has floating induction variable then insert corresponding
    205 /// integer induction variable if possible.
    206 /// For example,
    207 /// for(double i = 0; i < 10000; ++i)
    208 ///   bar(i)
    209 /// is converted into
    210 /// for(int i = 0; i < 10000; ++i)
    211 ///   bar((double)i);
    212 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
    213   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
    214   unsigned BackEdge     = IncomingEdge^1;
    215 
    216   // Check incoming value.
    217   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
    218 
    219   int64_t InitValue;
    220   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
    221     return false;
    222 
    223   // Check IV increment. Reject this PN if increment operation is not
    224   // an add or increment value can not be represented by an integer.
    225   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
    226   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
    227 
    228   // If this is not an add of the PHI with a constantfp, or if the constant fp
    229   // is not an integer, bail out.
    230   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
    231   int64_t IncValue;
    232   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
    233       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
    234     return false;
    235 
    236   // Check Incr uses. One user is PN and the other user is an exit condition
    237   // used by the conditional terminator.
    238   Value::user_iterator IncrUse = Incr->user_begin();
    239   Instruction *U1 = cast<Instruction>(*IncrUse++);
    240   if (IncrUse == Incr->user_end()) return false;
    241   Instruction *U2 = cast<Instruction>(*IncrUse++);
    242   if (IncrUse != Incr->user_end()) return false;
    243 
    244   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
    245   // only used by a branch, we can't transform it.
    246   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
    247   if (!Compare)
    248     Compare = dyn_cast<FCmpInst>(U2);
    249   if (!Compare || !Compare->hasOneUse() ||
    250       !isa<BranchInst>(Compare->user_back()))
    251     return false;
    252 
    253   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
    254 
    255   // We need to verify that the branch actually controls the iteration count
    256   // of the loop.  If not, the new IV can overflow and no one will notice.
    257   // The branch block must be in the loop and one of the successors must be out
    258   // of the loop.
    259   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
    260   if (!L->contains(TheBr->getParent()) ||
    261       (L->contains(TheBr->getSuccessor(0)) &&
    262        L->contains(TheBr->getSuccessor(1))))
    263     return false;
    264 
    265   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
    266   // transform it.
    267   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
    268   int64_t ExitValue;
    269   if (ExitValueVal == nullptr ||
    270       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
    271     return false;
    272 
    273   // Find new predicate for integer comparison.
    274   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
    275   switch (Compare->getPredicate()) {
    276   default: return false;  // Unknown comparison.
    277   case CmpInst::FCMP_OEQ:
    278   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
    279   case CmpInst::FCMP_ONE:
    280   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
    281   case CmpInst::FCMP_OGT:
    282   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
    283   case CmpInst::FCMP_OGE:
    284   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
    285   case CmpInst::FCMP_OLT:
    286   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
    287   case CmpInst::FCMP_OLE:
    288   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
    289   }
    290 
    291   // We convert the floating point induction variable to a signed i32 value if
    292   // we can.  This is only safe if the comparison will not overflow in a way
    293   // that won't be trapped by the integer equivalent operations.  Check for this
    294   // now.
    295   // TODO: We could use i64 if it is native and the range requires it.
    296 
    297   // The start/stride/exit values must all fit in signed i32.
    298   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
    299     return false;
    300 
    301   // If not actually striding (add x, 0.0), avoid touching the code.
    302   if (IncValue == 0)
    303     return false;
    304 
    305   // Positive and negative strides have different safety conditions.
    306   if (IncValue > 0) {
    307     // If we have a positive stride, we require the init to be less than the
    308     // exit value.
    309     if (InitValue >= ExitValue)
    310       return false;
    311 
    312     uint32_t Range = uint32_t(ExitValue-InitValue);
    313     // Check for infinite loop, either:
    314     // while (i <= Exit) or until (i > Exit)
    315     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
    316       if (++Range == 0) return false;  // Range overflows.
    317     }
    318 
    319     unsigned Leftover = Range % uint32_t(IncValue);
    320 
    321     // If this is an equality comparison, we require that the strided value
    322     // exactly land on the exit value, otherwise the IV condition will wrap
    323     // around and do things the fp IV wouldn't.
    324     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
    325         Leftover != 0)
    326       return false;
    327 
    328     // If the stride would wrap around the i32 before exiting, we can't
    329     // transform the IV.
    330     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
    331       return false;
    332   } else {
    333     // If we have a negative stride, we require the init to be greater than the
    334     // exit value.
    335     if (InitValue <= ExitValue)
    336       return false;
    337 
    338     uint32_t Range = uint32_t(InitValue-ExitValue);
    339     // Check for infinite loop, either:
    340     // while (i >= Exit) or until (i < Exit)
    341     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
    342       if (++Range == 0) return false;  // Range overflows.
    343     }
    344 
    345     unsigned Leftover = Range % uint32_t(-IncValue);
    346 
    347     // If this is an equality comparison, we require that the strided value
    348     // exactly land on the exit value, otherwise the IV condition will wrap
    349     // around and do things the fp IV wouldn't.
    350     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
    351         Leftover != 0)
    352       return false;
    353 
    354     // If the stride would wrap around the i32 before exiting, we can't
    355     // transform the IV.
    356     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
    357       return false;
    358   }
    359 
    360   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
    361 
    362   // Insert new integer induction variable.
    363   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
    364   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
    365                       PN->getIncomingBlock(IncomingEdge));
    366 
    367   Value *NewAdd =
    368     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
    369                               Incr->getName()+".int", Incr);
    370   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
    371 
    372   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
    373                                       ConstantInt::get(Int32Ty, ExitValue),
    374                                       Compare->getName());
    375 
    376   // In the following deletions, PN may become dead and may be deleted.
    377   // Use a WeakTrackingVH to observe whether this happens.
    378   WeakTrackingVH WeakPH = PN;
    379 
    380   // Delete the old floating point exit comparison.  The branch starts using the
    381   // new comparison.
    382   NewCompare->takeName(Compare);
    383   Compare->replaceAllUsesWith(NewCompare);
    384   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
    385 
    386   // Delete the old floating point increment.
    387   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
    388   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
    389 
    390   // If the FP induction variable still has uses, this is because something else
    391   // in the loop uses its value.  In order to canonicalize the induction
    392   // variable, we chose to eliminate the IV and rewrite it in terms of an
    393   // int->fp cast.
    394   //
    395   // We give preference to sitofp over uitofp because it is faster on most
    396   // platforms.
    397   if (WeakPH) {
    398     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
    399                                  &*PN->getParent()->getFirstInsertionPt());
    400     PN->replaceAllUsesWith(Conv);
    401     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
    402   }
    403   return true;
    404 }
    405 
    406 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
    407   // First step.  Check to see if there are any floating-point recurrences.
    408   // If there are, change them into integer recurrences, permitting analysis by
    409   // the SCEV routines.
    410   BasicBlock *Header = L->getHeader();
    411 
    412   SmallVector<WeakTrackingVH, 8> PHIs;
    413   for (PHINode &PN : Header->phis())
    414     PHIs.push_back(&PN);
    415 
    416   bool Changed = false;
    417   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
    418     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
    419       Changed |= handleFloatingPointIV(L, PN);
    420 
    421   // If the loop previously had floating-point IV, ScalarEvolution
    422   // may not have been able to compute a trip count. Now that we've done some
    423   // re-writing, the trip count may be computable.
    424   if (Changed)
    425     SE->forgetLoop(L);
    426   return Changed;
    427 }
    428 
    429 //===---------------------------------------------------------------------===//
    430 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
    431 // they will exit at the first iteration.
    432 //===---------------------------------------------------------------------===//
    433 
    434 /// Check to see if this loop has loop invariant conditions which lead to loop
    435 /// exits. If so, we know that if the exit path is taken, it is at the first
    436 /// loop iteration. This lets us predict exit values of PHI nodes that live in
    437 /// loop header.
    438 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
    439   // Verify the input to the pass is already in LCSSA form.
    440   assert(L->isLCSSAForm(*DT));
    441 
    442   SmallVector<BasicBlock *, 8> ExitBlocks;
    443   L->getUniqueExitBlocks(ExitBlocks);
    444 
    445   bool MadeAnyChanges = false;
    446   for (auto *ExitBB : ExitBlocks) {
    447     // If there are no more PHI nodes in this exit block, then no more
    448     // values defined inside the loop are used on this path.
    449     for (PHINode &PN : ExitBB->phis()) {
    450       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
    451            IncomingValIdx != E; ++IncomingValIdx) {
    452         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
    453 
    454         // Can we prove that the exit must run on the first iteration if it
    455         // runs at all?  (i.e. early exits are fine for our purposes, but
    456         // traces which lead to this exit being taken on the 2nd iteration
    457         // aren't.)  Note that this is about whether the exit branch is
    458         // executed, not about whether it is taken.
    459         if (!L->getLoopLatch() ||
    460             !DT->dominates(IncomingBB, L->getLoopLatch()))
    461           continue;
    462 
    463         // Get condition that leads to the exit path.
    464         auto *TermInst = IncomingBB->getTerminator();
    465 
    466         Value *Cond = nullptr;
    467         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
    468           // Must be a conditional branch, otherwise the block
    469           // should not be in the loop.
    470           Cond = BI->getCondition();
    471         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
    472           Cond = SI->getCondition();
    473         else
    474           continue;
    475 
    476         if (!L->isLoopInvariant(Cond))
    477           continue;
    478 
    479         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
    480 
    481         // Only deal with PHIs in the loop header.
    482         if (!ExitVal || ExitVal->getParent() != L->getHeader())
    483           continue;
    484 
    485         // If ExitVal is a PHI on the loop header, then we know its
    486         // value along this exit because the exit can only be taken
    487         // on the first iteration.
    488         auto *LoopPreheader = L->getLoopPreheader();
    489         assert(LoopPreheader && "Invalid loop");
    490         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
    491         if (PreheaderIdx != -1) {
    492           assert(ExitVal->getParent() == L->getHeader() &&
    493                  "ExitVal must be in loop header");
    494           MadeAnyChanges = true;
    495           PN.setIncomingValue(IncomingValIdx,
    496                               ExitVal->getIncomingValue(PreheaderIdx));
    497         }
    498       }
    499     }
    500   }
    501   return MadeAnyChanges;
    502 }
    503 
    504 //===----------------------------------------------------------------------===//
    505 //  IV Widening - Extend the width of an IV to cover its widest uses.
    506 //===----------------------------------------------------------------------===//
    507 
    508 /// Update information about the induction variable that is extended by this
    509 /// sign or zero extend operation. This is used to determine the final width of
    510 /// the IV before actually widening it.
    511 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
    512                         ScalarEvolution *SE,
    513                         const TargetTransformInfo *TTI) {
    514   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
    515   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
    516     return;
    517 
    518   Type *Ty = Cast->getType();
    519   uint64_t Width = SE->getTypeSizeInBits(Ty);
    520   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
    521     return;
    522 
    523   // Check that `Cast` actually extends the induction variable (we rely on this
    524   // later).  This takes care of cases where `Cast` is extending a truncation of
    525   // the narrow induction variable, and thus can end up being narrower than the
    526   // "narrow" induction variable.
    527   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
    528   if (NarrowIVWidth >= Width)
    529     return;
    530 
    531   // Cast is either an sext or zext up to this point.
    532   // We should not widen an indvar if arithmetics on the wider indvar are more
    533   // expensive than those on the narrower indvar. We check only the cost of ADD
    534   // because at least an ADD is required to increment the induction variable. We
    535   // could compute more comprehensively the cost of all instructions on the
    536   // induction variable when necessary.
    537   if (TTI &&
    538       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
    539           TTI->getArithmeticInstrCost(Instruction::Add,
    540                                       Cast->getOperand(0)->getType())) {
    541     return;
    542   }
    543 
    544   if (!WI.WidestNativeType) {
    545     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
    546     WI.IsSigned = IsSigned;
    547     return;
    548   }
    549 
    550   // We extend the IV to satisfy the sign of its first user, arbitrarily.
    551   if (WI.IsSigned != IsSigned)
    552     return;
    553 
    554   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
    555     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
    556 }
    557 
    558 //===----------------------------------------------------------------------===//
    559 //  Live IV Reduction - Minimize IVs live across the loop.
    560 //===----------------------------------------------------------------------===//
    561 
    562 //===----------------------------------------------------------------------===//
    563 //  Simplification of IV users based on SCEV evaluation.
    564 //===----------------------------------------------------------------------===//
    565 
    566 namespace {
    567 
    568 class IndVarSimplifyVisitor : public IVVisitor {
    569   ScalarEvolution *SE;
    570   const TargetTransformInfo *TTI;
    571   PHINode *IVPhi;
    572 
    573 public:
    574   WideIVInfo WI;
    575 
    576   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
    577                         const TargetTransformInfo *TTI,
    578                         const DominatorTree *DTree)
    579     : SE(SCEV), TTI(TTI), IVPhi(IV) {
    580     DT = DTree;
    581     WI.NarrowIV = IVPhi;
    582   }
    583 
    584   // Implement the interface used by simplifyUsersOfIV.
    585   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
    586 };
    587 
    588 } // end anonymous namespace
    589 
    590 /// Iteratively perform simplification on a worklist of IV users. Each
    591 /// successive simplification may push more users which may themselves be
    592 /// candidates for simplification.
    593 ///
    594 /// Sign/Zero extend elimination is interleaved with IV simplification.
    595 bool IndVarSimplify::simplifyAndExtend(Loop *L,
    596                                        SCEVExpander &Rewriter,
    597                                        LoopInfo *LI) {
    598   SmallVector<WideIVInfo, 8> WideIVs;
    599 
    600   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
    601           Intrinsic::getName(Intrinsic::experimental_guard));
    602   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
    603 
    604   SmallVector<PHINode*, 8> LoopPhis;
    605   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
    606     LoopPhis.push_back(cast<PHINode>(I));
    607   }
    608   // Each round of simplification iterates through the SimplifyIVUsers worklist
    609   // for all current phis, then determines whether any IVs can be
    610   // widened. Widening adds new phis to LoopPhis, inducing another round of
    611   // simplification on the wide IVs.
    612   bool Changed = false;
    613   while (!LoopPhis.empty()) {
    614     // Evaluate as many IV expressions as possible before widening any IVs. This
    615     // forces SCEV to set no-wrap flags before evaluating sign/zero
    616     // extension. The first time SCEV attempts to normalize sign/zero extension,
    617     // the result becomes final. So for the most predictable results, we delay
    618     // evaluation of sign/zero extend evaluation until needed, and avoid running
    619     // other SCEV based analysis prior to simplifyAndExtend.
    620     do {
    621       PHINode *CurrIV = LoopPhis.pop_back_val();
    622 
    623       // Information about sign/zero extensions of CurrIV.
    624       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
    625 
    626       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
    627                                    &Visitor);
    628 
    629       if (Visitor.WI.WidestNativeType) {
    630         WideIVs.push_back(Visitor.WI);
    631       }
    632     } while(!LoopPhis.empty());
    633 
    634     // Continue if we disallowed widening.
    635     if (!WidenIndVars)
    636       continue;
    637 
    638     for (; !WideIVs.empty(); WideIVs.pop_back()) {
    639       unsigned ElimExt;
    640       unsigned Widened;
    641       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
    642                                           DT, DeadInsts, ElimExt, Widened,
    643                                           HasGuards, UsePostIncrementRanges)) {
    644         NumElimExt += ElimExt;
    645         NumWidened += Widened;
    646         Changed = true;
    647         LoopPhis.push_back(WidePhi);
    648       }
    649     }
    650   }
    651   return Changed;
    652 }
    653 
    654 //===----------------------------------------------------------------------===//
    655 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
    656 //===----------------------------------------------------------------------===//
    657 
    658 /// Given an Value which is hoped to be part of an add recurance in the given
    659 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
    660 /// that this is less general than SCEVs AddRec checking.
    661 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
    662   Instruction *IncI = dyn_cast<Instruction>(IncV);
    663   if (!IncI)
    664     return nullptr;
    665 
    666   switch (IncI->getOpcode()) {
    667   case Instruction::Add:
    668   case Instruction::Sub:
    669     break;
    670   case Instruction::GetElementPtr:
    671     // An IV counter must preserve its type.
    672     if (IncI->getNumOperands() == 2)
    673       break;
    674     LLVM_FALLTHROUGH;
    675   default:
    676     return nullptr;
    677   }
    678 
    679   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
    680   if (Phi && Phi->getParent() == L->getHeader()) {
    681     if (L->isLoopInvariant(IncI->getOperand(1)))
    682       return Phi;
    683     return nullptr;
    684   }
    685   if (IncI->getOpcode() == Instruction::GetElementPtr)
    686     return nullptr;
    687 
    688   // Allow add/sub to be commuted.
    689   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
    690   if (Phi && Phi->getParent() == L->getHeader()) {
    691     if (L->isLoopInvariant(IncI->getOperand(0)))
    692       return Phi;
    693   }
    694   return nullptr;
    695 }
    696 
    697 /// Whether the current loop exit test is based on this value.  Currently this
    698 /// is limited to a direct use in the loop condition.
    699 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
    700   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
    701   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
    702   // TODO: Allow non-icmp loop test.
    703   if (!ICmp)
    704     return false;
    705 
    706   // TODO: Allow indirect use.
    707   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
    708 }
    709 
    710 /// linearFunctionTestReplace policy. Return true unless we can show that the
    711 /// current exit test is already sufficiently canonical.
    712 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
    713   assert(L->getLoopLatch() && "Must be in simplified form");
    714 
    715   // Avoid converting a constant or loop invariant test back to a runtime
    716   // test.  This is critical for when SCEV's cached ExitCount is less precise
    717   // than the current IR (such as after we've proven a particular exit is
    718   // actually dead and thus the BE count never reaches our ExitCount.)
    719   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
    720   if (L->isLoopInvariant(BI->getCondition()))
    721     return false;
    722 
    723   // Do LFTR to simplify the exit condition to an ICMP.
    724   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
    725   if (!Cond)
    726     return true;
    727 
    728   // Do LFTR to simplify the exit ICMP to EQ/NE
    729   ICmpInst::Predicate Pred = Cond->getPredicate();
    730   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
    731     return true;
    732 
    733   // Look for a loop invariant RHS
    734   Value *LHS = Cond->getOperand(0);
    735   Value *RHS = Cond->getOperand(1);
    736   if (!L->isLoopInvariant(RHS)) {
    737     if (!L->isLoopInvariant(LHS))
    738       return true;
    739     std::swap(LHS, RHS);
    740   }
    741   // Look for a simple IV counter LHS
    742   PHINode *Phi = dyn_cast<PHINode>(LHS);
    743   if (!Phi)
    744     Phi = getLoopPhiForCounter(LHS, L);
    745 
    746   if (!Phi)
    747     return true;
    748 
    749   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
    750   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
    751   if (Idx < 0)
    752     return true;
    753 
    754   // Do LFTR if the exit condition's IV is *not* a simple counter.
    755   Value *IncV = Phi->getIncomingValue(Idx);
    756   return Phi != getLoopPhiForCounter(IncV, L);
    757 }
    758 
    759 /// Return true if undefined behavior would provable be executed on the path to
    760 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
    761 /// anything about whether OnPathTo is actually executed or whether Root is
    762 /// actually poison.  This can be used to assess whether a new use of Root can
    763 /// be added at a location which is control equivalent with OnPathTo (such as
    764 /// immediately before it) without introducing UB which didn't previously
    765 /// exist.  Note that a false result conveys no information.
    766 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
    767                                           Instruction *OnPathTo,
    768                                           DominatorTree *DT) {
    769   // Basic approach is to assume Root is poison, propagate poison forward
    770   // through all users we can easily track, and then check whether any of those
    771   // users are provable UB and must execute before out exiting block might
    772   // exit.
    773 
    774   // The set of all recursive users we've visited (which are assumed to all be
    775   // poison because of said visit)
    776   SmallSet<const Value *, 16> KnownPoison;
    777   SmallVector<const Instruction*, 16> Worklist;
    778   Worklist.push_back(Root);
    779   while (!Worklist.empty()) {
    780     const Instruction *I = Worklist.pop_back_val();
    781 
    782     // If we know this must trigger UB on a path leading our target.
    783     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
    784       return true;
    785 
    786     // If we can't analyze propagation through this instruction, just skip it
    787     // and transitive users.  Safe as false is a conservative result.
    788     if (!propagatesPoison(cast<Operator>(I)) && I != Root)
    789       continue;
    790 
    791     if (KnownPoison.insert(I).second)
    792       for (const User *User : I->users())
    793         Worklist.push_back(cast<Instruction>(User));
    794   }
    795 
    796   // Might be non-UB, or might have a path we couldn't prove must execute on
    797   // way to exiting bb.
    798   return false;
    799 }
    800 
    801 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
    802 /// down to checking that all operands are constant and listing instructions
    803 /// that may hide undef.
    804 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
    805                                unsigned Depth) {
    806   if (isa<Constant>(V))
    807     return !isa<UndefValue>(V);
    808 
    809   if (Depth >= 6)
    810     return false;
    811 
    812   // Conservatively handle non-constant non-instructions. For example, Arguments
    813   // may be undef.
    814   Instruction *I = dyn_cast<Instruction>(V);
    815   if (!I)
    816     return false;
    817 
    818   // Load and return values may be undef.
    819   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
    820     return false;
    821 
    822   // Optimistically handle other instructions.
    823   for (Value *Op : I->operands()) {
    824     if (!Visited.insert(Op).second)
    825       continue;
    826     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
    827       return false;
    828   }
    829   return true;
    830 }
    831 
    832 /// Return true if the given value is concrete. We must prove that undef can
    833 /// never reach it.
    834 ///
    835 /// TODO: If we decide that this is a good approach to checking for undef, we
    836 /// may factor it into a common location.
    837 static bool hasConcreteDef(Value *V) {
    838   SmallPtrSet<Value*, 8> Visited;
    839   Visited.insert(V);
    840   return hasConcreteDefImpl(V, Visited, 0);
    841 }
    842 
    843 /// Return true if this IV has any uses other than the (soon to be rewritten)
    844 /// loop exit test.
    845 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
    846   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
    847   Value *IncV = Phi->getIncomingValue(LatchIdx);
    848 
    849   for (User *U : Phi->users())
    850     if (U != Cond && U != IncV) return false;
    851 
    852   for (User *U : IncV->users())
    853     if (U != Cond && U != Phi) return false;
    854   return true;
    855 }
    856 
    857 /// Return true if the given phi is a "counter" in L.  A counter is an
    858 /// add recurance (of integer or pointer type) with an arbitrary start, and a
    859 /// step of 1.  Note that L must have exactly one latch.
    860 static bool isLoopCounter(PHINode* Phi, Loop *L,
    861                           ScalarEvolution *SE) {
    862   assert(Phi->getParent() == L->getHeader());
    863   assert(L->getLoopLatch());
    864 
    865   if (!SE->isSCEVable(Phi->getType()))
    866     return false;
    867 
    868   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
    869   if (!AR || AR->getLoop() != L || !AR->isAffine())
    870     return false;
    871 
    872   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
    873   if (!Step || !Step->isOne())
    874     return false;
    875 
    876   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
    877   Value *IncV = Phi->getIncomingValue(LatchIdx);
    878   return (getLoopPhiForCounter(IncV, L) == Phi &&
    879           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
    880 }
    881 
    882 /// Search the loop header for a loop counter (anadd rec w/step of one)
    883 /// suitable for use by LFTR.  If multiple counters are available, select the
    884 /// "best" one based profitable heuristics.
    885 ///
    886 /// BECount may be an i8* pointer type. The pointer difference is already
    887 /// valid count without scaling the address stride, so it remains a pointer
    888 /// expression as far as SCEV is concerned.
    889 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
    890                                 const SCEV *BECount,
    891                                 ScalarEvolution *SE, DominatorTree *DT) {
    892   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
    893 
    894   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
    895 
    896   // Loop over all of the PHI nodes, looking for a simple counter.
    897   PHINode *BestPhi = nullptr;
    898   const SCEV *BestInit = nullptr;
    899   BasicBlock *LatchBlock = L->getLoopLatch();
    900   assert(LatchBlock && "Must be in simplified form");
    901   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
    902 
    903   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
    904     PHINode *Phi = cast<PHINode>(I);
    905     if (!isLoopCounter(Phi, L, SE))
    906       continue;
    907 
    908     // Avoid comparing an integer IV against a pointer Limit.
    909     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
    910       continue;
    911 
    912     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
    913 
    914     // AR may be a pointer type, while BECount is an integer type.
    915     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
    916     // AR may not be a narrower type, or we may never exit.
    917     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
    918     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
    919       continue;
    920 
    921     // Avoid reusing a potentially undef value to compute other values that may
    922     // have originally had a concrete definition.
    923     if (!hasConcreteDef(Phi)) {
    924       // We explicitly allow unknown phis as long as they are already used by
    925       // the loop exit test.  This is legal since performing LFTR could not
    926       // increase the number of undef users.
    927       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
    928       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
    929           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
    930         continue;
    931     }
    932 
    933     // Avoid introducing undefined behavior due to poison which didn't exist in
    934     // the original program.  (Annoyingly, the rules for poison and undef
    935     // propagation are distinct, so this does NOT cover the undef case above.)
    936     // We have to ensure that we don't introduce UB by introducing a use on an
    937     // iteration where said IV produces poison.  Our strategy here differs for
    938     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
    939     // see code in linearFunctionTestReplace.  For pointers, we restrict
    940     // transforms as there is no good way to reinfer inbounds once lost.
    941     if (!Phi->getType()->isIntegerTy() &&
    942         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
    943       continue;
    944 
    945     const SCEV *Init = AR->getStart();
    946 
    947     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
    948       // Don't force a live loop counter if another IV can be used.
    949       if (AlmostDeadIV(Phi, LatchBlock, Cond))
    950         continue;
    951 
    952       // Prefer to count-from-zero. This is a more "canonical" counter form. It
    953       // also prefers integer to pointer IVs.
    954       if (BestInit->isZero() != Init->isZero()) {
    955         if (BestInit->isZero())
    956           continue;
    957       }
    958       // If two IVs both count from zero or both count from nonzero then the
    959       // narrower is likely a dead phi that has been widened. Use the wider phi
    960       // to allow the other to be eliminated.
    961       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
    962         continue;
    963     }
    964     BestPhi = Phi;
    965     BestInit = Init;
    966   }
    967   return BestPhi;
    968 }
    969 
    970 /// Insert an IR expression which computes the value held by the IV IndVar
    971 /// (which must be an loop counter w/unit stride) after the backedge of loop L
    972 /// is taken ExitCount times.
    973 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
    974                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
    975                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
    976   assert(isLoopCounter(IndVar, L, SE));
    977   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
    978   const SCEV *IVInit = AR->getStart();
    979 
    980   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
    981   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
    982   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
    983   // the existing GEPs whenever possible.
    984   if (IndVar->getType()->isPointerTy() &&
    985       !ExitCount->getType()->isPointerTy()) {
    986     // IVOffset will be the new GEP offset that is interpreted by GEP as a
    987     // signed value. ExitCount on the other hand represents the loop trip count,
    988     // which is an unsigned value. FindLoopCounter only allows induction
    989     // variables that have a positive unit stride of one. This means we don't
    990     // have to handle the case of negative offsets (yet) and just need to zero
    991     // extend ExitCount.
    992     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
    993     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
    994     if (UsePostInc)
    995       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
    996 
    997     // Expand the code for the iteration count.
    998     assert(SE->isLoopInvariant(IVOffset, L) &&
    999            "Computed iteration count is not loop invariant!");
   1000 
   1001     // We could handle pointer IVs other than i8*, but we need to compensate for
   1002     // gep index scaling.
   1003     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
   1004                              cast<PointerType>(IndVar->getType())
   1005                                  ->getElementType())->isOne() &&
   1006            "unit stride pointer IV must be i8*");
   1007 
   1008     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
   1009     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
   1010     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
   1011   } else {
   1012     // In any other case, convert both IVInit and ExitCount to integers before
   1013     // comparing. This may result in SCEV expansion of pointers, but in practice
   1014     // SCEV will fold the pointer arithmetic away as such:
   1015     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
   1016     //
   1017     // Valid Cases: (1) both integers is most common; (2) both may be pointers
   1018     // for simple memset-style loops.
   1019     //
   1020     // IVInit integer and ExitCount pointer would only occur if a canonical IV
   1021     // were generated on top of case #2, which is not expected.
   1022 
   1023     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
   1024     // For unit stride, IVCount = Start + ExitCount with 2's complement
   1025     // overflow.
   1026 
   1027     // For integer IVs, truncate the IV before computing IVInit + BECount,
   1028     // unless we know apriori that the limit must be a constant when evaluated
   1029     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
   1030     // of the IV in the loop over a (potentially) expensive expansion of the
   1031     // widened exit count add(zext(add)) expression.
   1032     if (SE->getTypeSizeInBits(IVInit->getType())
   1033         > SE->getTypeSizeInBits(ExitCount->getType())) {
   1034       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
   1035         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
   1036       else
   1037         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
   1038     }
   1039 
   1040     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
   1041 
   1042     if (UsePostInc)
   1043       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
   1044 
   1045     // Expand the code for the iteration count.
   1046     assert(SE->isLoopInvariant(IVLimit, L) &&
   1047            "Computed iteration count is not loop invariant!");
   1048     // Ensure that we generate the same type as IndVar, or a smaller integer
   1049     // type. In the presence of null pointer values, we have an integer type
   1050     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
   1051     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
   1052       IndVar->getType() : ExitCount->getType();
   1053     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
   1054     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
   1055   }
   1056 }
   1057 
   1058 /// This method rewrites the exit condition of the loop to be a canonical !=
   1059 /// comparison against the incremented loop induction variable.  This pass is
   1060 /// able to rewrite the exit tests of any loop where the SCEV analysis can
   1061 /// determine a loop-invariant trip count of the loop, which is actually a much
   1062 /// broader range than just linear tests.
   1063 bool IndVarSimplify::
   1064 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
   1065                           const SCEV *ExitCount,
   1066                           PHINode *IndVar, SCEVExpander &Rewriter) {
   1067   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
   1068   assert(isLoopCounter(IndVar, L, SE));
   1069   Instruction * const IncVar =
   1070     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
   1071 
   1072   // Initialize CmpIndVar to the preincremented IV.
   1073   Value *CmpIndVar = IndVar;
   1074   bool UsePostInc = false;
   1075 
   1076   // If the exiting block is the same as the backedge block, we prefer to
   1077   // compare against the post-incremented value, otherwise we must compare
   1078   // against the preincremented value.
   1079   if (ExitingBB == L->getLoopLatch()) {
   1080     // For pointer IVs, we chose to not strip inbounds which requires us not
   1081     // to add a potentially UB introducing use.  We need to either a) show
   1082     // the loop test we're modifying is already in post-inc form, or b) show
   1083     // that adding a use must not introduce UB.
   1084     bool SafeToPostInc =
   1085         IndVar->getType()->isIntegerTy() ||
   1086         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
   1087         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
   1088     if (SafeToPostInc) {
   1089       UsePostInc = true;
   1090       CmpIndVar = IncVar;
   1091     }
   1092   }
   1093 
   1094   // It may be necessary to drop nowrap flags on the incrementing instruction
   1095   // if either LFTR moves from a pre-inc check to a post-inc check (in which
   1096   // case the increment might have previously been poison on the last iteration
   1097   // only) or if LFTR switches to a different IV that was previously dynamically
   1098   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
   1099   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
   1100   // check), because the pre-inc addrec flags may be adopted from the original
   1101   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
   1102   // TODO: This handling is inaccurate for one case: If we switch to a
   1103   // dynamically dead IV that wraps on the first loop iteration only, which is
   1104   // not covered by the post-inc addrec. (If the new IV was not dynamically
   1105   // dead, it could not be poison on the first iteration in the first place.)
   1106   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
   1107     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
   1108     if (BO->hasNoUnsignedWrap())
   1109       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
   1110     if (BO->hasNoSignedWrap())
   1111       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
   1112   }
   1113 
   1114   Value *ExitCnt = genLoopLimit(
   1115       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
   1116   assert(ExitCnt->getType()->isPointerTy() ==
   1117              IndVar->getType()->isPointerTy() &&
   1118          "genLoopLimit missed a cast");
   1119 
   1120   // Insert a new icmp_ne or icmp_eq instruction before the branch.
   1121   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
   1122   ICmpInst::Predicate P;
   1123   if (L->contains(BI->getSuccessor(0)))
   1124     P = ICmpInst::ICMP_NE;
   1125   else
   1126     P = ICmpInst::ICMP_EQ;
   1127 
   1128   IRBuilder<> Builder(BI);
   1129 
   1130   // The new loop exit condition should reuse the debug location of the
   1131   // original loop exit condition.
   1132   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
   1133     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
   1134 
   1135   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
   1136   // avoid the expensive expansion of the limit expression in the wider type,
   1137   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
   1138   // since we know (from the exit count bitwidth), that we can't self-wrap in
   1139   // the narrower type.
   1140   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
   1141   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
   1142   if (CmpIndVarSize > ExitCntSize) {
   1143     assert(!CmpIndVar->getType()->isPointerTy() &&
   1144            !ExitCnt->getType()->isPointerTy());
   1145 
   1146     // Before resorting to actually inserting the truncate, use the same
   1147     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
   1148     // the other side of the comparison instead.  We still evaluate the limit
   1149     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
   1150     // a truncate within in.
   1151     bool Extended = false;
   1152     const SCEV *IV = SE->getSCEV(CmpIndVar);
   1153     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
   1154                                                   ExitCnt->getType());
   1155     const SCEV *ZExtTrunc =
   1156       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
   1157 
   1158     if (ZExtTrunc == IV) {
   1159       Extended = true;
   1160       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
   1161                                    "wide.trip.count");
   1162     } else {
   1163       const SCEV *SExtTrunc =
   1164         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
   1165       if (SExtTrunc == IV) {
   1166         Extended = true;
   1167         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
   1168                                      "wide.trip.count");
   1169       }
   1170     }
   1171 
   1172     if (Extended) {
   1173       bool Discard;
   1174       L->makeLoopInvariant(ExitCnt, Discard);
   1175     } else
   1176       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
   1177                                       "lftr.wideiv");
   1178   }
   1179   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
   1180                     << "      LHS:" << *CmpIndVar << '\n'
   1181                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
   1182                     << "\n"
   1183                     << "      RHS:\t" << *ExitCnt << "\n"
   1184                     << "ExitCount:\t" << *ExitCount << "\n"
   1185                     << "  was: " << *BI->getCondition() << "\n");
   1186 
   1187   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
   1188   Value *OrigCond = BI->getCondition();
   1189   // It's tempting to use replaceAllUsesWith here to fully replace the old
   1190   // comparison, but that's not immediately safe, since users of the old
   1191   // comparison may not be dominated by the new comparison. Instead, just
   1192   // update the branch to use the new comparison; in the common case this
   1193   // will make old comparison dead.
   1194   BI->setCondition(Cond);
   1195   DeadInsts.emplace_back(OrigCond);
   1196 
   1197   ++NumLFTR;
   1198   return true;
   1199 }
   1200 
   1201 //===----------------------------------------------------------------------===//
   1202 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
   1203 //===----------------------------------------------------------------------===//
   1204 
   1205 /// If there's a single exit block, sink any loop-invariant values that
   1206 /// were defined in the preheader but not used inside the loop into the
   1207 /// exit block to reduce register pressure in the loop.
   1208 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
   1209   BasicBlock *ExitBlock = L->getExitBlock();
   1210   if (!ExitBlock) return false;
   1211 
   1212   BasicBlock *Preheader = L->getLoopPreheader();
   1213   if (!Preheader) return false;
   1214 
   1215   bool MadeAnyChanges = false;
   1216   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
   1217   BasicBlock::iterator I(Preheader->getTerminator());
   1218   while (I != Preheader->begin()) {
   1219     --I;
   1220     // New instructions were inserted at the end of the preheader.
   1221     if (isa<PHINode>(I))
   1222       break;
   1223 
   1224     // Don't move instructions which might have side effects, since the side
   1225     // effects need to complete before instructions inside the loop.  Also don't
   1226     // move instructions which might read memory, since the loop may modify
   1227     // memory. Note that it's okay if the instruction might have undefined
   1228     // behavior: LoopSimplify guarantees that the preheader dominates the exit
   1229     // block.
   1230     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
   1231       continue;
   1232 
   1233     // Skip debug info intrinsics.
   1234     if (isa<DbgInfoIntrinsic>(I))
   1235       continue;
   1236 
   1237     // Skip eh pad instructions.
   1238     if (I->isEHPad())
   1239       continue;
   1240 
   1241     // Don't sink alloca: we never want to sink static alloca's out of the
   1242     // entry block, and correctly sinking dynamic alloca's requires
   1243     // checks for stacksave/stackrestore intrinsics.
   1244     // FIXME: Refactor this check somehow?
   1245     if (isa<AllocaInst>(I))
   1246       continue;
   1247 
   1248     // Determine if there is a use in or before the loop (direct or
   1249     // otherwise).
   1250     bool UsedInLoop = false;
   1251     for (Use &U : I->uses()) {
   1252       Instruction *User = cast<Instruction>(U.getUser());
   1253       BasicBlock *UseBB = User->getParent();
   1254       if (PHINode *P = dyn_cast<PHINode>(User)) {
   1255         unsigned i =
   1256           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
   1257         UseBB = P->getIncomingBlock(i);
   1258       }
   1259       if (UseBB == Preheader || L->contains(UseBB)) {
   1260         UsedInLoop = true;
   1261         break;
   1262       }
   1263     }
   1264 
   1265     // If there is, the def must remain in the preheader.
   1266     if (UsedInLoop)
   1267       continue;
   1268 
   1269     // Otherwise, sink it to the exit block.
   1270     Instruction *ToMove = &*I;
   1271     bool Done = false;
   1272 
   1273     if (I != Preheader->begin()) {
   1274       // Skip debug info intrinsics.
   1275       do {
   1276         --I;
   1277       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
   1278 
   1279       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
   1280         Done = true;
   1281     } else {
   1282       Done = true;
   1283     }
   1284 
   1285     MadeAnyChanges = true;
   1286     ToMove->moveBefore(*ExitBlock, InsertPt);
   1287     if (Done) break;
   1288     InsertPt = ToMove->getIterator();
   1289   }
   1290 
   1291   return MadeAnyChanges;
   1292 }
   1293 
   1294 static void replaceExitCond(BranchInst *BI, Value *NewCond,
   1295                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   1296   auto *OldCond = BI->getCondition();
   1297   BI->setCondition(NewCond);
   1298   if (OldCond->use_empty())
   1299     DeadInsts.emplace_back(OldCond);
   1300 }
   1301 
   1302 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
   1303                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   1304   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
   1305   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
   1306   auto *OldCond = BI->getCondition();
   1307   auto *NewCond =
   1308       ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
   1309   replaceExitCond(BI, NewCond, DeadInsts);
   1310 }
   1311 
   1312 static void replaceWithInvariantCond(
   1313     const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
   1314     const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
   1315     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   1316   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
   1317   Rewriter.setInsertPoint(BI);
   1318   auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
   1319   auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
   1320   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
   1321   if (ExitIfTrue)
   1322     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
   1323   IRBuilder<> Builder(BI);
   1324   auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
   1325                                      BI->getCondition()->getName());
   1326   replaceExitCond(BI, NewCond, DeadInsts);
   1327 }
   1328 
   1329 static bool optimizeLoopExitWithUnknownExitCount(
   1330     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
   1331     const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
   1332     ScalarEvolution *SE, SCEVExpander &Rewriter,
   1333     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   1334   ICmpInst::Predicate Pred;
   1335   Value *LHS, *RHS;
   1336   using namespace PatternMatch;
   1337   BasicBlock *TrueSucc, *FalseSucc;
   1338   if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
   1339                       m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
   1340     return false;
   1341 
   1342   assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
   1343          "Not a loop exit!");
   1344 
   1345   // 'LHS pred RHS' should now mean that we stay in loop.
   1346   if (L->contains(FalseSucc))
   1347     Pred = CmpInst::getInversePredicate(Pred);
   1348 
   1349   // If we are proving loop exit, invert the predicate.
   1350   if (Inverted)
   1351     Pred = CmpInst::getInversePredicate(Pred);
   1352 
   1353   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
   1354   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
   1355   // Can we prove it to be trivially true?
   1356   if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
   1357     foldExit(L, ExitingBB, Inverted, DeadInsts);
   1358     return true;
   1359   }
   1360   // Further logic works for non-inverted condition only.
   1361   if (Inverted)
   1362     return false;
   1363 
   1364   auto *ARTy = LHSS->getType();
   1365   auto *MaxIterTy = MaxIter->getType();
   1366   // If possible, adjust types.
   1367   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
   1368     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
   1369   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
   1370     const SCEV *MinusOne = SE->getMinusOne(ARTy);
   1371     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
   1372     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
   1373       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
   1374   }
   1375 
   1376   if (SkipLastIter) {
   1377     const SCEV *One = SE->getOne(MaxIter->getType());
   1378     MaxIter = SE->getMinusSCEV(MaxIter, One);
   1379   }
   1380 
   1381   // Check if there is a loop-invariant predicate equivalent to our check.
   1382   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
   1383                                                                L, BI, MaxIter);
   1384   if (!LIP)
   1385     return false;
   1386 
   1387   // Can we prove it to be trivially true?
   1388   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
   1389     foldExit(L, ExitingBB, Inverted, DeadInsts);
   1390   else
   1391     replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
   1392                              Rewriter, DeadInsts);
   1393 
   1394   return true;
   1395 }
   1396 
   1397 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
   1398   SmallVector<BasicBlock*, 16> ExitingBlocks;
   1399   L->getExitingBlocks(ExitingBlocks);
   1400 
   1401   // Remove all exits which aren't both rewriteable and execute on every
   1402   // iteration.
   1403   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
   1404     // If our exitting block exits multiple loops, we can only rewrite the
   1405     // innermost one.  Otherwise, we're changing how many times the innermost
   1406     // loop runs before it exits.
   1407     if (LI->getLoopFor(ExitingBB) != L)
   1408       return true;
   1409 
   1410     // Can't rewrite non-branch yet.
   1411     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
   1412     if (!BI)
   1413       return true;
   1414 
   1415     // If already constant, nothing to do.
   1416     if (isa<Constant>(BI->getCondition()))
   1417       return true;
   1418 
   1419     // Likewise, the loop latch must be dominated by the exiting BB.
   1420     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
   1421       return true;
   1422 
   1423     return false;
   1424   });
   1425 
   1426   if (ExitingBlocks.empty())
   1427     return false;
   1428 
   1429   // Get a symbolic upper bound on the loop backedge taken count.
   1430   const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
   1431   if (isa<SCEVCouldNotCompute>(MaxExitCount))
   1432     return false;
   1433 
   1434   // Visit our exit blocks in order of dominance. We know from the fact that
   1435   // all exits must dominate the latch, so there is a total dominance order
   1436   // between them.
   1437   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
   1438                // std::sort sorts in ascending order, so we want the inverse of
   1439                // the normal dominance relation.
   1440                if (A == B) return false;
   1441                if (DT->properlyDominates(A, B))
   1442                  return true;
   1443                else {
   1444                  assert(DT->properlyDominates(B, A) &&
   1445                         "expected total dominance order!");
   1446                  return false;
   1447                }
   1448   });
   1449 #ifdef ASSERT
   1450   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
   1451     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
   1452   }
   1453 #endif
   1454 
   1455   bool Changed = false;
   1456   bool SkipLastIter = false;
   1457   SmallSet<const SCEV*, 8> DominatingExitCounts;
   1458   for (BasicBlock *ExitingBB : ExitingBlocks) {
   1459     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
   1460     if (isa<SCEVCouldNotCompute>(ExitCount)) {
   1461       // Okay, we do not know the exit count here. Can we at least prove that it
   1462       // will remain the same within iteration space?
   1463       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
   1464       auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
   1465         return optimizeLoopExitWithUnknownExitCount(
   1466             L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
   1467             Rewriter, DeadInsts);
   1468       };
   1469 
   1470       // TODO: We might have proved that we can skip the last iteration for
   1471       // this check. In this case, we only want to check the condition on the
   1472       // pre-last iteration (MaxExitCount - 1). However, there is a nasty
   1473       // corner case:
   1474       //
   1475       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
   1476       //
   1477       // If we could not prove that len != 0, then we also could not prove that
   1478       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
   1479       // OptimizeCond will likely not prove anything for it, even if it could
   1480       // prove the same fact for len.
   1481       //
   1482       // As a temporary solution, we query both last and pre-last iterations in
   1483       // hope that we will be able to prove triviality for at least one of
   1484       // them. We can stop querying MaxExitCount for this case once SCEV
   1485       // understands that (MaxExitCount - 1) will not overflow here.
   1486       if (OptimizeCond(false, false) || OptimizeCond(true, false))
   1487         Changed = true;
   1488       else if (SkipLastIter)
   1489         if (OptimizeCond(false, true) || OptimizeCond(true, true))
   1490           Changed = true;
   1491       continue;
   1492     }
   1493 
   1494     if (MaxExitCount == ExitCount)
   1495       // If the loop has more than 1 iteration, all further checks will be
   1496       // executed 1 iteration less.
   1497       SkipLastIter = true;
   1498 
   1499     // If we know we'd exit on the first iteration, rewrite the exit to
   1500     // reflect this.  This does not imply the loop must exit through this
   1501     // exit; there may be an earlier one taken on the first iteration.
   1502     // TODO: Given we know the backedge can't be taken, we should go ahead
   1503     // and break it.  Or at least, kill all the header phis and simplify.
   1504     if (ExitCount->isZero()) {
   1505       foldExit(L, ExitingBB, true, DeadInsts);
   1506       Changed = true;
   1507       continue;
   1508     }
   1509 
   1510     // If we end up with a pointer exit count, bail.  Note that we can end up
   1511     // with a pointer exit count for one exiting block, and not for another in
   1512     // the same loop.
   1513     if (!ExitCount->getType()->isIntegerTy() ||
   1514         !MaxExitCount->getType()->isIntegerTy())
   1515       continue;
   1516 
   1517     Type *WiderType =
   1518       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
   1519     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
   1520     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
   1521     assert(MaxExitCount->getType() == ExitCount->getType());
   1522 
   1523     // Can we prove that some other exit must be taken strictly before this
   1524     // one?
   1525     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
   1526                                      MaxExitCount, ExitCount)) {
   1527       foldExit(L, ExitingBB, false, DeadInsts);
   1528       Changed = true;
   1529       continue;
   1530     }
   1531 
   1532     // As we run, keep track of which exit counts we've encountered.  If we
   1533     // find a duplicate, we've found an exit which would have exited on the
   1534     // exiting iteration, but (from the visit order) strictly follows another
   1535     // which does the same and is thus dead.
   1536     if (!DominatingExitCounts.insert(ExitCount).second) {
   1537       foldExit(L, ExitingBB, false, DeadInsts);
   1538       Changed = true;
   1539       continue;
   1540     }
   1541 
   1542     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
   1543     // here.  If we kept track of the min of dominanting exits so far, we could
   1544     // discharge exits with EC >= MDEC. This is less powerful than the existing
   1545     // transform (since later exits aren't considered), but potentially more
   1546     // powerful for any case where SCEV can prove a >=u b, but neither a == b
   1547     // or a >u b.  Such a case is not currently known.
   1548   }
   1549   return Changed;
   1550 }
   1551 
   1552 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
   1553   SmallVector<BasicBlock*, 16> ExitingBlocks;
   1554   L->getExitingBlocks(ExitingBlocks);
   1555 
   1556   // Finally, see if we can rewrite our exit conditions into a loop invariant
   1557   // form. If we have a read-only loop, and we can tell that we must exit down
   1558   // a path which does not need any of the values computed within the loop, we
   1559   // can rewrite the loop to exit on the first iteration.  Note that this
   1560   // doesn't either a) tell us the loop exits on the first iteration (unless
   1561   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
   1562   // This transformation looks a lot like a restricted form of dead loop
   1563   // elimination, but restricted to read-only loops and without neccesssarily
   1564   // needing to kill the loop entirely.
   1565   if (!LoopPredication)
   1566     return false;
   1567 
   1568   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
   1569   // through *explicit* control flow.  We have to eliminate the possibility of
   1570   // implicit exits (see below) before we know it's truly exact.
   1571   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
   1572   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
   1573       !SE->isLoopInvariant(ExactBTC, L) ||
   1574       !isSafeToExpand(ExactBTC, *SE))
   1575     return false;
   1576 
   1577   // If we end up with a pointer exit count, bail.  It may be unsized.
   1578   if (!ExactBTC->getType()->isIntegerTy())
   1579     return false;
   1580 
   1581   auto BadExit = [&](BasicBlock *ExitingBB) {
   1582     // If our exiting block exits multiple loops, we can only rewrite the
   1583     // innermost one.  Otherwise, we're changing how many times the innermost
   1584     // loop runs before it exits.
   1585     if (LI->getLoopFor(ExitingBB) != L)
   1586       return true;
   1587 
   1588     // Can't rewrite non-branch yet.
   1589     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
   1590     if (!BI)
   1591       return true;
   1592 
   1593     // If already constant, nothing to do.
   1594     if (isa<Constant>(BI->getCondition()))
   1595       return true;
   1596 
   1597     // If the exit block has phis, we need to be able to compute the values
   1598     // within the loop which contains them.  This assumes trivially lcssa phis
   1599     // have already been removed; TODO: generalize
   1600     BasicBlock *ExitBlock =
   1601     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
   1602     if (!ExitBlock->phis().empty())
   1603       return true;
   1604 
   1605     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
   1606     if (isa<SCEVCouldNotCompute>(ExitCount) ||
   1607         !SE->isLoopInvariant(ExitCount, L) ||
   1608         !isSafeToExpand(ExitCount, *SE))
   1609       return true;
   1610 
   1611     // If we end up with a pointer exit count, bail.  It may be unsized.
   1612     if (!ExitCount->getType()->isIntegerTy())
   1613       return true;
   1614 
   1615     return false;
   1616   };
   1617 
   1618   // If we have any exits which can't be predicated themselves, than we can't
   1619   // predicate any exit which isn't guaranteed to execute before it.  Consider
   1620   // two exits (a) and (b) which would both exit on the same iteration.  If we
   1621   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
   1622   // we could convert a loop from exiting through (a) to one exiting through
   1623   // (b).  Note that this problem exists only for exits with the same exit
   1624   // count, and we could be more aggressive when exit counts are known inequal.
   1625   llvm::sort(ExitingBlocks,
   1626             [&](BasicBlock *A, BasicBlock *B) {
   1627               // std::sort sorts in ascending order, so we want the inverse of
   1628               // the normal dominance relation, plus a tie breaker for blocks
   1629               // unordered by dominance.
   1630               if (DT->properlyDominates(A, B)) return true;
   1631               if (DT->properlyDominates(B, A)) return false;
   1632               return A->getName() < B->getName();
   1633             });
   1634   // Check to see if our exit blocks are a total order (i.e. a linear chain of
   1635   // exits before the backedge).  If they aren't, reasoning about reachability
   1636   // is complicated and we choose not to for now.
   1637   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
   1638     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
   1639       return false;
   1640 
   1641   // Given our sorted total order, we know that exit[j] must be evaluated
   1642   // after all exit[i] such j > i.
   1643   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
   1644     if (BadExit(ExitingBlocks[i])) {
   1645       ExitingBlocks.resize(i);
   1646       break;
   1647     }
   1648 
   1649   if (ExitingBlocks.empty())
   1650     return false;
   1651 
   1652   // We rely on not being able to reach an exiting block on a later iteration
   1653   // then it's statically compute exit count.  The implementaton of
   1654   // getExitCount currently has this invariant, but assert it here so that
   1655   // breakage is obvious if this ever changes..
   1656   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
   1657         return DT->dominates(ExitingBB, L->getLoopLatch());
   1658       }));
   1659 
   1660   // At this point, ExitingBlocks consists of only those blocks which are
   1661   // predicatable.  Given that, we know we have at least one exit we can
   1662   // predicate if the loop is doesn't have side effects and doesn't have any
   1663   // implicit exits (because then our exact BTC isn't actually exact).
   1664   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
   1665   // suggestions on how to improve this?  I can obviously bail out for outer
   1666   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
   1667   // is that enough for *all* side effects?
   1668   for (BasicBlock *BB : L->blocks())
   1669     for (auto &I : *BB)
   1670       // TODO:isGuaranteedToTransfer
   1671       if (I.mayHaveSideEffects())
   1672         return false;
   1673 
   1674   bool Changed = false;
   1675   // Finally, do the actual predication for all predicatable blocks.  A couple
   1676   // of notes here:
   1677   // 1) We don't bother to constant fold dominated exits with identical exit
   1678   //    counts; that's simply a form of CSE/equality propagation and we leave
   1679   //    it for dedicated passes.
   1680   // 2) We insert the comparison at the branch.  Hoisting introduces additional
   1681   //    legality constraints and we leave that to dedicated logic.  We want to
   1682   //    predicate even if we can't insert a loop invariant expression as
   1683   //    peeling or unrolling will likely reduce the cost of the otherwise loop
   1684   //    varying check.
   1685   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
   1686   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
   1687   Value *ExactBTCV = nullptr; // Lazily generated if needed.
   1688   for (BasicBlock *ExitingBB : ExitingBlocks) {
   1689     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
   1690 
   1691     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
   1692     Value *NewCond;
   1693     if (ExitCount == ExactBTC) {
   1694       NewCond = L->contains(BI->getSuccessor(0)) ?
   1695         B.getFalse() : B.getTrue();
   1696     } else {
   1697       Value *ECV = Rewriter.expandCodeFor(ExitCount);
   1698       if (!ExactBTCV)
   1699         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
   1700       Value *RHS = ExactBTCV;
   1701       if (ECV->getType() != RHS->getType()) {
   1702         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
   1703         ECV = B.CreateZExt(ECV, WiderTy);
   1704         RHS = B.CreateZExt(RHS, WiderTy);
   1705       }
   1706       auto Pred = L->contains(BI->getSuccessor(0)) ?
   1707         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
   1708       NewCond = B.CreateICmp(Pred, ECV, RHS);
   1709     }
   1710     Value *OldCond = BI->getCondition();
   1711     BI->setCondition(NewCond);
   1712     if (OldCond->use_empty())
   1713       DeadInsts.emplace_back(OldCond);
   1714     Changed = true;
   1715   }
   1716 
   1717   return Changed;
   1718 }
   1719 
   1720 //===----------------------------------------------------------------------===//
   1721 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
   1722 //===----------------------------------------------------------------------===//
   1723 
   1724 bool IndVarSimplify::run(Loop *L) {
   1725   // We need (and expect!) the incoming loop to be in LCSSA.
   1726   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
   1727          "LCSSA required to run indvars!");
   1728 
   1729   // If LoopSimplify form is not available, stay out of trouble. Some notes:
   1730   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
   1731   //    canonicalization can be a pessimization without LSR to "clean up"
   1732   //    afterwards.
   1733   //  - We depend on having a preheader; in particular,
   1734   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
   1735   //    and we're in trouble if we can't find the induction variable even when
   1736   //    we've manually inserted one.
   1737   //  - LFTR relies on having a single backedge.
   1738   if (!L->isLoopSimplifyForm())
   1739     return false;
   1740 
   1741 #ifndef NDEBUG
   1742   // Used below for a consistency check only
   1743   // Note: Since the result returned by ScalarEvolution may depend on the order
   1744   // in which previous results are added to its cache, the call to
   1745   // getBackedgeTakenCount() may change following SCEV queries.
   1746   const SCEV *BackedgeTakenCount;
   1747   if (VerifyIndvars)
   1748     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   1749 #endif
   1750 
   1751   bool Changed = false;
   1752   // If there are any floating-point recurrences, attempt to
   1753   // transform them to use integer recurrences.
   1754   Changed |= rewriteNonIntegerIVs(L);
   1755 
   1756   // Create a rewriter object which we'll use to transform the code with.
   1757   SCEVExpander Rewriter(*SE, DL, "indvars");
   1758 #ifndef NDEBUG
   1759   Rewriter.setDebugType(DEBUG_TYPE);
   1760 #endif
   1761 
   1762   // Eliminate redundant IV users.
   1763   //
   1764   // Simplification works best when run before other consumers of SCEV. We
   1765   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
   1766   // other expressions involving loop IVs have been evaluated. This helps SCEV
   1767   // set no-wrap flags before normalizing sign/zero extension.
   1768   Rewriter.disableCanonicalMode();
   1769   Changed |= simplifyAndExtend(L, Rewriter, LI);
   1770 
   1771   // Check to see if we can compute the final value of any expressions
   1772   // that are recurrent in the loop, and substitute the exit values from the
   1773   // loop into any instructions outside of the loop that use the final values
   1774   // of the current expressions.
   1775   if (ReplaceExitValue != NeverRepl) {
   1776     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
   1777                                              ReplaceExitValue, DeadInsts)) {
   1778       NumReplaced += Rewrites;
   1779       Changed = true;
   1780     }
   1781   }
   1782 
   1783   // Eliminate redundant IV cycles.
   1784   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
   1785 
   1786   // Try to eliminate loop exits based on analyzeable exit counts
   1787   if (optimizeLoopExits(L, Rewriter))  {
   1788     Changed = true;
   1789     // Given we've changed exit counts, notify SCEV
   1790     // Some nested loops may share same folded exit basic block,
   1791     // thus we need to notify top most loop.
   1792     SE->forgetTopmostLoop(L);
   1793   }
   1794 
   1795   // Try to form loop invariant tests for loop exits by changing how many
   1796   // iterations of the loop run when that is unobservable.
   1797   if (predicateLoopExits(L, Rewriter)) {
   1798     Changed = true;
   1799     // Given we've changed exit counts, notify SCEV
   1800     SE->forgetLoop(L);
   1801   }
   1802 
   1803   // If we have a trip count expression, rewrite the loop's exit condition
   1804   // using it.
   1805   if (!DisableLFTR) {
   1806     BasicBlock *PreHeader = L->getLoopPreheader();
   1807     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
   1808 
   1809     SmallVector<BasicBlock*, 16> ExitingBlocks;
   1810     L->getExitingBlocks(ExitingBlocks);
   1811     for (BasicBlock *ExitingBB : ExitingBlocks) {
   1812       // Can't rewrite non-branch yet.
   1813       if (!isa<BranchInst>(ExitingBB->getTerminator()))
   1814         continue;
   1815 
   1816       // If our exitting block exits multiple loops, we can only rewrite the
   1817       // innermost one.  Otherwise, we're changing how many times the innermost
   1818       // loop runs before it exits.
   1819       if (LI->getLoopFor(ExitingBB) != L)
   1820         continue;
   1821 
   1822       if (!needsLFTR(L, ExitingBB))
   1823         continue;
   1824 
   1825       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
   1826       if (isa<SCEVCouldNotCompute>(ExitCount))
   1827         continue;
   1828 
   1829       // This was handled above, but as we form SCEVs, we can sometimes refine
   1830       // existing ones; this allows exit counts to be folded to zero which
   1831       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
   1832       // until stable to handle cases like this better.
   1833       if (ExitCount->isZero())
   1834         continue;
   1835 
   1836       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
   1837       if (!IndVar)
   1838         continue;
   1839 
   1840       // Avoid high cost expansions.  Note: This heuristic is questionable in
   1841       // that our definition of "high cost" is not exactly principled.
   1842       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
   1843                                        TTI, PreHeaderBR))
   1844         continue;
   1845 
   1846       // Check preconditions for proper SCEVExpander operation. SCEV does not
   1847       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
   1848       // any pass that uses the SCEVExpander must do it. This does not work
   1849       // well for loop passes because SCEVExpander makes assumptions about
   1850       // all loops, while LoopPassManager only forces the current loop to be
   1851       // simplified.
   1852       //
   1853       // FIXME: SCEV expansion has no way to bail out, so the caller must
   1854       // explicitly check any assumptions made by SCEV. Brittle.
   1855       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
   1856       if (!AR || AR->getLoop()->getLoopPreheader())
   1857         Changed |= linearFunctionTestReplace(L, ExitingBB,
   1858                                              ExitCount, IndVar,
   1859                                              Rewriter);
   1860     }
   1861   }
   1862   // Clear the rewriter cache, because values that are in the rewriter's cache
   1863   // can be deleted in the loop below, causing the AssertingVH in the cache to
   1864   // trigger.
   1865   Rewriter.clear();
   1866 
   1867   // Now that we're done iterating through lists, clean up any instructions
   1868   // which are now dead.
   1869   while (!DeadInsts.empty()) {
   1870     Value *V = DeadInsts.pop_back_val();
   1871 
   1872     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
   1873       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
   1874     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
   1875       Changed |=
   1876           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
   1877   }
   1878 
   1879   // The Rewriter may not be used from this point on.
   1880 
   1881   // Loop-invariant instructions in the preheader that aren't used in the
   1882   // loop may be sunk below the loop to reduce register pressure.
   1883   Changed |= sinkUnusedInvariants(L);
   1884 
   1885   // rewriteFirstIterationLoopExitValues does not rely on the computation of
   1886   // trip count and therefore can further simplify exit values in addition to
   1887   // rewriteLoopExitValues.
   1888   Changed |= rewriteFirstIterationLoopExitValues(L);
   1889 
   1890   // Clean up dead instructions.
   1891   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
   1892 
   1893   // Check a post-condition.
   1894   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
   1895          "Indvars did not preserve LCSSA!");
   1896 
   1897   // Verify that LFTR, and any other change have not interfered with SCEV's
   1898   // ability to compute trip count.  We may have *changed* the exit count, but
   1899   // only by reducing it.
   1900 #ifndef NDEBUG
   1901   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
   1902     SE->forgetLoop(L);
   1903     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
   1904     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
   1905         SE->getTypeSizeInBits(NewBECount->getType()))
   1906       NewBECount = SE->getTruncateOrNoop(NewBECount,
   1907                                          BackedgeTakenCount->getType());
   1908     else
   1909       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
   1910                                                  NewBECount->getType());
   1911     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
   1912                                  NewBECount) && "indvars must preserve SCEV");
   1913   }
   1914   if (VerifyMemorySSA && MSSAU)
   1915     MSSAU->getMemorySSA()->verifyMemorySSA();
   1916 #endif
   1917 
   1918   return Changed;
   1919 }
   1920 
   1921 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
   1922                                           LoopStandardAnalysisResults &AR,
   1923                                           LPMUpdater &) {
   1924   Function *F = L.getHeader()->getParent();
   1925   const DataLayout &DL = F->getParent()->getDataLayout();
   1926 
   1927   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
   1928                      WidenIndVars && AllowIVWidening);
   1929   if (!IVS.run(&L))
   1930     return PreservedAnalyses::all();
   1931 
   1932   auto PA = getLoopPassPreservedAnalyses();
   1933   PA.preserveSet<CFGAnalyses>();
   1934   if (AR.MSSA)
   1935     PA.preserve<MemorySSAAnalysis>();
   1936   return PA;
   1937 }
   1938 
   1939 namespace {
   1940 
   1941 struct IndVarSimplifyLegacyPass : public LoopPass {
   1942   static char ID; // Pass identification, replacement for typeid
   1943 
   1944   IndVarSimplifyLegacyPass() : LoopPass(ID) {
   1945     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
   1946   }
   1947 
   1948   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
   1949     if (skipLoop(L))
   1950       return false;
   1951 
   1952     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   1953     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   1954     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   1955     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
   1956     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
   1957     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
   1958     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
   1959     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
   1960     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
   1961     MemorySSA *MSSA = nullptr;
   1962     if (MSSAAnalysis)
   1963       MSSA = &MSSAAnalysis->getMSSA();
   1964 
   1965     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
   1966     return IVS.run(L);
   1967   }
   1968 
   1969   void getAnalysisUsage(AnalysisUsage &AU) const override {
   1970     AU.setPreservesCFG();
   1971     AU.addPreserved<MemorySSAWrapperPass>();
   1972     getLoopAnalysisUsage(AU);
   1973   }
   1974 };
   1975 
   1976 } // end anonymous namespace
   1977 
   1978 char IndVarSimplifyLegacyPass::ID = 0;
   1979 
   1980 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
   1981                       "Induction Variable Simplification", false, false)
   1982 INITIALIZE_PASS_DEPENDENCY(LoopPass)
   1983 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
   1984                     "Induction Variable Simplification", false, false)
   1985 
   1986 Pass *llvm::createIndVarSimplifyPass() {
   1987   return new IndVarSimplifyLegacyPass();
   1988 }
   1989