Home | History | Annotate | Line # | Download | only in Scalar
      1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This pass performs a simple dominator tree walk that eliminates trivially
     10 // redundant instructions.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "llvm/Transforms/Scalar/EarlyCSE.h"
     15 #include "llvm/ADT/DenseMapInfo.h"
     16 #include "llvm/ADT/Hashing.h"
     17 #include "llvm/ADT/STLExtras.h"
     18 #include "llvm/ADT/ScopedHashTable.h"
     19 #include "llvm/ADT/SetVector.h"
     20 #include "llvm/ADT/SmallVector.h"
     21 #include "llvm/ADT/Statistic.h"
     22 #include "llvm/Analysis/AssumptionCache.h"
     23 #include "llvm/Analysis/GlobalsModRef.h"
     24 #include "llvm/Analysis/GuardUtils.h"
     25 #include "llvm/Analysis/InstructionSimplify.h"
     26 #include "llvm/Analysis/MemorySSA.h"
     27 #include "llvm/Analysis/MemorySSAUpdater.h"
     28 #include "llvm/Analysis/TargetLibraryInfo.h"
     29 #include "llvm/Analysis/TargetTransformInfo.h"
     30 #include "llvm/Analysis/ValueTracking.h"
     31 #include "llvm/IR/BasicBlock.h"
     32 #include "llvm/IR/Constants.h"
     33 #include "llvm/IR/DataLayout.h"
     34 #include "llvm/IR/Dominators.h"
     35 #include "llvm/IR/Function.h"
     36 #include "llvm/IR/InstrTypes.h"
     37 #include "llvm/IR/Instruction.h"
     38 #include "llvm/IR/Instructions.h"
     39 #include "llvm/IR/IntrinsicInst.h"
     40 #include "llvm/IR/Intrinsics.h"
     41 #include "llvm/IR/LLVMContext.h"
     42 #include "llvm/IR/PassManager.h"
     43 #include "llvm/IR/PatternMatch.h"
     44 #include "llvm/IR/Type.h"
     45 #include "llvm/IR/Use.h"
     46 #include "llvm/IR/Value.h"
     47 #include "llvm/InitializePasses.h"
     48 #include "llvm/Pass.h"
     49 #include "llvm/Support/Allocator.h"
     50 #include "llvm/Support/AtomicOrdering.h"
     51 #include "llvm/Support/Casting.h"
     52 #include "llvm/Support/Debug.h"
     53 #include "llvm/Support/DebugCounter.h"
     54 #include "llvm/Support/RecyclingAllocator.h"
     55 #include "llvm/Support/raw_ostream.h"
     56 #include "llvm/Transforms/Scalar.h"
     57 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
     58 #include "llvm/Transforms/Utils/GuardUtils.h"
     59 #include "llvm/Transforms/Utils/Local.h"
     60 #include <cassert>
     61 #include <deque>
     62 #include <memory>
     63 #include <utility>
     64 
     65 using namespace llvm;
     66 using namespace llvm::PatternMatch;
     67 
     68 #define DEBUG_TYPE "early-cse"
     69 
     70 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
     71 STATISTIC(NumCSE,      "Number of instructions CSE'd");
     72 STATISTIC(NumCSECVP,   "Number of compare instructions CVP'd");
     73 STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
     74 STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
     75 STATISTIC(NumDSE,      "Number of trivial dead stores removed");
     76 
     77 DEBUG_COUNTER(CSECounter, "early-cse",
     78               "Controls which instructions are removed");
     79 
     80 static cl::opt<unsigned> EarlyCSEMssaOptCap(
     81     "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
     82     cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
     83              "for faster compile. Caps the MemorySSA clobbering calls."));
     84 
     85 static cl::opt<bool> EarlyCSEDebugHash(
     86     "earlycse-debug-hash", cl::init(false), cl::Hidden,
     87     cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
     88              "function is well-behaved w.r.t. its isEqual predicate"));
     89 
     90 //===----------------------------------------------------------------------===//
     91 // SimpleValue
     92 //===----------------------------------------------------------------------===//
     93 
     94 namespace {
     95 
     96 /// Struct representing the available values in the scoped hash table.
     97 struct SimpleValue {
     98   Instruction *Inst;
     99 
    100   SimpleValue(Instruction *I) : Inst(I) {
    101     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
    102   }
    103 
    104   bool isSentinel() const {
    105     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
    106            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
    107   }
    108 
    109   static bool canHandle(Instruction *Inst) {
    110     // This can only handle non-void readnone functions.
    111     // Also handled are constrained intrinsic that look like the types
    112     // of instruction handled below (UnaryOperator, etc.).
    113     if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
    114       if (Function *F = CI->getCalledFunction()) {
    115         switch ((Intrinsic::ID)F->getIntrinsicID()) {
    116         case Intrinsic::experimental_constrained_fadd:
    117         case Intrinsic::experimental_constrained_fsub:
    118         case Intrinsic::experimental_constrained_fmul:
    119         case Intrinsic::experimental_constrained_fdiv:
    120         case Intrinsic::experimental_constrained_frem:
    121         case Intrinsic::experimental_constrained_fptosi:
    122         case Intrinsic::experimental_constrained_sitofp:
    123         case Intrinsic::experimental_constrained_fptoui:
    124         case Intrinsic::experimental_constrained_uitofp:
    125         case Intrinsic::experimental_constrained_fcmp:
    126         case Intrinsic::experimental_constrained_fcmps: {
    127           auto *CFP = cast<ConstrainedFPIntrinsic>(CI);
    128           return CFP->isDefaultFPEnvironment();
    129         }
    130         }
    131       }
    132       return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
    133     }
    134     return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
    135            isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
    136            isa<CmpInst>(Inst) || isa<SelectInst>(Inst) ||
    137            isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
    138            isa<ShuffleVectorInst>(Inst) || isa<ExtractValueInst>(Inst) ||
    139            isa<InsertValueInst>(Inst) || isa<FreezeInst>(Inst);
    140   }
    141 };
    142 
    143 } // end anonymous namespace
    144 
    145 namespace llvm {
    146 
    147 template <> struct DenseMapInfo<SimpleValue> {
    148   static inline SimpleValue getEmptyKey() {
    149     return DenseMapInfo<Instruction *>::getEmptyKey();
    150   }
    151 
    152   static inline SimpleValue getTombstoneKey() {
    153     return DenseMapInfo<Instruction *>::getTombstoneKey();
    154   }
    155 
    156   static unsigned getHashValue(SimpleValue Val);
    157   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
    158 };
    159 
    160 } // end namespace llvm
    161 
    162 /// Match a 'select' including an optional 'not's of the condition.
    163 static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
    164                                            Value *&B,
    165                                            SelectPatternFlavor &Flavor) {
    166   // Return false if V is not even a select.
    167   if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
    168     return false;
    169 
    170   // Look through a 'not' of the condition operand by swapping A/B.
    171   Value *CondNot;
    172   if (match(Cond, m_Not(m_Value(CondNot)))) {
    173     Cond = CondNot;
    174     std::swap(A, B);
    175   }
    176 
    177   // Match canonical forms of min/max. We are not using ValueTracking's
    178   // more powerful matchSelectPattern() because it may rely on instruction flags
    179   // such as "nsw". That would be incompatible with the current hashing
    180   // mechanism that may remove flags to increase the likelihood of CSE.
    181 
    182   Flavor = SPF_UNKNOWN;
    183   CmpInst::Predicate Pred;
    184 
    185   if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
    186     // Check for commuted variants of min/max by swapping predicate.
    187     // If we do not match the standard or commuted patterns, this is not a
    188     // recognized form of min/max, but it is still a select, so return true.
    189     if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
    190       return true;
    191     Pred = ICmpInst::getSwappedPredicate(Pred);
    192   }
    193 
    194   switch (Pred) {
    195   case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
    196   case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
    197   case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
    198   case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
    199   // Non-strict inequalities.
    200   case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
    201   case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
    202   case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
    203   case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
    204   default: break;
    205   }
    206 
    207   return true;
    208 }
    209 
    210 static unsigned getHashValueImpl(SimpleValue Val) {
    211   Instruction *Inst = Val.Inst;
    212   // Hash in all of the operands as pointers.
    213   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
    214     Value *LHS = BinOp->getOperand(0);
    215     Value *RHS = BinOp->getOperand(1);
    216     if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
    217       std::swap(LHS, RHS);
    218 
    219     return hash_combine(BinOp->getOpcode(), LHS, RHS);
    220   }
    221 
    222   if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
    223     // Compares can be commuted by swapping the comparands and
    224     // updating the predicate.  Choose the form that has the
    225     // comparands in sorted order, or in the case of a tie, the
    226     // one with the lower predicate.
    227     Value *LHS = CI->getOperand(0);
    228     Value *RHS = CI->getOperand(1);
    229     CmpInst::Predicate Pred = CI->getPredicate();
    230     CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
    231     if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
    232       std::swap(LHS, RHS);
    233       Pred = SwappedPred;
    234     }
    235     return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
    236   }
    237 
    238   // Hash general selects to allow matching commuted true/false operands.
    239   SelectPatternFlavor SPF;
    240   Value *Cond, *A, *B;
    241   if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
    242     // Hash min/max (cmp + select) to allow for commuted operands.
    243     // Min/max may also have non-canonical compare predicate (eg, the compare for
    244     // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
    245     // compare.
    246     // TODO: We should also detect FP min/max.
    247     if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
    248         SPF == SPF_UMIN || SPF == SPF_UMAX) {
    249       if (A > B)
    250         std::swap(A, B);
    251       return hash_combine(Inst->getOpcode(), SPF, A, B);
    252     }
    253 
    254     // Hash general selects to allow matching commuted true/false operands.
    255 
    256     // If we do not have a compare as the condition, just hash in the condition.
    257     CmpInst::Predicate Pred;
    258     Value *X, *Y;
    259     if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
    260       return hash_combine(Inst->getOpcode(), Cond, A, B);
    261 
    262     // Similar to cmp normalization (above) - canonicalize the predicate value:
    263     // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
    264     if (CmpInst::getInversePredicate(Pred) < Pred) {
    265       Pred = CmpInst::getInversePredicate(Pred);
    266       std::swap(A, B);
    267     }
    268     return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
    269   }
    270 
    271   if (CastInst *CI = dyn_cast<CastInst>(Inst))
    272     return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
    273 
    274   if (FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
    275     return hash_combine(FI->getOpcode(), FI->getOperand(0));
    276 
    277   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
    278     return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
    279                         hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
    280 
    281   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
    282     return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
    283                         IVI->getOperand(1),
    284                         hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
    285 
    286   assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
    287           isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
    288           isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
    289           isa<FreezeInst>(Inst)) &&
    290          "Invalid/unknown instruction");
    291 
    292   // Handle intrinsics with commutative operands.
    293   // TODO: Extend this to handle intrinsics with >2 operands where the 1st
    294   //       2 operands are commutative.
    295   auto *II = dyn_cast<IntrinsicInst>(Inst);
    296   if (II && II->isCommutative() && II->getNumArgOperands() == 2) {
    297     Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
    298     if (LHS > RHS)
    299       std::swap(LHS, RHS);
    300     return hash_combine(II->getOpcode(), LHS, RHS);
    301   }
    302 
    303   // gc.relocate is 'special' call: its second and third operands are
    304   // not real values, but indices into statepoint's argument list.
    305   // Get values they point to.
    306   if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Inst))
    307     return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
    308                         GCR->getBasePtr(), GCR->getDerivedPtr());
    309 
    310   // Mix in the opcode.
    311   return hash_combine(
    312       Inst->getOpcode(),
    313       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
    314 }
    315 
    316 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
    317 #ifndef NDEBUG
    318   // If -earlycse-debug-hash was specified, return a constant -- this
    319   // will force all hashing to collide, so we'll exhaustively search
    320   // the table for a match, and the assertion in isEqual will fire if
    321   // there's a bug causing equal keys to hash differently.
    322   if (EarlyCSEDebugHash)
    323     return 0;
    324 #endif
    325   return getHashValueImpl(Val);
    326 }
    327 
    328 static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
    329   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
    330 
    331   if (LHS.isSentinel() || RHS.isSentinel())
    332     return LHSI == RHSI;
    333 
    334   if (LHSI->getOpcode() != RHSI->getOpcode())
    335     return false;
    336   if (LHSI->isIdenticalToWhenDefined(RHSI))
    337     return true;
    338 
    339   // If we're not strictly identical, we still might be a commutable instruction
    340   if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
    341     if (!LHSBinOp->isCommutative())
    342       return false;
    343 
    344     assert(isa<BinaryOperator>(RHSI) &&
    345            "same opcode, but different instruction type?");
    346     BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
    347 
    348     // Commuted equality
    349     return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
    350            LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
    351   }
    352   if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
    353     assert(isa<CmpInst>(RHSI) &&
    354            "same opcode, but different instruction type?");
    355     CmpInst *RHSCmp = cast<CmpInst>(RHSI);
    356     // Commuted equality
    357     return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
    358            LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
    359            LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
    360   }
    361 
    362   // TODO: Extend this for >2 args by matching the trailing N-2 args.
    363   auto *LII = dyn_cast<IntrinsicInst>(LHSI);
    364   auto *RII = dyn_cast<IntrinsicInst>(RHSI);
    365   if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
    366       LII->isCommutative() && LII->getNumArgOperands() == 2) {
    367     return LII->getArgOperand(0) == RII->getArgOperand(1) &&
    368            LII->getArgOperand(1) == RII->getArgOperand(0);
    369   }
    370 
    371   // See comment above in `getHashValue()`.
    372   if (const GCRelocateInst *GCR1 = dyn_cast<GCRelocateInst>(LHSI))
    373     if (const GCRelocateInst *GCR2 = dyn_cast<GCRelocateInst>(RHSI))
    374       return GCR1->getOperand(0) == GCR2->getOperand(0) &&
    375              GCR1->getBasePtr() == GCR2->getBasePtr() &&
    376              GCR1->getDerivedPtr() == GCR2->getDerivedPtr();
    377 
    378   // Min/max can occur with commuted operands, non-canonical predicates,
    379   // and/or non-canonical operands.
    380   // Selects can be non-trivially equivalent via inverted conditions and swaps.
    381   SelectPatternFlavor LSPF, RSPF;
    382   Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
    383   if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
    384       matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
    385     if (LSPF == RSPF) {
    386       // TODO: We should also detect FP min/max.
    387       if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
    388           LSPF == SPF_UMIN || LSPF == SPF_UMAX)
    389         return ((LHSA == RHSA && LHSB == RHSB) ||
    390                 (LHSA == RHSB && LHSB == RHSA));
    391 
    392       // select Cond, A, B <--> select not(Cond), B, A
    393       if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
    394         return true;
    395     }
    396 
    397     // If the true/false operands are swapped and the conditions are compares
    398     // with inverted predicates, the selects are equal:
    399     // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
    400     //
    401     // This also handles patterns with a double-negation in the sense of not +
    402     // inverse, because we looked through a 'not' in the matching function and
    403     // swapped A/B:
    404     // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
    405     //
    406     // This intentionally does NOT handle patterns with a double-negation in
    407     // the sense of not + not, because doing so could result in values
    408     // comparing
    409     // as equal that hash differently in the min/max cases like:
    410     // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
    411     //   ^ hashes as min                  ^ would not hash as min
    412     // In the context of the EarlyCSE pass, however, such cases never reach
    413     // this code, as we simplify the double-negation before hashing the second
    414     // select (and so still succeed at CSEing them).
    415     if (LHSA == RHSB && LHSB == RHSA) {
    416       CmpInst::Predicate PredL, PredR;
    417       Value *X, *Y;
    418       if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
    419           match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
    420           CmpInst::getInversePredicate(PredL) == PredR)
    421         return true;
    422     }
    423   }
    424 
    425   return false;
    426 }
    427 
    428 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
    429   // These comparisons are nontrivial, so assert that equality implies
    430   // hash equality (DenseMap demands this as an invariant).
    431   bool Result = isEqualImpl(LHS, RHS);
    432   assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
    433          getHashValueImpl(LHS) == getHashValueImpl(RHS));
    434   return Result;
    435 }
    436 
    437 //===----------------------------------------------------------------------===//
    438 // CallValue
    439 //===----------------------------------------------------------------------===//
    440 
    441 namespace {
    442 
    443 /// Struct representing the available call values in the scoped hash
    444 /// table.
    445 struct CallValue {
    446   Instruction *Inst;
    447 
    448   CallValue(Instruction *I) : Inst(I) {
    449     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
    450   }
    451 
    452   bool isSentinel() const {
    453     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
    454            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
    455   }
    456 
    457   static bool canHandle(Instruction *Inst) {
    458     // Don't value number anything that returns void.
    459     if (Inst->getType()->isVoidTy())
    460       return false;
    461 
    462     CallInst *CI = dyn_cast<CallInst>(Inst);
    463     if (!CI || !CI->onlyReadsMemory())
    464       return false;
    465     return true;
    466   }
    467 };
    468 
    469 } // end anonymous namespace
    470 
    471 namespace llvm {
    472 
    473 template <> struct DenseMapInfo<CallValue> {
    474   static inline CallValue getEmptyKey() {
    475     return DenseMapInfo<Instruction *>::getEmptyKey();
    476   }
    477 
    478   static inline CallValue getTombstoneKey() {
    479     return DenseMapInfo<Instruction *>::getTombstoneKey();
    480   }
    481 
    482   static unsigned getHashValue(CallValue Val);
    483   static bool isEqual(CallValue LHS, CallValue RHS);
    484 };
    485 
    486 } // end namespace llvm
    487 
    488 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
    489   Instruction *Inst = Val.Inst;
    490 
    491   // Hash all of the operands as pointers and mix in the opcode.
    492   return hash_combine(
    493       Inst->getOpcode(),
    494       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
    495 }
    496 
    497 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
    498   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
    499   if (LHS.isSentinel() || RHS.isSentinel())
    500     return LHSI == RHSI;
    501 
    502   return LHSI->isIdenticalTo(RHSI);
    503 }
    504 
    505 //===----------------------------------------------------------------------===//
    506 // EarlyCSE implementation
    507 //===----------------------------------------------------------------------===//
    508 
    509 namespace {
    510 
    511 /// A simple and fast domtree-based CSE pass.
    512 ///
    513 /// This pass does a simple depth-first walk over the dominator tree,
    514 /// eliminating trivially redundant instructions and using instsimplify to
    515 /// canonicalize things as it goes. It is intended to be fast and catch obvious
    516 /// cases so that instcombine and other passes are more effective. It is
    517 /// expected that a later pass of GVN will catch the interesting/hard cases.
    518 class EarlyCSE {
    519 public:
    520   const TargetLibraryInfo &TLI;
    521   const TargetTransformInfo &TTI;
    522   DominatorTree &DT;
    523   AssumptionCache &AC;
    524   const SimplifyQuery SQ;
    525   MemorySSA *MSSA;
    526   std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
    527 
    528   using AllocatorTy =
    529       RecyclingAllocator<BumpPtrAllocator,
    530                          ScopedHashTableVal<SimpleValue, Value *>>;
    531   using ScopedHTType =
    532       ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
    533                       AllocatorTy>;
    534 
    535   /// A scoped hash table of the current values of all of our simple
    536   /// scalar expressions.
    537   ///
    538   /// As we walk down the domtree, we look to see if instructions are in this:
    539   /// if so, we replace them with what we find, otherwise we insert them so
    540   /// that dominated values can succeed in their lookup.
    541   ScopedHTType AvailableValues;
    542 
    543   /// A scoped hash table of the current values of previously encountered
    544   /// memory locations.
    545   ///
    546   /// This allows us to get efficient access to dominating loads or stores when
    547   /// we have a fully redundant load.  In addition to the most recent load, we
    548   /// keep track of a generation count of the read, which is compared against
    549   /// the current generation count.  The current generation count is incremented
    550   /// after every possibly writing memory operation, which ensures that we only
    551   /// CSE loads with other loads that have no intervening store.  Ordering
    552   /// events (such as fences or atomic instructions) increment the generation
    553   /// count as well; essentially, we model these as writes to all possible
    554   /// locations.  Note that atomic and/or volatile loads and stores can be
    555   /// present the table; it is the responsibility of the consumer to inspect
    556   /// the atomicity/volatility if needed.
    557   struct LoadValue {
    558     Instruction *DefInst = nullptr;
    559     unsigned Generation = 0;
    560     int MatchingId = -1;
    561     bool IsAtomic = false;
    562 
    563     LoadValue() = default;
    564     LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
    565               bool IsAtomic)
    566         : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
    567           IsAtomic(IsAtomic) {}
    568   };
    569 
    570   using LoadMapAllocator =
    571       RecyclingAllocator<BumpPtrAllocator,
    572                          ScopedHashTableVal<Value *, LoadValue>>;
    573   using LoadHTType =
    574       ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
    575                       LoadMapAllocator>;
    576 
    577   LoadHTType AvailableLoads;
    578 
    579   // A scoped hash table mapping memory locations (represented as typed
    580   // addresses) to generation numbers at which that memory location became
    581   // (henceforth indefinitely) invariant.
    582   using InvariantMapAllocator =
    583       RecyclingAllocator<BumpPtrAllocator,
    584                          ScopedHashTableVal<MemoryLocation, unsigned>>;
    585   using InvariantHTType =
    586       ScopedHashTable<MemoryLocation, unsigned, DenseMapInfo<MemoryLocation>,
    587                       InvariantMapAllocator>;
    588   InvariantHTType AvailableInvariants;
    589 
    590   /// A scoped hash table of the current values of read-only call
    591   /// values.
    592   ///
    593   /// It uses the same generation count as loads.
    594   using CallHTType =
    595       ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
    596   CallHTType AvailableCalls;
    597 
    598   /// This is the current generation of the memory value.
    599   unsigned CurrentGeneration = 0;
    600 
    601   /// Set up the EarlyCSE runner for a particular function.
    602   EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
    603            const TargetTransformInfo &TTI, DominatorTree &DT,
    604            AssumptionCache &AC, MemorySSA *MSSA)
    605       : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
    606         MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
    607 
    608   bool run();
    609 
    610 private:
    611   unsigned ClobberCounter = 0;
    612   // Almost a POD, but needs to call the constructors for the scoped hash
    613   // tables so that a new scope gets pushed on. These are RAII so that the
    614   // scope gets popped when the NodeScope is destroyed.
    615   class NodeScope {
    616   public:
    617     NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
    618               InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
    619       : Scope(AvailableValues), LoadScope(AvailableLoads),
    620         InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
    621     NodeScope(const NodeScope &) = delete;
    622     NodeScope &operator=(const NodeScope &) = delete;
    623 
    624   private:
    625     ScopedHTType::ScopeTy Scope;
    626     LoadHTType::ScopeTy LoadScope;
    627     InvariantHTType::ScopeTy InvariantScope;
    628     CallHTType::ScopeTy CallScope;
    629   };
    630 
    631   // Contains all the needed information to create a stack for doing a depth
    632   // first traversal of the tree. This includes scopes for values, loads, and
    633   // calls as well as the generation. There is a child iterator so that the
    634   // children do not need to be store separately.
    635   class StackNode {
    636   public:
    637     StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
    638               InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
    639               unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child,
    640               DomTreeNode::const_iterator end)
    641         : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
    642           EndIter(end),
    643           Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
    644                  AvailableCalls)
    645           {}
    646     StackNode(const StackNode &) = delete;
    647     StackNode &operator=(const StackNode &) = delete;
    648 
    649     // Accessors.
    650     unsigned currentGeneration() const { return CurrentGeneration; }
    651     unsigned childGeneration() const { return ChildGeneration; }
    652     void childGeneration(unsigned generation) { ChildGeneration = generation; }
    653     DomTreeNode *node() { return Node; }
    654     DomTreeNode::const_iterator childIter() const { return ChildIter; }
    655 
    656     DomTreeNode *nextChild() {
    657       DomTreeNode *child = *ChildIter;
    658       ++ChildIter;
    659       return child;
    660     }
    661 
    662     DomTreeNode::const_iterator end() const { return EndIter; }
    663     bool isProcessed() const { return Processed; }
    664     void process() { Processed = true; }
    665 
    666   private:
    667     unsigned CurrentGeneration;
    668     unsigned ChildGeneration;
    669     DomTreeNode *Node;
    670     DomTreeNode::const_iterator ChildIter;
    671     DomTreeNode::const_iterator EndIter;
    672     NodeScope Scopes;
    673     bool Processed = false;
    674   };
    675 
    676   /// Wrapper class to handle memory instructions, including loads,
    677   /// stores and intrinsic loads and stores defined by the target.
    678   class ParseMemoryInst {
    679   public:
    680     ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
    681       : Inst(Inst) {
    682       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
    683         IntrID = II->getIntrinsicID();
    684         if (TTI.getTgtMemIntrinsic(II, Info))
    685           return;
    686         if (isHandledNonTargetIntrinsic(IntrID)) {
    687           switch (IntrID) {
    688           case Intrinsic::masked_load:
    689             Info.PtrVal = Inst->getOperand(0);
    690             Info.MatchingId = Intrinsic::masked_load;
    691             Info.ReadMem = true;
    692             Info.WriteMem = false;
    693             Info.IsVolatile = false;
    694             break;
    695           case Intrinsic::masked_store:
    696             Info.PtrVal = Inst->getOperand(1);
    697             // Use the ID of masked load as the "matching id". This will
    698             // prevent matching non-masked loads/stores with masked ones
    699             // (which could be done), but at the moment, the code here
    700             // does not support matching intrinsics with non-intrinsics,
    701             // so keep the MatchingIds specific to masked instructions
    702             // for now (TODO).
    703             Info.MatchingId = Intrinsic::masked_load;
    704             Info.ReadMem = false;
    705             Info.WriteMem = true;
    706             Info.IsVolatile = false;
    707             break;
    708           }
    709         }
    710       }
    711     }
    712 
    713     Instruction *get() { return Inst; }
    714     const Instruction *get() const { return Inst; }
    715 
    716     bool isLoad() const {
    717       if (IntrID != 0)
    718         return Info.ReadMem;
    719       return isa<LoadInst>(Inst);
    720     }
    721 
    722     bool isStore() const {
    723       if (IntrID != 0)
    724         return Info.WriteMem;
    725       return isa<StoreInst>(Inst);
    726     }
    727 
    728     bool isAtomic() const {
    729       if (IntrID != 0)
    730         return Info.Ordering != AtomicOrdering::NotAtomic;
    731       return Inst->isAtomic();
    732     }
    733 
    734     bool isUnordered() const {
    735       if (IntrID != 0)
    736         return Info.isUnordered();
    737 
    738       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    739         return LI->isUnordered();
    740       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    741         return SI->isUnordered();
    742       }
    743       // Conservative answer
    744       return !Inst->isAtomic();
    745     }
    746 
    747     bool isVolatile() const {
    748       if (IntrID != 0)
    749         return Info.IsVolatile;
    750 
    751       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    752         return LI->isVolatile();
    753       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    754         return SI->isVolatile();
    755       }
    756       // Conservative answer
    757       return true;
    758     }
    759 
    760     bool isInvariantLoad() const {
    761       if (auto *LI = dyn_cast<LoadInst>(Inst))
    762         return LI->hasMetadata(LLVMContext::MD_invariant_load);
    763       return false;
    764     }
    765 
    766     bool isValid() const { return getPointerOperand() != nullptr; }
    767 
    768     // For regular (non-intrinsic) loads/stores, this is set to -1. For
    769     // intrinsic loads/stores, the id is retrieved from the corresponding
    770     // field in the MemIntrinsicInfo structure.  That field contains
    771     // non-negative values only.
    772     int getMatchingId() const {
    773       if (IntrID != 0)
    774         return Info.MatchingId;
    775       return -1;
    776     }
    777 
    778     Value *getPointerOperand() const {
    779       if (IntrID != 0)
    780         return Info.PtrVal;
    781       return getLoadStorePointerOperand(Inst);
    782     }
    783 
    784     bool mayReadFromMemory() const {
    785       if (IntrID != 0)
    786         return Info.ReadMem;
    787       return Inst->mayReadFromMemory();
    788     }
    789 
    790     bool mayWriteToMemory() const {
    791       if (IntrID != 0)
    792         return Info.WriteMem;
    793       return Inst->mayWriteToMemory();
    794     }
    795 
    796   private:
    797     Intrinsic::ID IntrID = 0;
    798     MemIntrinsicInfo Info;
    799     Instruction *Inst;
    800   };
    801 
    802   // This function is to prevent accidentally passing a non-target
    803   // intrinsic ID to TargetTransformInfo.
    804   static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
    805     switch (ID) {
    806     case Intrinsic::masked_load:
    807     case Intrinsic::masked_store:
    808       return true;
    809     }
    810     return false;
    811   }
    812   static bool isHandledNonTargetIntrinsic(const Value *V) {
    813     if (auto *II = dyn_cast<IntrinsicInst>(V))
    814       return isHandledNonTargetIntrinsic(II->getIntrinsicID());
    815     return false;
    816   }
    817 
    818   bool processNode(DomTreeNode *Node);
    819 
    820   bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
    821                              const BasicBlock *BB, const BasicBlock *Pred);
    822 
    823   Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
    824                           unsigned CurrentGeneration);
    825 
    826   bool overridingStores(const ParseMemoryInst &Earlier,
    827                         const ParseMemoryInst &Later);
    828 
    829   Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
    830     if (auto *LI = dyn_cast<LoadInst>(Inst))
    831       return LI;
    832     if (auto *SI = dyn_cast<StoreInst>(Inst))
    833       return SI->getValueOperand();
    834     assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
    835     auto *II = cast<IntrinsicInst>(Inst);
    836     if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
    837       return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
    838     return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
    839   }
    840 
    841   Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
    842                                                 Type *ExpectedType) const {
    843     switch (II->getIntrinsicID()) {
    844     case Intrinsic::masked_load:
    845       return II;
    846     case Intrinsic::masked_store:
    847       return II->getOperand(0);
    848     }
    849     return nullptr;
    850   }
    851 
    852   /// Return true if the instruction is known to only operate on memory
    853   /// provably invariant in the given "generation".
    854   bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
    855 
    856   bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
    857                            Instruction *EarlierInst, Instruction *LaterInst);
    858 
    859   bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
    860                                  const IntrinsicInst *Later) {
    861     auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
    862       // Is Mask0 a submask of Mask1?
    863       if (Mask0 == Mask1)
    864         return true;
    865       if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
    866         return false;
    867       auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
    868       auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
    869       if (!Vec0 || !Vec1)
    870         return false;
    871       assert(Vec0->getType() == Vec1->getType() &&
    872              "Masks should have the same type");
    873       for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
    874         Constant *Elem0 = Vec0->getOperand(i);
    875         Constant *Elem1 = Vec1->getOperand(i);
    876         auto *Int0 = dyn_cast<ConstantInt>(Elem0);
    877         if (Int0 && Int0->isZero())
    878           continue;
    879         auto *Int1 = dyn_cast<ConstantInt>(Elem1);
    880         if (Int1 && !Int1->isZero())
    881           continue;
    882         if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
    883           return false;
    884         if (Elem0 == Elem1)
    885           continue;
    886         return false;
    887       }
    888       return true;
    889     };
    890     auto PtrOp = [](const IntrinsicInst *II) {
    891       if (II->getIntrinsicID() == Intrinsic::masked_load)
    892         return II->getOperand(0);
    893       if (II->getIntrinsicID() == Intrinsic::masked_store)
    894         return II->getOperand(1);
    895       llvm_unreachable("Unexpected IntrinsicInst");
    896     };
    897     auto MaskOp = [](const IntrinsicInst *II) {
    898       if (II->getIntrinsicID() == Intrinsic::masked_load)
    899         return II->getOperand(2);
    900       if (II->getIntrinsicID() == Intrinsic::masked_store)
    901         return II->getOperand(3);
    902       llvm_unreachable("Unexpected IntrinsicInst");
    903     };
    904     auto ThruOp = [](const IntrinsicInst *II) {
    905       if (II->getIntrinsicID() == Intrinsic::masked_load)
    906         return II->getOperand(3);
    907       llvm_unreachable("Unexpected IntrinsicInst");
    908     };
    909 
    910     if (PtrOp(Earlier) != PtrOp(Later))
    911       return false;
    912 
    913     Intrinsic::ID IDE = Earlier->getIntrinsicID();
    914     Intrinsic::ID IDL = Later->getIntrinsicID();
    915     // We could really use specific intrinsic classes for masked loads
    916     // and stores in IntrinsicInst.h.
    917     if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
    918       // Trying to replace later masked load with the earlier one.
    919       // Check that the pointers are the same, and
    920       // - masks and pass-throughs are the same, or
    921       // - replacee's pass-through is "undef" and replacer's mask is a
    922       //   super-set of the replacee's mask.
    923       if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
    924         return true;
    925       if (!isa<UndefValue>(ThruOp(Later)))
    926         return false;
    927       return IsSubmask(MaskOp(Later), MaskOp(Earlier));
    928     }
    929     if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
    930       // Trying to replace a load of a stored value with the store's value.
    931       // Check that the pointers are the same, and
    932       // - load's mask is a subset of store's mask, and
    933       // - load's pass-through is "undef".
    934       if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
    935         return false;
    936       return isa<UndefValue>(ThruOp(Later));
    937     }
    938     if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
    939       // Trying to remove a store of the loaded value.
    940       // Check that the pointers are the same, and
    941       // - store's mask is a subset of the load's mask.
    942       return IsSubmask(MaskOp(Later), MaskOp(Earlier));
    943     }
    944     if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
    945       // Trying to remove a dead store (earlier).
    946       // Check that the pointers are the same,
    947       // - the to-be-removed store's mask is a subset of the other store's
    948       //   mask.
    949       return IsSubmask(MaskOp(Earlier), MaskOp(Later));
    950     }
    951     return false;
    952   }
    953 
    954   void removeMSSA(Instruction &Inst) {
    955     if (!MSSA)
    956       return;
    957     if (VerifyMemorySSA)
    958       MSSA->verifyMemorySSA();
    959     // Removing a store here can leave MemorySSA in an unoptimized state by
    960     // creating MemoryPhis that have identical arguments and by creating
    961     // MemoryUses whose defining access is not an actual clobber. The phi case
    962     // is handled by MemorySSA when passing OptimizePhis = true to
    963     // removeMemoryAccess.  The non-optimized MemoryUse case is lazily updated
    964     // by MemorySSA's getClobberingMemoryAccess.
    965     MSSAUpdater->removeMemoryAccess(&Inst, true);
    966   }
    967 };
    968 
    969 } // end anonymous namespace
    970 
    971 /// Determine if the memory referenced by LaterInst is from the same heap
    972 /// version as EarlierInst.
    973 /// This is currently called in two scenarios:
    974 ///
    975 ///   load p
    976 ///   ...
    977 ///   load p
    978 ///
    979 /// and
    980 ///
    981 ///   x = load p
    982 ///   ...
    983 ///   store x, p
    984 ///
    985 /// in both cases we want to verify that there are no possible writes to the
    986 /// memory referenced by p between the earlier and later instruction.
    987 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
    988                                    unsigned LaterGeneration,
    989                                    Instruction *EarlierInst,
    990                                    Instruction *LaterInst) {
    991   // Check the simple memory generation tracking first.
    992   if (EarlierGeneration == LaterGeneration)
    993     return true;
    994 
    995   if (!MSSA)
    996     return false;
    997 
    998   // If MemorySSA has determined that one of EarlierInst or LaterInst does not
    999   // read/write memory, then we can safely return true here.
   1000   // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
   1001   // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
   1002   // by also checking the MemorySSA MemoryAccess on the instruction.  Initial
   1003   // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
   1004   // with the default optimization pipeline.
   1005   auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
   1006   if (!EarlierMA)
   1007     return true;
   1008   auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
   1009   if (!LaterMA)
   1010     return true;
   1011 
   1012   // Since we know LaterDef dominates LaterInst and EarlierInst dominates
   1013   // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
   1014   // EarlierInst and LaterInst and neither can any other write that potentially
   1015   // clobbers LaterInst.
   1016   MemoryAccess *LaterDef;
   1017   if (ClobberCounter < EarlyCSEMssaOptCap) {
   1018     LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
   1019     ClobberCounter++;
   1020   } else
   1021     LaterDef = LaterMA->getDefiningAccess();
   1022 
   1023   return MSSA->dominates(LaterDef, EarlierMA);
   1024 }
   1025 
   1026 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
   1027   // A location loaded from with an invariant_load is assumed to *never* change
   1028   // within the visible scope of the compilation.
   1029   if (auto *LI = dyn_cast<LoadInst>(I))
   1030     if (LI->hasMetadata(LLVMContext::MD_invariant_load))
   1031       return true;
   1032 
   1033   auto MemLocOpt = MemoryLocation::getOrNone(I);
   1034   if (!MemLocOpt)
   1035     // "target" intrinsic forms of loads aren't currently known to
   1036     // MemoryLocation::get.  TODO
   1037     return false;
   1038   MemoryLocation MemLoc = *MemLocOpt;
   1039   if (!AvailableInvariants.count(MemLoc))
   1040     return false;
   1041 
   1042   // Is the generation at which this became invariant older than the
   1043   // current one?
   1044   return AvailableInvariants.lookup(MemLoc) <= GenAt;
   1045 }
   1046 
   1047 bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
   1048                                      const BranchInst *BI, const BasicBlock *BB,
   1049                                      const BasicBlock *Pred) {
   1050   assert(BI->isConditional() && "Should be a conditional branch!");
   1051   assert(BI->getCondition() == CondInst && "Wrong condition?");
   1052   assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
   1053   auto *TorF = (BI->getSuccessor(0) == BB)
   1054                    ? ConstantInt::getTrue(BB->getContext())
   1055                    : ConstantInt::getFalse(BB->getContext());
   1056   auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
   1057                        Value *&RHS) {
   1058     if (Opcode == Instruction::And &&
   1059         match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
   1060       return true;
   1061     else if (Opcode == Instruction::Or &&
   1062              match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
   1063       return true;
   1064     return false;
   1065   };
   1066   // If the condition is AND operation, we can propagate its operands into the
   1067   // true branch. If it is OR operation, we can propagate them into the false
   1068   // branch.
   1069   unsigned PropagateOpcode =
   1070       (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
   1071 
   1072   bool MadeChanges = false;
   1073   SmallVector<Instruction *, 4> WorkList;
   1074   SmallPtrSet<Instruction *, 4> Visited;
   1075   WorkList.push_back(CondInst);
   1076   while (!WorkList.empty()) {
   1077     Instruction *Curr = WorkList.pop_back_val();
   1078 
   1079     AvailableValues.insert(Curr, TorF);
   1080     LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
   1081                       << Curr->getName() << "' as " << *TorF << " in "
   1082                       << BB->getName() << "\n");
   1083     if (!DebugCounter::shouldExecute(CSECounter)) {
   1084       LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
   1085     } else {
   1086       // Replace all dominated uses with the known value.
   1087       if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
   1088                                                     BasicBlockEdge(Pred, BB))) {
   1089         NumCSECVP += Count;
   1090         MadeChanges = true;
   1091       }
   1092     }
   1093 
   1094     Value *LHS, *RHS;
   1095     if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
   1096       for (auto &Op : { LHS, RHS })
   1097         if (Instruction *OPI = dyn_cast<Instruction>(Op))
   1098           if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
   1099             WorkList.push_back(OPI);
   1100   }
   1101 
   1102   return MadeChanges;
   1103 }
   1104 
   1105 Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
   1106                                   unsigned CurrentGeneration) {
   1107   if (InVal.DefInst == nullptr)
   1108     return nullptr;
   1109   if (InVal.MatchingId != MemInst.getMatchingId())
   1110     return nullptr;
   1111   // We don't yet handle removing loads with ordering of any kind.
   1112   if (MemInst.isVolatile() || !MemInst.isUnordered())
   1113     return nullptr;
   1114   // We can't replace an atomic load with one which isn't also atomic.
   1115   if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
   1116     return nullptr;
   1117   // The value V returned from this function is used differently depending
   1118   // on whether MemInst is a load or a store. If it's a load, we will replace
   1119   // MemInst with V, if it's a store, we will check if V is the same as the
   1120   // available value.
   1121   bool MemInstMatching = !MemInst.isLoad();
   1122   Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
   1123   Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
   1124 
   1125   // For stores check the result values before checking memory generation
   1126   // (otherwise isSameMemGeneration may crash).
   1127   Value *Result = MemInst.isStore()
   1128                       ? getOrCreateResult(Matching, Other->getType())
   1129                       : nullptr;
   1130   if (MemInst.isStore() && InVal.DefInst != Result)
   1131     return nullptr;
   1132 
   1133   // Deal with non-target memory intrinsics.
   1134   bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
   1135   bool OtherNTI = isHandledNonTargetIntrinsic(Other);
   1136   if (OtherNTI != MatchingNTI)
   1137     return nullptr;
   1138   if (OtherNTI && MatchingNTI) {
   1139     if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
   1140                                    cast<IntrinsicInst>(MemInst.get())))
   1141       return nullptr;
   1142   }
   1143 
   1144   if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
   1145       !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
   1146                            MemInst.get()))
   1147     return nullptr;
   1148 
   1149   if (!Result)
   1150     Result = getOrCreateResult(Matching, Other->getType());
   1151   return Result;
   1152 }
   1153 
   1154 bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
   1155                                 const ParseMemoryInst &Later) {
   1156   // Can we remove Earlier store because of Later store?
   1157 
   1158   assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
   1159          "Violated invariant");
   1160   if (Earlier.getPointerOperand() != Later.getPointerOperand())
   1161     return false;
   1162   if (Earlier.getMatchingId() != Later.getMatchingId())
   1163     return false;
   1164   // At the moment, we don't remove ordered stores, but do remove
   1165   // unordered atomic stores.  There's no special requirement (for
   1166   // unordered atomics) about removing atomic stores only in favor of
   1167   // other atomic stores since we were going to execute the non-atomic
   1168   // one anyway and the atomic one might never have become visible.
   1169   if (!Earlier.isUnordered() || !Later.isUnordered())
   1170     return false;
   1171 
   1172   // Deal with non-target memory intrinsics.
   1173   bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
   1174   bool LNTI = isHandledNonTargetIntrinsic(Later.get());
   1175   if (ENTI && LNTI)
   1176     return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
   1177                                      cast<IntrinsicInst>(Later.get()));
   1178 
   1179   // Because of the check above, at least one of them is false.
   1180   // For now disallow matching intrinsics with non-intrinsics,
   1181   // so assume that the stores match if neither is an intrinsic.
   1182   return ENTI == LNTI;
   1183 }
   1184 
   1185 bool EarlyCSE::processNode(DomTreeNode *Node) {
   1186   bool Changed = false;
   1187   BasicBlock *BB = Node->getBlock();
   1188 
   1189   // If this block has a single predecessor, then the predecessor is the parent
   1190   // of the domtree node and all of the live out memory values are still current
   1191   // in this block.  If this block has multiple predecessors, then they could
   1192   // have invalidated the live-out memory values of our parent value.  For now,
   1193   // just be conservative and invalidate memory if this block has multiple
   1194   // predecessors.
   1195   if (!BB->getSinglePredecessor())
   1196     ++CurrentGeneration;
   1197 
   1198   // If this node has a single predecessor which ends in a conditional branch,
   1199   // we can infer the value of the branch condition given that we took this
   1200   // path.  We need the single predecessor to ensure there's not another path
   1201   // which reaches this block where the condition might hold a different
   1202   // value.  Since we're adding this to the scoped hash table (like any other
   1203   // def), it will have been popped if we encounter a future merge block.
   1204   if (BasicBlock *Pred = BB->getSinglePredecessor()) {
   1205     auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
   1206     if (BI && BI->isConditional()) {
   1207       auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
   1208       if (CondInst && SimpleValue::canHandle(CondInst))
   1209         Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
   1210     }
   1211   }
   1212 
   1213   /// LastStore - Keep track of the last non-volatile store that we saw... for
   1214   /// as long as there in no instruction that reads memory.  If we see a store
   1215   /// to the same location, we delete the dead store.  This zaps trivial dead
   1216   /// stores which can occur in bitfield code among other things.
   1217   Instruction *LastStore = nullptr;
   1218 
   1219   // See if any instructions in the block can be eliminated.  If so, do it.  If
   1220   // not, add them to AvailableValues.
   1221   for (Instruction &Inst : make_early_inc_range(BB->getInstList())) {
   1222     // Dead instructions should just be removed.
   1223     if (isInstructionTriviallyDead(&Inst, &TLI)) {
   1224       LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');
   1225       if (!DebugCounter::shouldExecute(CSECounter)) {
   1226         LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
   1227         continue;
   1228       }
   1229 
   1230       salvageKnowledge(&Inst, &AC);
   1231       salvageDebugInfo(Inst);
   1232       removeMSSA(Inst);
   1233       Inst.eraseFromParent();
   1234       Changed = true;
   1235       ++NumSimplify;
   1236       continue;
   1237     }
   1238 
   1239     // Skip assume intrinsics, they don't really have side effects (although
   1240     // they're marked as such to ensure preservation of control dependencies),
   1241     // and this pass will not bother with its removal. However, we should mark
   1242     // its condition as true for all dominated blocks.
   1243     if (auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
   1244       auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));
   1245       if (CondI && SimpleValue::canHandle(CondI)) {
   1246         LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
   1247                           << '\n');
   1248         AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
   1249       } else
   1250         LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');
   1251       continue;
   1252     }
   1253 
   1254     // Likewise, noalias intrinsics don't actually write.
   1255     if (match(&Inst,
   1256               m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
   1257       LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
   1258                         << '\n');
   1259       continue;
   1260     }
   1261 
   1262     // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
   1263     if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
   1264       LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
   1265       continue;
   1266     }
   1267 
   1268     // We can skip all invariant.start intrinsics since they only read memory,
   1269     // and we can forward values across it. For invariant starts without
   1270     // invariant ends, we can use the fact that the invariantness never ends to
   1271     // start a scope in the current generaton which is true for all future
   1272     // generations.  Also, we dont need to consume the last store since the
   1273     // semantics of invariant.start allow us to perform   DSE of the last
   1274     // store, if there was a store following invariant.start. Consider:
   1275     //
   1276     // store 30, i8* p
   1277     // invariant.start(p)
   1278     // store 40, i8* p
   1279     // We can DSE the store to 30, since the store 40 to invariant location p
   1280     // causes undefined behaviour.
   1281     if (match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
   1282       // If there are any uses, the scope might end.
   1283       if (!Inst.use_empty())
   1284         continue;
   1285       MemoryLocation MemLoc =
   1286           MemoryLocation::getForArgument(&cast<CallInst>(Inst), 1, TLI);
   1287       // Don't start a scope if we already have a better one pushed
   1288       if (!AvailableInvariants.count(MemLoc))
   1289         AvailableInvariants.insert(MemLoc, CurrentGeneration);
   1290       continue;
   1291     }
   1292 
   1293     if (isGuard(&Inst)) {
   1294       if (auto *CondI =
   1295               dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
   1296         if (SimpleValue::canHandle(CondI)) {
   1297           // Do we already know the actual value of this condition?
   1298           if (auto *KnownCond = AvailableValues.lookup(CondI)) {
   1299             // Is the condition known to be true?
   1300             if (isa<ConstantInt>(KnownCond) &&
   1301                 cast<ConstantInt>(KnownCond)->isOne()) {
   1302               LLVM_DEBUG(dbgs()
   1303                          << "EarlyCSE removing guard: " << Inst << '\n');
   1304               salvageKnowledge(&Inst, &AC);
   1305               removeMSSA(Inst);
   1306               Inst.eraseFromParent();
   1307               Changed = true;
   1308               continue;
   1309             } else
   1310               // Use the known value if it wasn't true.
   1311               cast<CallInst>(Inst).setArgOperand(0, KnownCond);
   1312           }
   1313           // The condition we're on guarding here is true for all dominated
   1314           // locations.
   1315           AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
   1316         }
   1317       }
   1318 
   1319       // Guard intrinsics read all memory, but don't write any memory.
   1320       // Accordingly, don't update the generation but consume the last store (to
   1321       // avoid an incorrect DSE).
   1322       LastStore = nullptr;
   1323       continue;
   1324     }
   1325 
   1326     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
   1327     // its simpler value.
   1328     if (Value *V = SimplifyInstruction(&Inst, SQ)) {
   1329       LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << "  to: " << *V
   1330                         << '\n');
   1331       if (!DebugCounter::shouldExecute(CSECounter)) {
   1332         LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
   1333       } else {
   1334         bool Killed = false;
   1335         if (!Inst.use_empty()) {
   1336           Inst.replaceAllUsesWith(V);
   1337           Changed = true;
   1338         }
   1339         if (isInstructionTriviallyDead(&Inst, &TLI)) {
   1340           salvageKnowledge(&Inst, &AC);
   1341           removeMSSA(Inst);
   1342           Inst.eraseFromParent();
   1343           Changed = true;
   1344           Killed = true;
   1345         }
   1346         if (Changed)
   1347           ++NumSimplify;
   1348         if (Killed)
   1349           continue;
   1350       }
   1351     }
   1352 
   1353     // If this is a simple instruction that we can value number, process it.
   1354     if (SimpleValue::canHandle(&Inst)) {
   1355       // See if the instruction has an available value.  If so, use it.
   1356       if (Value *V = AvailableValues.lookup(&Inst)) {
   1357         LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << "  to: " << *V
   1358                           << '\n');
   1359         if (!DebugCounter::shouldExecute(CSECounter)) {
   1360           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
   1361           continue;
   1362         }
   1363         if (auto *I = dyn_cast<Instruction>(V))
   1364           I->andIRFlags(&Inst);
   1365         Inst.replaceAllUsesWith(V);
   1366         salvageKnowledge(&Inst, &AC);
   1367         removeMSSA(Inst);
   1368         Inst.eraseFromParent();
   1369         Changed = true;
   1370         ++NumCSE;
   1371         continue;
   1372       }
   1373 
   1374       // Otherwise, just remember that this value is available.
   1375       AvailableValues.insert(&Inst, &Inst);
   1376       continue;
   1377     }
   1378 
   1379     ParseMemoryInst MemInst(&Inst, TTI);
   1380     // If this is a non-volatile load, process it.
   1381     if (MemInst.isValid() && MemInst.isLoad()) {
   1382       // (conservatively) we can't peak past the ordering implied by this
   1383       // operation, but we can add this load to our set of available values
   1384       if (MemInst.isVolatile() || !MemInst.isUnordered()) {
   1385         LastStore = nullptr;
   1386         ++CurrentGeneration;
   1387       }
   1388 
   1389       if (MemInst.isInvariantLoad()) {
   1390         // If we pass an invariant load, we know that memory location is
   1391         // indefinitely constant from the moment of first dereferenceability.
   1392         // We conservatively treat the invariant_load as that moment.  If we
   1393         // pass a invariant load after already establishing a scope, don't
   1394         // restart it since we want to preserve the earliest point seen.
   1395         auto MemLoc = MemoryLocation::get(&Inst);
   1396         if (!AvailableInvariants.count(MemLoc))
   1397           AvailableInvariants.insert(MemLoc, CurrentGeneration);
   1398       }
   1399 
   1400       // If we have an available version of this load, and if it is the right
   1401       // generation or the load is known to be from an invariant location,
   1402       // replace this instruction.
   1403       //
   1404       // If either the dominating load or the current load are invariant, then
   1405       // we can assume the current load loads the same value as the dominating
   1406       // load.
   1407       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
   1408       if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
   1409         LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
   1410                           << "  to: " << *InVal.DefInst << '\n');
   1411         if (!DebugCounter::shouldExecute(CSECounter)) {
   1412           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
   1413           continue;
   1414         }
   1415         if (!Inst.use_empty())
   1416           Inst.replaceAllUsesWith(Op);
   1417         salvageKnowledge(&Inst, &AC);
   1418         removeMSSA(Inst);
   1419         Inst.eraseFromParent();
   1420         Changed = true;
   1421         ++NumCSELoad;
   1422         continue;
   1423       }
   1424 
   1425       // Otherwise, remember that we have this instruction.
   1426       AvailableLoads.insert(MemInst.getPointerOperand(),
   1427                             LoadValue(&Inst, CurrentGeneration,
   1428                                       MemInst.getMatchingId(),
   1429                                       MemInst.isAtomic()));
   1430       LastStore = nullptr;
   1431       continue;
   1432     }
   1433 
   1434     // If this instruction may read from memory or throw (and potentially read
   1435     // from memory in the exception handler), forget LastStore.  Load/store
   1436     // intrinsics will indicate both a read and a write to memory.  The target
   1437     // may override this (e.g. so that a store intrinsic does not read from
   1438     // memory, and thus will be treated the same as a regular store for
   1439     // commoning purposes).
   1440     if ((Inst.mayReadFromMemory() || Inst.mayThrow()) &&
   1441         !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
   1442       LastStore = nullptr;
   1443 
   1444     // If this is a read-only call, process it.
   1445     if (CallValue::canHandle(&Inst)) {
   1446       // If we have an available version of this call, and if it is the right
   1447       // generation, replace this instruction.
   1448       std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
   1449       if (InVal.first != nullptr &&
   1450           isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
   1451                               &Inst)) {
   1452         LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
   1453                           << "  to: " << *InVal.first << '\n');
   1454         if (!DebugCounter::shouldExecute(CSECounter)) {
   1455           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
   1456           continue;
   1457         }
   1458         if (!Inst.use_empty())
   1459           Inst.replaceAllUsesWith(InVal.first);
   1460         salvageKnowledge(&Inst, &AC);
   1461         removeMSSA(Inst);
   1462         Inst.eraseFromParent();
   1463         Changed = true;
   1464         ++NumCSECall;
   1465         continue;
   1466       }
   1467 
   1468       // Otherwise, remember that we have this instruction.
   1469       AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
   1470       continue;
   1471     }
   1472 
   1473     // A release fence requires that all stores complete before it, but does
   1474     // not prevent the reordering of following loads 'before' the fence.  As a
   1475     // result, we don't need to consider it as writing to memory and don't need
   1476     // to advance the generation.  We do need to prevent DSE across the fence,
   1477     // but that's handled above.
   1478     if (auto *FI = dyn_cast<FenceInst>(&Inst))
   1479       if (FI->getOrdering() == AtomicOrdering::Release) {
   1480         assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");
   1481         continue;
   1482       }
   1483 
   1484     // write back DSE - If we write back the same value we just loaded from
   1485     // the same location and haven't passed any intervening writes or ordering
   1486     // operations, we can remove the write.  The primary benefit is in allowing
   1487     // the available load table to remain valid and value forward past where
   1488     // the store originally was.
   1489     if (MemInst.isValid() && MemInst.isStore()) {
   1490       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
   1491       if (InVal.DefInst &&
   1492           InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {
   1493         // It is okay to have a LastStore to a different pointer here if MemorySSA
   1494         // tells us that the load and store are from the same memory generation.
   1495         // In that case, LastStore should keep its present value since we're
   1496         // removing the current store.
   1497         assert((!LastStore ||
   1498                 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
   1499                     MemInst.getPointerOperand() ||
   1500                 MSSA) &&
   1501                "can't have an intervening store if not using MemorySSA!");
   1502         LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');
   1503         if (!DebugCounter::shouldExecute(CSECounter)) {
   1504           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
   1505           continue;
   1506         }
   1507         salvageKnowledge(&Inst, &AC);
   1508         removeMSSA(Inst);
   1509         Inst.eraseFromParent();
   1510         Changed = true;
   1511         ++NumDSE;
   1512         // We can avoid incrementing the generation count since we were able
   1513         // to eliminate this store.
   1514         continue;
   1515       }
   1516     }
   1517 
   1518     // Okay, this isn't something we can CSE at all.  Check to see if it is
   1519     // something that could modify memory.  If so, our available memory values
   1520     // cannot be used so bump the generation count.
   1521     if (Inst.mayWriteToMemory()) {
   1522       ++CurrentGeneration;
   1523 
   1524       if (MemInst.isValid() && MemInst.isStore()) {
   1525         // We do a trivial form of DSE if there are two stores to the same
   1526         // location with no intervening loads.  Delete the earlier store.
   1527         if (LastStore) {
   1528           if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
   1529             LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
   1530                               << "  due to: " << Inst << '\n');
   1531             if (!DebugCounter::shouldExecute(CSECounter)) {
   1532               LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
   1533             } else {
   1534               salvageKnowledge(&Inst, &AC);
   1535               removeMSSA(*LastStore);
   1536               LastStore->eraseFromParent();
   1537               Changed = true;
   1538               ++NumDSE;
   1539               LastStore = nullptr;
   1540             }
   1541           }
   1542           // fallthrough - we can exploit information about this store
   1543         }
   1544 
   1545         // Okay, we just invalidated anything we knew about loaded values.  Try
   1546         // to salvage *something* by remembering that the stored value is a live
   1547         // version of the pointer.  It is safe to forward from volatile stores
   1548         // to non-volatile loads, so we don't have to check for volatility of
   1549         // the store.
   1550         AvailableLoads.insert(MemInst.getPointerOperand(),
   1551                               LoadValue(&Inst, CurrentGeneration,
   1552                                         MemInst.getMatchingId(),
   1553                                         MemInst.isAtomic()));
   1554 
   1555         // Remember that this was the last unordered store we saw for DSE. We
   1556         // don't yet handle DSE on ordered or volatile stores since we don't
   1557         // have a good way to model the ordering requirement for following
   1558         // passes  once the store is removed.  We could insert a fence, but
   1559         // since fences are slightly stronger than stores in their ordering,
   1560         // it's not clear this is a profitable transform. Another option would
   1561         // be to merge the ordering with that of the post dominating store.
   1562         if (MemInst.isUnordered() && !MemInst.isVolatile())
   1563           LastStore = &Inst;
   1564         else
   1565           LastStore = nullptr;
   1566       }
   1567     }
   1568   }
   1569 
   1570   return Changed;
   1571 }
   1572 
   1573 bool EarlyCSE::run() {
   1574   // Note, deque is being used here because there is significant performance
   1575   // gains over vector when the container becomes very large due to the
   1576   // specific access patterns. For more information see the mailing list
   1577   // discussion on this:
   1578   // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
   1579   std::deque<StackNode *> nodesToProcess;
   1580 
   1581   bool Changed = false;
   1582 
   1583   // Process the root node.
   1584   nodesToProcess.push_back(new StackNode(
   1585       AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
   1586       CurrentGeneration, DT.getRootNode(),
   1587       DT.getRootNode()->begin(), DT.getRootNode()->end()));
   1588 
   1589   assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
   1590 
   1591   // Process the stack.
   1592   while (!nodesToProcess.empty()) {
   1593     // Grab the first item off the stack. Set the current generation, remove
   1594     // the node from the stack, and process it.
   1595     StackNode *NodeToProcess = nodesToProcess.back();
   1596 
   1597     // Initialize class members.
   1598     CurrentGeneration = NodeToProcess->currentGeneration();
   1599 
   1600     // Check if the node needs to be processed.
   1601     if (!NodeToProcess->isProcessed()) {
   1602       // Process the node.
   1603       Changed |= processNode(NodeToProcess->node());
   1604       NodeToProcess->childGeneration(CurrentGeneration);
   1605       NodeToProcess->process();
   1606     } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
   1607       // Push the next child onto the stack.
   1608       DomTreeNode *child = NodeToProcess->nextChild();
   1609       nodesToProcess.push_back(
   1610           new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
   1611                         AvailableCalls, NodeToProcess->childGeneration(),
   1612                         child, child->begin(), child->end()));
   1613     } else {
   1614       // It has been processed, and there are no more children to process,
   1615       // so delete it and pop it off the stack.
   1616       delete NodeToProcess;
   1617       nodesToProcess.pop_back();
   1618     }
   1619   } // while (!nodes...)
   1620 
   1621   return Changed;
   1622 }
   1623 
   1624 PreservedAnalyses EarlyCSEPass::run(Function &F,
   1625                                     FunctionAnalysisManager &AM) {
   1626   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   1627   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
   1628   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   1629   auto &AC = AM.getResult<AssumptionAnalysis>(F);
   1630   auto *MSSA =
   1631       UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
   1632 
   1633   EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
   1634 
   1635   if (!CSE.run())
   1636     return PreservedAnalyses::all();
   1637 
   1638   PreservedAnalyses PA;
   1639   PA.preserveSet<CFGAnalyses>();
   1640   if (UseMemorySSA)
   1641     PA.preserve<MemorySSAAnalysis>();
   1642   return PA;
   1643 }
   1644 
   1645 namespace {
   1646 
   1647 /// A simple and fast domtree-based CSE pass.
   1648 ///
   1649 /// This pass does a simple depth-first walk over the dominator tree,
   1650 /// eliminating trivially redundant instructions and using instsimplify to
   1651 /// canonicalize things as it goes. It is intended to be fast and catch obvious
   1652 /// cases so that instcombine and other passes are more effective. It is
   1653 /// expected that a later pass of GVN will catch the interesting/hard cases.
   1654 template<bool UseMemorySSA>
   1655 class EarlyCSELegacyCommonPass : public FunctionPass {
   1656 public:
   1657   static char ID;
   1658 
   1659   EarlyCSELegacyCommonPass() : FunctionPass(ID) {
   1660     if (UseMemorySSA)
   1661       initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
   1662     else
   1663       initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
   1664   }
   1665 
   1666   bool runOnFunction(Function &F) override {
   1667     if (skipFunction(F))
   1668       return false;
   1669 
   1670     auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
   1671     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
   1672     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   1673     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   1674     auto *MSSA =
   1675         UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
   1676 
   1677     EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
   1678 
   1679     return CSE.run();
   1680   }
   1681 
   1682   void getAnalysisUsage(AnalysisUsage &AU) const override {
   1683     AU.addRequired<AssumptionCacheTracker>();
   1684     AU.addRequired<DominatorTreeWrapperPass>();
   1685     AU.addRequired<TargetLibraryInfoWrapperPass>();
   1686     AU.addRequired<TargetTransformInfoWrapperPass>();
   1687     if (UseMemorySSA) {
   1688       AU.addRequired<AAResultsWrapperPass>();
   1689       AU.addRequired<MemorySSAWrapperPass>();
   1690       AU.addPreserved<MemorySSAWrapperPass>();
   1691     }
   1692     AU.addPreserved<GlobalsAAWrapperPass>();
   1693     AU.addPreserved<AAResultsWrapperPass>();
   1694     AU.setPreservesCFG();
   1695   }
   1696 };
   1697 
   1698 } // end anonymous namespace
   1699 
   1700 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
   1701 
   1702 template<>
   1703 char EarlyCSELegacyPass::ID = 0;
   1704 
   1705 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
   1706                       false)
   1707 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
   1708 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
   1709 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
   1710 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
   1711 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
   1712 
   1713 using EarlyCSEMemSSALegacyPass =
   1714     EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
   1715 
   1716 template<>
   1717 char EarlyCSEMemSSALegacyPass::ID = 0;
   1718 
   1719 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
   1720   if (UseMemorySSA)
   1721     return new EarlyCSEMemSSALegacyPass();
   1722   else
   1723     return new EarlyCSELegacyPass();
   1724 }
   1725 
   1726 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
   1727                       "Early CSE w/ MemorySSA", false, false)
   1728 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
   1729 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
   1730 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
   1731 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
   1732 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
   1733 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
   1734 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
   1735                     "Early CSE w/ MemorySSA", false, false)
   1736