Home | History | Annotate | Line # | Download | only in Analysis
      1 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file implements an analysis that determines, for a given memory
     10 // operation, what preceding memory operations it depends on.  It builds on
     11 // alias analysis information, and tries to provide a lazy, caching interface to
     12 // a common kind of alias information query.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
     17 #include "llvm/ADT/DenseMap.h"
     18 #include "llvm/ADT/STLExtras.h"
     19 #include "llvm/ADT/SmallPtrSet.h"
     20 #include "llvm/ADT/SmallVector.h"
     21 #include "llvm/ADT/Statistic.h"
     22 #include "llvm/Analysis/AliasAnalysis.h"
     23 #include "llvm/Analysis/AssumptionCache.h"
     24 #include "llvm/Analysis/MemoryBuiltins.h"
     25 #include "llvm/Analysis/MemoryLocation.h"
     26 #include "llvm/Analysis/PHITransAddr.h"
     27 #include "llvm/Analysis/PhiValues.h"
     28 #include "llvm/Analysis/TargetLibraryInfo.h"
     29 #include "llvm/Analysis/ValueTracking.h"
     30 #include "llvm/IR/Attributes.h"
     31 #include "llvm/IR/BasicBlock.h"
     32 #include "llvm/IR/Constants.h"
     33 #include "llvm/IR/DataLayout.h"
     34 #include "llvm/IR/DerivedTypes.h"
     35 #include "llvm/IR/Dominators.h"
     36 #include "llvm/IR/Function.h"
     37 #include "llvm/IR/InstrTypes.h"
     38 #include "llvm/IR/Instruction.h"
     39 #include "llvm/IR/Instructions.h"
     40 #include "llvm/IR/IntrinsicInst.h"
     41 #include "llvm/IR/LLVMContext.h"
     42 #include "llvm/IR/Metadata.h"
     43 #include "llvm/IR/Module.h"
     44 #include "llvm/IR/PredIteratorCache.h"
     45 #include "llvm/IR/Type.h"
     46 #include "llvm/IR/Use.h"
     47 #include "llvm/IR/User.h"
     48 #include "llvm/IR/Value.h"
     49 #include "llvm/InitializePasses.h"
     50 #include "llvm/Pass.h"
     51 #include "llvm/Support/AtomicOrdering.h"
     52 #include "llvm/Support/Casting.h"
     53 #include "llvm/Support/CommandLine.h"
     54 #include "llvm/Support/Compiler.h"
     55 #include "llvm/Support/Debug.h"
     56 #include "llvm/Support/MathExtras.h"
     57 #include <algorithm>
     58 #include <cassert>
     59 #include <cstdint>
     60 #include <iterator>
     61 #include <utility>
     62 
     63 using namespace llvm;
     64 
     65 #define DEBUG_TYPE "memdep"
     66 
     67 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
     68 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
     69 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
     70 
     71 STATISTIC(NumCacheNonLocalPtr,
     72           "Number of fully cached non-local ptr responses");
     73 STATISTIC(NumCacheDirtyNonLocalPtr,
     74           "Number of cached, but dirty, non-local ptr responses");
     75 STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
     76 STATISTIC(NumCacheCompleteNonLocalPtr,
     77           "Number of block queries that were completely cached");
     78 
     79 // Limit for the number of instructions to scan in a block.
     80 
     81 static cl::opt<unsigned> BlockScanLimit(
     82     "memdep-block-scan-limit", cl::Hidden, cl::init(100),
     83     cl::desc("The number of instructions to scan in a block in memory "
     84              "dependency analysis (default = 100)"));
     85 
     86 static cl::opt<unsigned>
     87     BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000),
     88                      cl::desc("The number of blocks to scan during memory "
     89                               "dependency analysis (default = 1000)"));
     90 
     91 // Limit on the number of memdep results to process.
     92 static const unsigned int NumResultsLimit = 100;
     93 
     94 /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
     95 ///
     96 /// If the set becomes empty, remove Inst's entry.
     97 template <typename KeyTy>
     98 static void
     99 RemoveFromReverseMap(DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>> &ReverseMap,
    100                      Instruction *Inst, KeyTy Val) {
    101   typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
    102       ReverseMap.find(Inst);
    103   assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
    104   bool Found = InstIt->second.erase(Val);
    105   assert(Found && "Invalid reverse map!");
    106   (void)Found;
    107   if (InstIt->second.empty())
    108     ReverseMap.erase(InstIt);
    109 }
    110 
    111 /// If the given instruction references a specific memory location, fill in Loc
    112 /// with the details, otherwise set Loc.Ptr to null.
    113 ///
    114 /// Returns a ModRefInfo value describing the general behavior of the
    115 /// instruction.
    116 static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
    117                               const TargetLibraryInfo &TLI) {
    118   if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    119     if (LI->isUnordered()) {
    120       Loc = MemoryLocation::get(LI);
    121       return ModRefInfo::Ref;
    122     }
    123     if (LI->getOrdering() == AtomicOrdering::Monotonic) {
    124       Loc = MemoryLocation::get(LI);
    125       return ModRefInfo::ModRef;
    126     }
    127     Loc = MemoryLocation();
    128     return ModRefInfo::ModRef;
    129   }
    130 
    131   if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    132     if (SI->isUnordered()) {
    133       Loc = MemoryLocation::get(SI);
    134       return ModRefInfo::Mod;
    135     }
    136     if (SI->getOrdering() == AtomicOrdering::Monotonic) {
    137       Loc = MemoryLocation::get(SI);
    138       return ModRefInfo::ModRef;
    139     }
    140     Loc = MemoryLocation();
    141     return ModRefInfo::ModRef;
    142   }
    143 
    144   if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
    145     Loc = MemoryLocation::get(V);
    146     return ModRefInfo::ModRef;
    147   }
    148 
    149   if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
    150     // calls to free() deallocate the entire structure
    151     Loc = MemoryLocation::getAfter(CI->getArgOperand(0));
    152     return ModRefInfo::Mod;
    153   }
    154 
    155   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
    156     switch (II->getIntrinsicID()) {
    157     case Intrinsic::lifetime_start:
    158     case Intrinsic::lifetime_end:
    159     case Intrinsic::invariant_start:
    160       Loc = MemoryLocation::getForArgument(II, 1, TLI);
    161       // These intrinsics don't really modify the memory, but returning Mod
    162       // will allow them to be handled conservatively.
    163       return ModRefInfo::Mod;
    164     case Intrinsic::invariant_end:
    165       Loc = MemoryLocation::getForArgument(II, 2, TLI);
    166       // These intrinsics don't really modify the memory, but returning Mod
    167       // will allow them to be handled conservatively.
    168       return ModRefInfo::Mod;
    169     case Intrinsic::masked_load:
    170       Loc = MemoryLocation::getForArgument(II, 0, TLI);
    171       return ModRefInfo::Ref;
    172     case Intrinsic::masked_store:
    173       Loc = MemoryLocation::getForArgument(II, 1, TLI);
    174       return ModRefInfo::Mod;
    175     default:
    176       break;
    177     }
    178   }
    179 
    180   // Otherwise, just do the coarse-grained thing that always works.
    181   if (Inst->mayWriteToMemory())
    182     return ModRefInfo::ModRef;
    183   if (Inst->mayReadFromMemory())
    184     return ModRefInfo::Ref;
    185   return ModRefInfo::NoModRef;
    186 }
    187 
    188 /// Private helper for finding the local dependencies of a call site.
    189 MemDepResult MemoryDependenceResults::getCallDependencyFrom(
    190     CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
    191     BasicBlock *BB) {
    192   unsigned Limit = getDefaultBlockScanLimit();
    193 
    194   // Walk backwards through the block, looking for dependencies.
    195   while (ScanIt != BB->begin()) {
    196     Instruction *Inst = &*--ScanIt;
    197     // Debug intrinsics don't cause dependences and should not affect Limit
    198     if (isa<DbgInfoIntrinsic>(Inst))
    199       continue;
    200 
    201     // Limit the amount of scanning we do so we don't end up with quadratic
    202     // running time on extreme testcases.
    203     --Limit;
    204     if (!Limit)
    205       return MemDepResult::getUnknown();
    206 
    207     // If this inst is a memory op, get the pointer it accessed
    208     MemoryLocation Loc;
    209     ModRefInfo MR = GetLocation(Inst, Loc, TLI);
    210     if (Loc.Ptr) {
    211       // A simple instruction.
    212       if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
    213         return MemDepResult::getClobber(Inst);
    214       continue;
    215     }
    216 
    217     if (auto *CallB = dyn_cast<CallBase>(Inst)) {
    218       // If these two calls do not interfere, look past it.
    219       if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
    220         // If the two calls are the same, return Inst as a Def, so that
    221         // Call can be found redundant and eliminated.
    222         if (isReadOnlyCall && !isModSet(MR) &&
    223             Call->isIdenticalToWhenDefined(CallB))
    224           return MemDepResult::getDef(Inst);
    225 
    226         // Otherwise if the two calls don't interact (e.g. CallB is readnone)
    227         // keep scanning.
    228         continue;
    229       } else
    230         return MemDepResult::getClobber(Inst);
    231     }
    232 
    233     // If we could not obtain a pointer for the instruction and the instruction
    234     // touches memory then assume that this is a dependency.
    235     if (isModOrRefSet(MR))
    236       return MemDepResult::getClobber(Inst);
    237   }
    238 
    239   // No dependence found.  If this is the entry block of the function, it is
    240   // unknown, otherwise it is non-local.
    241   if (BB != &BB->getParent()->getEntryBlock())
    242     return MemDepResult::getNonLocal();
    243   return MemDepResult::getNonFuncLocal();
    244 }
    245 
    246 MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
    247     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
    248     BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
    249     BatchAAResults &BatchAA) {
    250   MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
    251   if (QueryInst != nullptr) {
    252     if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
    253       InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
    254 
    255       if (InvariantGroupDependency.isDef())
    256         return InvariantGroupDependency;
    257     }
    258   }
    259   MemDepResult SimpleDep = getSimplePointerDependencyFrom(
    260       MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, BatchAA);
    261   if (SimpleDep.isDef())
    262     return SimpleDep;
    263   // Non-local invariant group dependency indicates there is non local Def
    264   // (it only returns nonLocal if it finds nonLocal def), which is better than
    265   // local clobber and everything else.
    266   if (InvariantGroupDependency.isNonLocal())
    267     return InvariantGroupDependency;
    268 
    269   assert(InvariantGroupDependency.isUnknown() &&
    270          "InvariantGroupDependency should be only unknown at this point");
    271   return SimpleDep;
    272 }
    273 
    274 MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
    275     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
    276     BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
    277   BatchAAResults BatchAA(AA);
    278   return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
    279                                   BatchAA);
    280 }
    281 
    282 MemDepResult
    283 MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
    284                                                             BasicBlock *BB) {
    285 
    286   if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
    287     return MemDepResult::getUnknown();
    288 
    289   // Take the ptr operand after all casts and geps 0. This way we can search
    290   // cast graph down only.
    291   Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
    292 
    293   // It's is not safe to walk the use list of global value, because function
    294   // passes aren't allowed to look outside their functions.
    295   // FIXME: this could be fixed by filtering instructions from outside
    296   // of current function.
    297   if (isa<GlobalValue>(LoadOperand))
    298     return MemDepResult::getUnknown();
    299 
    300   // Queue to process all pointers that are equivalent to load operand.
    301   SmallVector<const Value *, 8> LoadOperandsQueue;
    302   LoadOperandsQueue.push_back(LoadOperand);
    303 
    304   Instruction *ClosestDependency = nullptr;
    305   // Order of instructions in uses list is unpredictible. In order to always
    306   // get the same result, we will look for the closest dominance.
    307   auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
    308     assert(Other && "Must call it with not null instruction");
    309     if (Best == nullptr || DT.dominates(Best, Other))
    310       return Other;
    311     return Best;
    312   };
    313 
    314   // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
    315   // we will see all the instructions. This should be fixed in MSSA.
    316   while (!LoadOperandsQueue.empty()) {
    317     const Value *Ptr = LoadOperandsQueue.pop_back_val();
    318     assert(Ptr && !isa<GlobalValue>(Ptr) &&
    319            "Null or GlobalValue should not be inserted");
    320 
    321     for (const Use &Us : Ptr->uses()) {
    322       auto *U = dyn_cast<Instruction>(Us.getUser());
    323       if (!U || U == LI || !DT.dominates(U, LI))
    324         continue;
    325 
    326       // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
    327       // users.      U = bitcast Ptr
    328       if (isa<BitCastInst>(U)) {
    329         LoadOperandsQueue.push_back(U);
    330         continue;
    331       }
    332       // Gep with zeros is equivalent to bitcast.
    333       // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
    334       // or gep 0 to bitcast because of SROA, so there are 2 forms. When
    335       // typeless pointers will be ready then both cases will be gone
    336       // (and this BFS also won't be needed).
    337       if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
    338         if (GEP->hasAllZeroIndices()) {
    339           LoadOperandsQueue.push_back(U);
    340           continue;
    341         }
    342 
    343       // If we hit load/store with the same invariant.group metadata (and the
    344       // same pointer operand) we can assume that value pointed by pointer
    345       // operand didn't change.
    346       if ((isa<LoadInst>(U) ||
    347            (isa<StoreInst>(U) &&
    348             cast<StoreInst>(U)->getPointerOperand() == Ptr)) &&
    349           U->hasMetadata(LLVMContext::MD_invariant_group))
    350         ClosestDependency = GetClosestDependency(ClosestDependency, U);
    351     }
    352   }
    353 
    354   if (!ClosestDependency)
    355     return MemDepResult::getUnknown();
    356   if (ClosestDependency->getParent() == BB)
    357     return MemDepResult::getDef(ClosestDependency);
    358   // Def(U) can't be returned here because it is non-local. If local
    359   // dependency won't be found then return nonLocal counting that the
    360   // user will call getNonLocalPointerDependency, which will return cached
    361   // result.
    362   NonLocalDefsCache.try_emplace(
    363       LI, NonLocalDepResult(ClosestDependency->getParent(),
    364                             MemDepResult::getDef(ClosestDependency), nullptr));
    365   ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
    366   return MemDepResult::getNonLocal();
    367 }
    368 
    369 MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
    370     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
    371     BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
    372     BatchAAResults &BatchAA) {
    373   bool isInvariantLoad = false;
    374 
    375   unsigned DefaultLimit = getDefaultBlockScanLimit();
    376   if (!Limit)
    377     Limit = &DefaultLimit;
    378 
    379   // We must be careful with atomic accesses, as they may allow another thread
    380   //   to touch this location, clobbering it. We are conservative: if the
    381   //   QueryInst is not a simple (non-atomic) memory access, we automatically
    382   //   return getClobber.
    383   // If it is simple, we know based on the results of
    384   // "Compiler testing via a theory of sound optimisations in the C11/C++11
    385   //   memory model" in PLDI 2013, that a non-atomic location can only be
    386   //   clobbered between a pair of a release and an acquire action, with no
    387   //   access to the location in between.
    388   // Here is an example for giving the general intuition behind this rule.
    389   // In the following code:
    390   //   store x 0;
    391   //   release action; [1]
    392   //   acquire action; [4]
    393   //   %val = load x;
    394   // It is unsafe to replace %val by 0 because another thread may be running:
    395   //   acquire action; [2]
    396   //   store x 42;
    397   //   release action; [3]
    398   // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
    399   // being 42. A key property of this program however is that if either
    400   // 1 or 4 were missing, there would be a race between the store of 42
    401   // either the store of 0 or the load (making the whole program racy).
    402   // The paper mentioned above shows that the same property is respected
    403   // by every program that can detect any optimization of that kind: either
    404   // it is racy (undefined) or there is a release followed by an acquire
    405   // between the pair of accesses under consideration.
    406 
    407   // If the load is invariant, we "know" that it doesn't alias *any* write. We
    408   // do want to respect mustalias results since defs are useful for value
    409   // forwarding, but any mayalias write can be assumed to be noalias.
    410   // Arguably, this logic should be pushed inside AliasAnalysis itself.
    411   if (isLoad && QueryInst) {
    412     LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
    413     if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load))
    414       isInvariantLoad = true;
    415   }
    416 
    417   // Return "true" if and only if the instruction I is either a non-simple
    418   // load or a non-simple store.
    419   auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
    420     if (auto *LI = dyn_cast<LoadInst>(I))
    421       return !LI->isSimple();
    422     if (auto *SI = dyn_cast<StoreInst>(I))
    423       return !SI->isSimple();
    424     return false;
    425   };
    426 
    427   // Return "true" if I is not a load and not a store, but it does access
    428   // memory.
    429   auto isOtherMemAccess = [](Instruction *I) -> bool {
    430     return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
    431   };
    432 
    433   // Walk backwards through the basic block, looking for dependencies.
    434   while (ScanIt != BB->begin()) {
    435     Instruction *Inst = &*--ScanIt;
    436 
    437     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
    438       // Debug intrinsics don't (and can't) cause dependencies.
    439       if (isa<DbgInfoIntrinsic>(II))
    440         continue;
    441 
    442     // Limit the amount of scanning we do so we don't end up with quadratic
    443     // running time on extreme testcases.
    444     --*Limit;
    445     if (!*Limit)
    446       return MemDepResult::getUnknown();
    447 
    448     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
    449       // If we reach a lifetime begin or end marker, then the query ends here
    450       // because the value is undefined.
    451       Intrinsic::ID ID = II->getIntrinsicID();
    452       switch (ID) {
    453       case Intrinsic::lifetime_start: {
    454         // FIXME: This only considers queries directly on the invariant-tagged
    455         // pointer, not on query pointers that are indexed off of them.  It'd
    456         // be nice to handle that at some point (the right approach is to use
    457         // GetPointerBaseWithConstantOffset).
    458         MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1));
    459         if (BatchAA.isMustAlias(ArgLoc, MemLoc))
    460           return MemDepResult::getDef(II);
    461         continue;
    462       }
    463       case Intrinsic::masked_load:
    464       case Intrinsic::masked_store: {
    465         MemoryLocation Loc;
    466         /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
    467         AliasResult R = BatchAA.alias(Loc, MemLoc);
    468         if (R == AliasResult::NoAlias)
    469           continue;
    470         if (R == AliasResult::MustAlias)
    471           return MemDepResult::getDef(II);
    472         if (ID == Intrinsic::masked_load)
    473           continue;
    474         return MemDepResult::getClobber(II);
    475       }
    476       }
    477     }
    478 
    479     // Values depend on loads if the pointers are must aliased.  This means
    480     // that a load depends on another must aliased load from the same value.
    481     // One exception is atomic loads: a value can depend on an atomic load that
    482     // it does not alias with when this atomic load indicates that another
    483     // thread may be accessing the location.
    484     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    485       // While volatile access cannot be eliminated, they do not have to clobber
    486       // non-aliasing locations, as normal accesses, for example, can be safely
    487       // reordered with volatile accesses.
    488       if (LI->isVolatile()) {
    489         if (!QueryInst)
    490           // Original QueryInst *may* be volatile
    491           return MemDepResult::getClobber(LI);
    492         if (QueryInst->isVolatile())
    493           // Ordering required if QueryInst is itself volatile
    494           return MemDepResult::getClobber(LI);
    495         // Otherwise, volatile doesn't imply any special ordering
    496       }
    497 
    498       // Atomic loads have complications involved.
    499       // A Monotonic (or higher) load is OK if the query inst is itself not
    500       // atomic.
    501       // FIXME: This is overly conservative.
    502       if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
    503         if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
    504             isOtherMemAccess(QueryInst))
    505           return MemDepResult::getClobber(LI);
    506         if (LI->getOrdering() != AtomicOrdering::Monotonic)
    507           return MemDepResult::getClobber(LI);
    508       }
    509 
    510       MemoryLocation LoadLoc = MemoryLocation::get(LI);
    511 
    512       // If we found a pointer, check if it could be the same as our pointer.
    513       AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
    514 
    515       if (isLoad) {
    516         if (R == AliasResult::NoAlias)
    517           continue;
    518 
    519         // Must aliased loads are defs of each other.
    520         if (R == AliasResult::MustAlias)
    521           return MemDepResult::getDef(Inst);
    522 
    523         // If we have a partial alias, then return this as a clobber for the
    524         // client to handle.
    525         if (R == AliasResult::PartialAlias && R.hasOffset()) {
    526           ClobberOffsets[LI] = R.getOffset();
    527           return MemDepResult::getClobber(Inst);
    528         }
    529 
    530         // Random may-alias loads don't depend on each other without a
    531         // dependence.
    532         continue;
    533       }
    534 
    535       // Stores don't depend on other no-aliased accesses.
    536       if (R == AliasResult::NoAlias)
    537         continue;
    538 
    539       // Stores don't alias loads from read-only memory.
    540       if (BatchAA.pointsToConstantMemory(LoadLoc))
    541         continue;
    542 
    543       // Stores depend on may/must aliased loads.
    544       return MemDepResult::getDef(Inst);
    545     }
    546 
    547     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    548       // Atomic stores have complications involved.
    549       // A Monotonic store is OK if the query inst is itself not atomic.
    550       // FIXME: This is overly conservative.
    551       if (!SI->isUnordered() && SI->isAtomic()) {
    552         if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
    553             isOtherMemAccess(QueryInst))
    554           return MemDepResult::getClobber(SI);
    555         if (SI->getOrdering() != AtomicOrdering::Monotonic)
    556           return MemDepResult::getClobber(SI);
    557       }
    558 
    559       // FIXME: this is overly conservative.
    560       // While volatile access cannot be eliminated, they do not have to clobber
    561       // non-aliasing locations, as normal accesses can for example be reordered
    562       // with volatile accesses.
    563       if (SI->isVolatile())
    564         if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
    565             isOtherMemAccess(QueryInst))
    566           return MemDepResult::getClobber(SI);
    567 
    568       // If alias analysis can tell that this store is guaranteed to not modify
    569       // the query pointer, ignore it.  Use getModRefInfo to handle cases where
    570       // the query pointer points to constant memory etc.
    571       if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc)))
    572         continue;
    573 
    574       // Ok, this store might clobber the query pointer.  Check to see if it is
    575       // a must alias: in this case, we want to return this as a def.
    576       // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
    577       MemoryLocation StoreLoc = MemoryLocation::get(SI);
    578 
    579       // If we found a pointer, check if it could be the same as our pointer.
    580       AliasResult R = BatchAA.alias(StoreLoc, MemLoc);
    581 
    582       if (R == AliasResult::NoAlias)
    583         continue;
    584       if (R == AliasResult::MustAlias)
    585         return MemDepResult::getDef(Inst);
    586       if (isInvariantLoad)
    587         continue;
    588       return MemDepResult::getClobber(Inst);
    589     }
    590 
    591     // If this is an allocation, and if we know that the accessed pointer is to
    592     // the allocation, return Def.  This means that there is no dependence and
    593     // the access can be optimized based on that.  For example, a load could
    594     // turn into undef.  Note that we can bypass the allocation itself when
    595     // looking for a clobber in many cases; that's an alias property and is
    596     // handled by BasicAA.
    597     if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) {
    598       const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr);
    599       if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr))
    600         return MemDepResult::getDef(Inst);
    601     }
    602 
    603     if (isInvariantLoad)
    604       continue;
    605 
    606     // A release fence requires that all stores complete before it, but does
    607     // not prevent the reordering of following loads or stores 'before' the
    608     // fence.  As a result, we look past it when finding a dependency for
    609     // loads.  DSE uses this to find preceding stores to delete and thus we
    610     // can't bypass the fence if the query instruction is a store.
    611     if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
    612       if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
    613         continue;
    614 
    615     // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
    616     ModRefInfo MR = BatchAA.getModRefInfo(Inst, MemLoc);
    617     // If necessary, perform additional analysis.
    618     if (isModAndRefSet(MR))
    619       MR = BatchAA.callCapturesBefore(Inst, MemLoc, &DT);
    620     switch (clearMust(MR)) {
    621     case ModRefInfo::NoModRef:
    622       // If the call has no effect on the queried pointer, just ignore it.
    623       continue;
    624     case ModRefInfo::Mod:
    625       return MemDepResult::getClobber(Inst);
    626     case ModRefInfo::Ref:
    627       // If the call is known to never store to the pointer, and if this is a
    628       // load query, we can safely ignore it (scan past it).
    629       if (isLoad)
    630         continue;
    631       LLVM_FALLTHROUGH;
    632     default:
    633       // Otherwise, there is a potential dependence.  Return a clobber.
    634       return MemDepResult::getClobber(Inst);
    635     }
    636   }
    637 
    638   // No dependence found.  If this is the entry block of the function, it is
    639   // unknown, otherwise it is non-local.
    640   if (BB != &BB->getParent()->getEntryBlock())
    641     return MemDepResult::getNonLocal();
    642   return MemDepResult::getNonFuncLocal();
    643 }
    644 
    645 MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
    646   ClobberOffsets.clear();
    647   Instruction *ScanPos = QueryInst;
    648 
    649   // Check for a cached result
    650   MemDepResult &LocalCache = LocalDeps[QueryInst];
    651 
    652   // If the cached entry is non-dirty, just return it.  Note that this depends
    653   // on MemDepResult's default constructing to 'dirty'.
    654   if (!LocalCache.isDirty())
    655     return LocalCache;
    656 
    657   // Otherwise, if we have a dirty entry, we know we can start the scan at that
    658   // instruction, which may save us some work.
    659   if (Instruction *Inst = LocalCache.getInst()) {
    660     ScanPos = Inst;
    661 
    662     RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
    663   }
    664 
    665   BasicBlock *QueryParent = QueryInst->getParent();
    666 
    667   // Do the scan.
    668   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
    669     // No dependence found. If this is the entry block of the function, it is
    670     // unknown, otherwise it is non-local.
    671     if (QueryParent != &QueryParent->getParent()->getEntryBlock())
    672       LocalCache = MemDepResult::getNonLocal();
    673     else
    674       LocalCache = MemDepResult::getNonFuncLocal();
    675   } else {
    676     MemoryLocation MemLoc;
    677     ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
    678     if (MemLoc.Ptr) {
    679       // If we can do a pointer scan, make it happen.
    680       bool isLoad = !isModSet(MR);
    681       if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
    682         isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
    683 
    684       LocalCache =
    685           getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
    686                                    QueryParent, QueryInst, nullptr);
    687     } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
    688       bool isReadOnly = AA.onlyReadsMemory(QueryCall);
    689       LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
    690                                          ScanPos->getIterator(), QueryParent);
    691     } else
    692       // Non-memory instruction.
    693       LocalCache = MemDepResult::getUnknown();
    694   }
    695 
    696   // Remember the result!
    697   if (Instruction *I = LocalCache.getInst())
    698     ReverseLocalDeps[I].insert(QueryInst);
    699 
    700   return LocalCache;
    701 }
    702 
    703 #ifndef NDEBUG
    704 /// This method is used when -debug is specified to verify that cache arrays
    705 /// are properly kept sorted.
    706 static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache,
    707                          int Count = -1) {
    708   if (Count == -1)
    709     Count = Cache.size();
    710   assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
    711          "Cache isn't sorted!");
    712 }
    713 #endif
    714 
    715 const MemoryDependenceResults::NonLocalDepInfo &
    716 MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) {
    717   assert(getDependency(QueryCall).isNonLocal() &&
    718          "getNonLocalCallDependency should only be used on calls with "
    719          "non-local deps!");
    720   PerInstNLInfo &CacheP = NonLocalDeps[QueryCall];
    721   NonLocalDepInfo &Cache = CacheP.first;
    722 
    723   // This is the set of blocks that need to be recomputed.  In the cached case,
    724   // this can happen due to instructions being deleted etc. In the uncached
    725   // case, this starts out as the set of predecessors we care about.
    726   SmallVector<BasicBlock *, 32> DirtyBlocks;
    727 
    728   if (!Cache.empty()) {
    729     // Okay, we have a cache entry.  If we know it is not dirty, just return it
    730     // with no computation.
    731     if (!CacheP.second) {
    732       ++NumCacheNonLocal;
    733       return Cache;
    734     }
    735 
    736     // If we already have a partially computed set of results, scan them to
    737     // determine what is dirty, seeding our initial DirtyBlocks worklist.
    738     for (auto &Entry : Cache)
    739       if (Entry.getResult().isDirty())
    740         DirtyBlocks.push_back(Entry.getBB());
    741 
    742     // Sort the cache so that we can do fast binary search lookups below.
    743     llvm::sort(Cache);
    744 
    745     ++NumCacheDirtyNonLocal;
    746     // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
    747     //     << Cache.size() << " cached: " << *QueryInst;
    748   } else {
    749     // Seed DirtyBlocks with each of the preds of QueryInst's block.
    750     BasicBlock *QueryBB = QueryCall->getParent();
    751     append_range(DirtyBlocks, PredCache.get(QueryBB));
    752     ++NumUncacheNonLocal;
    753   }
    754 
    755   // isReadonlyCall - If this is a read-only call, we can be more aggressive.
    756   bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
    757 
    758   SmallPtrSet<BasicBlock *, 32> Visited;
    759 
    760   unsigned NumSortedEntries = Cache.size();
    761   LLVM_DEBUG(AssertSorted(Cache));
    762 
    763   // Iterate while we still have blocks to update.
    764   while (!DirtyBlocks.empty()) {
    765     BasicBlock *DirtyBB = DirtyBlocks.pop_back_val();
    766 
    767     // Already processed this block?
    768     if (!Visited.insert(DirtyBB).second)
    769       continue;
    770 
    771     // Do a binary search to see if we already have an entry for this block in
    772     // the cache set.  If so, find it.
    773     LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
    774     NonLocalDepInfo::iterator Entry =
    775         std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
    776                          NonLocalDepEntry(DirtyBB));
    777     if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
    778       --Entry;
    779 
    780     NonLocalDepEntry *ExistingResult = nullptr;
    781     if (Entry != Cache.begin() + NumSortedEntries &&
    782         Entry->getBB() == DirtyBB) {
    783       // If we already have an entry, and if it isn't already dirty, the block
    784       // is done.
    785       if (!Entry->getResult().isDirty())
    786         continue;
    787 
    788       // Otherwise, remember this slot so we can update the value.
    789       ExistingResult = &*Entry;
    790     }
    791 
    792     // If the dirty entry has a pointer, start scanning from it so we don't have
    793     // to rescan the entire block.
    794     BasicBlock::iterator ScanPos = DirtyBB->end();
    795     if (ExistingResult) {
    796       if (Instruction *Inst = ExistingResult->getResult().getInst()) {
    797         ScanPos = Inst->getIterator();
    798         // We're removing QueryInst's use of Inst.
    799         RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
    800                                             QueryCall);
    801       }
    802     }
    803 
    804     // Find out if this block has a local dependency for QueryInst.
    805     MemDepResult Dep;
    806 
    807     if (ScanPos != DirtyBB->begin()) {
    808       Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
    809     } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
    810       // No dependence found.  If this is the entry block of the function, it is
    811       // a clobber, otherwise it is unknown.
    812       Dep = MemDepResult::getNonLocal();
    813     } else {
    814       Dep = MemDepResult::getNonFuncLocal();
    815     }
    816 
    817     // If we had a dirty entry for the block, update it.  Otherwise, just add
    818     // a new entry.
    819     if (ExistingResult)
    820       ExistingResult->setResult(Dep);
    821     else
    822       Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
    823 
    824     // If the block has a dependency (i.e. it isn't completely transparent to
    825     // the value), remember the association!
    826     if (!Dep.isNonLocal()) {
    827       // Keep the ReverseNonLocalDeps map up to date so we can efficiently
    828       // update this when we remove instructions.
    829       if (Instruction *Inst = Dep.getInst())
    830         ReverseNonLocalDeps[Inst].insert(QueryCall);
    831     } else {
    832 
    833       // If the block *is* completely transparent to the load, we need to check
    834       // the predecessors of this block.  Add them to our worklist.
    835       append_range(DirtyBlocks, PredCache.get(DirtyBB));
    836     }
    837   }
    838 
    839   return Cache;
    840 }
    841 
    842 void MemoryDependenceResults::getNonLocalPointerDependency(
    843     Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
    844   const MemoryLocation Loc = MemoryLocation::get(QueryInst);
    845   bool isLoad = isa<LoadInst>(QueryInst);
    846   BasicBlock *FromBB = QueryInst->getParent();
    847   assert(FromBB);
    848 
    849   assert(Loc.Ptr->getType()->isPointerTy() &&
    850          "Can't get pointer deps of a non-pointer!");
    851   Result.clear();
    852   {
    853     // Check if there is cached Def with invariant.group.
    854     auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
    855     if (NonLocalDefIt != NonLocalDefsCache.end()) {
    856       Result.push_back(NonLocalDefIt->second);
    857       ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
    858           .erase(QueryInst);
    859       NonLocalDefsCache.erase(NonLocalDefIt);
    860       return;
    861     }
    862   }
    863   // This routine does not expect to deal with volatile instructions.
    864   // Doing so would require piping through the QueryInst all the way through.
    865   // TODO: volatiles can't be elided, but they can be reordered with other
    866   // non-volatile accesses.
    867 
    868   // We currently give up on any instruction which is ordered, but we do handle
    869   // atomic instructions which are unordered.
    870   // TODO: Handle ordered instructions
    871   auto isOrdered = [](Instruction *Inst) {
    872     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
    873       return !LI->isUnordered();
    874     } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    875       return !SI->isUnordered();
    876     }
    877     return false;
    878   };
    879   if (QueryInst->isVolatile() || isOrdered(QueryInst)) {
    880     Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
    881                                        const_cast<Value *>(Loc.Ptr)));
    882     return;
    883   }
    884   const DataLayout &DL = FromBB->getModule()->getDataLayout();
    885   PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
    886 
    887   // This is the set of blocks we've inspected, and the pointer we consider in
    888   // each block.  Because of critical edges, we currently bail out if querying
    889   // a block with multiple different pointers.  This can happen during PHI
    890   // translation.
    891   DenseMap<BasicBlock *, Value *> Visited;
    892   if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
    893                                    Result, Visited, true))
    894     return;
    895   Result.clear();
    896   Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
    897                                      const_cast<Value *>(Loc.Ptr)));
    898 }
    899 
    900 /// Compute the memdep value for BB with Pointer/PointeeSize using either
    901 /// cached information in Cache or by doing a lookup (which may use dirty cache
    902 /// info if available).
    903 ///
    904 /// If we do a lookup, add the result to the cache.
    905 MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
    906     Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
    907     BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries,
    908     BatchAAResults &BatchAA) {
    909 
    910   bool isInvariantLoad = false;
    911 
    912   if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
    913     isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
    914 
    915   // Do a binary search to see if we already have an entry for this block in
    916   // the cache set.  If so, find it.
    917   NonLocalDepInfo::iterator Entry = std::upper_bound(
    918       Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
    919   if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
    920     --Entry;
    921 
    922   NonLocalDepEntry *ExistingResult = nullptr;
    923   if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
    924     ExistingResult = &*Entry;
    925 
    926   // Use cached result for invariant load only if there is no dependency for non
    927   // invariant load. In this case invariant load can not have any dependency as
    928   // well.
    929   if (ExistingResult && isInvariantLoad &&
    930       !ExistingResult->getResult().isNonFuncLocal())
    931     ExistingResult = nullptr;
    932 
    933   // If we have a cached entry, and it is non-dirty, use it as the value for
    934   // this dependency.
    935   if (ExistingResult && !ExistingResult->getResult().isDirty()) {
    936     ++NumCacheNonLocalPtr;
    937     return ExistingResult->getResult();
    938   }
    939 
    940   // Otherwise, we have to scan for the value.  If we have a dirty cache
    941   // entry, start scanning from its position, otherwise we scan from the end
    942   // of the block.
    943   BasicBlock::iterator ScanPos = BB->end();
    944   if (ExistingResult && ExistingResult->getResult().getInst()) {
    945     assert(ExistingResult->getResult().getInst()->getParent() == BB &&
    946            "Instruction invalidated?");
    947     ++NumCacheDirtyNonLocalPtr;
    948     ScanPos = ExistingResult->getResult().getInst()->getIterator();
    949 
    950     // Eliminating the dirty entry from 'Cache', so update the reverse info.
    951     ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
    952     RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
    953   } else {
    954     ++NumUncacheNonLocalPtr;
    955   }
    956 
    957   // Scan the block for the dependency.
    958   MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
    959                                               QueryInst, nullptr, BatchAA);
    960 
    961   // Don't cache results for invariant load.
    962   if (isInvariantLoad)
    963     return Dep;
    964 
    965   // If we had a dirty entry for the block, update it.  Otherwise, just add
    966   // a new entry.
    967   if (ExistingResult)
    968     ExistingResult->setResult(Dep);
    969   else
    970     Cache->push_back(NonLocalDepEntry(BB, Dep));
    971 
    972   // If the block has a dependency (i.e. it isn't completely transparent to
    973   // the value), remember the reverse association because we just added it
    974   // to Cache!
    975   if (!Dep.isDef() && !Dep.isClobber())
    976     return Dep;
    977 
    978   // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
    979   // update MemDep when we remove instructions.
    980   Instruction *Inst = Dep.getInst();
    981   assert(Inst && "Didn't depend on anything?");
    982   ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
    983   ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
    984   return Dep;
    985 }
    986 
    987 /// Sort the NonLocalDepInfo cache, given a certain number of elements in the
    988 /// array that are already properly ordered.
    989 ///
    990 /// This is optimized for the case when only a few entries are added.
    991 static void
    992 SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
    993                          unsigned NumSortedEntries) {
    994   switch (Cache.size() - NumSortedEntries) {
    995   case 0:
    996     // done, no new entries.
    997     break;
    998   case 2: {
    999     // Two new entries, insert the last one into place.
   1000     NonLocalDepEntry Val = Cache.back();
   1001     Cache.pop_back();
   1002     MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
   1003         std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
   1004     Cache.insert(Entry, Val);
   1005     LLVM_FALLTHROUGH;
   1006   }
   1007   case 1:
   1008     // One new entry, Just insert the new value at the appropriate position.
   1009     if (Cache.size() != 1) {
   1010       NonLocalDepEntry Val = Cache.back();
   1011       Cache.pop_back();
   1012       MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
   1013           llvm::upper_bound(Cache, Val);
   1014       Cache.insert(Entry, Val);
   1015     }
   1016     break;
   1017   default:
   1018     // Added many values, do a full scale sort.
   1019     llvm::sort(Cache);
   1020     break;
   1021   }
   1022 }
   1023 
   1024 /// Perform a dependency query based on pointer/pointeesize starting at the end
   1025 /// of StartBB.
   1026 ///
   1027 /// Add any clobber/def results to the results vector and keep track of which
   1028 /// blocks are visited in 'Visited'.
   1029 ///
   1030 /// This has special behavior for the first block queries (when SkipFirstBlock
   1031 /// is true).  In this special case, it ignores the contents of the specified
   1032 /// block and starts returning dependence info for its predecessors.
   1033 ///
   1034 /// This function returns true on success, or false to indicate that it could
   1035 /// not compute dependence information for some reason.  This should be treated
   1036 /// as a clobber dependence on the first instruction in the predecessor block.
   1037 bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
   1038     Instruction *QueryInst, const PHITransAddr &Pointer,
   1039     const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
   1040     SmallVectorImpl<NonLocalDepResult> &Result,
   1041     DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock,
   1042     bool IsIncomplete) {
   1043   // Look up the cached info for Pointer.
   1044   ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
   1045 
   1046   // Set up a temporary NLPI value. If the map doesn't yet have an entry for
   1047   // CacheKey, this value will be inserted as the associated value. Otherwise,
   1048   // it'll be ignored, and we'll have to check to see if the cached size and
   1049   // aa tags are consistent with the current query.
   1050   NonLocalPointerInfo InitialNLPI;
   1051   InitialNLPI.Size = Loc.Size;
   1052   InitialNLPI.AATags = Loc.AATags;
   1053 
   1054   bool isInvariantLoad = false;
   1055   if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
   1056     isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
   1057 
   1058   // Get the NLPI for CacheKey, inserting one into the map if it doesn't
   1059   // already have one.
   1060   std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
   1061       NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
   1062   NonLocalPointerInfo *CacheInfo = &Pair.first->second;
   1063 
   1064   // If we already have a cache entry for this CacheKey, we may need to do some
   1065   // work to reconcile the cache entry and the current query.
   1066   // Invariant loads don't participate in caching. Thus no need to reconcile.
   1067   if (!isInvariantLoad && !Pair.second) {
   1068     if (CacheInfo->Size != Loc.Size) {
   1069       bool ThrowOutEverything;
   1070       if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
   1071         // FIXME: We may be able to do better in the face of results with mixed
   1072         // precision. We don't appear to get them in practice, though, so just
   1073         // be conservative.
   1074         ThrowOutEverything =
   1075             CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
   1076             CacheInfo->Size.getValue() < Loc.Size.getValue();
   1077       } else {
   1078         // For our purposes, unknown size > all others.
   1079         ThrowOutEverything = !Loc.Size.hasValue();
   1080       }
   1081 
   1082       if (ThrowOutEverything) {
   1083         // The query's Size is greater than the cached one. Throw out the
   1084         // cached data and proceed with the query at the greater size.
   1085         CacheInfo->Pair = BBSkipFirstBlockPair();
   1086         CacheInfo->Size = Loc.Size;
   1087         for (auto &Entry : CacheInfo->NonLocalDeps)
   1088           if (Instruction *Inst = Entry.getResult().getInst())
   1089             RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
   1090         CacheInfo->NonLocalDeps.clear();
   1091         // The cache is cleared (in the above line) so we will have lost
   1092         // information about blocks we have already visited. We therefore must
   1093         // assume that the cache information is incomplete.
   1094         IsIncomplete = true;
   1095       } else {
   1096         // This query's Size is less than the cached one. Conservatively restart
   1097         // the query using the greater size.
   1098         return getNonLocalPointerDepFromBB(
   1099             QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
   1100             StartBB, Result, Visited, SkipFirstBlock, IsIncomplete);
   1101       }
   1102     }
   1103 
   1104     // If the query's AATags are inconsistent with the cached one,
   1105     // conservatively throw out the cached data and restart the query with
   1106     // no tag if needed.
   1107     if (CacheInfo->AATags != Loc.AATags) {
   1108       if (CacheInfo->AATags) {
   1109         CacheInfo->Pair = BBSkipFirstBlockPair();
   1110         CacheInfo->AATags = AAMDNodes();
   1111         for (auto &Entry : CacheInfo->NonLocalDeps)
   1112           if (Instruction *Inst = Entry.getResult().getInst())
   1113             RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
   1114         CacheInfo->NonLocalDeps.clear();
   1115         // The cache is cleared (in the above line) so we will have lost
   1116         // information about blocks we have already visited. We therefore must
   1117         // assume that the cache information is incomplete.
   1118         IsIncomplete = true;
   1119       }
   1120       if (Loc.AATags)
   1121         return getNonLocalPointerDepFromBB(
   1122             QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
   1123             Visited, SkipFirstBlock, IsIncomplete);
   1124     }
   1125   }
   1126 
   1127   NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
   1128 
   1129   // If we have valid cached information for exactly the block we are
   1130   // investigating, just return it with no recomputation.
   1131   // Don't use cached information for invariant loads since it is valid for
   1132   // non-invariant loads only.
   1133   if (!IsIncomplete && !isInvariantLoad &&
   1134       CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
   1135     // We have a fully cached result for this query then we can just return the
   1136     // cached results and populate the visited set.  However, we have to verify
   1137     // that we don't already have conflicting results for these blocks.  Check
   1138     // to ensure that if a block in the results set is in the visited set that
   1139     // it was for the same pointer query.
   1140     if (!Visited.empty()) {
   1141       for (auto &Entry : *Cache) {
   1142         DenseMap<BasicBlock *, Value *>::iterator VI =
   1143             Visited.find(Entry.getBB());
   1144         if (VI == Visited.end() || VI->second == Pointer.getAddr())
   1145           continue;
   1146 
   1147         // We have a pointer mismatch in a block.  Just return false, saying
   1148         // that something was clobbered in this result.  We could also do a
   1149         // non-fully cached query, but there is little point in doing this.
   1150         return false;
   1151       }
   1152     }
   1153 
   1154     Value *Addr = Pointer.getAddr();
   1155     for (auto &Entry : *Cache) {
   1156       Visited.insert(std::make_pair(Entry.getBB(), Addr));
   1157       if (Entry.getResult().isNonLocal()) {
   1158         continue;
   1159       }
   1160 
   1161       if (DT.isReachableFromEntry(Entry.getBB())) {
   1162         Result.push_back(
   1163             NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
   1164       }
   1165     }
   1166     ++NumCacheCompleteNonLocalPtr;
   1167     return true;
   1168   }
   1169 
   1170   // Otherwise, either this is a new block, a block with an invalid cache
   1171   // pointer or one that we're about to invalidate by putting more info into
   1172   // it than its valid cache info.  If empty and not explicitly indicated as
   1173   // incomplete, the result will be valid cache info, otherwise it isn't.
   1174   //
   1175   // Invariant loads don't affect cache in any way thus no need to update
   1176   // CacheInfo as well.
   1177   if (!isInvariantLoad) {
   1178     if (!IsIncomplete && Cache->empty())
   1179       CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
   1180     else
   1181       CacheInfo->Pair = BBSkipFirstBlockPair();
   1182   }
   1183 
   1184   SmallVector<BasicBlock *, 32> Worklist;
   1185   Worklist.push_back(StartBB);
   1186 
   1187   // PredList used inside loop.
   1188   SmallVector<std::pair<BasicBlock *, PHITransAddr>, 16> PredList;
   1189 
   1190   // Keep track of the entries that we know are sorted.  Previously cached
   1191   // entries will all be sorted.  The entries we add we only sort on demand (we
   1192   // don't insert every element into its sorted position).  We know that we
   1193   // won't get any reuse from currently inserted values, because we don't
   1194   // revisit blocks after we insert info for them.
   1195   unsigned NumSortedEntries = Cache->size();
   1196   unsigned WorklistEntries = BlockNumberLimit;
   1197   bool GotWorklistLimit = false;
   1198   LLVM_DEBUG(AssertSorted(*Cache));
   1199 
   1200   BatchAAResults BatchAA(AA);
   1201   while (!Worklist.empty()) {
   1202     BasicBlock *BB = Worklist.pop_back_val();
   1203 
   1204     // If we do process a large number of blocks it becomes very expensive and
   1205     // likely it isn't worth worrying about
   1206     if (Result.size() > NumResultsLimit) {
   1207       Worklist.clear();
   1208       // Sort it now (if needed) so that recursive invocations of
   1209       // getNonLocalPointerDepFromBB and other routines that could reuse the
   1210       // cache value will only see properly sorted cache arrays.
   1211       if (Cache && NumSortedEntries != Cache->size()) {
   1212         SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   1213       }
   1214       // Since we bail out, the "Cache" set won't contain all of the
   1215       // results for the query.  This is ok (we can still use it to accelerate
   1216       // specific block queries) but we can't do the fastpath "return all
   1217       // results from the set".  Clear out the indicator for this.
   1218       CacheInfo->Pair = BBSkipFirstBlockPair();
   1219       return false;
   1220     }
   1221 
   1222     // Skip the first block if we have it.
   1223     if (!SkipFirstBlock) {
   1224       // Analyze the dependency of *Pointer in FromBB.  See if we already have
   1225       // been here.
   1226       assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
   1227 
   1228       // Get the dependency info for Pointer in BB.  If we have cached
   1229       // information, we will use it, otherwise we compute it.
   1230       LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
   1231       MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB,
   1232                                                  Cache, NumSortedEntries,
   1233                                                  BatchAA);
   1234 
   1235       // If we got a Def or Clobber, add this to the list of results.
   1236       if (!Dep.isNonLocal()) {
   1237         if (DT.isReachableFromEntry(BB)) {
   1238           Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
   1239           continue;
   1240         }
   1241       }
   1242     }
   1243 
   1244     // If 'Pointer' is an instruction defined in this block, then we need to do
   1245     // phi translation to change it into a value live in the predecessor block.
   1246     // If not, we just add the predecessors to the worklist and scan them with
   1247     // the same Pointer.
   1248     if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
   1249       SkipFirstBlock = false;
   1250       SmallVector<BasicBlock *, 16> NewBlocks;
   1251       for (BasicBlock *Pred : PredCache.get(BB)) {
   1252         // Verify that we haven't looked at this block yet.
   1253         std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
   1254             Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
   1255         if (InsertRes.second) {
   1256           // First time we've looked at *PI.
   1257           NewBlocks.push_back(Pred);
   1258           continue;
   1259         }
   1260 
   1261         // If we have seen this block before, but it was with a different
   1262         // pointer then we have a phi translation failure and we have to treat
   1263         // this as a clobber.
   1264         if (InsertRes.first->second != Pointer.getAddr()) {
   1265           // Make sure to clean up the Visited map before continuing on to
   1266           // PredTranslationFailure.
   1267           for (unsigned i = 0; i < NewBlocks.size(); i++)
   1268             Visited.erase(NewBlocks[i]);
   1269           goto PredTranslationFailure;
   1270         }
   1271       }
   1272       if (NewBlocks.size() > WorklistEntries) {
   1273         // Make sure to clean up the Visited map before continuing on to
   1274         // PredTranslationFailure.
   1275         for (unsigned i = 0; i < NewBlocks.size(); i++)
   1276           Visited.erase(NewBlocks[i]);
   1277         GotWorklistLimit = true;
   1278         goto PredTranslationFailure;
   1279       }
   1280       WorklistEntries -= NewBlocks.size();
   1281       Worklist.append(NewBlocks.begin(), NewBlocks.end());
   1282       continue;
   1283     }
   1284 
   1285     // We do need to do phi translation, if we know ahead of time we can't phi
   1286     // translate this value, don't even try.
   1287     if (!Pointer.IsPotentiallyPHITranslatable())
   1288       goto PredTranslationFailure;
   1289 
   1290     // We may have added values to the cache list before this PHI translation.
   1291     // If so, we haven't done anything to ensure that the cache remains sorted.
   1292     // Sort it now (if needed) so that recursive invocations of
   1293     // getNonLocalPointerDepFromBB and other routines that could reuse the cache
   1294     // value will only see properly sorted cache arrays.
   1295     if (Cache && NumSortedEntries != Cache->size()) {
   1296       SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   1297       NumSortedEntries = Cache->size();
   1298     }
   1299     Cache = nullptr;
   1300 
   1301     PredList.clear();
   1302     for (BasicBlock *Pred : PredCache.get(BB)) {
   1303       PredList.push_back(std::make_pair(Pred, Pointer));
   1304 
   1305       // Get the PHI translated pointer in this predecessor.  This can fail if
   1306       // not translatable, in which case the getAddr() returns null.
   1307       PHITransAddr &PredPointer = PredList.back().second;
   1308       PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
   1309       Value *PredPtrVal = PredPointer.getAddr();
   1310 
   1311       // Check to see if we have already visited this pred block with another
   1312       // pointer.  If so, we can't do this lookup.  This failure can occur
   1313       // with PHI translation when a critical edge exists and the PHI node in
   1314       // the successor translates to a pointer value different than the
   1315       // pointer the block was first analyzed with.
   1316       std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
   1317           Visited.insert(std::make_pair(Pred, PredPtrVal));
   1318 
   1319       if (!InsertRes.second) {
   1320         // We found the pred; take it off the list of preds to visit.
   1321         PredList.pop_back();
   1322 
   1323         // If the predecessor was visited with PredPtr, then we already did
   1324         // the analysis and can ignore it.
   1325         if (InsertRes.first->second == PredPtrVal)
   1326           continue;
   1327 
   1328         // Otherwise, the block was previously analyzed with a different
   1329         // pointer.  We can't represent the result of this case, so we just
   1330         // treat this as a phi translation failure.
   1331 
   1332         // Make sure to clean up the Visited map before continuing on to
   1333         // PredTranslationFailure.
   1334         for (unsigned i = 0, n = PredList.size(); i < n; ++i)
   1335           Visited.erase(PredList[i].first);
   1336 
   1337         goto PredTranslationFailure;
   1338       }
   1339     }
   1340 
   1341     // Actually process results here; this need to be a separate loop to avoid
   1342     // calling getNonLocalPointerDepFromBB for blocks we don't want to return
   1343     // any results for.  (getNonLocalPointerDepFromBB will modify our
   1344     // datastructures in ways the code after the PredTranslationFailure label
   1345     // doesn't expect.)
   1346     for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
   1347       BasicBlock *Pred = PredList[i].first;
   1348       PHITransAddr &PredPointer = PredList[i].second;
   1349       Value *PredPtrVal = PredPointer.getAddr();
   1350 
   1351       bool CanTranslate = true;
   1352       // If PHI translation was unable to find an available pointer in this
   1353       // predecessor, then we have to assume that the pointer is clobbered in
   1354       // that predecessor.  We can still do PRE of the load, which would insert
   1355       // a computation of the pointer in this predecessor.
   1356       if (!PredPtrVal)
   1357         CanTranslate = false;
   1358 
   1359       // FIXME: it is entirely possible that PHI translating will end up with
   1360       // the same value.  Consider PHI translating something like:
   1361       // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
   1362       // to recurse here, pedantically speaking.
   1363 
   1364       // If getNonLocalPointerDepFromBB fails here, that means the cached
   1365       // result conflicted with the Visited list; we have to conservatively
   1366       // assume it is unknown, but this also does not block PRE of the load.
   1367       if (!CanTranslate ||
   1368           !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
   1369                                       Loc.getWithNewPtr(PredPtrVal), isLoad,
   1370                                       Pred, Result, Visited)) {
   1371         // Add the entry to the Result list.
   1372         NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
   1373         Result.push_back(Entry);
   1374 
   1375         // Since we had a phi translation failure, the cache for CacheKey won't
   1376         // include all of the entries that we need to immediately satisfy future
   1377         // queries.  Mark this in NonLocalPointerDeps by setting the
   1378         // BBSkipFirstBlockPair pointer to null.  This requires reuse of the
   1379         // cached value to do more work but not miss the phi trans failure.
   1380         NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
   1381         NLPI.Pair = BBSkipFirstBlockPair();
   1382         continue;
   1383       }
   1384     }
   1385 
   1386     // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
   1387     CacheInfo = &NonLocalPointerDeps[CacheKey];
   1388     Cache = &CacheInfo->NonLocalDeps;
   1389     NumSortedEntries = Cache->size();
   1390 
   1391     // Since we did phi translation, the "Cache" set won't contain all of the
   1392     // results for the query.  This is ok (we can still use it to accelerate
   1393     // specific block queries) but we can't do the fastpath "return all
   1394     // results from the set"  Clear out the indicator for this.
   1395     CacheInfo->Pair = BBSkipFirstBlockPair();
   1396     SkipFirstBlock = false;
   1397     continue;
   1398 
   1399   PredTranslationFailure:
   1400     // The following code is "failure"; we can't produce a sane translation
   1401     // for the given block.  It assumes that we haven't modified any of
   1402     // our datastructures while processing the current block.
   1403 
   1404     if (!Cache) {
   1405       // Refresh the CacheInfo/Cache pointer if it got invalidated.
   1406       CacheInfo = &NonLocalPointerDeps[CacheKey];
   1407       Cache = &CacheInfo->NonLocalDeps;
   1408       NumSortedEntries = Cache->size();
   1409     }
   1410 
   1411     // Since we failed phi translation, the "Cache" set won't contain all of the
   1412     // results for the query.  This is ok (we can still use it to accelerate
   1413     // specific block queries) but we can't do the fastpath "return all
   1414     // results from the set".  Clear out the indicator for this.
   1415     CacheInfo->Pair = BBSkipFirstBlockPair();
   1416 
   1417     // If *nothing* works, mark the pointer as unknown.
   1418     //
   1419     // If this is the magic first block, return this as a clobber of the whole
   1420     // incoming value.  Since we can't phi translate to one of the predecessors,
   1421     // we have to bail out.
   1422     if (SkipFirstBlock)
   1423       return false;
   1424 
   1425     // Results of invariant loads are not cached thus no need to update cached
   1426     // information.
   1427     if (!isInvariantLoad) {
   1428       for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
   1429         if (I.getBB() != BB)
   1430           continue;
   1431 
   1432         assert((GotWorklistLimit || I.getResult().isNonLocal() ||
   1433                 !DT.isReachableFromEntry(BB)) &&
   1434                "Should only be here with transparent block");
   1435 
   1436         I.setResult(MemDepResult::getUnknown());
   1437 
   1438 
   1439         break;
   1440       }
   1441     }
   1442     (void)GotWorklistLimit;
   1443     // Go ahead and report unknown dependence.
   1444     Result.push_back(
   1445         NonLocalDepResult(BB, MemDepResult::getUnknown(), Pointer.getAddr()));
   1446   }
   1447 
   1448   // Okay, we're done now.  If we added new values to the cache, re-sort it.
   1449   SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
   1450   LLVM_DEBUG(AssertSorted(*Cache));
   1451   return true;
   1452 }
   1453 
   1454 /// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
   1455 void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
   1456     ValueIsLoadPair P) {
   1457 
   1458   // Most of the time this cache is empty.
   1459   if (!NonLocalDefsCache.empty()) {
   1460     auto it = NonLocalDefsCache.find(P.getPointer());
   1461     if (it != NonLocalDefsCache.end()) {
   1462       RemoveFromReverseMap(ReverseNonLocalDefsCache,
   1463                            it->second.getResult().getInst(), P.getPointer());
   1464       NonLocalDefsCache.erase(it);
   1465     }
   1466 
   1467     if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
   1468       auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
   1469       if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
   1470         for (const auto *entry : toRemoveIt->second)
   1471           NonLocalDefsCache.erase(entry);
   1472         ReverseNonLocalDefsCache.erase(toRemoveIt);
   1473       }
   1474     }
   1475   }
   1476 
   1477   CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
   1478   if (It == NonLocalPointerDeps.end())
   1479     return;
   1480 
   1481   // Remove all of the entries in the BB->val map.  This involves removing
   1482   // instructions from the reverse map.
   1483   NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
   1484 
   1485   for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
   1486     Instruction *Target = PInfo[i].getResult().getInst();
   1487     if (!Target)
   1488       continue; // Ignore non-local dep results.
   1489     assert(Target->getParent() == PInfo[i].getBB());
   1490 
   1491     // Eliminating the dirty entry from 'Cache', so update the reverse info.
   1492     RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
   1493   }
   1494 
   1495   // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
   1496   NonLocalPointerDeps.erase(It);
   1497 }
   1498 
   1499 void MemoryDependenceResults::invalidateCachedPointerInfo(Value *Ptr) {
   1500   // If Ptr isn't really a pointer, just ignore it.
   1501   if (!Ptr->getType()->isPointerTy())
   1502     return;
   1503   // Flush store info for the pointer.
   1504   RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
   1505   // Flush load info for the pointer.
   1506   RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
   1507   // Invalidate phis that use the pointer.
   1508   PV.invalidateValue(Ptr);
   1509 }
   1510 
   1511 void MemoryDependenceResults::invalidateCachedPredecessors() {
   1512   PredCache.clear();
   1513 }
   1514 
   1515 void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
   1516   // Walk through the Non-local dependencies, removing this one as the value
   1517   // for any cached queries.
   1518   NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
   1519   if (NLDI != NonLocalDeps.end()) {
   1520     NonLocalDepInfo &BlockMap = NLDI->second.first;
   1521     for (auto &Entry : BlockMap)
   1522       if (Instruction *Inst = Entry.getResult().getInst())
   1523         RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
   1524     NonLocalDeps.erase(NLDI);
   1525   }
   1526 
   1527   // If we have a cached local dependence query for this instruction, remove it.
   1528   LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
   1529   if (LocalDepEntry != LocalDeps.end()) {
   1530     // Remove us from DepInst's reverse set now that the local dep info is gone.
   1531     if (Instruction *Inst = LocalDepEntry->second.getInst())
   1532       RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
   1533 
   1534     // Remove this local dependency info.
   1535     LocalDeps.erase(LocalDepEntry);
   1536   }
   1537 
   1538   // If we have any cached dependencies on this instruction, remove
   1539   // them.
   1540 
   1541   // If the instruction is a pointer, remove it from both the load info and the
   1542   // store info.
   1543   if (RemInst->getType()->isPointerTy()) {
   1544     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
   1545     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
   1546   } else {
   1547     // Otherwise, if the instructions is in the map directly, it must be a load.
   1548     // Remove it.
   1549     auto toRemoveIt = NonLocalDefsCache.find(RemInst);
   1550     if (toRemoveIt != NonLocalDefsCache.end()) {
   1551       assert(isa<LoadInst>(RemInst) &&
   1552              "only load instructions should be added directly");
   1553       const Instruction *DepV = toRemoveIt->second.getResult().getInst();
   1554       ReverseNonLocalDefsCache.find(DepV)->second.erase(RemInst);
   1555       NonLocalDefsCache.erase(toRemoveIt);
   1556     }
   1557   }
   1558 
   1559   // Loop over all of the things that depend on the instruction we're removing.
   1560   SmallVector<std::pair<Instruction *, Instruction *>, 8> ReverseDepsToAdd;
   1561 
   1562   // If we find RemInst as a clobber or Def in any of the maps for other values,
   1563   // we need to replace its entry with a dirty version of the instruction after
   1564   // it.  If RemInst is a terminator, we use a null dirty value.
   1565   //
   1566   // Using a dirty version of the instruction after RemInst saves having to scan
   1567   // the entire block to get to this point.
   1568   MemDepResult NewDirtyVal;
   1569   if (!RemInst->isTerminator())
   1570     NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
   1571 
   1572   ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
   1573   if (ReverseDepIt != ReverseLocalDeps.end()) {
   1574     // RemInst can't be the terminator if it has local stuff depending on it.
   1575     assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
   1576            "Nothing can locally depend on a terminator");
   1577 
   1578     for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
   1579       assert(InstDependingOnRemInst != RemInst &&
   1580              "Already removed our local dep info");
   1581 
   1582       LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
   1583 
   1584       // Make sure to remember that new things depend on NewDepInst.
   1585       assert(NewDirtyVal.getInst() &&
   1586              "There is no way something else can have "
   1587              "a local dep on this if it is a terminator!");
   1588       ReverseDepsToAdd.push_back(
   1589           std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
   1590     }
   1591 
   1592     ReverseLocalDeps.erase(ReverseDepIt);
   1593 
   1594     // Add new reverse deps after scanning the set, to avoid invalidating the
   1595     // 'ReverseDeps' reference.
   1596     while (!ReverseDepsToAdd.empty()) {
   1597       ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
   1598           ReverseDepsToAdd.back().second);
   1599       ReverseDepsToAdd.pop_back();
   1600     }
   1601   }
   1602 
   1603   ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
   1604   if (ReverseDepIt != ReverseNonLocalDeps.end()) {
   1605     for (Instruction *I : ReverseDepIt->second) {
   1606       assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
   1607 
   1608       PerInstNLInfo &INLD = NonLocalDeps[I];
   1609       // The information is now dirty!
   1610       INLD.second = true;
   1611 
   1612       for (auto &Entry : INLD.first) {
   1613         if (Entry.getResult().getInst() != RemInst)
   1614           continue;
   1615 
   1616         // Convert to a dirty entry for the subsequent instruction.
   1617         Entry.setResult(NewDirtyVal);
   1618 
   1619         if (Instruction *NextI = NewDirtyVal.getInst())
   1620           ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
   1621       }
   1622     }
   1623 
   1624     ReverseNonLocalDeps.erase(ReverseDepIt);
   1625 
   1626     // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
   1627     while (!ReverseDepsToAdd.empty()) {
   1628       ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
   1629           ReverseDepsToAdd.back().second);
   1630       ReverseDepsToAdd.pop_back();
   1631     }
   1632   }
   1633 
   1634   // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
   1635   // value in the NonLocalPointerDeps info.
   1636   ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
   1637       ReverseNonLocalPtrDeps.find(RemInst);
   1638   if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
   1639     SmallVector<std::pair<Instruction *, ValueIsLoadPair>, 8>
   1640         ReversePtrDepsToAdd;
   1641 
   1642     for (ValueIsLoadPair P : ReversePtrDepIt->second) {
   1643       assert(P.getPointer() != RemInst &&
   1644              "Already removed NonLocalPointerDeps info for RemInst");
   1645 
   1646       NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
   1647 
   1648       // The cache is not valid for any specific block anymore.
   1649       NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
   1650 
   1651       // Update any entries for RemInst to use the instruction after it.
   1652       for (auto &Entry : NLPDI) {
   1653         if (Entry.getResult().getInst() != RemInst)
   1654           continue;
   1655 
   1656         // Convert to a dirty entry for the subsequent instruction.
   1657         Entry.setResult(NewDirtyVal);
   1658 
   1659         if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
   1660           ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
   1661       }
   1662 
   1663       // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
   1664       // subsequent value may invalidate the sortedness.
   1665       llvm::sort(NLPDI);
   1666     }
   1667 
   1668     ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
   1669 
   1670     while (!ReversePtrDepsToAdd.empty()) {
   1671       ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
   1672           ReversePtrDepsToAdd.back().second);
   1673       ReversePtrDepsToAdd.pop_back();
   1674     }
   1675   }
   1676 
   1677   // Invalidate phis that use the removed instruction.
   1678   PV.invalidateValue(RemInst);
   1679 
   1680   assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
   1681   LLVM_DEBUG(verifyRemoved(RemInst));
   1682 }
   1683 
   1684 /// Verify that the specified instruction does not occur in our internal data
   1685 /// structures.
   1686 ///
   1687 /// This function verifies by asserting in debug builds.
   1688 void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
   1689 #ifndef NDEBUG
   1690   for (const auto &DepKV : LocalDeps) {
   1691     assert(DepKV.first != D && "Inst occurs in data structures");
   1692     assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
   1693   }
   1694 
   1695   for (const auto &DepKV : NonLocalPointerDeps) {
   1696     assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
   1697     for (const auto &Entry : DepKV.second.NonLocalDeps)
   1698       assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
   1699   }
   1700 
   1701   for (const auto &DepKV : NonLocalDeps) {
   1702     assert(DepKV.first != D && "Inst occurs in data structures");
   1703     const PerInstNLInfo &INLD = DepKV.second;
   1704     for (const auto &Entry : INLD.first)
   1705       assert(Entry.getResult().getInst() != D &&
   1706              "Inst occurs in data structures");
   1707   }
   1708 
   1709   for (const auto &DepKV : ReverseLocalDeps) {
   1710     assert(DepKV.first != D && "Inst occurs in data structures");
   1711     for (Instruction *Inst : DepKV.second)
   1712       assert(Inst != D && "Inst occurs in data structures");
   1713   }
   1714 
   1715   for (const auto &DepKV : ReverseNonLocalDeps) {
   1716     assert(DepKV.first != D && "Inst occurs in data structures");
   1717     for (Instruction *Inst : DepKV.second)
   1718       assert(Inst != D && "Inst occurs in data structures");
   1719   }
   1720 
   1721   for (const auto &DepKV : ReverseNonLocalPtrDeps) {
   1722     assert(DepKV.first != D && "Inst occurs in rev NLPD map");
   1723 
   1724     for (ValueIsLoadPair P : DepKV.second)
   1725       assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
   1726              "Inst occurs in ReverseNonLocalPtrDeps map");
   1727   }
   1728 #endif
   1729 }
   1730 
   1731 AnalysisKey MemoryDependenceAnalysis::Key;
   1732 
   1733 MemoryDependenceAnalysis::MemoryDependenceAnalysis()
   1734     : DefaultBlockScanLimit(BlockScanLimit) {}
   1735 
   1736 MemoryDependenceResults
   1737 MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   1738   auto &AA = AM.getResult<AAManager>(F);
   1739   auto &AC = AM.getResult<AssumptionAnalysis>(F);
   1740   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   1741   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   1742   auto &PV = AM.getResult<PhiValuesAnalysis>(F);
   1743   return MemoryDependenceResults(AA, AC, TLI, DT, PV, DefaultBlockScanLimit);
   1744 }
   1745 
   1746 char MemoryDependenceWrapperPass::ID = 0;
   1747 
   1748 INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep",
   1749                       "Memory Dependence Analysis", false, true)
   1750 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
   1751 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
   1752 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
   1753 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
   1754 INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass)
   1755 INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep",
   1756                     "Memory Dependence Analysis", false, true)
   1757 
   1758 MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
   1759   initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry());
   1760 }
   1761 
   1762 MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() = default;
   1763 
   1764 void MemoryDependenceWrapperPass::releaseMemory() {
   1765   MemDep.reset();
   1766 }
   1767 
   1768 void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   1769   AU.setPreservesAll();
   1770   AU.addRequired<AssumptionCacheTracker>();
   1771   AU.addRequired<DominatorTreeWrapperPass>();
   1772   AU.addRequired<PhiValuesWrapperPass>();
   1773   AU.addRequiredTransitive<AAResultsWrapperPass>();
   1774   AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
   1775 }
   1776 
   1777 bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
   1778                                FunctionAnalysisManager::Invalidator &Inv) {
   1779   // Check whether our analysis is preserved.
   1780   auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
   1781   if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
   1782     // If not, give up now.
   1783     return true;
   1784 
   1785   // Check whether the analyses we depend on became invalid for any reason.
   1786   if (Inv.invalidate<AAManager>(F, PA) ||
   1787       Inv.invalidate<AssumptionAnalysis>(F, PA) ||
   1788       Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
   1789       Inv.invalidate<PhiValuesAnalysis>(F, PA))
   1790     return true;
   1791 
   1792   // Otherwise this analysis result remains valid.
   1793   return false;
   1794 }
   1795 
   1796 unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
   1797   return DefaultBlockScanLimit;
   1798 }
   1799 
   1800 bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
   1801   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
   1802   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   1803   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
   1804   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   1805   auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();
   1806   MemDep.emplace(AA, AC, TLI, DT, PV, BlockScanLimit);
   1807   return false;
   1808 }
   1809