Home | History | Annotate | Line # | Download | only in Scalar
      1 //===- InferAddressSpace.cpp - --------------------------------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // CUDA C/C++ includes memory space designation as variable type qualifers (such
     10 // as __global__ and __shared__). Knowing the space of a memory access allows
     11 // CUDA compilers to emit faster PTX loads and stores. For example, a load from
     12 // shared memory can be translated to `ld.shared` which is roughly 10% faster
     13 // than a generic `ld` on an NVIDIA Tesla K40c.
     14 //
     15 // Unfortunately, type qualifiers only apply to variable declarations, so CUDA
     16 // compilers must infer the memory space of an address expression from
     17 // type-qualified variables.
     18 //
     19 // LLVM IR uses non-zero (so-called) specific address spaces to represent memory
     20 // spaces (e.g. addrspace(3) means shared memory). The Clang frontend
     21 // places only type-qualified variables in specific address spaces, and then
     22 // conservatively `addrspacecast`s each type-qualified variable to addrspace(0)
     23 // (so-called the generic address space) for other instructions to use.
     24 //
     25 // For example, the Clang translates the following CUDA code
     26 //   __shared__ float a[10];
     27 //   float v = a[i];
     28 // to
     29 //   %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
     30 //   %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i
     31 //   %v = load float, float* %1 ; emits ld.f32
     32 // @a is in addrspace(3) since it's type-qualified, but its use from %1 is
     33 // redirected to %0 (the generic version of @a).
     34 //
     35 // The optimization implemented in this file propagates specific address spaces
     36 // from type-qualified variable declarations to its users. For example, it
     37 // optimizes the above IR to
     38 //   %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
     39 //   %v = load float addrspace(3)* %1 ; emits ld.shared.f32
     40 // propagating the addrspace(3) from @a to %1. As the result, the NVPTX
     41 // codegen is able to emit ld.shared.f32 for %v.
     42 //
     43 // Address space inference works in two steps. First, it uses a data-flow
     44 // analysis to infer as many generic pointers as possible to point to only one
     45 // specific address space. In the above example, it can prove that %1 only
     46 // points to addrspace(3). This algorithm was published in
     47 //   CUDA: Compiling and optimizing for a GPU platform
     48 //   Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang
     49 //   ICCS 2012
     50 //
     51 // Then, address space inference replaces all refinable generic pointers with
     52 // equivalent specific pointers.
     53 //
     54 // The major challenge of implementing this optimization is handling PHINodes,
     55 // which may create loops in the data flow graph. This brings two complications.
     56 //
     57 // First, the data flow analysis in Step 1 needs to be circular. For example,
     58 //     %generic.input = addrspacecast float addrspace(3)* %input to float*
     59 //   loop:
     60 //     %y = phi [ %generic.input, %y2 ]
     61 //     %y2 = getelementptr %y, 1
     62 //     %v = load %y2
     63 //     br ..., label %loop, ...
     64 // proving %y specific requires proving both %generic.input and %y2 specific,
     65 // but proving %y2 specific circles back to %y. To address this complication,
     66 // the data flow analysis operates on a lattice:
     67 //   uninitialized > specific address spaces > generic.
     68 // All address expressions (our implementation only considers phi, bitcast,
     69 // addrspacecast, and getelementptr) start with the uninitialized address space.
     70 // The monotone transfer function moves the address space of a pointer down a
     71 // lattice path from uninitialized to specific and then to generic. A join
     72 // operation of two different specific address spaces pushes the expression down
     73 // to the generic address space. The analysis completes once it reaches a fixed
     74 // point.
     75 //
     76 // Second, IR rewriting in Step 2 also needs to be circular. For example,
     77 // converting %y to addrspace(3) requires the compiler to know the converted
     78 // %y2, but converting %y2 needs the converted %y. To address this complication,
     79 // we break these cycles using "undef" placeholders. When converting an
     80 // instruction `I` to a new address space, if its operand `Op` is not converted
     81 // yet, we let `I` temporarily use `undef` and fix all the uses of undef later.
     82 // For instance, our algorithm first converts %y to
     83 //   %y' = phi float addrspace(3)* [ %input, undef ]
     84 // Then, it converts %y2 to
     85 //   %y2' = getelementptr %y', 1
     86 // Finally, it fixes the undef in %y' so that
     87 //   %y' = phi float addrspace(3)* [ %input, %y2' ]
     88 //
     89 //===----------------------------------------------------------------------===//
     90 
     91 #include "llvm/Transforms/Scalar/InferAddressSpaces.h"
     92 #include "llvm/ADT/ArrayRef.h"
     93 #include "llvm/ADT/DenseMap.h"
     94 #include "llvm/ADT/DenseSet.h"
     95 #include "llvm/ADT/None.h"
     96 #include "llvm/ADT/Optional.h"
     97 #include "llvm/ADT/SetVector.h"
     98 #include "llvm/ADT/SmallVector.h"
     99 #include "llvm/Analysis/TargetTransformInfo.h"
    100 #include "llvm/IR/BasicBlock.h"
    101 #include "llvm/IR/Constant.h"
    102 #include "llvm/IR/Constants.h"
    103 #include "llvm/IR/Function.h"
    104 #include "llvm/IR/IRBuilder.h"
    105 #include "llvm/IR/InstIterator.h"
    106 #include "llvm/IR/Instruction.h"
    107 #include "llvm/IR/Instructions.h"
    108 #include "llvm/IR/IntrinsicInst.h"
    109 #include "llvm/IR/Intrinsics.h"
    110 #include "llvm/IR/LLVMContext.h"
    111 #include "llvm/IR/Operator.h"
    112 #include "llvm/IR/PassManager.h"
    113 #include "llvm/IR/Type.h"
    114 #include "llvm/IR/Use.h"
    115 #include "llvm/IR/User.h"
    116 #include "llvm/IR/Value.h"
    117 #include "llvm/IR/ValueHandle.h"
    118 #include "llvm/Pass.h"
    119 #include "llvm/Support/Casting.h"
    120 #include "llvm/Support/CommandLine.h"
    121 #include "llvm/Support/Compiler.h"
    122 #include "llvm/Support/Debug.h"
    123 #include "llvm/Support/ErrorHandling.h"
    124 #include "llvm/Support/raw_ostream.h"
    125 #include "llvm/Transforms/Scalar.h"
    126 #include "llvm/Transforms/Utils/Local.h"
    127 #include "llvm/Transforms/Utils/ValueMapper.h"
    128 #include <cassert>
    129 #include <iterator>
    130 #include <limits>
    131 #include <utility>
    132 #include <vector>
    133 
    134 #define DEBUG_TYPE "infer-address-spaces"
    135 
    136 using namespace llvm;
    137 
    138 static cl::opt<bool> AssumeDefaultIsFlatAddressSpace(
    139     "assume-default-is-flat-addrspace", cl::init(false), cl::ReallyHidden,
    140     cl::desc("The default address space is assumed as the flat address space. "
    141              "This is mainly for test purpose."));
    142 
    143 static const unsigned UninitializedAddressSpace =
    144     std::numeric_limits<unsigned>::max();
    145 
    146 namespace {
    147 
    148 using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
    149 using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>;
    150 
    151 class InferAddressSpaces : public FunctionPass {
    152   unsigned FlatAddrSpace = 0;
    153 
    154 public:
    155   static char ID;
    156 
    157   InferAddressSpaces() :
    158     FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) {}
    159   InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) {}
    160 
    161   void getAnalysisUsage(AnalysisUsage &AU) const override {
    162     AU.setPreservesCFG();
    163     AU.addRequired<TargetTransformInfoWrapperPass>();
    164   }
    165 
    166   bool runOnFunction(Function &F) override;
    167 };
    168 
    169 class InferAddressSpacesImpl {
    170   const TargetTransformInfo *TTI = nullptr;
    171   const DataLayout *DL = nullptr;
    172 
    173   /// Target specific address space which uses of should be replaced if
    174   /// possible.
    175   unsigned FlatAddrSpace = 0;
    176 
    177   // Returns the new address space of V if updated; otherwise, returns None.
    178   Optional<unsigned>
    179   updateAddressSpace(const Value &V,
    180                      const ValueToAddrSpaceMapTy &InferredAddrSpace) const;
    181 
    182   // Tries to infer the specific address space of each address expression in
    183   // Postorder.
    184   void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
    185                           ValueToAddrSpaceMapTy *InferredAddrSpace) const;
    186 
    187   bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
    188 
    189   Value *cloneInstructionWithNewAddressSpace(
    190       Instruction *I, unsigned NewAddrSpace,
    191       const ValueToValueMapTy &ValueWithNewAddrSpace,
    192       SmallVectorImpl<const Use *> *UndefUsesToFix) const;
    193 
    194   // Changes the flat address expressions in function F to point to specific
    195   // address spaces if InferredAddrSpace says so. Postorder is the postorder of
    196   // all flat expressions in the use-def graph of function F.
    197   bool rewriteWithNewAddressSpaces(
    198       const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
    199       const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const;
    200 
    201   void appendsFlatAddressExpressionToPostorderStack(
    202       Value *V, PostorderStackTy &PostorderStack,
    203       DenseSet<Value *> &Visited) const;
    204 
    205   bool rewriteIntrinsicOperands(IntrinsicInst *II,
    206                                 Value *OldV, Value *NewV) const;
    207   void collectRewritableIntrinsicOperands(IntrinsicInst *II,
    208                                           PostorderStackTy &PostorderStack,
    209                                           DenseSet<Value *> &Visited) const;
    210 
    211   std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;
    212 
    213   Value *cloneValueWithNewAddressSpace(
    214     Value *V, unsigned NewAddrSpace,
    215     const ValueToValueMapTy &ValueWithNewAddrSpace,
    216     SmallVectorImpl<const Use *> *UndefUsesToFix) const;
    217   unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;
    218 
    219 public:
    220   InferAddressSpacesImpl(const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
    221       : TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
    222   bool run(Function &F);
    223 };
    224 
    225 } // end anonymous namespace
    226 
    227 char InferAddressSpaces::ID = 0;
    228 
    229 namespace llvm {
    230 
    231 void initializeInferAddressSpacesPass(PassRegistry &);
    232 
    233 } // end namespace llvm
    234 
    235 INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
    236                 false, false)
    237 
    238 // Check whether that's no-op pointer bicast using a pair of
    239 // `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over
    240 // different address spaces.
    241 static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
    242                                  const TargetTransformInfo *TTI) {
    243   assert(I2P->getOpcode() == Instruction::IntToPtr);
    244   auto *P2I = dyn_cast<Operator>(I2P->getOperand(0));
    245   if (!P2I || P2I->getOpcode() != Instruction::PtrToInt)
    246     return false;
    247   // Check it's really safe to treat that pair of `ptrtoint`/`inttoptr` as a
    248   // no-op cast. Besides checking both of them are no-op casts, as the
    249   // reinterpreted pointer may be used in other pointer arithmetic, we also
    250   // need to double-check that through the target-specific hook. That ensures
    251   // the underlying target also agrees that's a no-op address space cast and
    252   // pointer bits are preserved.
    253   // The current IR spec doesn't have clear rules on address space casts,
    254   // especially a clear definition for pointer bits in non-default address
    255   // spaces. It would be undefined if that pointer is dereferenced after an
    256   // invalid reinterpret cast. Also, due to the unclearness for the meaning of
    257   // bits in non-default address spaces in the current spec, the pointer
    258   // arithmetic may also be undefined after invalid pointer reinterpret cast.
    259   // However, as we confirm through the target hooks that it's a no-op
    260   // addrspacecast, it doesn't matter since the bits should be the same.
    261   return CastInst::isNoopCast(Instruction::CastOps(I2P->getOpcode()),
    262                               I2P->getOperand(0)->getType(), I2P->getType(),
    263                               DL) &&
    264          CastInst::isNoopCast(Instruction::CastOps(P2I->getOpcode()),
    265                               P2I->getOperand(0)->getType(), P2I->getType(),
    266                               DL) &&
    267          TTI->isNoopAddrSpaceCast(
    268              P2I->getOperand(0)->getType()->getPointerAddressSpace(),
    269              I2P->getType()->getPointerAddressSpace());
    270 }
    271 
    272 // Returns true if V is an address expression.
    273 // TODO: Currently, we consider only phi, bitcast, addrspacecast, and
    274 // getelementptr operators.
    275 static bool isAddressExpression(const Value &V, const DataLayout &DL,
    276                                 const TargetTransformInfo *TTI) {
    277   const Operator *Op = dyn_cast<Operator>(&V);
    278   if (!Op)
    279     return false;
    280 
    281   switch (Op->getOpcode()) {
    282   case Instruction::PHI:
    283     assert(Op->getType()->isPointerTy());
    284     return true;
    285   case Instruction::BitCast:
    286   case Instruction::AddrSpaceCast:
    287   case Instruction::GetElementPtr:
    288     return true;
    289   case Instruction::Select:
    290     return Op->getType()->isPointerTy();
    291   case Instruction::Call: {
    292     const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&V);
    293     return II && II->getIntrinsicID() == Intrinsic::ptrmask;
    294   }
    295   case Instruction::IntToPtr:
    296     return isNoopPtrIntCastPair(Op, DL, TTI);
    297   default:
    298     // That value is an address expression if it has an assumed address space.
    299     return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
    300   }
    301 }
    302 
    303 // Returns the pointer operands of V.
    304 //
    305 // Precondition: V is an address expression.
    306 static SmallVector<Value *, 2>
    307 getPointerOperands(const Value &V, const DataLayout &DL,
    308                    const TargetTransformInfo *TTI) {
    309   const Operator &Op = cast<Operator>(V);
    310   switch (Op.getOpcode()) {
    311   case Instruction::PHI: {
    312     auto IncomingValues = cast<PHINode>(Op).incoming_values();
    313     return SmallVector<Value *, 2>(IncomingValues.begin(),
    314                                    IncomingValues.end());
    315   }
    316   case Instruction::BitCast:
    317   case Instruction::AddrSpaceCast:
    318   case Instruction::GetElementPtr:
    319     return {Op.getOperand(0)};
    320   case Instruction::Select:
    321     return {Op.getOperand(1), Op.getOperand(2)};
    322   case Instruction::Call: {
    323     const IntrinsicInst &II = cast<IntrinsicInst>(Op);
    324     assert(II.getIntrinsicID() == Intrinsic::ptrmask &&
    325            "unexpected intrinsic call");
    326     return {II.getArgOperand(0)};
    327   }
    328   case Instruction::IntToPtr: {
    329     assert(isNoopPtrIntCastPair(&Op, DL, TTI));
    330     auto *P2I = cast<Operator>(Op.getOperand(0));
    331     return {P2I->getOperand(0)};
    332   }
    333   default:
    334     llvm_unreachable("Unexpected instruction type.");
    335   }
    336 }
    337 
    338 bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
    339                                                       Value *OldV,
    340                                                       Value *NewV) const {
    341   Module *M = II->getParent()->getParent()->getParent();
    342 
    343   switch (II->getIntrinsicID()) {
    344   case Intrinsic::objectsize: {
    345     Type *DestTy = II->getType();
    346     Type *SrcTy = NewV->getType();
    347     Function *NewDecl =
    348         Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
    349     II->setArgOperand(0, NewV);
    350     II->setCalledFunction(NewDecl);
    351     return true;
    352   }
    353   case Intrinsic::ptrmask:
    354     // This is handled as an address expression, not as a use memory operation.
    355     return false;
    356   default: {
    357     Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
    358     if (!Rewrite)
    359       return false;
    360     if (Rewrite != II)
    361       II->replaceAllUsesWith(Rewrite);
    362     return true;
    363   }
    364   }
    365 }
    366 
    367 void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
    368     IntrinsicInst *II, PostorderStackTy &PostorderStack,
    369     DenseSet<Value *> &Visited) const {
    370   auto IID = II->getIntrinsicID();
    371   switch (IID) {
    372   case Intrinsic::ptrmask:
    373   case Intrinsic::objectsize:
    374     appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
    375                                                  PostorderStack, Visited);
    376     break;
    377   default:
    378     SmallVector<int, 2> OpIndexes;
    379     if (TTI->collectFlatAddressOperands(OpIndexes, IID)) {
    380       for (int Idx : OpIndexes) {
    381         appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(Idx),
    382                                                      PostorderStack, Visited);
    383       }
    384     }
    385     break;
    386   }
    387 }
    388 
    389 // Returns all flat address expressions in function F. The elements are
    390 // If V is an unvisited flat address expression, appends V to PostorderStack
    391 // and marks it as visited.
    392 void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
    393     Value *V, PostorderStackTy &PostorderStack,
    394     DenseSet<Value *> &Visited) const {
    395   assert(V->getType()->isPointerTy());
    396 
    397   // Generic addressing expressions may be hidden in nested constant
    398   // expressions.
    399   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
    400     // TODO: Look in non-address parts, like icmp operands.
    401     if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
    402       PostorderStack.emplace_back(CE, false);
    403 
    404     return;
    405   }
    406 
    407   if (V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
    408       isAddressExpression(*V, *DL, TTI)) {
    409     if (Visited.insert(V).second) {
    410       PostorderStack.emplace_back(V, false);
    411 
    412       Operator *Op = cast<Operator>(V);
    413       for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
    414         if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
    415           if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
    416             PostorderStack.emplace_back(CE, false);
    417         }
    418       }
    419     }
    420   }
    421 }
    422 
    423 // Returns all flat address expressions in function F. The elements are ordered
    424 // ordered in postorder.
    425 std::vector<WeakTrackingVH>
    426 InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
    427   // This function implements a non-recursive postorder traversal of a partial
    428   // use-def graph of function F.
    429   PostorderStackTy PostorderStack;
    430   // The set of visited expressions.
    431   DenseSet<Value *> Visited;
    432 
    433   auto PushPtrOperand = [&](Value *Ptr) {
    434     appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack,
    435                                                  Visited);
    436   };
    437 
    438   // Look at operations that may be interesting accelerate by moving to a known
    439   // address space. We aim at generating after loads and stores, but pure
    440   // addressing calculations may also be faster.
    441   for (Instruction &I : instructions(F)) {
    442     if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
    443       if (!GEP->getType()->isVectorTy())
    444         PushPtrOperand(GEP->getPointerOperand());
    445     } else if (auto *LI = dyn_cast<LoadInst>(&I))
    446       PushPtrOperand(LI->getPointerOperand());
    447     else if (auto *SI = dyn_cast<StoreInst>(&I))
    448       PushPtrOperand(SI->getPointerOperand());
    449     else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
    450       PushPtrOperand(RMW->getPointerOperand());
    451     else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
    452       PushPtrOperand(CmpX->getPointerOperand());
    453     else if (auto *MI = dyn_cast<MemIntrinsic>(&I)) {
    454       // For memset/memcpy/memmove, any pointer operand can be replaced.
    455       PushPtrOperand(MI->getRawDest());
    456 
    457       // Handle 2nd operand for memcpy/memmove.
    458       if (auto *MTI = dyn_cast<MemTransferInst>(MI))
    459         PushPtrOperand(MTI->getRawSource());
    460     } else if (auto *II = dyn_cast<IntrinsicInst>(&I))
    461       collectRewritableIntrinsicOperands(II, PostorderStack, Visited);
    462     else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(&I)) {
    463       // FIXME: Handle vectors of pointers
    464       if (Cmp->getOperand(0)->getType()->isPointerTy()) {
    465         PushPtrOperand(Cmp->getOperand(0));
    466         PushPtrOperand(Cmp->getOperand(1));
    467       }
    468     } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
    469       if (!ASC->getType()->isVectorTy())
    470         PushPtrOperand(ASC->getPointerOperand());
    471     } else if (auto *I2P = dyn_cast<IntToPtrInst>(&I)) {
    472       if (isNoopPtrIntCastPair(cast<Operator>(I2P), *DL, TTI))
    473         PushPtrOperand(
    474             cast<PtrToIntInst>(I2P->getOperand(0))->getPointerOperand());
    475     }
    476   }
    477 
    478   std::vector<WeakTrackingVH> Postorder; // The resultant postorder.
    479   while (!PostorderStack.empty()) {
    480     Value *TopVal = PostorderStack.back().getPointer();
    481     // If the operands of the expression on the top are already explored,
    482     // adds that expression to the resultant postorder.
    483     if (PostorderStack.back().getInt()) {
    484       if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace)
    485         Postorder.push_back(TopVal);
    486       PostorderStack.pop_back();
    487       continue;
    488     }
    489     // Otherwise, adds its operands to the stack and explores them.
    490     PostorderStack.back().setInt(true);
    491     // Skip values with an assumed address space.
    492     if (TTI->getAssumedAddrSpace(TopVal) == UninitializedAddressSpace) {
    493       for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) {
    494         appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
    495                                                      Visited);
    496       }
    497     }
    498   }
    499   return Postorder;
    500 }
    501 
    502 // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone
    503 // of OperandUse.get() in the new address space. If the clone is not ready yet,
    504 // returns an undef in the new address space as a placeholder.
    505 static Value *operandWithNewAddressSpaceOrCreateUndef(
    506     const Use &OperandUse, unsigned NewAddrSpace,
    507     const ValueToValueMapTy &ValueWithNewAddrSpace,
    508     SmallVectorImpl<const Use *> *UndefUsesToFix) {
    509   Value *Operand = OperandUse.get();
    510 
    511   Type *NewPtrTy =
    512       Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
    513 
    514   if (Constant *C = dyn_cast<Constant>(Operand))
    515     return ConstantExpr::getAddrSpaceCast(C, NewPtrTy);
    516 
    517   if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand))
    518     return NewOperand;
    519 
    520   UndefUsesToFix->push_back(&OperandUse);
    521   return UndefValue::get(NewPtrTy);
    522 }
    523 
    524 // Returns a clone of `I` with its operands converted to those specified in
    525 // ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an
    526 // operand whose address space needs to be modified might not exist in
    527 // ValueWithNewAddrSpace. In that case, uses undef as a placeholder operand and
    528 // adds that operand use to UndefUsesToFix so that caller can fix them later.
    529 //
    530 // Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast
    531 // from a pointer whose type already matches. Therefore, this function returns a
    532 // Value* instead of an Instruction*.
    533 //
    534 // This may also return nullptr in the case the instruction could not be
    535 // rewritten.
    536 Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
    537     Instruction *I, unsigned NewAddrSpace,
    538     const ValueToValueMapTy &ValueWithNewAddrSpace,
    539     SmallVectorImpl<const Use *> *UndefUsesToFix) const {
    540   Type *NewPtrType =
    541       I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
    542 
    543   if (I->getOpcode() == Instruction::AddrSpaceCast) {
    544     Value *Src = I->getOperand(0);
    545     // Because `I` is flat, the source address space must be specific.
    546     // Therefore, the inferred address space must be the source space, according
    547     // to our algorithm.
    548     assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
    549     if (Src->getType() != NewPtrType)
    550       return new BitCastInst(Src, NewPtrType);
    551     return Src;
    552   }
    553 
    554   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
    555     // Technically the intrinsic ID is a pointer typed argument, so specially
    556     // handle calls early.
    557     assert(II->getIntrinsicID() == Intrinsic::ptrmask);
    558     Value *NewPtr = operandWithNewAddressSpaceOrCreateUndef(
    559         II->getArgOperandUse(0), NewAddrSpace, ValueWithNewAddrSpace,
    560         UndefUsesToFix);
    561     Value *Rewrite =
    562         TTI->rewriteIntrinsicWithAddressSpace(II, II->getArgOperand(0), NewPtr);
    563     if (Rewrite) {
    564       assert(Rewrite != II && "cannot modify this pointer operation in place");
    565       return Rewrite;
    566     }
    567 
    568     return nullptr;
    569   }
    570 
    571   unsigned AS = TTI->getAssumedAddrSpace(I);
    572   if (AS != UninitializedAddressSpace) {
    573     // For the assumed address space, insert an `addrspacecast` to make that
    574     // explicit.
    575     auto *NewPtrTy = I->getType()->getPointerElementType()->getPointerTo(AS);
    576     auto *NewI = new AddrSpaceCastInst(I, NewPtrTy);
    577     NewI->insertAfter(I);
    578     return NewI;
    579   }
    580 
    581   // Computes the converted pointer operands.
    582   SmallVector<Value *, 4> NewPointerOperands;
    583   for (const Use &OperandUse : I->operands()) {
    584     if (!OperandUse.get()->getType()->isPointerTy())
    585       NewPointerOperands.push_back(nullptr);
    586     else
    587       NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef(
    588                                      OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix));
    589   }
    590 
    591   switch (I->getOpcode()) {
    592   case Instruction::BitCast:
    593     return new BitCastInst(NewPointerOperands[0], NewPtrType);
    594   case Instruction::PHI: {
    595     assert(I->getType()->isPointerTy());
    596     PHINode *PHI = cast<PHINode>(I);
    597     PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues());
    598     for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) {
    599       unsigned OperandNo = PHINode::getOperandNumForIncomingValue(Index);
    600       NewPHI->addIncoming(NewPointerOperands[OperandNo],
    601                           PHI->getIncomingBlock(Index));
    602     }
    603     return NewPHI;
    604   }
    605   case Instruction::GetElementPtr: {
    606     GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
    607     GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
    608         GEP->getSourceElementType(), NewPointerOperands[0],
    609         SmallVector<Value *, 4>(GEP->indices()));
    610     NewGEP->setIsInBounds(GEP->isInBounds());
    611     return NewGEP;
    612   }
    613   case Instruction::Select:
    614     assert(I->getType()->isPointerTy());
    615     return SelectInst::Create(I->getOperand(0), NewPointerOperands[1],
    616                               NewPointerOperands[2], "", nullptr, I);
    617   case Instruction::IntToPtr: {
    618     assert(isNoopPtrIntCastPair(cast<Operator>(I), *DL, TTI));
    619     Value *Src = cast<Operator>(I->getOperand(0))->getOperand(0);
    620     assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
    621     if (Src->getType() != NewPtrType)
    622       return new BitCastInst(Src, NewPtrType);
    623     return Src;
    624   }
    625   default:
    626     llvm_unreachable("Unexpected opcode");
    627   }
    628 }
    629 
    630 // Similar to cloneInstructionWithNewAddressSpace, returns a clone of the
    631 // constant expression `CE` with its operands replaced as specified in
    632 // ValueWithNewAddrSpace.
    633 static Value *cloneConstantExprWithNewAddressSpace(
    634     ConstantExpr *CE, unsigned NewAddrSpace,
    635     const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL,
    636     const TargetTransformInfo *TTI) {
    637   Type *TargetType =
    638     CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
    639 
    640   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
    641     // Because CE is flat, the source address space must be specific.
    642     // Therefore, the inferred address space must be the source space according
    643     // to our algorithm.
    644     assert(CE->getOperand(0)->getType()->getPointerAddressSpace() ==
    645            NewAddrSpace);
    646     return ConstantExpr::getBitCast(CE->getOperand(0), TargetType);
    647   }
    648 
    649   if (CE->getOpcode() == Instruction::BitCast) {
    650     if (Value *NewOperand = ValueWithNewAddrSpace.lookup(CE->getOperand(0)))
    651       return ConstantExpr::getBitCast(cast<Constant>(NewOperand), TargetType);
    652     return ConstantExpr::getAddrSpaceCast(CE, TargetType);
    653   }
    654 
    655   if (CE->getOpcode() == Instruction::Select) {
    656     Constant *Src0 = CE->getOperand(1);
    657     Constant *Src1 = CE->getOperand(2);
    658     if (Src0->getType()->getPointerAddressSpace() ==
    659         Src1->getType()->getPointerAddressSpace()) {
    660 
    661       return ConstantExpr::getSelect(
    662           CE->getOperand(0), ConstantExpr::getAddrSpaceCast(Src0, TargetType),
    663           ConstantExpr::getAddrSpaceCast(Src1, TargetType));
    664     }
    665   }
    666 
    667   if (CE->getOpcode() == Instruction::IntToPtr) {
    668     assert(isNoopPtrIntCastPair(cast<Operator>(CE), *DL, TTI));
    669     Constant *Src = cast<ConstantExpr>(CE->getOperand(0))->getOperand(0);
    670     assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
    671     return ConstantExpr::getBitCast(Src, TargetType);
    672   }
    673 
    674   // Computes the operands of the new constant expression.
    675   bool IsNew = false;
    676   SmallVector<Constant *, 4> NewOperands;
    677   for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) {
    678     Constant *Operand = CE->getOperand(Index);
    679     // If the address space of `Operand` needs to be modified, the new operand
    680     // with the new address space should already be in ValueWithNewAddrSpace
    681     // because (1) the constant expressions we consider (i.e. addrspacecast,
    682     // bitcast, and getelementptr) do not incur cycles in the data flow graph
    683     // and (2) this function is called on constant expressions in postorder.
    684     if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) {
    685       IsNew = true;
    686       NewOperands.push_back(cast<Constant>(NewOperand));
    687       continue;
    688     }
    689     if (auto CExpr = dyn_cast<ConstantExpr>(Operand))
    690       if (Value *NewOperand = cloneConstantExprWithNewAddressSpace(
    691               CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) {
    692         IsNew = true;
    693         NewOperands.push_back(cast<Constant>(NewOperand));
    694         continue;
    695       }
    696     // Otherwise, reuses the old operand.
    697     NewOperands.push_back(Operand);
    698   }
    699 
    700   // If !IsNew, we will replace the Value with itself. However, replaced values
    701   // are assumed to wrapped in a addrspace cast later so drop it now.
    702   if (!IsNew)
    703     return nullptr;
    704 
    705   if (CE->getOpcode() == Instruction::GetElementPtr) {
    706     // Needs to specify the source type while constructing a getelementptr
    707     // constant expression.
    708     return CE->getWithOperands(
    709       NewOperands, TargetType, /*OnlyIfReduced=*/false,
    710       NewOperands[0]->getType()->getPointerElementType());
    711   }
    712 
    713   return CE->getWithOperands(NewOperands, TargetType);
    714 }
    715 
    716 // Returns a clone of the value `V`, with its operands replaced as specified in
    717 // ValueWithNewAddrSpace. This function is called on every flat address
    718 // expression whose address space needs to be modified, in postorder.
    719 //
    720 // See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix.
    721 Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
    722     Value *V, unsigned NewAddrSpace,
    723     const ValueToValueMapTy &ValueWithNewAddrSpace,
    724     SmallVectorImpl<const Use *> *UndefUsesToFix) const {
    725   // All values in Postorder are flat address expressions.
    726   assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
    727          isAddressExpression(*V, *DL, TTI));
    728 
    729   if (Instruction *I = dyn_cast<Instruction>(V)) {
    730     Value *NewV = cloneInstructionWithNewAddressSpace(
    731       I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix);
    732     if (Instruction *NewI = dyn_cast_or_null<Instruction>(NewV)) {
    733       if (NewI->getParent() == nullptr) {
    734         NewI->insertBefore(I);
    735         NewI->takeName(I);
    736       }
    737     }
    738     return NewV;
    739   }
    740 
    741   return cloneConstantExprWithNewAddressSpace(
    742       cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace, DL, TTI);
    743 }
    744 
    745 // Defines the join operation on the address space lattice (see the file header
    746 // comments).
    747 unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1,
    748                                                    unsigned AS2) const {
    749   if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace)
    750     return FlatAddrSpace;
    751 
    752   if (AS1 == UninitializedAddressSpace)
    753     return AS2;
    754   if (AS2 == UninitializedAddressSpace)
    755     return AS1;
    756 
    757   // The join of two different specific address spaces is flat.
    758   return (AS1 == AS2) ? AS1 : FlatAddrSpace;
    759 }
    760 
    761 bool InferAddressSpacesImpl::run(Function &F) {
    762   DL = &F.getParent()->getDataLayout();
    763 
    764   if (AssumeDefaultIsFlatAddressSpace)
    765     FlatAddrSpace = 0;
    766 
    767   if (FlatAddrSpace == UninitializedAddressSpace) {
    768     FlatAddrSpace = TTI->getFlatAddressSpace();
    769     if (FlatAddrSpace == UninitializedAddressSpace)
    770       return false;
    771   }
    772 
    773   // Collects all flat address expressions in postorder.
    774   std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F);
    775 
    776   // Runs a data-flow analysis to refine the address spaces of every expression
    777   // in Postorder.
    778   ValueToAddrSpaceMapTy InferredAddrSpace;
    779   inferAddressSpaces(Postorder, &InferredAddrSpace);
    780 
    781   // Changes the address spaces of the flat address expressions who are inferred
    782   // to point to a specific address space.
    783   return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace, &F);
    784 }
    785 
    786 // Constants need to be tracked through RAUW to handle cases with nested
    787 // constant expressions, so wrap values in WeakTrackingVH.
    788 void InferAddressSpacesImpl::inferAddressSpaces(
    789     ArrayRef<WeakTrackingVH> Postorder,
    790     ValueToAddrSpaceMapTy *InferredAddrSpace) const {
    791   SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
    792   // Initially, all expressions are in the uninitialized address space.
    793   for (Value *V : Postorder)
    794     (*InferredAddrSpace)[V] = UninitializedAddressSpace;
    795 
    796   while (!Worklist.empty()) {
    797     Value *V = Worklist.pop_back_val();
    798 
    799     // Tries to update the address space of the stack top according to the
    800     // address spaces of its operands.
    801     LLVM_DEBUG(dbgs() << "Updating the address space of\n  " << *V << '\n');
    802     Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace);
    803     if (!NewAS.hasValue())
    804       continue;
    805     // If any updates are made, grabs its users to the worklist because
    806     // their address spaces can also be possibly updated.
    807     LLVM_DEBUG(dbgs() << "  to " << NewAS.getValue() << '\n');
    808     (*InferredAddrSpace)[V] = NewAS.getValue();
    809 
    810     for (Value *User : V->users()) {
    811       // Skip if User is already in the worklist.
    812       if (Worklist.count(User))
    813         continue;
    814 
    815       auto Pos = InferredAddrSpace->find(User);
    816       // Our algorithm only updates the address spaces of flat address
    817       // expressions, which are those in InferredAddrSpace.
    818       if (Pos == InferredAddrSpace->end())
    819         continue;
    820 
    821       // Function updateAddressSpace moves the address space down a lattice
    822       // path. Therefore, nothing to do if User is already inferred as flat (the
    823       // bottom element in the lattice).
    824       if (Pos->second == FlatAddrSpace)
    825         continue;
    826 
    827       Worklist.insert(User);
    828     }
    829   }
    830 }
    831 
    832 Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
    833     const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const {
    834   assert(InferredAddrSpace.count(&V));
    835 
    836   // The new inferred address space equals the join of the address spaces
    837   // of all its pointer operands.
    838   unsigned NewAS = UninitializedAddressSpace;
    839 
    840   const Operator &Op = cast<Operator>(V);
    841   if (Op.getOpcode() == Instruction::Select) {
    842     Value *Src0 = Op.getOperand(1);
    843     Value *Src1 = Op.getOperand(2);
    844 
    845     auto I = InferredAddrSpace.find(Src0);
    846     unsigned Src0AS = (I != InferredAddrSpace.end()) ?
    847       I->second : Src0->getType()->getPointerAddressSpace();
    848 
    849     auto J = InferredAddrSpace.find(Src1);
    850     unsigned Src1AS = (J != InferredAddrSpace.end()) ?
    851       J->second : Src1->getType()->getPointerAddressSpace();
    852 
    853     auto *C0 = dyn_cast<Constant>(Src0);
    854     auto *C1 = dyn_cast<Constant>(Src1);
    855 
    856     // If one of the inputs is a constant, we may be able to do a constant
    857     // addrspacecast of it. Defer inferring the address space until the input
    858     // address space is known.
    859     if ((C1 && Src0AS == UninitializedAddressSpace) ||
    860         (C0 && Src1AS == UninitializedAddressSpace))
    861       return None;
    862 
    863     if (C0 && isSafeToCastConstAddrSpace(C0, Src1AS))
    864       NewAS = Src1AS;
    865     else if (C1 && isSafeToCastConstAddrSpace(C1, Src0AS))
    866       NewAS = Src0AS;
    867     else
    868       NewAS = joinAddressSpaces(Src0AS, Src1AS);
    869   } else {
    870     unsigned AS = TTI->getAssumedAddrSpace(&V);
    871     if (AS != UninitializedAddressSpace) {
    872       // Use the assumed address space directly.
    873       NewAS = AS;
    874     } else {
    875       // Otherwise, infer the address space from its pointer operands.
    876       for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
    877         auto I = InferredAddrSpace.find(PtrOperand);
    878         unsigned OperandAS =
    879             I != InferredAddrSpace.end()
    880                 ? I->second
    881                 : PtrOperand->getType()->getPointerAddressSpace();
    882 
    883         // join(flat, *) = flat. So we can break if NewAS is already flat.
    884         NewAS = joinAddressSpaces(NewAS, OperandAS);
    885         if (NewAS == FlatAddrSpace)
    886           break;
    887       }
    888     }
    889   }
    890 
    891   unsigned OldAS = InferredAddrSpace.lookup(&V);
    892   assert(OldAS != FlatAddrSpace);
    893   if (OldAS == NewAS)
    894     return None;
    895   return NewAS;
    896 }
    897 
    898 /// \p returns true if \p U is the pointer operand of a memory instruction with
    899 /// a single pointer operand that can have its address space changed by simply
    900 /// mutating the use to a new value. If the memory instruction is volatile,
    901 /// return true only if the target allows the memory instruction to be volatile
    902 /// in the new address space.
    903 static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI,
    904                                              Use &U, unsigned AddrSpace) {
    905   User *Inst = U.getUser();
    906   unsigned OpNo = U.getOperandNo();
    907   bool VolatileIsAllowed = false;
    908   if (auto *I = dyn_cast<Instruction>(Inst))
    909     VolatileIsAllowed = TTI.hasVolatileVariant(I, AddrSpace);
    910 
    911   if (auto *LI = dyn_cast<LoadInst>(Inst))
    912     return OpNo == LoadInst::getPointerOperandIndex() &&
    913            (VolatileIsAllowed || !LI->isVolatile());
    914 
    915   if (auto *SI = dyn_cast<StoreInst>(Inst))
    916     return OpNo == StoreInst::getPointerOperandIndex() &&
    917            (VolatileIsAllowed || !SI->isVolatile());
    918 
    919   if (auto *RMW = dyn_cast<AtomicRMWInst>(Inst))
    920     return OpNo == AtomicRMWInst::getPointerOperandIndex() &&
    921            (VolatileIsAllowed || !RMW->isVolatile());
    922 
    923   if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst))
    924     return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() &&
    925            (VolatileIsAllowed || !CmpX->isVolatile());
    926 
    927   return false;
    928 }
    929 
    930 /// Update memory intrinsic uses that require more complex processing than
    931 /// simple memory instructions. Thse require re-mangling and may have multiple
    932 /// pointer operands.
    933 static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
    934                                      Value *NewV) {
    935   IRBuilder<> B(MI);
    936   MDNode *TBAA = MI->getMetadata(LLVMContext::MD_tbaa);
    937   MDNode *ScopeMD = MI->getMetadata(LLVMContext::MD_alias_scope);
    938   MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias);
    939 
    940   if (auto *MSI = dyn_cast<MemSetInst>(MI)) {
    941     B.CreateMemSet(NewV, MSI->getValue(), MSI->getLength(),
    942                    MaybeAlign(MSI->getDestAlignment()),
    943                    false, // isVolatile
    944                    TBAA, ScopeMD, NoAliasMD);
    945   } else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
    946     Value *Src = MTI->getRawSource();
    947     Value *Dest = MTI->getRawDest();
    948 
    949     // Be careful in case this is a self-to-self copy.
    950     if (Src == OldV)
    951       Src = NewV;
    952 
    953     if (Dest == OldV)
    954       Dest = NewV;
    955 
    956     if (isa<MemCpyInst>(MTI)) {
    957       MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct);
    958       B.CreateMemCpy(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(),
    959                      MTI->getLength(),
    960                      false, // isVolatile
    961                      TBAA, TBAAStruct, ScopeMD, NoAliasMD);
    962     } else {
    963       assert(isa<MemMoveInst>(MTI));
    964       B.CreateMemMove(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(),
    965                       MTI->getLength(),
    966                       false, // isVolatile
    967                       TBAA, ScopeMD, NoAliasMD);
    968     }
    969   } else
    970     llvm_unreachable("unhandled MemIntrinsic");
    971 
    972   MI->eraseFromParent();
    973   return true;
    974 }
    975 
    976 // \p returns true if it is OK to change the address space of constant \p C with
    977 // a ConstantExpr addrspacecast.
    978 bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C,
    979                                                         unsigned NewAS) const {
    980   assert(NewAS != UninitializedAddressSpace);
    981 
    982   unsigned SrcAS = C->getType()->getPointerAddressSpace();
    983   if (SrcAS == NewAS || isa<UndefValue>(C))
    984     return true;
    985 
    986   // Prevent illegal casts between different non-flat address spaces.
    987   if (SrcAS != FlatAddrSpace && NewAS != FlatAddrSpace)
    988     return false;
    989 
    990   if (isa<ConstantPointerNull>(C))
    991     return true;
    992 
    993   if (auto *Op = dyn_cast<Operator>(C)) {
    994     // If we already have a constant addrspacecast, it should be safe to cast it
    995     // off.
    996     if (Op->getOpcode() == Instruction::AddrSpaceCast)
    997       return isSafeToCastConstAddrSpace(cast<Constant>(Op->getOperand(0)), NewAS);
    998 
    999     if (Op->getOpcode() == Instruction::IntToPtr &&
   1000         Op->getType()->getPointerAddressSpace() == FlatAddrSpace)
   1001       return true;
   1002   }
   1003 
   1004   return false;
   1005 }
   1006 
   1007 static Value::use_iterator skipToNextUser(Value::use_iterator I,
   1008                                           Value::use_iterator End) {
   1009   User *CurUser = I->getUser();
   1010   ++I;
   1011 
   1012   while (I != End && I->getUser() == CurUser)
   1013     ++I;
   1014 
   1015   return I;
   1016 }
   1017 
   1018 bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
   1019     const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
   1020     const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
   1021   // For each address expression to be modified, creates a clone of it with its
   1022   // pointer operands converted to the new address space. Since the pointer
   1023   // operands are converted, the clone is naturally in the new address space by
   1024   // construction.
   1025   ValueToValueMapTy ValueWithNewAddrSpace;
   1026   SmallVector<const Use *, 32> UndefUsesToFix;
   1027   for (Value* V : Postorder) {
   1028     unsigned NewAddrSpace = InferredAddrSpace.lookup(V);
   1029 
   1030     // In some degenerate cases (e.g. invalid IR in unreachable code), we may
   1031     // not even infer the value to have its original address space.
   1032     if (NewAddrSpace == UninitializedAddressSpace)
   1033       continue;
   1034 
   1035     if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
   1036       Value *New = cloneValueWithNewAddressSpace(
   1037           V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
   1038       if (New)
   1039         ValueWithNewAddrSpace[V] = New;
   1040     }
   1041   }
   1042 
   1043   if (ValueWithNewAddrSpace.empty())
   1044     return false;
   1045 
   1046   // Fixes all the undef uses generated by cloneInstructionWithNewAddressSpace.
   1047   for (const Use *UndefUse : UndefUsesToFix) {
   1048     User *V = UndefUse->getUser();
   1049     User *NewV = cast_or_null<User>(ValueWithNewAddrSpace.lookup(V));
   1050     if (!NewV)
   1051       continue;
   1052 
   1053     unsigned OperandNo = UndefUse->getOperandNo();
   1054     assert(isa<UndefValue>(NewV->getOperand(OperandNo)));
   1055     NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get()));
   1056   }
   1057 
   1058   SmallVector<Instruction *, 16> DeadInstructions;
   1059 
   1060   // Replaces the uses of the old address expressions with the new ones.
   1061   for (const WeakTrackingVH &WVH : Postorder) {
   1062     assert(WVH && "value was unexpectedly deleted");
   1063     Value *V = WVH;
   1064     Value *NewV = ValueWithNewAddrSpace.lookup(V);
   1065     if (NewV == nullptr)
   1066       continue;
   1067 
   1068     LLVM_DEBUG(dbgs() << "Replacing the uses of " << *V << "\n  with\n  "
   1069                       << *NewV << '\n');
   1070 
   1071     if (Constant *C = dyn_cast<Constant>(V)) {
   1072       Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
   1073                                                          C->getType());
   1074       if (C != Replace) {
   1075         LLVM_DEBUG(dbgs() << "Inserting replacement const cast: " << Replace
   1076                           << ": " << *Replace << '\n');
   1077         C->replaceAllUsesWith(Replace);
   1078         V = Replace;
   1079       }
   1080     }
   1081 
   1082     Value::use_iterator I, E, Next;
   1083     for (I = V->use_begin(), E = V->use_end(); I != E; ) {
   1084       Use &U = *I;
   1085 
   1086       // Some users may see the same pointer operand in multiple operands. Skip
   1087       // to the next instruction.
   1088       I = skipToNextUser(I, E);
   1089 
   1090       if (isSimplePointerUseValidToReplace(
   1091               TTI, U, V->getType()->getPointerAddressSpace())) {
   1092         // If V is used as the pointer operand of a compatible memory operation,
   1093         // sets the pointer operand to NewV. This replacement does not change
   1094         // the element type, so the resultant load/store is still valid.
   1095         U.set(NewV);
   1096         continue;
   1097       }
   1098 
   1099       User *CurUser = U.getUser();
   1100       // Skip if the current user is the new value itself.
   1101       if (CurUser == NewV)
   1102         continue;
   1103       // Handle more complex cases like intrinsic that need to be remangled.
   1104       if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) {
   1105         if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV))
   1106           continue;
   1107       }
   1108 
   1109       if (auto *II = dyn_cast<IntrinsicInst>(CurUser)) {
   1110         if (rewriteIntrinsicOperands(II, V, NewV))
   1111           continue;
   1112       }
   1113 
   1114       if (isa<Instruction>(CurUser)) {
   1115         if (ICmpInst *Cmp = dyn_cast<ICmpInst>(CurUser)) {
   1116           // If we can infer that both pointers are in the same addrspace,
   1117           // transform e.g.
   1118           //   %cmp = icmp eq float* %p, %q
   1119           // into
   1120           //   %cmp = icmp eq float addrspace(3)* %new_p, %new_q
   1121 
   1122           unsigned NewAS = NewV->getType()->getPointerAddressSpace();
   1123           int SrcIdx = U.getOperandNo();
   1124           int OtherIdx = (SrcIdx == 0) ? 1 : 0;
   1125           Value *OtherSrc = Cmp->getOperand(OtherIdx);
   1126 
   1127           if (Value *OtherNewV = ValueWithNewAddrSpace.lookup(OtherSrc)) {
   1128             if (OtherNewV->getType()->getPointerAddressSpace() == NewAS) {
   1129               Cmp->setOperand(OtherIdx, OtherNewV);
   1130               Cmp->setOperand(SrcIdx, NewV);
   1131               continue;
   1132             }
   1133           }
   1134 
   1135           // Even if the type mismatches, we can cast the constant.
   1136           if (auto *KOtherSrc = dyn_cast<Constant>(OtherSrc)) {
   1137             if (isSafeToCastConstAddrSpace(KOtherSrc, NewAS)) {
   1138               Cmp->setOperand(SrcIdx, NewV);
   1139               Cmp->setOperand(OtherIdx,
   1140                 ConstantExpr::getAddrSpaceCast(KOtherSrc, NewV->getType()));
   1141               continue;
   1142             }
   1143           }
   1144         }
   1145 
   1146         if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) {
   1147           unsigned NewAS = NewV->getType()->getPointerAddressSpace();
   1148           if (ASC->getDestAddressSpace() == NewAS) {
   1149             if (ASC->getType()->getPointerElementType() !=
   1150                 NewV->getType()->getPointerElementType()) {
   1151               NewV = CastInst::Create(Instruction::BitCast, NewV,
   1152                                       ASC->getType(), "", ASC);
   1153             }
   1154             ASC->replaceAllUsesWith(NewV);
   1155             DeadInstructions.push_back(ASC);
   1156             continue;
   1157           }
   1158         }
   1159 
   1160         // Otherwise, replaces the use with flat(NewV).
   1161         if (Instruction *Inst = dyn_cast<Instruction>(V)) {
   1162           // Don't create a copy of the original addrspacecast.
   1163           if (U == V && isa<AddrSpaceCastInst>(V))
   1164             continue;
   1165 
   1166           BasicBlock::iterator InsertPos = std::next(Inst->getIterator());
   1167           while (isa<PHINode>(InsertPos))
   1168             ++InsertPos;
   1169           U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
   1170         } else {
   1171           U.set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
   1172                                                V->getType()));
   1173         }
   1174       }
   1175     }
   1176 
   1177     if (V->use_empty()) {
   1178       if (Instruction *I = dyn_cast<Instruction>(V))
   1179         DeadInstructions.push_back(I);
   1180     }
   1181   }
   1182 
   1183   for (Instruction *I : DeadInstructions)
   1184     RecursivelyDeleteTriviallyDeadInstructions(I);
   1185 
   1186   return true;
   1187 }
   1188 
   1189 bool InferAddressSpaces::runOnFunction(Function &F) {
   1190   if (skipFunction(F))
   1191     return false;
   1192 
   1193   return InferAddressSpacesImpl(
   1194              &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
   1195              FlatAddrSpace)
   1196       .run(F);
   1197 }
   1198 
   1199 FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) {
   1200   return new InferAddressSpaces(AddressSpace);
   1201 }
   1202 
   1203 InferAddressSpacesPass::InferAddressSpacesPass()
   1204     : FlatAddrSpace(UninitializedAddressSpace) {}
   1205 InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace)
   1206     : FlatAddrSpace(AddressSpace) {}
   1207 
   1208 PreservedAnalyses InferAddressSpacesPass::run(Function &F,
   1209                                               FunctionAnalysisManager &AM) {
   1210   bool Changed =
   1211       InferAddressSpacesImpl(&AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace)
   1212           .run(F);
   1213   if (Changed) {
   1214     PreservedAnalyses PA;
   1215     PA.preserveSet<CFGAnalyses>();
   1216     return PA;
   1217   }
   1218   return PreservedAnalyses::all();
   1219 }
   1220