Home | History | Annotate | Line # | Download | only in AMDGPU
      1 //=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 // \file
      9 //
     10 // The pass bind printfs to a kernel arg pointer that will be bound to a buffer
     11 // later by the runtime.
     12 //
     13 // This pass traverses the functions in the module and converts
     14 // each call to printf to a sequence of operations that
     15 // store the following into the printf buffer:
     16 // - format string (passed as a module's metadata unique ID)
     17 // - bitwise copies of printf arguments
     18 // The backend passes will need to store metadata in the kernel
     19 //===----------------------------------------------------------------------===//
     20 
     21 #include "AMDGPU.h"
     22 #include "llvm/Analysis/InstructionSimplify.h"
     23 #include "llvm/Analysis/TargetLibraryInfo.h"
     24 #include "llvm/IR/Dominators.h"
     25 #include "llvm/IR/IRBuilder.h"
     26 #include "llvm/IR/Instructions.h"
     27 #include "llvm/InitializePasses.h"
     28 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     29 
     30 using namespace llvm;
     31 
     32 #define DEBUG_TYPE "printfToRuntime"
     33 #define DWORD_ALIGN 4
     34 
     35 namespace {
     36 class AMDGPUPrintfRuntimeBinding final : public ModulePass {
     37 
     38 public:
     39   static char ID;
     40 
     41   explicit AMDGPUPrintfRuntimeBinding();
     42 
     43 private:
     44   bool runOnModule(Module &M) override;
     45 
     46   void getAnalysisUsage(AnalysisUsage &AU) const override {
     47     AU.addRequired<TargetLibraryInfoWrapperPass>();
     48     AU.addRequired<DominatorTreeWrapperPass>();
     49   }
     50 };
     51 
     52 class AMDGPUPrintfRuntimeBindingImpl {
     53 public:
     54   AMDGPUPrintfRuntimeBindingImpl(
     55       function_ref<const DominatorTree &(Function &)> GetDT,
     56       function_ref<const TargetLibraryInfo &(Function &)> GetTLI)
     57       : GetDT(GetDT), GetTLI(GetTLI) {}
     58   bool run(Module &M);
     59 
     60 private:
     61   void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers,
     62                                StringRef fmt, size_t num_ops) const;
     63 
     64   bool shouldPrintAsStr(char Specifier, Type *OpType) const;
     65   bool lowerPrintfForGpu(Module &M);
     66 
     67   Value *simplify(Instruction *I, const TargetLibraryInfo *TLI,
     68                   const DominatorTree *DT) {
     69     return SimplifyInstruction(I, {*TD, TLI, DT});
     70   }
     71 
     72   const DataLayout *TD;
     73   function_ref<const DominatorTree &(Function &)> GetDT;
     74   function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
     75   SmallVector<CallInst *, 32> Printfs;
     76 };
     77 } // namespace
     78 
     79 char AMDGPUPrintfRuntimeBinding::ID = 0;
     80 
     81 INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding,
     82                       "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering",
     83                       false, false)
     84 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
     85 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
     86 INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding",
     87                     "AMDGPU Printf lowering", false, false)
     88 
     89 char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID;
     90 
     91 namespace llvm {
     92 ModulePass *createAMDGPUPrintfRuntimeBinding() {
     93   return new AMDGPUPrintfRuntimeBinding();
     94 }
     95 } // namespace llvm
     96 
     97 AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() : ModulePass(ID) {
     98   initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry());
     99 }
    100 
    101 void AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers(
    102     SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt,
    103     size_t NumOps) const {
    104   // not all format characters are collected.
    105   // At this time the format characters of interest
    106   // are %p and %s, which use to know if we
    107   // are either storing a literal string or a
    108   // pointer to the printf buffer.
    109   static const char ConvSpecifiers[] = "cdieEfgGaosuxXp";
    110   size_t CurFmtSpecifierIdx = 0;
    111   size_t PrevFmtSpecifierIdx = 0;
    112 
    113   while ((CurFmtSpecifierIdx = Fmt.find_first_of(
    114               ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) {
    115     bool ArgDump = false;
    116     StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx,
    117                                   CurFmtSpecifierIdx - PrevFmtSpecifierIdx);
    118     size_t pTag = CurFmt.find_last_of("%");
    119     if (pTag != StringRef::npos) {
    120       ArgDump = true;
    121       while (pTag && CurFmt[--pTag] == '%') {
    122         ArgDump = !ArgDump;
    123       }
    124     }
    125 
    126     if (ArgDump)
    127       OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]);
    128 
    129     PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx;
    130   }
    131 }
    132 
    133 bool AMDGPUPrintfRuntimeBindingImpl::shouldPrintAsStr(char Specifier,
    134                                                       Type *OpType) const {
    135   if (Specifier != 's')
    136     return false;
    137   const PointerType *PT = dyn_cast<PointerType>(OpType);
    138   if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
    139     return false;
    140   Type *ElemType = PT->getContainedType(0);
    141   if (ElemType->getTypeID() != Type::IntegerTyID)
    142     return false;
    143   IntegerType *ElemIType = cast<IntegerType>(ElemType);
    144   return ElemIType->getBitWidth() == 8;
    145 }
    146 
    147 bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
    148   LLVMContext &Ctx = M.getContext();
    149   IRBuilder<> Builder(Ctx);
    150   Type *I32Ty = Type::getInt32Ty(Ctx);
    151   unsigned UniqID = 0;
    152   // NB: This is important for this string size to be divizable by 4
    153   const char NonLiteralStr[4] = "???";
    154 
    155   for (auto CI : Printfs) {
    156     unsigned NumOps = CI->getNumArgOperands();
    157 
    158     SmallString<16> OpConvSpecifiers;
    159     Value *Op = CI->getArgOperand(0);
    160 
    161     if (auto LI = dyn_cast<LoadInst>(Op)) {
    162       Op = LI->getPointerOperand();
    163       for (auto Use : Op->users()) {
    164         if (auto SI = dyn_cast<StoreInst>(Use)) {
    165           Op = SI->getValueOperand();
    166           break;
    167         }
    168       }
    169     }
    170 
    171     if (auto I = dyn_cast<Instruction>(Op)) {
    172       Value *Op_simplified =
    173           simplify(I, &GetTLI(*I->getFunction()), &GetDT(*I->getFunction()));
    174       if (Op_simplified)
    175         Op = Op_simplified;
    176     }
    177 
    178     ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Op);
    179 
    180     if (ConstExpr) {
    181       GlobalVariable *GVar = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
    182 
    183       StringRef Str("unknown");
    184       if (GVar && GVar->hasInitializer()) {
    185         auto *Init = GVar->getInitializer();
    186         if (auto *CA = dyn_cast<ConstantDataArray>(Init)) {
    187           if (CA->isString())
    188             Str = CA->getAsCString();
    189         } else if (isa<ConstantAggregateZero>(Init)) {
    190           Str = "";
    191         }
    192         //
    193         // we need this call to ascertain
    194         // that we are printing a string
    195         // or a pointer. It takes out the
    196         // specifiers and fills up the first
    197         // arg
    198         getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1);
    199       }
    200       // Add metadata for the string
    201       std::string AStreamHolder;
    202       raw_string_ostream Sizes(AStreamHolder);
    203       int Sum = DWORD_ALIGN;
    204       Sizes << CI->getNumArgOperands() - 1;
    205       Sizes << ':';
    206       for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
    207                                   ArgCount <= OpConvSpecifiers.size();
    208            ArgCount++) {
    209         Value *Arg = CI->getArgOperand(ArgCount);
    210         Type *ArgType = Arg->getType();
    211         unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType);
    212         ArgSize = ArgSize / 8;
    213         //
    214         // ArgSize by design should be a multiple of DWORD_ALIGN,
    215         // expand the arguments that do not follow this rule.
    216         //
    217         if (ArgSize % DWORD_ALIGN != 0) {
    218           llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx);
    219           auto *LLVMVecType = llvm::dyn_cast<llvm::FixedVectorType>(ArgType);
    220           int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1;
    221           if (LLVMVecType && NumElem > 1)
    222             ResType = llvm::FixedVectorType::get(ResType, NumElem);
    223           Builder.SetInsertPoint(CI);
    224           Builder.SetCurrentDebugLocation(CI->getDebugLoc());
    225           if (OpConvSpecifiers[ArgCount - 1] == 'x' ||
    226               OpConvSpecifiers[ArgCount - 1] == 'X' ||
    227               OpConvSpecifiers[ArgCount - 1] == 'u' ||
    228               OpConvSpecifiers[ArgCount - 1] == 'o')
    229             Arg = Builder.CreateZExt(Arg, ResType);
    230           else
    231             Arg = Builder.CreateSExt(Arg, ResType);
    232           ArgType = Arg->getType();
    233           ArgSize = TD->getTypeAllocSizeInBits(ArgType);
    234           ArgSize = ArgSize / 8;
    235           CI->setOperand(ArgCount, Arg);
    236         }
    237         if (OpConvSpecifiers[ArgCount - 1] == 'f') {
    238           ConstantFP *FpCons = dyn_cast<ConstantFP>(Arg);
    239           if (FpCons)
    240             ArgSize = 4;
    241           else {
    242             FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg);
    243             if (FpExt && FpExt->getType()->isDoubleTy() &&
    244                 FpExt->getOperand(0)->getType()->isFloatTy())
    245               ArgSize = 4;
    246           }
    247         }
    248         if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
    249           if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
    250             auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
    251             if (GV && GV->hasInitializer()) {
    252               Constant *Init = GV->getInitializer();
    253               bool IsZeroValue = Init->isZeroValue();
    254               auto *CA = dyn_cast<ConstantDataArray>(Init);
    255               if (IsZeroValue || (CA && CA->isString())) {
    256                 size_t SizeStr =
    257                     IsZeroValue ? 1 : (strlen(CA->getAsCString().data()) + 1);
    258                 size_t Rem = SizeStr % DWORD_ALIGN;
    259                 size_t NSizeStr = 0;
    260                 LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr
    261                                   << '\n');
    262                 if (Rem) {
    263                   NSizeStr = SizeStr + (DWORD_ALIGN - Rem);
    264                 } else {
    265                   NSizeStr = SizeStr;
    266                 }
    267                 ArgSize = NSizeStr;
    268               }
    269             } else {
    270               ArgSize = sizeof(NonLiteralStr);
    271             }
    272           } else {
    273             ArgSize = sizeof(NonLiteralStr);
    274           }
    275         }
    276         LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize
    277                           << " for type: " << *ArgType << '\n');
    278         Sizes << ArgSize << ':';
    279         Sum += ArgSize;
    280       }
    281       LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str()
    282                         << '\n');
    283       for (size_t I = 0; I < Str.size(); ++I) {
    284         // Rest of the C escape sequences (e.g. \') are handled correctly
    285         // by the MDParser
    286         switch (Str[I]) {
    287         case '\a':
    288           Sizes << "\\a";
    289           break;
    290         case '\b':
    291           Sizes << "\\b";
    292           break;
    293         case '\f':
    294           Sizes << "\\f";
    295           break;
    296         case '\n':
    297           Sizes << "\\n";
    298           break;
    299         case '\r':
    300           Sizes << "\\r";
    301           break;
    302         case '\v':
    303           Sizes << "\\v";
    304           break;
    305         case ':':
    306           // ':' cannot be scanned by Flex, as it is defined as a delimiter
    307           // Replace it with it's octal representation \72
    308           Sizes << "\\72";
    309           break;
    310         default:
    311           Sizes << Str[I];
    312           break;
    313         }
    314       }
    315 
    316       // Insert the printf_alloc call
    317       Builder.SetInsertPoint(CI);
    318       Builder.SetCurrentDebugLocation(CI->getDebugLoc());
    319 
    320       AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex,
    321                                               Attribute::NoUnwind);
    322 
    323       Type *SizetTy = Type::getInt32Ty(Ctx);
    324 
    325       Type *Tys_alloc[1] = {SizetTy};
    326       Type *I8Ptr = PointerType::get(Type::getInt8Ty(Ctx), 1);
    327       FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false);
    328       FunctionCallee PrintfAllocFn =
    329           M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
    330 
    331       LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n');
    332       std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str();
    333       MDString *fmtStrArray = MDString::get(Ctx, fmtstr);
    334 
    335       // Instead of creating global variables, the
    336       // printf format strings are extracted
    337       // and passed as metadata. This avoids
    338       // polluting llvm's symbol tables in this module.
    339       // Metadata is going to be extracted
    340       // by the backend passes and inserted
    341       // into the OpenCL binary as appropriate.
    342       StringRef amd("llvm.printf.fmts");
    343       NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd);
    344       MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
    345       metaD->addOperand(myMD);
    346       Value *sumC = ConstantInt::get(SizetTy, Sum, false);
    347       SmallVector<Value *, 1> alloc_args;
    348       alloc_args.push_back(sumC);
    349       CallInst *pcall =
    350           CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI);
    351 
    352       //
    353       // Insert code to split basicblock with a
    354       // piece of hammock code.
    355       // basicblock splits after buffer overflow check
    356       //
    357       ConstantPointerNull *zeroIntPtr =
    358           ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(Ctx), 1));
    359       auto *cmp = cast<ICmpInst>(Builder.CreateICmpNE(pcall, zeroIntPtr, ""));
    360       if (!CI->use_empty()) {
    361         Value *result =
    362             Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res");
    363         CI->replaceAllUsesWith(result);
    364       }
    365       SplitBlock(CI->getParent(), cmp);
    366       Instruction *Brnch =
    367           SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false);
    368 
    369       Builder.SetInsertPoint(Brnch);
    370 
    371       // store unique printf id in the buffer
    372       //
    373       SmallVector<Value *, 1> ZeroIdxList;
    374       ConstantInt *zeroInt =
    375           ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10));
    376       ZeroIdxList.push_back(zeroInt);
    377 
    378       GetElementPtrInst *BufferIdx = GetElementPtrInst::Create(
    379           nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch);
    380 
    381       Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS);
    382       Value *id_gep_cast =
    383           new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch);
    384 
    385       new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast, Brnch);
    386 
    387       SmallVector<Value *, 2> FourthIdxList;
    388       ConstantInt *fourInt =
    389           ConstantInt::get(Ctx, APInt(32, StringRef("4"), 10));
    390 
    391       FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id
    392       // the following GEP is the buffer pointer
    393       BufferIdx = GetElementPtrInst::Create(nullptr, pcall, FourthIdxList,
    394                                             "PrintBuffGep", Brnch);
    395 
    396       Type *Int32Ty = Type::getInt32Ty(Ctx);
    397       Type *Int64Ty = Type::getInt64Ty(Ctx);
    398       for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
    399                                   ArgCount <= OpConvSpecifiers.size();
    400            ArgCount++) {
    401         Value *Arg = CI->getArgOperand(ArgCount);
    402         Type *ArgType = Arg->getType();
    403         SmallVector<Value *, 32> WhatToStore;
    404         if (ArgType->isFPOrFPVectorTy() && !isa<VectorType>(ArgType)) {
    405           Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty;
    406           if (OpConvSpecifiers[ArgCount - 1] == 'f') {
    407             if (auto *FpCons = dyn_cast<ConstantFP>(Arg)) {
    408               APFloat Val(FpCons->getValueAPF());
    409               bool Lost = false;
    410               Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
    411                           &Lost);
    412               Arg = ConstantFP::get(Ctx, Val);
    413               IType = Int32Ty;
    414             } else if (auto *FpExt = dyn_cast<FPExtInst>(Arg)) {
    415               if (FpExt->getType()->isDoubleTy() &&
    416                   FpExt->getOperand(0)->getType()->isFloatTy()) {
    417                 Arg = FpExt->getOperand(0);
    418                 IType = Int32Ty;
    419               }
    420             }
    421           }
    422           Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch);
    423           WhatToStore.push_back(Arg);
    424         } else if (ArgType->getTypeID() == Type::PointerTyID) {
    425           if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
    426             const char *S = NonLiteralStr;
    427             if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
    428               auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
    429               if (GV && GV->hasInitializer()) {
    430                 Constant *Init = GV->getInitializer();
    431                 bool IsZeroValue = Init->isZeroValue();
    432                 auto *CA = dyn_cast<ConstantDataArray>(Init);
    433                 if (IsZeroValue || (CA && CA->isString())) {
    434                   S = IsZeroValue ? "" : CA->getAsCString().data();
    435                 }
    436               }
    437             }
    438             size_t SizeStr = strlen(S) + 1;
    439             size_t Rem = SizeStr % DWORD_ALIGN;
    440             size_t NSizeStr = 0;
    441             if (Rem) {
    442               NSizeStr = SizeStr + (DWORD_ALIGN - Rem);
    443             } else {
    444               NSizeStr = SizeStr;
    445             }
    446             if (S[0]) {
    447               char *MyNewStr = new char[NSizeStr]();
    448               strcpy(MyNewStr, S);
    449               int NumInts = NSizeStr / 4;
    450               int CharC = 0;
    451               while (NumInts) {
    452                 int ANum = *(int *)(MyNewStr + CharC);
    453                 CharC += 4;
    454                 NumInts--;
    455                 Value *ANumV = ConstantInt::get(Int32Ty, ANum, false);
    456                 WhatToStore.push_back(ANumV);
    457               }
    458               delete[] MyNewStr;
    459             } else {
    460               // Empty string, give a hint to RT it is no NULL
    461               Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false);
    462               WhatToStore.push_back(ANumV);
    463             }
    464           } else {
    465             uint64_t Size = TD->getTypeAllocSizeInBits(ArgType);
    466             assert((Size == 32 || Size == 64) && "unsupported size");
    467             Type *DstType = (Size == 32) ? Int32Ty : Int64Ty;
    468             Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch);
    469             WhatToStore.push_back(Arg);
    470           }
    471         } else if (isa<FixedVectorType>(ArgType)) {
    472           Type *IType = NULL;
    473           uint32_t EleCount = cast<FixedVectorType>(ArgType)->getNumElements();
    474           uint32_t EleSize = ArgType->getScalarSizeInBits();
    475           uint32_t TotalSize = EleCount * EleSize;
    476           if (EleCount == 3) {
    477             ShuffleVectorInst *Shuffle =
    478                 new ShuffleVectorInst(Arg, Arg, ArrayRef<int>{0, 1, 2, 2});
    479             Shuffle->insertBefore(Brnch);
    480             Arg = Shuffle;
    481             ArgType = Arg->getType();
    482             TotalSize += EleSize;
    483           }
    484           switch (EleSize) {
    485           default:
    486             EleCount = TotalSize / 64;
    487             IType = Type::getInt64Ty(ArgType->getContext());
    488             break;
    489           case 8:
    490             if (EleCount >= 8) {
    491               EleCount = TotalSize / 64;
    492               IType = Type::getInt64Ty(ArgType->getContext());
    493             } else if (EleCount >= 3) {
    494               EleCount = 1;
    495               IType = Type::getInt32Ty(ArgType->getContext());
    496             } else {
    497               EleCount = 1;
    498               IType = Type::getInt16Ty(ArgType->getContext());
    499             }
    500             break;
    501           case 16:
    502             if (EleCount >= 3) {
    503               EleCount = TotalSize / 64;
    504               IType = Type::getInt64Ty(ArgType->getContext());
    505             } else {
    506               EleCount = 1;
    507               IType = Type::getInt32Ty(ArgType->getContext());
    508             }
    509             break;
    510           }
    511           if (EleCount > 1) {
    512             IType = FixedVectorType::get(IType, EleCount);
    513           }
    514           Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch);
    515           WhatToStore.push_back(Arg);
    516         } else {
    517           WhatToStore.push_back(Arg);
    518         }
    519         for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) {
    520           Value *TheBtCast = WhatToStore[I];
    521           unsigned ArgSize =
    522               TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8;
    523           SmallVector<Value *, 1> BuffOffset;
    524           BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize));
    525 
    526           Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1);
    527           Value *CastedGEP =
    528               new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch);
    529           StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch);
    530           LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n"
    531                             << *StBuff << '\n');
    532           (void)StBuff;
    533           if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands())
    534             break;
    535           BufferIdx = GetElementPtrInst::Create(nullptr, BufferIdx, BuffOffset,
    536                                                 "PrintBuffNextPtr", Brnch);
    537           LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n"
    538                             << *BufferIdx << '\n');
    539         }
    540       }
    541     }
    542   }
    543 
    544   // erase the printf calls
    545   for (auto CI : Printfs)
    546     CI->eraseFromParent();
    547 
    548   Printfs.clear();
    549   return true;
    550 }
    551 
    552 bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
    553   Triple TT(M.getTargetTriple());
    554   if (TT.getArch() == Triple::r600)
    555     return false;
    556 
    557   auto PrintfFunction = M.getFunction("printf");
    558   if (!PrintfFunction)
    559     return false;
    560 
    561   for (auto &U : PrintfFunction->uses()) {
    562     if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
    563       if (CI->isCallee(&U))
    564         Printfs.push_back(CI);
    565     }
    566   }
    567 
    568   if (Printfs.empty())
    569     return false;
    570 
    571   if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) {
    572     for (auto &U : HostcallFunction->uses()) {
    573       if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
    574         M.getContext().emitError(
    575             CI, "Cannot use both printf and hostcall in the same module");
    576       }
    577     }
    578   }
    579 
    580   TD = &M.getDataLayout();
    581 
    582   return lowerPrintfForGpu(M);
    583 }
    584 
    585 bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) {
    586   auto GetDT = [this](Function &F) -> DominatorTree & {
    587     return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
    588   };
    589   auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
    590     return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
    591   };
    592 
    593   return AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
    594 }
    595 
    596 PreservedAnalyses
    597 AMDGPUPrintfRuntimeBindingPass::run(Module &M, ModuleAnalysisManager &AM) {
    598   FunctionAnalysisManager &FAM =
    599       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
    600   auto GetDT = [&FAM](Function &F) -> DominatorTree & {
    601     return FAM.getResult<DominatorTreeAnalysis>(F);
    602   };
    603   auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
    604     return FAM.getResult<TargetLibraryAnalysis>(F);
    605   };
    606   bool Changed = AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
    607   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
    608 }
    609