1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the OpenMPIRBuilder class, which is used as a 11 /// convenient way to create LLVM instructions for OpenMP directives. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 16 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/Triple.h" 19 #include "llvm/IR/CFG.h" 20 #include "llvm/IR/DebugInfo.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/MDBuilder.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 26 #include "llvm/Transforms/Utils/CodeExtractor.h" 27 28 #include <sstream> 29 30 #define DEBUG_TYPE "openmp-ir-builder" 31 32 using namespace llvm; 33 using namespace omp; 34 35 static cl::opt<bool> 36 OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, 37 cl::desc("Use optimistic attributes describing " 38 "'as-if' properties of runtime calls."), 39 cl::init(false)); 40 41 void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { 42 LLVMContext &Ctx = Fn.getContext(); 43 44 // Get the function's current attributes. 45 auto Attrs = Fn.getAttributes(); 46 auto FnAttrs = Attrs.getFnAttributes(); 47 auto RetAttrs = Attrs.getRetAttributes(); 48 SmallVector<AttributeSet, 4> ArgAttrs; 49 for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo) 50 ArgAttrs.emplace_back(Attrs.getParamAttributes(ArgNo)); 51 52 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet; 53 #include "llvm/Frontend/OpenMP/OMPKinds.def" 54 55 // Add attributes to the function declaration. 56 switch (FnID) { 57 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \ 58 case Enum: \ 59 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \ 60 RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \ 61 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \ 62 ArgAttrs[ArgNo] = \ 63 ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \ 64 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \ 65 break; 66 #include "llvm/Frontend/OpenMP/OMPKinds.def" 67 default: 68 // Attributes are optional. 69 break; 70 } 71 } 72 73 FunctionCallee 74 OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) { 75 FunctionType *FnTy = nullptr; 76 Function *Fn = nullptr; 77 78 // Try to find the declation in the module first. 79 switch (FnID) { 80 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \ 81 case Enum: \ 82 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \ 83 IsVarArg); \ 84 Fn = M.getFunction(Str); \ 85 break; 86 #include "llvm/Frontend/OpenMP/OMPKinds.def" 87 } 88 89 if (!Fn) { 90 // Create a new declaration if we need one. 91 switch (FnID) { 92 #define OMP_RTL(Enum, Str, ...) \ 93 case Enum: \ 94 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \ 95 break; 96 #include "llvm/Frontend/OpenMP/OMPKinds.def" 97 } 98 99 // Add information if the runtime function takes a callback function 100 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) { 101 if (!Fn->hasMetadata(LLVMContext::MD_callback)) { 102 LLVMContext &Ctx = Fn->getContext(); 103 MDBuilder MDB(Ctx); 104 // Annotate the callback behavior of the runtime function: 105 // - The callback callee is argument number 2 (microtask). 106 // - The first two arguments of the callback callee are unknown (-1). 107 // - All variadic arguments to the runtime function are passed to the 108 // callback callee. 109 Fn->addMetadata( 110 LLVMContext::MD_callback, 111 *MDNode::get(Ctx, {MDB.createCallbackEncoding( 112 2, {-1, -1}, /* VarArgsArePassed */ true)})); 113 } 114 } 115 116 LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName() 117 << " with type " << *Fn->getFunctionType() << "\n"); 118 addAttributes(FnID, *Fn); 119 120 } else { 121 LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName() 122 << " with type " << *Fn->getFunctionType() << "\n"); 123 } 124 125 assert(Fn && "Failed to create OpenMP runtime function"); 126 127 // Cast the function to the expected type if necessary 128 Constant *C = ConstantExpr::getBitCast(Fn, FnTy->getPointerTo()); 129 return {FnTy, C}; 130 } 131 132 Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) { 133 FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID); 134 auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee()); 135 assert(Fn && "Failed to create OpenMP runtime function pointer"); 136 return Fn; 137 } 138 139 void OpenMPIRBuilder::initialize() { initializeTypes(M); } 140 141 void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) { 142 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; 143 SmallVector<BasicBlock *, 32> Blocks; 144 SmallVector<OutlineInfo, 16> DeferredOutlines; 145 for (OutlineInfo &OI : OutlineInfos) { 146 // Skip functions that have not finalized yet; may happen with nested 147 // function generation. 148 if (Fn && OI.getFunction() != Fn) { 149 DeferredOutlines.push_back(OI); 150 continue; 151 } 152 153 ParallelRegionBlockSet.clear(); 154 Blocks.clear(); 155 OI.collectBlocks(ParallelRegionBlockSet, Blocks); 156 157 Function *OuterFn = OI.getFunction(); 158 CodeExtractorAnalysisCache CEAC(*OuterFn); 159 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, 160 /* AggregateArgs */ false, 161 /* BlockFrequencyInfo */ nullptr, 162 /* BranchProbabilityInfo */ nullptr, 163 /* AssumptionCache */ nullptr, 164 /* AllowVarArgs */ true, 165 /* AllowAlloca */ true, 166 /* Suffix */ ".omp_par"); 167 168 LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); 169 LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName() 170 << " Exit: " << OI.ExitBB->getName() << "\n"); 171 assert(Extractor.isEligible() && 172 "Expected OpenMP outlining to be possible!"); 173 174 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); 175 176 LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n"); 177 LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); 178 assert(OutlinedFn->getReturnType()->isVoidTy() && 179 "OpenMP outlined functions should not return a value!"); 180 181 // For compability with the clang CG we move the outlined function after the 182 // one with the parallel region. 183 OutlinedFn->removeFromParent(); 184 M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn); 185 186 // Remove the artificial entry introduced by the extractor right away, we 187 // made our own entry block after all. 188 { 189 BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); 190 assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); 191 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); 192 if (AllowExtractorSinking) { 193 // Move instructions from the to-be-deleted ArtificialEntry to the entry 194 // basic block of the parallel region. CodeExtractor may have sunk 195 // allocas/bitcasts for values that are solely used in the outlined 196 // region and do not escape. 197 assert(!ArtificialEntry.empty() && 198 "Expected instructions to sink in the outlined region"); 199 for (BasicBlock::iterator It = ArtificialEntry.begin(), 200 End = ArtificialEntry.end(); 201 It != End;) { 202 Instruction &I = *It; 203 It++; 204 205 if (I.isTerminator()) 206 continue; 207 208 I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt()); 209 } 210 } 211 OI.EntryBB->moveBefore(&ArtificialEntry); 212 ArtificialEntry.eraseFromParent(); 213 } 214 assert(&OutlinedFn->getEntryBlock() == OI.EntryBB); 215 assert(OutlinedFn && OutlinedFn->getNumUses() == 1); 216 217 // Run a user callback, e.g. to add attributes. 218 if (OI.PostOutlineCB) 219 OI.PostOutlineCB(*OutlinedFn); 220 } 221 222 // Remove work items that have been completed. 223 OutlineInfos = std::move(DeferredOutlines); 224 } 225 226 OpenMPIRBuilder::~OpenMPIRBuilder() { 227 assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); 228 } 229 230 Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, 231 IdentFlag LocFlags, 232 unsigned Reserve2Flags) { 233 // Enable "C-mode". 234 LocFlags |= OMP_IDENT_FLAG_KMPC; 235 236 Value *&Ident = 237 IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}]; 238 if (!Ident) { 239 Constant *I32Null = ConstantInt::getNullValue(Int32); 240 Constant *IdentData[] = { 241 I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)), 242 ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr}; 243 Constant *Initializer = ConstantStruct::get( 244 cast<StructType>(IdentPtr->getPointerElementType()), IdentData); 245 246 // Look for existing encoding of the location + flags, not needed but 247 // minimizes the difference to the existing solution while we transition. 248 for (GlobalVariable &GV : M.getGlobalList()) 249 if (GV.getType() == IdentPtr && GV.hasInitializer()) 250 if (GV.getInitializer() == Initializer) 251 return Ident = &GV; 252 253 auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(), 254 /* isConstant = */ true, 255 GlobalValue::PrivateLinkage, Initializer); 256 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 257 GV->setAlignment(Align(8)); 258 Ident = GV; 259 } 260 return Builder.CreatePointerCast(Ident, IdentPtr); 261 } 262 263 Type *OpenMPIRBuilder::getLanemaskType() { 264 LLVMContext &Ctx = M.getContext(); 265 Triple triple(M.getTargetTriple()); 266 267 // This test is adequate until deviceRTL has finer grained lane widths 268 return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx); 269 } 270 271 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { 272 Constant *&SrcLocStr = SrcLocStrMap[LocStr]; 273 if (!SrcLocStr) { 274 Constant *Initializer = 275 ConstantDataArray::getString(M.getContext(), LocStr); 276 277 // Look for existing encoding of the location, not needed but minimizes the 278 // difference to the existing solution while we transition. 279 for (GlobalVariable &GV : M.getGlobalList()) 280 if (GV.isConstant() && GV.hasInitializer() && 281 GV.getInitializer() == Initializer) 282 return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr); 283 284 SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "", 285 /* AddressSpace */ 0, &M); 286 } 287 return SrcLocStr; 288 } 289 290 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName, 291 StringRef FileName, 292 unsigned Line, 293 unsigned Column) { 294 SmallString<128> Buffer; 295 Buffer.push_back(';'); 296 Buffer.append(FileName); 297 Buffer.push_back(';'); 298 Buffer.append(FunctionName); 299 Buffer.push_back(';'); 300 Buffer.append(std::to_string(Line)); 301 Buffer.push_back(';'); 302 Buffer.append(std::to_string(Column)); 303 Buffer.push_back(';'); 304 Buffer.push_back(';'); 305 return getOrCreateSrcLocStr(Buffer.str()); 306 } 307 308 Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() { 309 return getOrCreateSrcLocStr(";unknown;unknown;0;0;;"); 310 } 311 312 Constant * 313 OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) { 314 DILocation *DIL = Loc.DL.get(); 315 if (!DIL) 316 return getOrCreateDefaultSrcLocStr(); 317 StringRef FileName = M.getName(); 318 if (DIFile *DIF = DIL->getFile()) 319 if (Optional<StringRef> Source = DIF->getSource()) 320 FileName = *Source; 321 StringRef Function = DIL->getScope()->getSubprogram()->getName(); 322 Function = 323 !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName(); 324 return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(), 325 DIL->getColumn()); 326 } 327 328 Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { 329 return Builder.CreateCall( 330 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident, 331 "omp_global_thread_num"); 332 } 333 334 OpenMPIRBuilder::InsertPointTy 335 OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive DK, 336 bool ForceSimpleCall, bool CheckCancelFlag) { 337 if (!updateToLocation(Loc)) 338 return Loc.IP; 339 return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag); 340 } 341 342 OpenMPIRBuilder::InsertPointTy 343 OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind, 344 bool ForceSimpleCall, bool CheckCancelFlag) { 345 // Build call __kmpc_cancel_barrier(loc, thread_id) or 346 // __kmpc_barrier(loc, thread_id); 347 348 IdentFlag BarrierLocFlags; 349 switch (Kind) { 350 case OMPD_for: 351 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR; 352 break; 353 case OMPD_sections: 354 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS; 355 break; 356 case OMPD_single: 357 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE; 358 break; 359 case OMPD_barrier: 360 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL; 361 break; 362 default: 363 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL; 364 break; 365 } 366 367 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 368 Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags), 369 getOrCreateThreadID(getOrCreateIdent(SrcLocStr))}; 370 371 // If we are in a cancellable parallel region, barriers are cancellation 372 // points. 373 // TODO: Check why we would force simple calls or to ignore the cancel flag. 374 bool UseCancelBarrier = 375 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel); 376 377 Value *Result = 378 Builder.CreateCall(getOrCreateRuntimeFunctionPtr( 379 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier 380 : OMPRTL___kmpc_barrier), 381 Args); 382 383 if (UseCancelBarrier && CheckCancelFlag) 384 emitCancelationCheckImpl(Result, OMPD_parallel); 385 386 return Builder.saveIP(); 387 } 388 389 OpenMPIRBuilder::InsertPointTy 390 OpenMPIRBuilder::createCancel(const LocationDescription &Loc, 391 Value *IfCondition, 392 omp::Directive CanceledDirective) { 393 if (!updateToLocation(Loc)) 394 return Loc.IP; 395 396 // LLVM utilities like blocks with terminators. 397 auto *UI = Builder.CreateUnreachable(); 398 399 Instruction *ThenTI = UI, *ElseTI = nullptr; 400 if (IfCondition) 401 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 402 Builder.SetInsertPoint(ThenTI); 403 404 Value *CancelKind = nullptr; 405 switch (CanceledDirective) { 406 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \ 407 case DirectiveEnum: \ 408 CancelKind = Builder.getInt32(Value); \ 409 break; 410 #include "llvm/Frontend/OpenMP/OMPKinds.def" 411 default: 412 llvm_unreachable("Unknown cancel kind!"); 413 } 414 415 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 416 Value *Ident = getOrCreateIdent(SrcLocStr); 417 Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; 418 Value *Result = Builder.CreateCall( 419 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); 420 421 // The actual cancel logic is shared with others, e.g., cancel_barriers. 422 emitCancelationCheckImpl(Result, CanceledDirective); 423 424 // Update the insertion point and remove the terminator we introduced. 425 Builder.SetInsertPoint(UI->getParent()); 426 UI->eraseFromParent(); 427 428 return Builder.saveIP(); 429 } 430 431 void OpenMPIRBuilder::emitCancelationCheckImpl( 432 Value *CancelFlag, omp::Directive CanceledDirective) { 433 assert(isLastFinalizationInfoCancellable(CanceledDirective) && 434 "Unexpected cancellation!"); 435 436 // For a cancel barrier we create two new blocks. 437 BasicBlock *BB = Builder.GetInsertBlock(); 438 BasicBlock *NonCancellationBlock; 439 if (Builder.GetInsertPoint() == BB->end()) { 440 // TODO: This branch will not be needed once we moved to the 441 // OpenMPIRBuilder codegen completely. 442 NonCancellationBlock = BasicBlock::Create( 443 BB->getContext(), BB->getName() + ".cont", BB->getParent()); 444 } else { 445 NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint()); 446 BB->getTerminator()->eraseFromParent(); 447 Builder.SetInsertPoint(BB); 448 } 449 BasicBlock *CancellationBlock = BasicBlock::Create( 450 BB->getContext(), BB->getName() + ".cncl", BB->getParent()); 451 452 // Jump to them based on the return value. 453 Value *Cmp = Builder.CreateIsNull(CancelFlag); 454 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock, 455 /* TODO weight */ nullptr, nullptr); 456 457 // From the cancellation block we finalize all variables and go to the 458 // post finalization block that is known to the FiniCB callback. 459 Builder.SetInsertPoint(CancellationBlock); 460 auto &FI = FinalizationStack.back(); 461 FI.FiniCB(Builder.saveIP()); 462 463 // The continuation block is where code generation continues. 464 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); 465 } 466 467 IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( 468 const LocationDescription &Loc, InsertPointTy OuterAllocaIP, 469 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, 470 FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, 471 omp::ProcBindKind ProcBind, bool IsCancellable) { 472 if (!updateToLocation(Loc)) 473 return Loc.IP; 474 475 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 476 Value *Ident = getOrCreateIdent(SrcLocStr); 477 Value *ThreadID = getOrCreateThreadID(Ident); 478 479 if (NumThreads) { 480 // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads) 481 Value *Args[] = { 482 Ident, ThreadID, 483 Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)}; 484 Builder.CreateCall( 485 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args); 486 } 487 488 if (ProcBind != OMP_PROC_BIND_default) { 489 // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind) 490 Value *Args[] = { 491 Ident, ThreadID, 492 ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)}; 493 Builder.CreateCall( 494 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args); 495 } 496 497 BasicBlock *InsertBB = Builder.GetInsertBlock(); 498 Function *OuterFn = InsertBB->getParent(); 499 500 // Save the outer alloca block because the insertion iterator may get 501 // invalidated and we still need this later. 502 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock(); 503 504 // Vector to remember instructions we used only during the modeling but which 505 // we want to delete at the end. 506 SmallVector<Instruction *, 4> ToBeDeleted; 507 508 // Change the location to the outer alloca insertion point to create and 509 // initialize the allocas we pass into the parallel region. 510 Builder.restoreIP(OuterAllocaIP); 511 AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); 512 AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); 513 514 // If there is an if condition we actually use the TIDAddr and ZeroAddr in the 515 // program, otherwise we only need them for modeling purposes to get the 516 // associated arguments in the outlined function. In the former case, 517 // initialize the allocas properly, in the latter case, delete them later. 518 if (IfCondition) { 519 Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr); 520 Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr); 521 } else { 522 ToBeDeleted.push_back(TIDAddr); 523 ToBeDeleted.push_back(ZeroAddr); 524 } 525 526 // Create an artificial insertion point that will also ensure the blocks we 527 // are about to split are not degenerated. 528 auto *UI = new UnreachableInst(Builder.getContext(), InsertBB); 529 530 Instruction *ThenTI = UI, *ElseTI = nullptr; 531 if (IfCondition) 532 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI); 533 534 BasicBlock *ThenBB = ThenTI->getParent(); 535 BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry"); 536 BasicBlock *PRegBodyBB = 537 PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region"); 538 BasicBlock *PRegPreFiniBB = 539 PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize"); 540 BasicBlock *PRegExitBB = 541 PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit"); 542 543 auto FiniCBWrapper = [&](InsertPointTy IP) { 544 // Hide "open-ended" blocks from the given FiniCB by setting the right jump 545 // target to the region exit block. 546 if (IP.getBlock()->end() == IP.getPoint()) { 547 IRBuilder<>::InsertPointGuard IPG(Builder); 548 Builder.restoreIP(IP); 549 Instruction *I = Builder.CreateBr(PRegExitBB); 550 IP = InsertPointTy(I->getParent(), I->getIterator()); 551 } 552 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 && 553 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB && 554 "Unexpected insertion point for finalization call!"); 555 return FiniCB(IP); 556 }; 557 558 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable}); 559 560 // Generate the privatization allocas in the block that will become the entry 561 // of the outlined function. 562 Builder.SetInsertPoint(PRegEntryBB->getTerminator()); 563 InsertPointTy InnerAllocaIP = Builder.saveIP(); 564 565 AllocaInst *PrivTIDAddr = 566 Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); 567 Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid"); 568 569 // Add some fake uses for OpenMP provided arguments. 570 ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use")); 571 Instruction *ZeroAddrUse = Builder.CreateLoad(Int32, ZeroAddr, 572 "zero.addr.use"); 573 ToBeDeleted.push_back(ZeroAddrUse); 574 575 // ThenBB 576 // | 577 // V 578 // PRegionEntryBB <- Privatization allocas are placed here. 579 // | 580 // V 581 // PRegionBodyBB <- BodeGen is invoked here. 582 // | 583 // V 584 // PRegPreFiniBB <- The block we will start finalization from. 585 // | 586 // V 587 // PRegionExitBB <- A common exit to simplify block collection. 588 // 589 590 LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n"); 591 592 // Let the caller create the body. 593 assert(BodyGenCB && "Expected body generation callback!"); 594 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); 595 BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB); 596 597 LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); 598 599 FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call); 600 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 601 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 602 llvm::LLVMContext &Ctx = F->getContext(); 603 MDBuilder MDB(Ctx); 604 // Annotate the callback behavior of the __kmpc_fork_call: 605 // - The callback callee is argument number 2 (microtask). 606 // - The first two arguments of the callback callee are unknown (-1). 607 // - All variadic arguments to the __kmpc_fork_call are passed to the 608 // callback callee. 609 F->addMetadata( 610 llvm::LLVMContext::MD_callback, 611 *llvm::MDNode::get( 612 Ctx, {MDB.createCallbackEncoding(2, {-1, -1}, 613 /* VarArgsArePassed */ true)})); 614 } 615 } 616 617 OutlineInfo OI; 618 OI.PostOutlineCB = [=](Function &OutlinedFn) { 619 // Add some known attributes. 620 OutlinedFn.addParamAttr(0, Attribute::NoAlias); 621 OutlinedFn.addParamAttr(1, Attribute::NoAlias); 622 OutlinedFn.addFnAttr(Attribute::NoUnwind); 623 OutlinedFn.addFnAttr(Attribute::NoRecurse); 624 625 assert(OutlinedFn.arg_size() >= 2 && 626 "Expected at least tid and bounded tid as arguments"); 627 unsigned NumCapturedVars = 628 OutlinedFn.arg_size() - /* tid & bounded tid */ 2; 629 630 CallInst *CI = cast<CallInst>(OutlinedFn.user_back()); 631 CI->getParent()->setName("omp_parallel"); 632 Builder.SetInsertPoint(CI); 633 634 // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn); 635 Value *ForkCallArgs[] = { 636 Ident, Builder.getInt32(NumCapturedVars), 637 Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)}; 638 639 SmallVector<Value *, 16> RealArgs; 640 RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs)); 641 RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); 642 643 Builder.CreateCall(RTLFn, RealArgs); 644 645 LLVM_DEBUG(dbgs() << "With fork_call placed: " 646 << *Builder.GetInsertBlock()->getParent() << "\n"); 647 648 InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end()); 649 650 // Initialize the local TID stack location with the argument value. 651 Builder.SetInsertPoint(PrivTID); 652 Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin(); 653 Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr); 654 655 // If no "if" clause was present we do not need the call created during 656 // outlining, otherwise we reuse it in the serialized parallel region. 657 if (!ElseTI) { 658 CI->eraseFromParent(); 659 } else { 660 661 // If an "if" clause was present we are now generating the serialized 662 // version into the "else" branch. 663 Builder.SetInsertPoint(ElseTI); 664 665 // Build calls __kmpc_serialized_parallel(&Ident, GTid); 666 Value *SerializedParallelCallArgs[] = {Ident, ThreadID}; 667 Builder.CreateCall( 668 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel), 669 SerializedParallelCallArgs); 670 671 // OutlinedFn(>id, &zero, CapturedStruct); 672 CI->removeFromParent(); 673 Builder.Insert(CI); 674 675 // __kmpc_end_serialized_parallel(&Ident, GTid); 676 Value *EndArgs[] = {Ident, ThreadID}; 677 Builder.CreateCall( 678 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel), 679 EndArgs); 680 681 LLVM_DEBUG(dbgs() << "With serialized parallel region: " 682 << *Builder.GetInsertBlock()->getParent() << "\n"); 683 } 684 685 for (Instruction *I : ToBeDeleted) 686 I->eraseFromParent(); 687 }; 688 689 // Adjust the finalization stack, verify the adjustment, and call the 690 // finalize function a last time to finalize values between the pre-fini 691 // block and the exit block if we left the parallel "the normal way". 692 auto FiniInfo = FinalizationStack.pop_back_val(); 693 (void)FiniInfo; 694 assert(FiniInfo.DK == OMPD_parallel && 695 "Unexpected finalization stack state!"); 696 697 Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); 698 699 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); 700 FiniCB(PreFiniIP); 701 702 OI.EntryBB = PRegEntryBB; 703 OI.ExitBB = PRegExitBB; 704 705 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; 706 SmallVector<BasicBlock *, 32> Blocks; 707 OI.collectBlocks(ParallelRegionBlockSet, Blocks); 708 709 // Ensure a single exit node for the outlined region by creating one. 710 // We might have multiple incoming edges to the exit now due to finalizations, 711 // e.g., cancel calls that cause the control flow to leave the region. 712 BasicBlock *PRegOutlinedExitBB = PRegExitBB; 713 PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt()); 714 PRegOutlinedExitBB->setName("omp.par.outlined.exit"); 715 Blocks.push_back(PRegOutlinedExitBB); 716 717 CodeExtractorAnalysisCache CEAC(*OuterFn); 718 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, 719 /* AggregateArgs */ false, 720 /* BlockFrequencyInfo */ nullptr, 721 /* BranchProbabilityInfo */ nullptr, 722 /* AssumptionCache */ nullptr, 723 /* AllowVarArgs */ true, 724 /* AllowAlloca */ true, 725 /* Suffix */ ".omp_par"); 726 727 // Find inputs to, outputs from the code region. 728 BasicBlock *CommonExit = nullptr; 729 SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands; 730 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); 731 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands); 732 733 LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n"); 734 735 FunctionCallee TIDRTLFn = 736 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); 737 738 auto PrivHelper = [&](Value &V) { 739 if (&V == TIDAddr || &V == ZeroAddr) 740 return; 741 742 SetVector<Use *> Uses; 743 for (Use &U : V.uses()) 744 if (auto *UserI = dyn_cast<Instruction>(U.getUser())) 745 if (ParallelRegionBlockSet.count(UserI->getParent())) 746 Uses.insert(&U); 747 748 // __kmpc_fork_call expects extra arguments as pointers. If the input 749 // already has a pointer type, everything is fine. Otherwise, store the 750 // value onto stack and load it back inside the to-be-outlined region. This 751 // will ensure only the pointer will be passed to the function. 752 // FIXME: if there are more than 15 trailing arguments, they must be 753 // additionally packed in a struct. 754 Value *Inner = &V; 755 if (!V.getType()->isPointerTy()) { 756 IRBuilder<>::InsertPointGuard Guard(Builder); 757 LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n"); 758 759 Builder.restoreIP(OuterAllocaIP); 760 Value *Ptr = 761 Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded"); 762 763 // Store to stack at end of the block that currently branches to the entry 764 // block of the to-be-outlined region. 765 Builder.SetInsertPoint(InsertBB, 766 InsertBB->getTerminator()->getIterator()); 767 Builder.CreateStore(&V, Ptr); 768 769 // Load back next to allocations in the to-be-outlined region. 770 Builder.restoreIP(InnerAllocaIP); 771 Inner = Builder.CreateLoad(V.getType(), Ptr); 772 } 773 774 Value *ReplacementValue = nullptr; 775 CallInst *CI = dyn_cast<CallInst>(&V); 776 if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) { 777 ReplacementValue = PrivTID; 778 } else { 779 Builder.restoreIP( 780 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue)); 781 assert(ReplacementValue && 782 "Expected copy/create callback to set replacement value!"); 783 if (ReplacementValue == &V) 784 return; 785 } 786 787 for (Use *UPtr : Uses) 788 UPtr->set(ReplacementValue); 789 }; 790 791 // Reset the inner alloca insertion as it will be used for loading the values 792 // wrapped into pointers before passing them into the to-be-outlined region. 793 // Configure it to insert immediately after the fake use of zero address so 794 // that they are available in the generated body and so that the 795 // OpenMP-related values (thread ID and zero address pointers) remain leading 796 // in the argument list. 797 InnerAllocaIP = IRBuilder<>::InsertPoint( 798 ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator()); 799 800 // Reset the outer alloca insertion point to the entry of the relevant block 801 // in case it was invalidated. 802 OuterAllocaIP = IRBuilder<>::InsertPoint( 803 OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt()); 804 805 for (Value *Input : Inputs) { 806 LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); 807 PrivHelper(*Input); 808 } 809 LLVM_DEBUG({ 810 for (Value *Output : Outputs) 811 LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); 812 }); 813 assert(Outputs.empty() && 814 "OpenMP outlining should not produce live-out values!"); 815 816 LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n"); 817 LLVM_DEBUG({ 818 for (auto *BB : Blocks) 819 dbgs() << " PBR: " << BB->getName() << "\n"; 820 }); 821 822 // Register the outlined info. 823 addOutlineInfo(std::move(OI)); 824 825 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end()); 826 UI->eraseFromParent(); 827 828 return AfterIP; 829 } 830 831 void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { 832 // Build call void __kmpc_flush(ident_t *loc) 833 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 834 Value *Args[] = {getOrCreateIdent(SrcLocStr)}; 835 836 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args); 837 } 838 839 void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) { 840 if (!updateToLocation(Loc)) 841 return; 842 emitFlush(Loc); 843 } 844 845 void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { 846 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 847 // global_tid); 848 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 849 Value *Ident = getOrCreateIdent(SrcLocStr); 850 Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; 851 852 // Ignore return result until untied tasks are supported. 853 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), 854 Args); 855 } 856 857 void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) { 858 if (!updateToLocation(Loc)) 859 return; 860 emitTaskwaitImpl(Loc); 861 } 862 863 void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { 864 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 865 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 866 Value *Ident = getOrCreateIdent(SrcLocStr); 867 Constant *I32Null = ConstantInt::getNullValue(Int32); 868 Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null}; 869 870 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield), 871 Args); 872 } 873 874 void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) { 875 if (!updateToLocation(Loc)) 876 return; 877 emitTaskyieldImpl(Loc); 878 } 879 880 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( 881 const LocationDescription &Loc, InsertPointTy AllocaIP, 882 ArrayRef<StorableBodyGenCallbackTy> SectionCBs, PrivatizeCallbackTy PrivCB, 883 FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) { 884 if (!updateToLocation(Loc)) 885 return Loc.IP; 886 887 auto FiniCBWrapper = [&](InsertPointTy IP) { 888 if (IP.getBlock()->end() != IP.getPoint()) 889 return FiniCB(IP); 890 // This must be done otherwise any nested constructs using FinalizeOMPRegion 891 // will fail because that function requires the Finalization Basic Block to 892 // have a terminator, which is already removed by EmitOMPRegionBody. 893 // IP is currently at cancelation block. 894 // We need to backtrack to the condition block to fetch 895 // the exit block and create a branch from cancelation 896 // to exit block. 897 IRBuilder<>::InsertPointGuard IPG(Builder); 898 Builder.restoreIP(IP); 899 auto *CaseBB = IP.getBlock()->getSinglePredecessor(); 900 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); 901 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); 902 Instruction *I = Builder.CreateBr(ExitBB); 903 IP = InsertPointTy(I->getParent(), I->getIterator()); 904 return FiniCB(IP); 905 }; 906 907 FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable}); 908 909 // Each section is emitted as a switch case 910 // Each finalization callback is handled from clang.EmitOMPSectionDirective() 911 // -> OMP.createSection() which generates the IR for each section 912 // Iterate through all sections and emit a switch construct: 913 // switch (IV) { 914 // case 0: 915 // <SectionStmt[0]>; 916 // break; 917 // ... 918 // case <NumSection> - 1: 919 // <SectionStmt[<NumSection> - 1]>; 920 // break; 921 // } 922 // ... 923 // section_loop.after: 924 // <FiniCB>; 925 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) { 926 auto *CurFn = CodeGenIP.getBlock()->getParent(); 927 auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor(); 928 auto *ForExitBB = CodeGenIP.getBlock() 929 ->getSinglePredecessor() 930 ->getTerminator() 931 ->getSuccessor(1); 932 SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB); 933 Builder.restoreIP(CodeGenIP); 934 unsigned CaseNumber = 0; 935 for (auto SectionCB : SectionCBs) { 936 auto *CaseBB = BasicBlock::Create(M.getContext(), 937 "omp_section_loop.body.case", CurFn); 938 SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); 939 Builder.SetInsertPoint(CaseBB); 940 SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB); 941 CaseNumber++; 942 } 943 // remove the existing terminator from body BB since there can be no 944 // terminators after switch/case 945 CodeGenIP.getBlock()->getTerminator()->eraseFromParent(); 946 }; 947 // Loop body ends here 948 // LowerBound, UpperBound, and STride for createCanonicalLoop 949 Type *I32Ty = Type::getInt32Ty(M.getContext()); 950 Value *LB = ConstantInt::get(I32Ty, 0); 951 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size()); 952 Value *ST = ConstantInt::get(I32Ty, 1); 953 llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop( 954 Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); 955 LoopInfo = createStaticWorkshareLoop(Loc, LoopInfo, AllocaIP, true); 956 BasicBlock *LoopAfterBB = LoopInfo->getAfter(); 957 Instruction *SplitPos = LoopAfterBB->getTerminator(); 958 if (!isa_and_nonnull<BranchInst>(SplitPos)) 959 SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB); 960 // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB, 961 // which requires a BB with branch 962 BasicBlock *ExitBB = 963 LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end"); 964 SplitPos->eraseFromParent(); 965 966 // Apply the finalization callback in LoopAfterBB 967 auto FiniInfo = FinalizationStack.pop_back_val(); 968 assert(FiniInfo.DK == OMPD_sections && 969 "Unexpected finalization stack state!"); 970 Builder.SetInsertPoint(LoopAfterBB->getTerminator()); 971 FiniInfo.FiniCB(Builder.saveIP()); 972 Builder.SetInsertPoint(ExitBB); 973 974 return Builder.saveIP(); 975 } 976 977 OpenMPIRBuilder::InsertPointTy 978 OpenMPIRBuilder::createSection(const LocationDescription &Loc, 979 BodyGenCallbackTy BodyGenCB, 980 FinalizeCallbackTy FiniCB) { 981 if (!updateToLocation(Loc)) 982 return Loc.IP; 983 984 auto FiniCBWrapper = [&](InsertPointTy IP) { 985 if (IP.getBlock()->end() != IP.getPoint()) 986 return FiniCB(IP); 987 // This must be done otherwise any nested constructs using FinalizeOMPRegion 988 // will fail because that function requires the Finalization Basic Block to 989 // have a terminator, which is already removed by EmitOMPRegionBody. 990 // IP is currently at cancelation block. 991 // We need to backtrack to the condition block to fetch 992 // the exit block and create a branch from cancelation 993 // to exit block. 994 IRBuilder<>::InsertPointGuard IPG(Builder); 995 Builder.restoreIP(IP); 996 auto *CaseBB = Loc.IP.getBlock(); 997 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor(); 998 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1); 999 Instruction *I = Builder.CreateBr(ExitBB); 1000 IP = InsertPointTy(I->getParent(), I->getIterator()); 1001 return FiniCB(IP); 1002 }; 1003 1004 Directive OMPD = Directive::OMPD_sections; 1005 // Since we are using Finalization Callback here, HasFinalize 1006 // and IsCancellable have to be true 1007 return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper, 1008 /*Conditional*/ false, /*hasFinalize*/ true, 1009 /*IsCancellable*/ true); 1010 } 1011 1012 OpenMPIRBuilder::InsertPointTy 1013 OpenMPIRBuilder::createMaster(const LocationDescription &Loc, 1014 BodyGenCallbackTy BodyGenCB, 1015 FinalizeCallbackTy FiniCB) { 1016 1017 if (!updateToLocation(Loc)) 1018 return Loc.IP; 1019 1020 Directive OMPD = Directive::OMPD_master; 1021 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1022 Value *Ident = getOrCreateIdent(SrcLocStr); 1023 Value *ThreadId = getOrCreateThreadID(Ident); 1024 Value *Args[] = {Ident, ThreadId}; 1025 1026 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master); 1027 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 1028 1029 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master); 1030 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1031 1032 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1033 /*Conditional*/ true, /*hasFinalize*/ true); 1034 } 1035 1036 OpenMPIRBuilder::InsertPointTy 1037 OpenMPIRBuilder::createMasked(const LocationDescription &Loc, 1038 BodyGenCallbackTy BodyGenCB, 1039 FinalizeCallbackTy FiniCB, Value *Filter) { 1040 if (!updateToLocation(Loc)) 1041 return Loc.IP; 1042 1043 Directive OMPD = Directive::OMPD_masked; 1044 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1045 Value *Ident = getOrCreateIdent(SrcLocStr); 1046 Value *ThreadId = getOrCreateThreadID(Ident); 1047 Value *Args[] = {Ident, ThreadId, Filter}; 1048 Value *ArgsEnd[] = {Ident, ThreadId}; 1049 1050 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked); 1051 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 1052 1053 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked); 1054 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd); 1055 1056 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1057 /*Conditional*/ true, /*hasFinalize*/ true); 1058 } 1059 1060 CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( 1061 DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, 1062 BasicBlock *PostInsertBefore, const Twine &Name) { 1063 Module *M = F->getParent(); 1064 LLVMContext &Ctx = M->getContext(); 1065 Type *IndVarTy = TripCount->getType(); 1066 1067 // Create the basic block structure. 1068 BasicBlock *Preheader = 1069 BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore); 1070 BasicBlock *Header = 1071 BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore); 1072 BasicBlock *Cond = 1073 BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore); 1074 BasicBlock *Body = 1075 BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore); 1076 BasicBlock *Latch = 1077 BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore); 1078 BasicBlock *Exit = 1079 BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore); 1080 BasicBlock *After = 1081 BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore); 1082 1083 // Use specified DebugLoc for new instructions. 1084 Builder.SetCurrentDebugLocation(DL); 1085 1086 Builder.SetInsertPoint(Preheader); 1087 Builder.CreateBr(Header); 1088 1089 Builder.SetInsertPoint(Header); 1090 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv"); 1091 IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader); 1092 Builder.CreateBr(Cond); 1093 1094 Builder.SetInsertPoint(Cond); 1095 Value *Cmp = 1096 Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp"); 1097 Builder.CreateCondBr(Cmp, Body, Exit); 1098 1099 Builder.SetInsertPoint(Body); 1100 Builder.CreateBr(Latch); 1101 1102 Builder.SetInsertPoint(Latch); 1103 Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1), 1104 "omp_" + Name + ".next", /*HasNUW=*/true); 1105 Builder.CreateBr(Header); 1106 IndVarPHI->addIncoming(Next, Latch); 1107 1108 Builder.SetInsertPoint(Exit); 1109 Builder.CreateBr(After); 1110 1111 // Remember and return the canonical control flow. 1112 LoopInfos.emplace_front(); 1113 CanonicalLoopInfo *CL = &LoopInfos.front(); 1114 1115 CL->Preheader = Preheader; 1116 CL->Header = Header; 1117 CL->Cond = Cond; 1118 CL->Body = Body; 1119 CL->Latch = Latch; 1120 CL->Exit = Exit; 1121 CL->After = After; 1122 1123 CL->IsValid = true; 1124 1125 #ifndef NDEBUG 1126 CL->assertOK(); 1127 #endif 1128 return CL; 1129 } 1130 1131 CanonicalLoopInfo * 1132 OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, 1133 LoopBodyGenCallbackTy BodyGenCB, 1134 Value *TripCount, const Twine &Name) { 1135 BasicBlock *BB = Loc.IP.getBlock(); 1136 BasicBlock *NextBB = BB->getNextNode(); 1137 1138 CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(), 1139 NextBB, NextBB, Name); 1140 BasicBlock *After = CL->getAfter(); 1141 1142 // If location is not set, don't connect the loop. 1143 if (updateToLocation(Loc)) { 1144 // Split the loop at the insertion point: Branch to the preheader and move 1145 // every following instruction to after the loop (the After BB). Also, the 1146 // new successor is the loop's after block. 1147 Builder.CreateBr(CL->Preheader); 1148 After->getInstList().splice(After->begin(), BB->getInstList(), 1149 Builder.GetInsertPoint(), BB->end()); 1150 After->replaceSuccessorsPhiUsesWith(BB, After); 1151 } 1152 1153 // Emit the body content. We do it after connecting the loop to the CFG to 1154 // avoid that the callback encounters degenerate BBs. 1155 BodyGenCB(CL->getBodyIP(), CL->getIndVar()); 1156 1157 #ifndef NDEBUG 1158 CL->assertOK(); 1159 #endif 1160 return CL; 1161 } 1162 1163 CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop( 1164 const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, 1165 Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, 1166 InsertPointTy ComputeIP, const Twine &Name) { 1167 1168 // Consider the following difficulties (assuming 8-bit signed integers): 1169 // * Adding \p Step to the loop counter which passes \p Stop may overflow: 1170 // DO I = 1, 100, 50 1171 /// * A \p Step of INT_MIN cannot not be normalized to a positive direction: 1172 // DO I = 100, 0, -128 1173 1174 // Start, Stop and Step must be of the same integer type. 1175 auto *IndVarTy = cast<IntegerType>(Start->getType()); 1176 assert(IndVarTy == Stop->getType() && "Stop type mismatch"); 1177 assert(IndVarTy == Step->getType() && "Step type mismatch"); 1178 1179 LocationDescription ComputeLoc = 1180 ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc; 1181 updateToLocation(ComputeLoc); 1182 1183 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0); 1184 ConstantInt *One = ConstantInt::get(IndVarTy, 1); 1185 1186 // Like Step, but always positive. 1187 Value *Incr = Step; 1188 1189 // Distance between Start and Stop; always positive. 1190 Value *Span; 1191 1192 // Condition whether there are no iterations are executed at all, e.g. because 1193 // UB < LB. 1194 Value *ZeroCmp; 1195 1196 if (IsSigned) { 1197 // Ensure that increment is positive. If not, negate and invert LB and UB. 1198 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero); 1199 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step); 1200 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start); 1201 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop); 1202 Span = Builder.CreateSub(UB, LB, "", false, true); 1203 ZeroCmp = Builder.CreateICmp( 1204 InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB); 1205 } else { 1206 Span = Builder.CreateSub(Stop, Start, "", true); 1207 ZeroCmp = Builder.CreateICmp( 1208 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start); 1209 } 1210 1211 Value *CountIfLooping; 1212 if (InclusiveStop) { 1213 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One); 1214 } else { 1215 // Avoid incrementing past stop since it could overflow. 1216 Value *CountIfTwo = Builder.CreateAdd( 1217 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One); 1218 Value *OneCmp = Builder.CreateICmp( 1219 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr); 1220 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo); 1221 } 1222 Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping, 1223 "omp_" + Name + ".tripcount"); 1224 1225 auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) { 1226 Builder.restoreIP(CodeGenIP); 1227 Value *Span = Builder.CreateMul(IV, Step); 1228 Value *IndVar = Builder.CreateAdd(Span, Start); 1229 BodyGenCB(Builder.saveIP(), IndVar); 1230 }; 1231 LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP(); 1232 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name); 1233 } 1234 1235 // Returns an LLVM function to call for initializing loop bounds using OpenMP 1236 // static scheduling depending on `type`. Only i32 and i64 are supported by the 1237 // runtime. Always interpret integers as unsigned similarly to 1238 // CanonicalLoopInfo. 1239 static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, 1240 OpenMPIRBuilder &OMPBuilder) { 1241 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1242 if (Bitwidth == 32) 1243 return OMPBuilder.getOrCreateRuntimeFunction( 1244 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u); 1245 if (Bitwidth == 64) 1246 return OMPBuilder.getOrCreateRuntimeFunction( 1247 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u); 1248 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1249 } 1250 1251 // Sets the number of loop iterations to the given value. This value must be 1252 // valid in the condition block (i.e., defined in the preheader) and is 1253 // interpreted as an unsigned integer. 1254 void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) { 1255 Instruction *CmpI = &CLI->getCond()->front(); 1256 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); 1257 CmpI->setOperand(1, TripCount); 1258 CLI->assertOK(); 1259 } 1260 1261 CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop( 1262 const LocationDescription &Loc, CanonicalLoopInfo *CLI, 1263 InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) { 1264 // Set up the source location value for OpenMP runtime. 1265 if (!updateToLocation(Loc)) 1266 return nullptr; 1267 1268 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1269 Value *SrcLoc = getOrCreateIdent(SrcLocStr); 1270 1271 // Declare useful OpenMP runtime functions. 1272 Value *IV = CLI->getIndVar(); 1273 Type *IVTy = IV->getType(); 1274 FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this); 1275 FunctionCallee StaticFini = 1276 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); 1277 1278 // Allocate space for computed loop bounds as expected by the "init" function. 1279 Builder.restoreIP(AllocaIP); 1280 Type *I32Type = Type::getInt32Ty(M.getContext()); 1281 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); 1282 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); 1283 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound"); 1284 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride"); 1285 1286 // At the end of the preheader, prepare for calling the "init" function by 1287 // storing the current loop bounds into the allocated space. A canonical loop 1288 // always iterates from 0 to trip-count with step 1. Note that "init" expects 1289 // and produces an inclusive upper bound. 1290 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator()); 1291 Constant *Zero = ConstantInt::get(IVTy, 0); 1292 Constant *One = ConstantInt::get(IVTy, 1); 1293 Builder.CreateStore(Zero, PLowerBound); 1294 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One); 1295 Builder.CreateStore(UpperBound, PUpperBound); 1296 Builder.CreateStore(One, PStride); 1297 1298 if (!Chunk) 1299 Chunk = One; 1300 1301 Value *ThreadNum = getOrCreateThreadID(SrcLoc); 1302 1303 Constant *SchedulingType = 1304 ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static)); 1305 1306 // Call the "init" function and update the trip count of the loop with the 1307 // value it produced. 1308 Builder.CreateCall(StaticInit, 1309 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, 1310 PUpperBound, PStride, One, Chunk}); 1311 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound); 1312 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound); 1313 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound); 1314 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One); 1315 setCanonicalLoopTripCount(CLI, TripCount); 1316 1317 // Update all uses of the induction variable except the one in the condition 1318 // block that compares it with the actual upper bound, and the increment in 1319 // the latch block. 1320 // TODO: this can eventually move to CanonicalLoopInfo or to a new 1321 // CanonicalLoopInfoUpdater interface. 1322 Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt()); 1323 Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound); 1324 IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) { 1325 auto *Instr = dyn_cast<Instruction>(U.getUser()); 1326 return !Instr || 1327 (Instr->getParent() != CLI->getCond() && 1328 Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV); 1329 }); 1330 1331 // In the "exit" block, call the "fini" function. 1332 Builder.SetInsertPoint(CLI->getExit(), 1333 CLI->getExit()->getTerminator()->getIterator()); 1334 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum}); 1335 1336 // Add the barrier if requested. 1337 if (NeedsBarrier) 1338 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), 1339 omp::Directive::OMPD_for, /* ForceSimpleCall */ false, 1340 /* CheckCancelFlag */ false); 1341 1342 CLI->assertOK(); 1343 return CLI; 1344 } 1345 1346 CanonicalLoopInfo *OpenMPIRBuilder::createWorkshareLoop( 1347 const LocationDescription &Loc, CanonicalLoopInfo *CLI, 1348 InsertPointTy AllocaIP, bool NeedsBarrier) { 1349 // Currently only supports static schedules. 1350 return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier); 1351 } 1352 1353 /// Returns an LLVM function to call for initializing loop bounds using OpenMP 1354 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by 1355 /// the runtime. Always interpret integers as unsigned similarly to 1356 /// CanonicalLoopInfo. 1357 static FunctionCallee 1358 getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { 1359 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1360 if (Bitwidth == 32) 1361 return OMPBuilder.getOrCreateRuntimeFunction( 1362 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u); 1363 if (Bitwidth == 64) 1364 return OMPBuilder.getOrCreateRuntimeFunction( 1365 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u); 1366 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1367 } 1368 1369 /// Returns an LLVM function to call for updating the next loop using OpenMP 1370 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by 1371 /// the runtime. Always interpret integers as unsigned similarly to 1372 /// CanonicalLoopInfo. 1373 static FunctionCallee 1374 getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { 1375 unsigned Bitwidth = Ty->getIntegerBitWidth(); 1376 if (Bitwidth == 32) 1377 return OMPBuilder.getOrCreateRuntimeFunction( 1378 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u); 1379 if (Bitwidth == 64) 1380 return OMPBuilder.getOrCreateRuntimeFunction( 1381 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u); 1382 llvm_unreachable("unknown OpenMP loop iterator bitwidth"); 1383 } 1384 1385 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop( 1386 const LocationDescription &Loc, CanonicalLoopInfo *CLI, 1387 InsertPointTy AllocaIP, OMPScheduleType SchedType, bool NeedsBarrier, 1388 Value *Chunk) { 1389 // Set up the source location value for OpenMP runtime. 1390 Builder.SetCurrentDebugLocation(Loc.DL); 1391 1392 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1393 Value *SrcLoc = getOrCreateIdent(SrcLocStr); 1394 1395 // Declare useful OpenMP runtime functions. 1396 Value *IV = CLI->getIndVar(); 1397 Type *IVTy = IV->getType(); 1398 FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this); 1399 FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this); 1400 1401 // Allocate space for computed loop bounds as expected by the "init" function. 1402 Builder.restoreIP(AllocaIP); 1403 Type *I32Type = Type::getInt32Ty(M.getContext()); 1404 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); 1405 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); 1406 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound"); 1407 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride"); 1408 1409 // At the end of the preheader, prepare for calling the "init" function by 1410 // storing the current loop bounds into the allocated space. A canonical loop 1411 // always iterates from 0 to trip-count with step 1. Note that "init" expects 1412 // and produces an inclusive upper bound. 1413 BasicBlock *PreHeader = CLI->getPreheader(); 1414 Builder.SetInsertPoint(PreHeader->getTerminator()); 1415 Constant *One = ConstantInt::get(IVTy, 1); 1416 Builder.CreateStore(One, PLowerBound); 1417 Value *UpperBound = CLI->getTripCount(); 1418 Builder.CreateStore(UpperBound, PUpperBound); 1419 Builder.CreateStore(One, PStride); 1420 1421 BasicBlock *Header = CLI->getHeader(); 1422 BasicBlock *Exit = CLI->getExit(); 1423 BasicBlock *Cond = CLI->getCond(); 1424 InsertPointTy AfterIP = CLI->getAfterIP(); 1425 1426 // The CLI will be "broken" in the code below, as the loop is no longer 1427 // a valid canonical loop. 1428 1429 if (!Chunk) 1430 Chunk = One; 1431 1432 Value *ThreadNum = getOrCreateThreadID(SrcLoc); 1433 1434 OMPScheduleType DynamicSchedType = 1435 SchedType | OMPScheduleType::ModifierNonmonotonic; 1436 Constant *SchedulingType = 1437 ConstantInt::get(I32Type, static_cast<int>(DynamicSchedType)); 1438 1439 // Call the "init" function. 1440 Builder.CreateCall(DynamicInit, 1441 {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One, 1442 UpperBound, /* step */ One, Chunk}); 1443 1444 // An outer loop around the existing one. 1445 BasicBlock *OuterCond = BasicBlock::Create( 1446 PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond", 1447 PreHeader->getParent()); 1448 // This needs to be 32-bit always, so can't use the IVTy Zero above. 1449 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt()); 1450 Value *Res = 1451 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter, 1452 PLowerBound, PUpperBound, PStride}); 1453 Constant *Zero32 = ConstantInt::get(I32Type, 0); 1454 Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32); 1455 Value *LowerBound = 1456 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb"); 1457 Builder.CreateCondBr(MoreWork, Header, Exit); 1458 1459 // Change PHI-node in loop header to use outer cond rather than preheader, 1460 // and set IV to the LowerBound. 1461 Instruction *Phi = &Header->front(); 1462 auto *PI = cast<PHINode>(Phi); 1463 PI->setIncomingBlock(0, OuterCond); 1464 PI->setIncomingValue(0, LowerBound); 1465 1466 // Then set the pre-header to jump to the OuterCond 1467 Instruction *Term = PreHeader->getTerminator(); 1468 auto *Br = cast<BranchInst>(Term); 1469 Br->setSuccessor(0, OuterCond); 1470 1471 // Modify the inner condition: 1472 // * Use the UpperBound returned from the DynamicNext call. 1473 // * jump to the loop outer loop when done with one of the inner loops. 1474 Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt()); 1475 UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub"); 1476 Instruction *Comp = &*Builder.GetInsertPoint(); 1477 auto *CI = cast<CmpInst>(Comp); 1478 CI->setOperand(1, UpperBound); 1479 // Redirect the inner exit to branch to outer condition. 1480 Instruction *Branch = &Cond->back(); 1481 auto *BI = cast<BranchInst>(Branch); 1482 assert(BI->getSuccessor(1) == Exit); 1483 BI->setSuccessor(1, OuterCond); 1484 1485 // Add the barrier if requested. 1486 if (NeedsBarrier) { 1487 Builder.SetInsertPoint(&Exit->back()); 1488 createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), 1489 omp::Directive::OMPD_for, /* ForceSimpleCall */ false, 1490 /* CheckCancelFlag */ false); 1491 } 1492 1493 return AfterIP; 1494 } 1495 1496 /// Make \p Source branch to \p Target. 1497 /// 1498 /// Handles two situations: 1499 /// * \p Source already has an unconditional branch. 1500 /// * \p Source is a degenerate block (no terminator because the BB is 1501 /// the current head of the IR construction). 1502 static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) { 1503 if (Instruction *Term = Source->getTerminator()) { 1504 auto *Br = cast<BranchInst>(Term); 1505 assert(!Br->isConditional() && 1506 "BB's terminator must be an unconditional branch (or degenerate)"); 1507 BasicBlock *Succ = Br->getSuccessor(0); 1508 Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true); 1509 Br->setSuccessor(0, Target); 1510 return; 1511 } 1512 1513 auto *NewBr = BranchInst::Create(Target, Source); 1514 NewBr->setDebugLoc(DL); 1515 } 1516 1517 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is, 1518 /// after this \p OldTarget will be orphaned. 1519 static void redirectAllPredecessorsTo(BasicBlock *OldTarget, 1520 BasicBlock *NewTarget, DebugLoc DL) { 1521 for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget))) 1522 redirectTo(Pred, NewTarget, DL); 1523 } 1524 1525 /// Determine which blocks in \p BBs are reachable from outside and remove the 1526 /// ones that are not reachable from the function. 1527 static void removeUnusedBlocksFromParent(ArrayRef<BasicBlock *> BBs) { 1528 SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()}; 1529 auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) { 1530 for (Use &U : BB->uses()) { 1531 auto *UseInst = dyn_cast<Instruction>(U.getUser()); 1532 if (!UseInst) 1533 continue; 1534 if (BBsToErase.count(UseInst->getParent())) 1535 continue; 1536 return true; 1537 } 1538 return false; 1539 }; 1540 1541 while (true) { 1542 bool Changed = false; 1543 for (BasicBlock *BB : make_early_inc_range(BBsToErase)) { 1544 if (HasRemainingUses(BB)) { 1545 BBsToErase.erase(BB); 1546 Changed = true; 1547 } 1548 } 1549 if (!Changed) 1550 break; 1551 } 1552 1553 SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end()); 1554 DeleteDeadBlocks(BBVec); 1555 } 1556 1557 CanonicalLoopInfo * 1558 OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1559 InsertPointTy ComputeIP) { 1560 assert(Loops.size() >= 1 && "At least one loop required"); 1561 size_t NumLoops = Loops.size(); 1562 1563 // Nothing to do if there is already just one loop. 1564 if (NumLoops == 1) 1565 return Loops.front(); 1566 1567 CanonicalLoopInfo *Outermost = Loops.front(); 1568 CanonicalLoopInfo *Innermost = Loops.back(); 1569 BasicBlock *OrigPreheader = Outermost->getPreheader(); 1570 BasicBlock *OrigAfter = Outermost->getAfter(); 1571 Function *F = OrigPreheader->getParent(); 1572 1573 // Setup the IRBuilder for inserting the trip count computation. 1574 Builder.SetCurrentDebugLocation(DL); 1575 if (ComputeIP.isSet()) 1576 Builder.restoreIP(ComputeIP); 1577 else 1578 Builder.restoreIP(Outermost->getPreheaderIP()); 1579 1580 // Derive the collapsed' loop trip count. 1581 // TODO: Find common/largest indvar type. 1582 Value *CollapsedTripCount = nullptr; 1583 for (CanonicalLoopInfo *L : Loops) { 1584 Value *OrigTripCount = L->getTripCount(); 1585 if (!CollapsedTripCount) { 1586 CollapsedTripCount = OrigTripCount; 1587 continue; 1588 } 1589 1590 // TODO: Enable UndefinedSanitizer to diagnose an overflow here. 1591 CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount, 1592 {}, /*HasNUW=*/true); 1593 } 1594 1595 // Create the collapsed loop control flow. 1596 CanonicalLoopInfo *Result = 1597 createLoopSkeleton(DL, CollapsedTripCount, F, 1598 OrigPreheader->getNextNode(), OrigAfter, "collapsed"); 1599 1600 // Build the collapsed loop body code. 1601 // Start with deriving the input loop induction variables from the collapsed 1602 // one, using a divmod scheme. To preserve the original loops' order, the 1603 // innermost loop use the least significant bits. 1604 Builder.restoreIP(Result->getBodyIP()); 1605 1606 Value *Leftover = Result->getIndVar(); 1607 SmallVector<Value *> NewIndVars; 1608 NewIndVars.set_size(NumLoops); 1609 for (int i = NumLoops - 1; i >= 1; --i) { 1610 Value *OrigTripCount = Loops[i]->getTripCount(); 1611 1612 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount); 1613 NewIndVars[i] = NewIndVar; 1614 1615 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount); 1616 } 1617 // Outermost loop gets all the remaining bits. 1618 NewIndVars[0] = Leftover; 1619 1620 // Construct the loop body control flow. 1621 // We progressively construct the branch structure following in direction of 1622 // the control flow, from the leading in-between code, the loop nest body, the 1623 // trailing in-between code, and rejoining the collapsed loop's latch. 1624 // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If 1625 // the ContinueBlock is set, continue with that block. If ContinuePred, use 1626 // its predecessors as sources. 1627 BasicBlock *ContinueBlock = Result->getBody(); 1628 BasicBlock *ContinuePred = nullptr; 1629 auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest, 1630 BasicBlock *NextSrc) { 1631 if (ContinueBlock) 1632 redirectTo(ContinueBlock, Dest, DL); 1633 else 1634 redirectAllPredecessorsTo(ContinuePred, Dest, DL); 1635 1636 ContinueBlock = nullptr; 1637 ContinuePred = NextSrc; 1638 }; 1639 1640 // The code before the nested loop of each level. 1641 // Because we are sinking it into the nest, it will be executed more often 1642 // that the original loop. More sophisticated schemes could keep track of what 1643 // the in-between code is and instantiate it only once per thread. 1644 for (size_t i = 0; i < NumLoops - 1; ++i) 1645 ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader()); 1646 1647 // Connect the loop nest body. 1648 ContinueWith(Innermost->getBody(), Innermost->getLatch()); 1649 1650 // The code after the nested loop at each level. 1651 for (size_t i = NumLoops - 1; i > 0; --i) 1652 ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch()); 1653 1654 // Connect the finished loop to the collapsed loop latch. 1655 ContinueWith(Result->getLatch(), nullptr); 1656 1657 // Replace the input loops with the new collapsed loop. 1658 redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL); 1659 redirectTo(Result->getAfter(), Outermost->getAfter(), DL); 1660 1661 // Replace the input loop indvars with the derived ones. 1662 for (size_t i = 0; i < NumLoops; ++i) 1663 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]); 1664 1665 // Remove unused parts of the input loops. 1666 SmallVector<BasicBlock *, 12> OldControlBBs; 1667 OldControlBBs.reserve(6 * Loops.size()); 1668 for (CanonicalLoopInfo *Loop : Loops) 1669 Loop->collectControlBlocks(OldControlBBs); 1670 removeUnusedBlocksFromParent(OldControlBBs); 1671 1672 #ifndef NDEBUG 1673 Result->assertOK(); 1674 #endif 1675 return Result; 1676 } 1677 1678 std::vector<CanonicalLoopInfo *> 1679 OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, 1680 ArrayRef<Value *> TileSizes) { 1681 assert(TileSizes.size() == Loops.size() && 1682 "Must pass as many tile sizes as there are loops"); 1683 int NumLoops = Loops.size(); 1684 assert(NumLoops >= 1 && "At least one loop to tile required"); 1685 1686 CanonicalLoopInfo *OutermostLoop = Loops.front(); 1687 CanonicalLoopInfo *InnermostLoop = Loops.back(); 1688 Function *F = OutermostLoop->getBody()->getParent(); 1689 BasicBlock *InnerEnter = InnermostLoop->getBody(); 1690 BasicBlock *InnerLatch = InnermostLoop->getLatch(); 1691 1692 // Collect original trip counts and induction variable to be accessible by 1693 // index. Also, the structure of the original loops is not preserved during 1694 // the construction of the tiled loops, so do it before we scavenge the BBs of 1695 // any original CanonicalLoopInfo. 1696 SmallVector<Value *, 4> OrigTripCounts, OrigIndVars; 1697 for (CanonicalLoopInfo *L : Loops) { 1698 OrigTripCounts.push_back(L->getTripCount()); 1699 OrigIndVars.push_back(L->getIndVar()); 1700 } 1701 1702 // Collect the code between loop headers. These may contain SSA definitions 1703 // that are used in the loop nest body. To be usable with in the innermost 1704 // body, these BasicBlocks will be sunk into the loop nest body. That is, 1705 // these instructions may be executed more often than before the tiling. 1706 // TODO: It would be sufficient to only sink them into body of the 1707 // corresponding tile loop. 1708 SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> InbetweenCode; 1709 for (int i = 0; i < NumLoops - 1; ++i) { 1710 CanonicalLoopInfo *Surrounding = Loops[i]; 1711 CanonicalLoopInfo *Nested = Loops[i + 1]; 1712 1713 BasicBlock *EnterBB = Surrounding->getBody(); 1714 BasicBlock *ExitBB = Nested->getHeader(); 1715 InbetweenCode.emplace_back(EnterBB, ExitBB); 1716 } 1717 1718 // Compute the trip counts of the floor loops. 1719 Builder.SetCurrentDebugLocation(DL); 1720 Builder.restoreIP(OutermostLoop->getPreheaderIP()); 1721 SmallVector<Value *, 4> FloorCount, FloorRems; 1722 for (int i = 0; i < NumLoops; ++i) { 1723 Value *TileSize = TileSizes[i]; 1724 Value *OrigTripCount = OrigTripCounts[i]; 1725 Type *IVType = OrigTripCount->getType(); 1726 1727 Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize); 1728 Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize); 1729 1730 // 0 if tripcount divides the tilesize, 1 otherwise. 1731 // 1 means we need an additional iteration for a partial tile. 1732 // 1733 // Unfortunately we cannot just use the roundup-formula 1734 // (tripcount + tilesize - 1)/tilesize 1735 // because the summation might overflow. We do not want introduce undefined 1736 // behavior when the untiled loop nest did not. 1737 Value *FloorTripOverflow = 1738 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0)); 1739 1740 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType); 1741 FloorTripCount = 1742 Builder.CreateAdd(FloorTripCount, FloorTripOverflow, 1743 "omp_floor" + Twine(i) + ".tripcount", true); 1744 1745 // Remember some values for later use. 1746 FloorCount.push_back(FloorTripCount); 1747 FloorRems.push_back(FloorTripRem); 1748 } 1749 1750 // Generate the new loop nest, from the outermost to the innermost. 1751 std::vector<CanonicalLoopInfo *> Result; 1752 Result.reserve(NumLoops * 2); 1753 1754 // The basic block of the surrounding loop that enters the nest generated 1755 // loop. 1756 BasicBlock *Enter = OutermostLoop->getPreheader(); 1757 1758 // The basic block of the surrounding loop where the inner code should 1759 // continue. 1760 BasicBlock *Continue = OutermostLoop->getAfter(); 1761 1762 // Where the next loop basic block should be inserted. 1763 BasicBlock *OutroInsertBefore = InnermostLoop->getExit(); 1764 1765 auto EmbeddNewLoop = 1766 [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore]( 1767 Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * { 1768 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton( 1769 DL, TripCount, F, InnerEnter, OutroInsertBefore, Name); 1770 redirectTo(Enter, EmbeddedLoop->getPreheader(), DL); 1771 redirectTo(EmbeddedLoop->getAfter(), Continue, DL); 1772 1773 // Setup the position where the next embedded loop connects to this loop. 1774 Enter = EmbeddedLoop->getBody(); 1775 Continue = EmbeddedLoop->getLatch(); 1776 OutroInsertBefore = EmbeddedLoop->getLatch(); 1777 return EmbeddedLoop; 1778 }; 1779 1780 auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts, 1781 const Twine &NameBase) { 1782 for (auto P : enumerate(TripCounts)) { 1783 CanonicalLoopInfo *EmbeddedLoop = 1784 EmbeddNewLoop(P.value(), NameBase + Twine(P.index())); 1785 Result.push_back(EmbeddedLoop); 1786 } 1787 }; 1788 1789 EmbeddNewLoops(FloorCount, "floor"); 1790 1791 // Within the innermost floor loop, emit the code that computes the tile 1792 // sizes. 1793 Builder.SetInsertPoint(Enter->getTerminator()); 1794 SmallVector<Value *, 4> TileCounts; 1795 for (int i = 0; i < NumLoops; ++i) { 1796 CanonicalLoopInfo *FloorLoop = Result[i]; 1797 Value *TileSize = TileSizes[i]; 1798 1799 Value *FloorIsEpilogue = 1800 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]); 1801 Value *TileTripCount = 1802 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize); 1803 1804 TileCounts.push_back(TileTripCount); 1805 } 1806 1807 // Create the tile loops. 1808 EmbeddNewLoops(TileCounts, "tile"); 1809 1810 // Insert the inbetween code into the body. 1811 BasicBlock *BodyEnter = Enter; 1812 BasicBlock *BodyEntered = nullptr; 1813 for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) { 1814 BasicBlock *EnterBB = P.first; 1815 BasicBlock *ExitBB = P.second; 1816 1817 if (BodyEnter) 1818 redirectTo(BodyEnter, EnterBB, DL); 1819 else 1820 redirectAllPredecessorsTo(BodyEntered, EnterBB, DL); 1821 1822 BodyEnter = nullptr; 1823 BodyEntered = ExitBB; 1824 } 1825 1826 // Append the original loop nest body into the generated loop nest body. 1827 if (BodyEnter) 1828 redirectTo(BodyEnter, InnerEnter, DL); 1829 else 1830 redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL); 1831 redirectAllPredecessorsTo(InnerLatch, Continue, DL); 1832 1833 // Replace the original induction variable with an induction variable computed 1834 // from the tile and floor induction variables. 1835 Builder.restoreIP(Result.back()->getBodyIP()); 1836 for (int i = 0; i < NumLoops; ++i) { 1837 CanonicalLoopInfo *FloorLoop = Result[i]; 1838 CanonicalLoopInfo *TileLoop = Result[NumLoops + i]; 1839 Value *OrigIndVar = OrigIndVars[i]; 1840 Value *Size = TileSizes[i]; 1841 1842 Value *Scale = 1843 Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true); 1844 Value *Shift = 1845 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true); 1846 OrigIndVar->replaceAllUsesWith(Shift); 1847 } 1848 1849 // Remove unused parts of the original loops. 1850 SmallVector<BasicBlock *, 12> OldControlBBs; 1851 OldControlBBs.reserve(6 * Loops.size()); 1852 for (CanonicalLoopInfo *Loop : Loops) 1853 Loop->collectControlBlocks(OldControlBBs); 1854 removeUnusedBlocksFromParent(OldControlBBs); 1855 1856 #ifndef NDEBUG 1857 for (CanonicalLoopInfo *GenL : Result) 1858 GenL->assertOK(); 1859 #endif 1860 return Result; 1861 } 1862 1863 OpenMPIRBuilder::InsertPointTy 1864 OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, 1865 llvm::Value *BufSize, llvm::Value *CpyBuf, 1866 llvm::Value *CpyFn, llvm::Value *DidIt) { 1867 if (!updateToLocation(Loc)) 1868 return Loc.IP; 1869 1870 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1871 Value *Ident = getOrCreateIdent(SrcLocStr); 1872 Value *ThreadId = getOrCreateThreadID(Ident); 1873 1874 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt); 1875 1876 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD}; 1877 1878 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate); 1879 Builder.CreateCall(Fn, Args); 1880 1881 return Builder.saveIP(); 1882 } 1883 1884 OpenMPIRBuilder::InsertPointTy 1885 OpenMPIRBuilder::createSingle(const LocationDescription &Loc, 1886 BodyGenCallbackTy BodyGenCB, 1887 FinalizeCallbackTy FiniCB, llvm::Value *DidIt) { 1888 1889 if (!updateToLocation(Loc)) 1890 return Loc.IP; 1891 1892 // If needed (i.e. not null), initialize `DidIt` with 0 1893 if (DidIt) { 1894 Builder.CreateStore(Builder.getInt32(0), DidIt); 1895 } 1896 1897 Directive OMPD = Directive::OMPD_single; 1898 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1899 Value *Ident = getOrCreateIdent(SrcLocStr); 1900 Value *ThreadId = getOrCreateThreadID(Ident); 1901 Value *Args[] = {Ident, ThreadId}; 1902 1903 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single); 1904 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args); 1905 1906 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single); 1907 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1908 1909 // generates the following: 1910 // if (__kmpc_single()) { 1911 // .... single region ... 1912 // __kmpc_end_single 1913 // } 1914 1915 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1916 /*Conditional*/ true, /*hasFinalize*/ true); 1917 } 1918 1919 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( 1920 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, 1921 FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { 1922 1923 if (!updateToLocation(Loc)) 1924 return Loc.IP; 1925 1926 Directive OMPD = Directive::OMPD_critical; 1927 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 1928 Value *Ident = getOrCreateIdent(SrcLocStr); 1929 Value *ThreadId = getOrCreateThreadID(Ident); 1930 Value *LockVar = getOMPCriticalRegionLock(CriticalName); 1931 Value *Args[] = {Ident, ThreadId, LockVar}; 1932 1933 SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), std::end(Args)); 1934 Function *RTFn = nullptr; 1935 if (HintInst) { 1936 // Add Hint to entry Args and create call 1937 EnterArgs.push_back(HintInst); 1938 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint); 1939 } else { 1940 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical); 1941 } 1942 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs); 1943 1944 Function *ExitRTLFn = 1945 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical); 1946 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); 1947 1948 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, 1949 /*Conditional*/ false, /*hasFinalize*/ true); 1950 } 1951 1952 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( 1953 Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, 1954 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, 1955 bool HasFinalize, bool IsCancellable) { 1956 1957 if (HasFinalize) 1958 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable}); 1959 1960 // Create inlined region's entry and body blocks, in preparation 1961 // for conditional creation 1962 BasicBlock *EntryBB = Builder.GetInsertBlock(); 1963 Instruction *SplitPos = EntryBB->getTerminator(); 1964 if (!isa_and_nonnull<BranchInst>(SplitPos)) 1965 SplitPos = new UnreachableInst(Builder.getContext(), EntryBB); 1966 BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end"); 1967 BasicBlock *FiniBB = 1968 EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize"); 1969 1970 Builder.SetInsertPoint(EntryBB->getTerminator()); 1971 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); 1972 1973 // generate body 1974 BodyGenCB(/* AllocaIP */ InsertPointTy(), 1975 /* CodeGenIP */ Builder.saveIP(), *FiniBB); 1976 1977 // If we didn't emit a branch to FiniBB during body generation, it means 1978 // FiniBB is unreachable (e.g. while(1);). stop generating all the 1979 // unreachable blocks, and remove anything we are not going to use. 1980 auto SkipEmittingRegion = FiniBB->hasNPredecessors(0); 1981 if (SkipEmittingRegion) { 1982 FiniBB->eraseFromParent(); 1983 ExitCall->eraseFromParent(); 1984 // Discard finalization if we have it. 1985 if (HasFinalize) { 1986 assert(!FinalizationStack.empty() && 1987 "Unexpected finalization stack state!"); 1988 FinalizationStack.pop_back(); 1989 } 1990 } else { 1991 // emit exit call and do any needed finalization. 1992 auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); 1993 assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && 1994 FiniBB->getTerminator()->getSuccessor(0) == ExitBB && 1995 "Unexpected control flow graph state!!"); 1996 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); 1997 assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && 1998 "Unexpected Control Flow State!"); 1999 MergeBlockIntoPredecessor(FiniBB); 2000 } 2001 2002 // If we are skipping the region of a non conditional, remove the exit 2003 // block, and clear the builder's insertion point. 2004 assert(SplitPos->getParent() == ExitBB && 2005 "Unexpected Insertion point location!"); 2006 if (!Conditional && SkipEmittingRegion) { 2007 ExitBB->eraseFromParent(); 2008 Builder.ClearInsertionPoint(); 2009 } else { 2010 auto merged = MergeBlockIntoPredecessor(ExitBB); 2011 BasicBlock *ExitPredBB = SplitPos->getParent(); 2012 auto InsertBB = merged ? ExitPredBB : ExitBB; 2013 if (!isa_and_nonnull<BranchInst>(SplitPos)) 2014 SplitPos->eraseFromParent(); 2015 Builder.SetInsertPoint(InsertBB); 2016 } 2017 2018 return Builder.saveIP(); 2019 } 2020 2021 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( 2022 Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) { 2023 // if nothing to do, Return current insertion point. 2024 if (!Conditional || !EntryCall) 2025 return Builder.saveIP(); 2026 2027 BasicBlock *EntryBB = Builder.GetInsertBlock(); 2028 Value *CallBool = Builder.CreateIsNotNull(EntryCall); 2029 auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body"); 2030 auto *UI = new UnreachableInst(Builder.getContext(), ThenBB); 2031 2032 // Emit thenBB and set the Builder's insertion point there for 2033 // body generation next. Place the block after the current block. 2034 Function *CurFn = EntryBB->getParent(); 2035 CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB); 2036 2037 // Move Entry branch to end of ThenBB, and replace with conditional 2038 // branch (If-stmt) 2039 Instruction *EntryBBTI = EntryBB->getTerminator(); 2040 Builder.CreateCondBr(CallBool, ThenBB, ExitBB); 2041 EntryBBTI->removeFromParent(); 2042 Builder.SetInsertPoint(UI); 2043 Builder.Insert(EntryBBTI); 2044 UI->eraseFromParent(); 2045 Builder.SetInsertPoint(ThenBB->getTerminator()); 2046 2047 // return an insertion point to ExitBB. 2048 return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt()); 2049 } 2050 2051 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( 2052 omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall, 2053 bool HasFinalize) { 2054 2055 Builder.restoreIP(FinIP); 2056 2057 // If there is finalization to do, emit it before the exit call 2058 if (HasFinalize) { 2059 assert(!FinalizationStack.empty() && 2060 "Unexpected finalization stack state!"); 2061 2062 FinalizationInfo Fi = FinalizationStack.pop_back_val(); 2063 assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); 2064 2065 Fi.FiniCB(FinIP); 2066 2067 BasicBlock *FiniBB = FinIP.getBlock(); 2068 Instruction *FiniBBTI = FiniBB->getTerminator(); 2069 2070 // set Builder IP for call creation 2071 Builder.SetInsertPoint(FiniBBTI); 2072 } 2073 2074 if (!ExitCall) 2075 return Builder.saveIP(); 2076 2077 // place the Exitcall as last instruction before Finalization block terminator 2078 ExitCall->removeFromParent(); 2079 Builder.Insert(ExitCall); 2080 2081 return IRBuilder<>::InsertPoint(ExitCall->getParent(), 2082 ExitCall->getIterator()); 2083 } 2084 2085 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks( 2086 InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, 2087 llvm::IntegerType *IntPtrTy, bool BranchtoEnd) { 2088 if (!IP.isSet()) 2089 return IP; 2090 2091 IRBuilder<>::InsertPointGuard IPG(Builder); 2092 2093 // creates the following CFG structure 2094 // OMP_Entry : (MasterAddr != PrivateAddr)? 2095 // F T 2096 // | \ 2097 // | copin.not.master 2098 // | / 2099 // v / 2100 // copyin.not.master.end 2101 // | 2102 // v 2103 // OMP.Entry.Next 2104 2105 BasicBlock *OMP_Entry = IP.getBlock(); 2106 Function *CurFn = OMP_Entry->getParent(); 2107 BasicBlock *CopyBegin = 2108 BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn); 2109 BasicBlock *CopyEnd = nullptr; 2110 2111 // If entry block is terminated, split to preserve the branch to following 2112 // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is. 2113 if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) { 2114 CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(), 2115 "copyin.not.master.end"); 2116 OMP_Entry->getTerminator()->eraseFromParent(); 2117 } else { 2118 CopyEnd = 2119 BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn); 2120 } 2121 2122 Builder.SetInsertPoint(OMP_Entry); 2123 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy); 2124 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy); 2125 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr); 2126 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd); 2127 2128 Builder.SetInsertPoint(CopyBegin); 2129 if (BranchtoEnd) 2130 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd)); 2131 2132 return Builder.saveIP(); 2133 } 2134 2135 CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc, 2136 Value *Size, Value *Allocator, 2137 std::string Name) { 2138 IRBuilder<>::InsertPointGuard IPG(Builder); 2139 Builder.restoreIP(Loc.IP); 2140 2141 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2142 Value *Ident = getOrCreateIdent(SrcLocStr); 2143 Value *ThreadId = getOrCreateThreadID(Ident); 2144 Value *Args[] = {ThreadId, Size, Allocator}; 2145 2146 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc); 2147 2148 return Builder.CreateCall(Fn, Args, Name); 2149 } 2150 2151 CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc, 2152 Value *Addr, Value *Allocator, 2153 std::string Name) { 2154 IRBuilder<>::InsertPointGuard IPG(Builder); 2155 Builder.restoreIP(Loc.IP); 2156 2157 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2158 Value *Ident = getOrCreateIdent(SrcLocStr); 2159 Value *ThreadId = getOrCreateThreadID(Ident); 2160 Value *Args[] = {ThreadId, Addr, Allocator}; 2161 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free); 2162 return Builder.CreateCall(Fn, Args, Name); 2163 } 2164 2165 CallInst *OpenMPIRBuilder::createCachedThreadPrivate( 2166 const LocationDescription &Loc, llvm::Value *Pointer, 2167 llvm::ConstantInt *Size, const llvm::Twine &Name) { 2168 IRBuilder<>::InsertPointGuard IPG(Builder); 2169 Builder.restoreIP(Loc.IP); 2170 2171 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); 2172 Value *Ident = getOrCreateIdent(SrcLocStr); 2173 Value *ThreadId = getOrCreateThreadID(Ident); 2174 Constant *ThreadPrivateCache = 2175 getOrCreateOMPInternalVariable(Int8PtrPtr, Name); 2176 llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache}; 2177 2178 Function *Fn = 2179 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached); 2180 2181 return Builder.CreateCall(Fn, Args); 2182 } 2183 2184 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts, 2185 StringRef FirstSeparator, 2186 StringRef Separator) { 2187 SmallString<128> Buffer; 2188 llvm::raw_svector_ostream OS(Buffer); 2189 StringRef Sep = FirstSeparator; 2190 for (StringRef Part : Parts) { 2191 OS << Sep << Part; 2192 Sep = Separator; 2193 } 2194 return OS.str().str(); 2195 } 2196 2197 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable( 2198 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2199 // TODO: Replace the twine arg with stringref to get rid of the conversion 2200 // logic. However This is taken from current implementation in clang as is. 2201 // Since this method is used in many places exclusively for OMP internal use 2202 // we will keep it as is for temporarily until we move all users to the 2203 // builder and then, if possible, fix it everywhere in one go. 2204 SmallString<256> Buffer; 2205 llvm::raw_svector_ostream Out(Buffer); 2206 Out << Name; 2207 StringRef RuntimeName = Out.str(); 2208 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2209 if (Elem.second) { 2210 assert(Elem.second->getType()->getPointerElementType() == Ty && 2211 "OMP internal variable has different type than requested"); 2212 } else { 2213 // TODO: investigate the appropriate linkage type used for the global 2214 // variable for possibly changing that to internal or private, or maybe 2215 // create different versions of the function for different OMP internal 2216 // variables. 2217 Elem.second = new llvm::GlobalVariable( 2218 M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, 2219 llvm::Constant::getNullValue(Ty), Elem.first(), 2220 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, 2221 AddressSpace); 2222 } 2223 2224 return Elem.second; 2225 } 2226 2227 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) { 2228 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2229 std::string Name = getNameWithSeparators({Prefix, "var"}, ".", "."); 2230 return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name); 2231 } 2232 2233 GlobalVariable * 2234 OpenMPIRBuilder::createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings, 2235 std::string VarName) { 2236 llvm::Constant *MaptypesArrayInit = 2237 llvm::ConstantDataArray::get(M.getContext(), Mappings); 2238 auto *MaptypesArrayGlobal = new llvm::GlobalVariable( 2239 M, MaptypesArrayInit->getType(), 2240 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit, 2241 VarName); 2242 MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 2243 return MaptypesArrayGlobal; 2244 } 2245 2246 bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic( 2247 const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) { 2248 assert(!(AO == AtomicOrdering::NotAtomic || 2249 AO == llvm::AtomicOrdering::Unordered) && 2250 "Unexpected Atomic Ordering."); 2251 2252 bool Flush = false; 2253 llvm::AtomicOrdering FlushAO = AtomicOrdering::Monotonic; 2254 2255 switch (AK) { 2256 case Read: 2257 if (AO == AtomicOrdering::Acquire || AO == AtomicOrdering::AcquireRelease || 2258 AO == AtomicOrdering::SequentiallyConsistent) { 2259 FlushAO = AtomicOrdering::Acquire; 2260 Flush = true; 2261 } 2262 break; 2263 case Write: 2264 case Update: 2265 if (AO == AtomicOrdering::Release || AO == AtomicOrdering::AcquireRelease || 2266 AO == AtomicOrdering::SequentiallyConsistent) { 2267 FlushAO = AtomicOrdering::Release; 2268 Flush = true; 2269 } 2270 break; 2271 case Capture: 2272 switch (AO) { 2273 case AtomicOrdering::Acquire: 2274 FlushAO = AtomicOrdering::Acquire; 2275 Flush = true; 2276 break; 2277 case AtomicOrdering::Release: 2278 FlushAO = AtomicOrdering::Release; 2279 Flush = true; 2280 break; 2281 case AtomicOrdering::AcquireRelease: 2282 case AtomicOrdering::SequentiallyConsistent: 2283 FlushAO = AtomicOrdering::AcquireRelease; 2284 Flush = true; 2285 break; 2286 default: 2287 // do nothing - leave silently. 2288 break; 2289 } 2290 } 2291 2292 if (Flush) { 2293 // Currently Flush RT call still doesn't take memory_ordering, so for when 2294 // that happens, this tries to do the resolution of which atomic ordering 2295 // to use with but issue the flush call 2296 // TODO: pass `FlushAO` after memory ordering support is added 2297 (void)FlushAO; 2298 emitFlush(Loc); 2299 } 2300 2301 // for AO == AtomicOrdering::Monotonic and all other case combinations 2302 // do nothing 2303 return Flush; 2304 } 2305 2306 OpenMPIRBuilder::InsertPointTy 2307 OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc, 2308 AtomicOpValue &X, AtomicOpValue &V, 2309 AtomicOrdering AO) { 2310 if (!updateToLocation(Loc)) 2311 return Loc.IP; 2312 2313 Type *XTy = X.Var->getType(); 2314 assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); 2315 Type *XElemTy = XTy->getPointerElementType(); 2316 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2317 XElemTy->isPointerTy()) && 2318 "OMP atomic read expected a scalar type"); 2319 2320 Value *XRead = nullptr; 2321 2322 if (XElemTy->isIntegerTy()) { 2323 LoadInst *XLD = 2324 Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read"); 2325 XLD->setAtomic(AO); 2326 XRead = cast<Value>(XLD); 2327 } else { 2328 // We need to bitcast and perform atomic op as integer 2329 unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace(); 2330 IntegerType *IntCastTy = 2331 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2332 Value *XBCast = Builder.CreateBitCast( 2333 X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast"); 2334 LoadInst *XLoad = 2335 Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load"); 2336 XLoad->setAtomic(AO); 2337 if (XElemTy->isFloatingPointTy()) { 2338 XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast"); 2339 } else { 2340 XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast"); 2341 } 2342 } 2343 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read); 2344 Builder.CreateStore(XRead, V.Var, V.IsVolatile); 2345 return Builder.saveIP(); 2346 } 2347 2348 OpenMPIRBuilder::InsertPointTy 2349 OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc, 2350 AtomicOpValue &X, Value *Expr, 2351 AtomicOrdering AO) { 2352 if (!updateToLocation(Loc)) 2353 return Loc.IP; 2354 2355 Type *XTy = X.Var->getType(); 2356 assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); 2357 Type *XElemTy = XTy->getPointerElementType(); 2358 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2359 XElemTy->isPointerTy()) && 2360 "OMP atomic write expected a scalar type"); 2361 2362 if (XElemTy->isIntegerTy()) { 2363 StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile); 2364 XSt->setAtomic(AO); 2365 } else { 2366 // We need to bitcast and perform atomic op as integers 2367 unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace(); 2368 IntegerType *IntCastTy = 2369 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2370 Value *XBCast = Builder.CreateBitCast( 2371 X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast"); 2372 Value *ExprCast = 2373 Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast"); 2374 Builder.GetInsertBlock()->getParent()->dump(); 2375 StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile); 2376 Builder.GetInsertBlock()->dump(); 2377 XSt->setAtomic(AO); 2378 } 2379 2380 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write); 2381 return Builder.saveIP(); 2382 } 2383 2384 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate( 2385 const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, 2386 Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2387 AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart) { 2388 if (!updateToLocation(Loc)) 2389 return Loc.IP; 2390 2391 Type *XTy = X.Var->getType(); 2392 assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); 2393 Type *XElemTy = XTy->getPointerElementType(); 2394 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2395 XElemTy->isPointerTy()) && 2396 "OMP atomic update expected a scalar type"); 2397 assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) && 2398 (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) && 2399 "OpenMP atomic does not support LT or GT operations"); 2400 2401 emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile, 2402 IsXLHSInRHSPart); 2403 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update); 2404 return Builder.saveIP(); 2405 } 2406 2407 Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, 2408 AtomicRMWInst::BinOp RMWOp) { 2409 switch (RMWOp) { 2410 case AtomicRMWInst::Add: 2411 return Builder.CreateAdd(Src1, Src2); 2412 case AtomicRMWInst::Sub: 2413 return Builder.CreateSub(Src1, Src2); 2414 case AtomicRMWInst::And: 2415 return Builder.CreateAnd(Src1, Src2); 2416 case AtomicRMWInst::Nand: 2417 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2)); 2418 case AtomicRMWInst::Or: 2419 return Builder.CreateOr(Src1, Src2); 2420 case AtomicRMWInst::Xor: 2421 return Builder.CreateXor(Src1, Src2); 2422 case AtomicRMWInst::Xchg: 2423 case AtomicRMWInst::FAdd: 2424 case AtomicRMWInst::FSub: 2425 case AtomicRMWInst::BAD_BINOP: 2426 case AtomicRMWInst::Max: 2427 case AtomicRMWInst::Min: 2428 case AtomicRMWInst::UMax: 2429 case AtomicRMWInst::UMin: 2430 llvm_unreachable("Unsupported atomic update operation"); 2431 } 2432 llvm_unreachable("Unsupported atomic update operation"); 2433 } 2434 2435 std::pair<Value *, Value *> 2436 OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr, 2437 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, 2438 AtomicUpdateCallbackTy &UpdateOp, 2439 bool VolatileX, bool IsXLHSInRHSPart) { 2440 Type *XElemTy = X->getType()->getPointerElementType(); 2441 2442 bool DoCmpExch = 2443 ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) || 2444 (RMWOp == AtomicRMWInst::FSub) || 2445 (RMWOp == AtomicRMWInst::Sub && !IsXLHSInRHSPart); 2446 2447 std::pair<Value *, Value *> Res; 2448 if (XElemTy->isIntegerTy() && !DoCmpExch) { 2449 Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO); 2450 // not needed except in case of postfix captures. Generate anyway for 2451 // consistency with the else part. Will be removed with any DCE pass. 2452 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp); 2453 } else { 2454 unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace(); 2455 IntegerType *IntCastTy = 2456 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2457 Value *XBCast = 2458 Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace)); 2459 LoadInst *OldVal = 2460 Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load"); 2461 OldVal->setAtomic(AO); 2462 // CurBB 2463 // | /---\ 2464 // ContBB | 2465 // | \---/ 2466 // ExitBB 2467 BasicBlock *CurBB = Builder.GetInsertBlock(); 2468 Instruction *CurBBTI = CurBB->getTerminator(); 2469 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable(); 2470 BasicBlock *ExitBB = 2471 CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit"); 2472 BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(), 2473 X->getName() + ".atomic.cont"); 2474 ContBB->getTerminator()->eraseFromParent(); 2475 Builder.SetInsertPoint(ContBB); 2476 llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2); 2477 PHI->addIncoming(OldVal, CurBB); 2478 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy); 2479 NewAtomicAddr->setName(X->getName() + "x.new.val"); 2480 NewAtomicAddr->moveBefore(AllocIP); 2481 IntegerType *NewAtomicCastTy = 2482 IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits()); 2483 bool IsIntTy = XElemTy->isIntegerTy(); 2484 Value *NewAtomicIntAddr = 2485 (IsIntTy) 2486 ? NewAtomicAddr 2487 : Builder.CreateBitCast(NewAtomicAddr, 2488 NewAtomicCastTy->getPointerTo(Addrspace)); 2489 Value *OldExprVal = PHI; 2490 if (!IsIntTy) { 2491 if (XElemTy->isFloatingPointTy()) { 2492 OldExprVal = Builder.CreateBitCast(PHI, XElemTy, 2493 X->getName() + ".atomic.fltCast"); 2494 } else { 2495 OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy, 2496 X->getName() + ".atomic.ptrCast"); 2497 } 2498 } 2499 2500 Value *Upd = UpdateOp(OldExprVal, Builder); 2501 Builder.CreateStore(Upd, NewAtomicAddr); 2502 LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr); 2503 Value *XAddr = 2504 (IsIntTy) 2505 ? X 2506 : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace)); 2507 AtomicOrdering Failure = 2508 llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO); 2509 AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg( 2510 XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure); 2511 Result->setVolatile(VolatileX); 2512 Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0); 2513 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1); 2514 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock()); 2515 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB); 2516 2517 Res.first = OldExprVal; 2518 Res.second = Upd; 2519 2520 // set Insertion point in exit block 2521 if (UnreachableInst *ExitTI = 2522 dyn_cast<UnreachableInst>(ExitBB->getTerminator())) { 2523 CurBBTI->eraseFromParent(); 2524 Builder.SetInsertPoint(ExitBB); 2525 } else { 2526 Builder.SetInsertPoint(ExitTI); 2527 } 2528 } 2529 2530 return Res; 2531 } 2532 2533 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture( 2534 const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, 2535 AtomicOpValue &V, Value *Expr, AtomicOrdering AO, 2536 AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, 2537 bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart) { 2538 if (!updateToLocation(Loc)) 2539 return Loc.IP; 2540 2541 Type *XTy = X.Var->getType(); 2542 assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory"); 2543 Type *XElemTy = XTy->getPointerElementType(); 2544 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() || 2545 XElemTy->isPointerTy()) && 2546 "OMP atomic capture expected a scalar type"); 2547 assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) && 2548 "OpenMP atomic does not support LT or GT operations"); 2549 2550 // If UpdateExpr is 'x' updated with some `expr` not based on 'x', 2551 // 'x' is simply atomically rewritten with 'expr'. 2552 AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg); 2553 std::pair<Value *, Value *> Result = 2554 emitAtomicUpdate(AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp, 2555 X.IsVolatile, IsXLHSInRHSPart); 2556 2557 Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second); 2558 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile); 2559 2560 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture); 2561 return Builder.saveIP(); 2562 } 2563 2564 GlobalVariable * 2565 OpenMPIRBuilder::createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names, 2566 std::string VarName) { 2567 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 2568 llvm::ArrayType::get( 2569 llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()), 2570 Names); 2571 auto *MapNamesArrayGlobal = new llvm::GlobalVariable( 2572 M, MapNamesArrayInit->getType(), 2573 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit, 2574 VarName); 2575 return MapNamesArrayGlobal; 2576 } 2577 2578 // Create all simple and struct types exposed by the runtime and remember 2579 // the llvm::PointerTypes of them for easy access later. 2580 void OpenMPIRBuilder::initializeTypes(Module &M) { 2581 LLVMContext &Ctx = M.getContext(); 2582 StructType *T; 2583 #define OMP_TYPE(VarName, InitValue) VarName = InitValue; 2584 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ 2585 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \ 2586 VarName##PtrTy = PointerType::getUnqual(VarName##Ty); 2587 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ 2588 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \ 2589 VarName##Ptr = PointerType::getUnqual(VarName); 2590 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \ 2591 T = StructType::getTypeByName(Ctx, StructName); \ 2592 if (!T) \ 2593 T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \ 2594 VarName = T; \ 2595 VarName##Ptr = PointerType::getUnqual(T); 2596 #include "llvm/Frontend/OpenMP/OMPKinds.def" 2597 } 2598 2599 void OpenMPIRBuilder::OutlineInfo::collectBlocks( 2600 SmallPtrSetImpl<BasicBlock *> &BlockSet, 2601 SmallVectorImpl<BasicBlock *> &BlockVector) { 2602 SmallVector<BasicBlock *, 32> Worklist; 2603 BlockSet.insert(EntryBB); 2604 BlockSet.insert(ExitBB); 2605 2606 Worklist.push_back(EntryBB); 2607 while (!Worklist.empty()) { 2608 BasicBlock *BB = Worklist.pop_back_val(); 2609 BlockVector.push_back(BB); 2610 for (BasicBlock *SuccBB : successors(BB)) 2611 if (BlockSet.insert(SuccBB).second) 2612 Worklist.push_back(SuccBB); 2613 } 2614 } 2615 2616 void CanonicalLoopInfo::collectControlBlocks( 2617 SmallVectorImpl<BasicBlock *> &BBs) { 2618 // We only count those BBs as control block for which we do not need to 2619 // reverse the CFG, i.e. not the loop body which can contain arbitrary control 2620 // flow. For consistency, this also means we do not add the Body block, which 2621 // is just the entry to the body code. 2622 BBs.reserve(BBs.size() + 6); 2623 BBs.append({Preheader, Header, Cond, Latch, Exit, After}); 2624 } 2625 2626 void CanonicalLoopInfo::assertOK() const { 2627 #ifndef NDEBUG 2628 if (!IsValid) 2629 return; 2630 2631 // Verify standard control-flow we use for OpenMP loops. 2632 assert(Preheader); 2633 assert(isa<BranchInst>(Preheader->getTerminator()) && 2634 "Preheader must terminate with unconditional branch"); 2635 assert(Preheader->getSingleSuccessor() == Header && 2636 "Preheader must jump to header"); 2637 2638 assert(Header); 2639 assert(isa<BranchInst>(Header->getTerminator()) && 2640 "Header must terminate with unconditional branch"); 2641 assert(Header->getSingleSuccessor() == Cond && 2642 "Header must jump to exiting block"); 2643 2644 assert(Cond); 2645 assert(Cond->getSinglePredecessor() == Header && 2646 "Exiting block only reachable from header"); 2647 2648 assert(isa<BranchInst>(Cond->getTerminator()) && 2649 "Exiting block must terminate with conditional branch"); 2650 assert(size(successors(Cond)) == 2 && 2651 "Exiting block must have two successors"); 2652 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body && 2653 "Exiting block's first successor jump to the body"); 2654 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit && 2655 "Exiting block's second successor must exit the loop"); 2656 2657 assert(Body); 2658 assert(Body->getSinglePredecessor() == Cond && 2659 "Body only reachable from exiting block"); 2660 assert(!isa<PHINode>(Body->front())); 2661 2662 assert(Latch); 2663 assert(isa<BranchInst>(Latch->getTerminator()) && 2664 "Latch must terminate with unconditional branch"); 2665 assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header"); 2666 // TODO: To support simple redirecting of the end of the body code that has 2667 // multiple; introduce another auxiliary basic block like preheader and after. 2668 assert(Latch->getSinglePredecessor() != nullptr); 2669 assert(!isa<PHINode>(Latch->front())); 2670 2671 assert(Exit); 2672 assert(isa<BranchInst>(Exit->getTerminator()) && 2673 "Exit block must terminate with unconditional branch"); 2674 assert(Exit->getSingleSuccessor() == After && 2675 "Exit block must jump to after block"); 2676 2677 assert(After); 2678 assert(After->getSinglePredecessor() == Exit && 2679 "After block only reachable from exit block"); 2680 assert(After->empty() || !isa<PHINode>(After->front())); 2681 2682 Instruction *IndVar = getIndVar(); 2683 assert(IndVar && "Canonical induction variable not found?"); 2684 assert(isa<IntegerType>(IndVar->getType()) && 2685 "Induction variable must be an integer"); 2686 assert(cast<PHINode>(IndVar)->getParent() == Header && 2687 "Induction variable must be a PHI in the loop header"); 2688 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader); 2689 assert( 2690 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero()); 2691 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch); 2692 2693 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1); 2694 assert(cast<Instruction>(NextIndVar)->getParent() == Latch); 2695 assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add); 2696 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar); 2697 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1)) 2698 ->isOne()); 2699 2700 Value *TripCount = getTripCount(); 2701 assert(TripCount && "Loop trip count not found?"); 2702 assert(IndVar->getType() == TripCount->getType() && 2703 "Trip count and induction variable must have the same type"); 2704 2705 auto *CmpI = cast<CmpInst>(&Cond->front()); 2706 assert(CmpI->getPredicate() == CmpInst::ICMP_ULT && 2707 "Exit condition must be a signed less-than comparison"); 2708 assert(CmpI->getOperand(0) == IndVar && 2709 "Exit condition must compare the induction variable"); 2710 assert(CmpI->getOperand(1) == TripCount && 2711 "Exit condition must compare with the trip count"); 2712 #endif 2713 } 2714