1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13 #include "llvm/IR/PassManager.h" 14 #include "llvm/Support/CodeGen.h" 15 16 namespace llvm { 17 18 class FunctionPass; 19 class GCNTargetMachine; 20 class ImmutablePass; 21 class MachineFunctionPass; 22 class ModulePass; 23 class Pass; 24 class Target; 25 class TargetMachine; 26 class TargetOptions; 27 class PassRegistry; 28 class Module; 29 30 // GlobalISel passes 31 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 32 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 33 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 34 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 35 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 36 void initializeAMDGPURegBankCombinerPass(PassRegistry &); 37 38 // R600 Passes 39 FunctionPass *createR600VectorRegMerger(); 40 FunctionPass *createR600ExpandSpecialInstrsPass(); 41 FunctionPass *createR600EmitClauseMarkers(); 42 FunctionPass *createR600ClauseMergePass(); 43 FunctionPass *createR600Packetizer(); 44 FunctionPass *createR600ControlFlowFinalizer(); 45 FunctionPass *createAMDGPUCFGStructurizerPass(); 46 FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel); 47 48 // SI Passes 49 FunctionPass *createGCNDPPCombinePass(); 50 FunctionPass *createSIAnnotateControlFlowPass(); 51 FunctionPass *createSIFoldOperandsPass(); 52 FunctionPass *createSIPeepholeSDWAPass(); 53 FunctionPass *createSILowerI1CopiesPass(); 54 FunctionPass *createSIShrinkInstructionsPass(); 55 FunctionPass *createSILoadStoreOptimizerPass(); 56 FunctionPass *createSIWholeQuadModePass(); 57 FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 58 FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 59 FunctionPass *createSIFixSGPRCopiesPass(); 60 FunctionPass *createSIMemoryLegalizerPass(); 61 FunctionPass *createSIInsertWaitcntsPass(); 62 FunctionPass *createSIPreAllocateWWMRegsPass(); 63 FunctionPass *createSIFormMemoryClausesPass(); 64 65 FunctionPass *createSIPostRABundlerPass(); 66 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); 67 FunctionPass *createAMDGPUUseNativeCallsPass(); 68 FunctionPass *createAMDGPUCodeGenPreparePass(); 69 FunctionPass *createAMDGPULateCodeGenPreparePass(); 70 FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 71 FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *); 72 ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *); 73 FunctionPass *createAMDGPURewriteOutArgumentsPass(); 74 ModulePass *createAMDGPULowerModuleLDSPass(); 75 FunctionPass *createSIModeRegisterPass(); 76 77 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 78 AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {} 79 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 80 81 private: 82 TargetMachine &TM; 83 }; 84 85 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 86 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 87 }; 88 89 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); 90 91 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 92 extern char &AMDGPUMachineCFGStructurizerID; 93 94 void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 95 96 Pass *createAMDGPUAnnotateKernelFeaturesPass(); 97 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 98 extern char &AMDGPUAnnotateKernelFeaturesID; 99 100 FunctionPass *createAMDGPUAtomicOptimizerPass(); 101 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 102 extern char &AMDGPUAtomicOptimizerID; 103 104 ModulePass *createAMDGPULowerIntrinsicsPass(); 105 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &); 106 extern char &AMDGPULowerIntrinsicsID; 107 108 ModulePass *createAMDGPUFixFunctionBitcastsPass(); 109 void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &); 110 extern char &AMDGPUFixFunctionBitcastsID; 111 112 FunctionPass *createAMDGPULowerKernelArgumentsPass(); 113 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 114 extern char &AMDGPULowerKernelArgumentsID; 115 116 ModulePass *createAMDGPULowerKernelAttributesPass(); 117 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 118 extern char &AMDGPULowerKernelAttributesID; 119 120 struct AMDGPULowerKernelAttributesPass 121 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 122 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 123 }; 124 125 void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &); 126 extern char &AMDGPUPropagateAttributesEarlyID; 127 128 struct AMDGPUPropagateAttributesEarlyPass 129 : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> { 130 AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {} 131 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 132 133 private: 134 TargetMachine &TM; 135 }; 136 137 void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &); 138 extern char &AMDGPUPropagateAttributesLateID; 139 140 struct AMDGPUPropagateAttributesLatePass 141 : PassInfoMixin<AMDGPUPropagateAttributesLatePass> { 142 AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {} 143 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 144 145 private: 146 TargetMachine &TM; 147 }; 148 149 void initializeAMDGPULowerModuleLDSPass(PassRegistry &); 150 extern char &AMDGPULowerModuleLDSID; 151 152 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> { 153 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 154 }; 155 156 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 157 extern char &AMDGPURewriteOutArgumentsID; 158 159 void initializeGCNDPPCombinePass(PassRegistry &); 160 extern char &GCNDPPCombineID; 161 162 void initializeR600ClauseMergePassPass(PassRegistry &); 163 extern char &R600ClauseMergePassID; 164 165 void initializeR600ControlFlowFinalizerPass(PassRegistry &); 166 extern char &R600ControlFlowFinalizerID; 167 168 void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &); 169 extern char &R600ExpandSpecialInstrsPassID; 170 171 void initializeR600VectorRegMergerPass(PassRegistry &); 172 extern char &R600VectorRegMergerID; 173 174 void initializeR600PacketizerPass(PassRegistry &); 175 extern char &R600PacketizerID; 176 177 void initializeSIFoldOperandsPass(PassRegistry &); 178 extern char &SIFoldOperandsID; 179 180 void initializeSIPeepholeSDWAPass(PassRegistry &); 181 extern char &SIPeepholeSDWAID; 182 183 void initializeSIShrinkInstructionsPass(PassRegistry&); 184 extern char &SIShrinkInstructionsID; 185 186 void initializeSIFixSGPRCopiesPass(PassRegistry &); 187 extern char &SIFixSGPRCopiesID; 188 189 void initializeSIFixVGPRCopiesPass(PassRegistry &); 190 extern char &SIFixVGPRCopiesID; 191 192 void initializeSILowerI1CopiesPass(PassRegistry &); 193 extern char &SILowerI1CopiesID; 194 195 void initializeSILowerSGPRSpillsPass(PassRegistry &); 196 extern char &SILowerSGPRSpillsID; 197 198 void initializeSILoadStoreOptimizerPass(PassRegistry &); 199 extern char &SILoadStoreOptimizerID; 200 201 void initializeSIWholeQuadModePass(PassRegistry &); 202 extern char &SIWholeQuadModeID; 203 204 void initializeSILowerControlFlowPass(PassRegistry &); 205 extern char &SILowerControlFlowID; 206 207 void initializeSIPreEmitPeepholePass(PassRegistry &); 208 extern char &SIPreEmitPeepholeID; 209 210 void initializeSILateBranchLoweringPass(PassRegistry &); 211 extern char &SILateBranchLoweringPassID; 212 213 void initializeSIOptimizeExecMaskingPass(PassRegistry &); 214 extern char &SIOptimizeExecMaskingID; 215 216 void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 217 extern char &SIPreAllocateWWMRegsID; 218 219 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &); 220 extern char &AMDGPUSimplifyLibCallsID; 221 222 void initializeAMDGPUUseNativeCallsPass(PassRegistry &); 223 extern char &AMDGPUUseNativeCallsID; 224 225 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 226 extern char &AMDGPUPerfHintAnalysisID; 227 228 // Passes common to R600 and SI 229 FunctionPass *createAMDGPUPromoteAlloca(); 230 void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 231 extern char &AMDGPUPromoteAllocaID; 232 233 FunctionPass *createAMDGPUPromoteAllocaToVector(); 234 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 235 extern char &AMDGPUPromoteAllocaToVectorID; 236 237 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 238 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 239 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 240 241 private: 242 TargetMachine &TM; 243 }; 244 245 struct AMDGPUPromoteAllocaToVectorPass 246 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 247 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 248 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 249 250 private: 251 TargetMachine &TM; 252 }; 253 254 Pass *createAMDGPUStructurizeCFGPass(); 255 FunctionPass *createAMDGPUISelDag( 256 TargetMachine *TM = nullptr, 257 CodeGenOpt::Level OptLevel = CodeGenOpt::Default); 258 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 259 260 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 261 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 262 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 263 264 private: 265 bool GlobalOpt; 266 }; 267 268 ModulePass *createR600OpenCLImageTypeLoweringPass(); 269 FunctionPass *createAMDGPUAnnotateUniformValues(); 270 271 ModulePass *createAMDGPUPrintfRuntimeBinding(); 272 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 273 extern char &AMDGPUPrintfRuntimeBindingID; 274 275 struct AMDGPUPrintfRuntimeBindingPass 276 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 277 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 278 }; 279 280 ModulePass* createAMDGPUUnifyMetadataPass(); 281 void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 282 extern char &AMDGPUUnifyMetadataID; 283 284 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 285 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 286 }; 287 288 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 289 extern char &SIOptimizeExecMaskingPreRAID; 290 291 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 292 extern char &AMDGPUAnnotateUniformValuesPassID; 293 294 void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 295 extern char &AMDGPUCodeGenPrepareID; 296 297 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 298 extern char &AMDGPULateCodeGenPrepareID; 299 300 void initializeSIAnnotateControlFlowPass(PassRegistry&); 301 extern char &SIAnnotateControlFlowPassID; 302 303 void initializeSIMemoryLegalizerPass(PassRegistry&); 304 extern char &SIMemoryLegalizerID; 305 306 void initializeSIModeRegisterPass(PassRegistry&); 307 extern char &SIModeRegisterID; 308 309 void initializeSIInsertHardClausesPass(PassRegistry &); 310 extern char &SIInsertHardClausesID; 311 312 void initializeSIInsertWaitcntsPass(PassRegistry&); 313 extern char &SIInsertWaitcntsID; 314 315 void initializeSIFormMemoryClausesPass(PassRegistry&); 316 extern char &SIFormMemoryClausesID; 317 318 void initializeSIPostRABundlerPass(PassRegistry&); 319 extern char &SIPostRABundlerID; 320 321 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 322 extern char &AMDGPUUnifyDivergentExitNodesID; 323 324 ImmutablePass *createAMDGPUAAWrapperPass(); 325 void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 326 ImmutablePass *createAMDGPUExternalAAWrapperPass(); 327 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 328 329 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 330 331 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 332 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 333 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 334 335 void initializeGCNNSAReassignPass(PassRegistry &); 336 extern char &GCNNSAReassignID; 337 338 namespace AMDGPU { 339 enum TargetIndex { 340 TI_CONSTDATA_START, 341 TI_SCRATCH_RSRC_DWORD0, 342 TI_SCRATCH_RSRC_DWORD1, 343 TI_SCRATCH_RSRC_DWORD2, 344 TI_SCRATCH_RSRC_DWORD3 345 }; 346 } 347 348 /// OpenCL uses address spaces to differentiate between 349 /// various memory regions on the hardware. On the CPU 350 /// all of the address spaces point to the same memory, 351 /// however on the GPU, each address space points to 352 /// a separate piece of memory that is unique from other 353 /// memory locations. 354 namespace AMDGPUAS { 355 enum : unsigned { 356 // The maximum value for flat, generic, local, private, constant and region. 357 MAX_AMDGPU_ADDRESS = 7, 358 359 FLAT_ADDRESS = 0, ///< Address space for flat memory. 360 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). 361 REGION_ADDRESS = 2, ///< Address space for region memory. (GDS) 362 363 CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2). 364 LOCAL_ADDRESS = 3, ///< Address space for local memory. 365 PRIVATE_ADDRESS = 5, ///< Address space for private memory. 366 367 CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory. 368 369 BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers. 370 371 /// Address space for direct addressible parameter memory (CONST0). 372 PARAM_D_ADDRESS = 6, 373 /// Address space for indirect addressible parameter memory (VTX1). 374 PARAM_I_ADDRESS = 7, 375 376 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on 377 // this order to be able to dynamically index a constant buffer, for 378 // example: 379 // 380 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx 381 382 CONSTANT_BUFFER_0 = 8, 383 CONSTANT_BUFFER_1 = 9, 384 CONSTANT_BUFFER_2 = 10, 385 CONSTANT_BUFFER_3 = 11, 386 CONSTANT_BUFFER_4 = 12, 387 CONSTANT_BUFFER_5 = 13, 388 CONSTANT_BUFFER_6 = 14, 389 CONSTANT_BUFFER_7 = 15, 390 CONSTANT_BUFFER_8 = 16, 391 CONSTANT_BUFFER_9 = 17, 392 CONSTANT_BUFFER_10 = 18, 393 CONSTANT_BUFFER_11 = 19, 394 CONSTANT_BUFFER_12 = 20, 395 CONSTANT_BUFFER_13 = 21, 396 CONSTANT_BUFFER_14 = 22, 397 CONSTANT_BUFFER_15 = 23, 398 399 // Some places use this if the address space can't be determined. 400 UNKNOWN_ADDRESS_SPACE = ~0u, 401 }; 402 } 403 404 namespace AMDGPU { 405 406 // FIXME: Missing constant_32bit 407 inline bool isFlatGlobalAddrSpace(unsigned AS) { 408 return AS == AMDGPUAS::GLOBAL_ADDRESS || 409 AS == AMDGPUAS::FLAT_ADDRESS || 410 AS == AMDGPUAS::CONSTANT_ADDRESS || 411 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 412 } 413 } 414 415 } // End namespace llvm 416 417 #endif 418