Home | History | Annotate | Line # | Download | only in AMDGPU
      1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 /// \file
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
     11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
     12 
     13 #include "llvm/IR/PassManager.h"
     14 #include "llvm/Support/CodeGen.h"
     15 
     16 namespace llvm {
     17 
     18 class FunctionPass;
     19 class GCNTargetMachine;
     20 class ImmutablePass;
     21 class MachineFunctionPass;
     22 class ModulePass;
     23 class Pass;
     24 class Target;
     25 class TargetMachine;
     26 class TargetOptions;
     27 class PassRegistry;
     28 class Module;
     29 
     30 // GlobalISel passes
     31 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
     32 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
     33 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
     34 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
     35 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
     36 void initializeAMDGPURegBankCombinerPass(PassRegistry &);
     37 
     38 // R600 Passes
     39 FunctionPass *createR600VectorRegMerger();
     40 FunctionPass *createR600ExpandSpecialInstrsPass();
     41 FunctionPass *createR600EmitClauseMarkers();
     42 FunctionPass *createR600ClauseMergePass();
     43 FunctionPass *createR600Packetizer();
     44 FunctionPass *createR600ControlFlowFinalizer();
     45 FunctionPass *createAMDGPUCFGStructurizerPass();
     46 FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
     47 
     48 // SI Passes
     49 FunctionPass *createGCNDPPCombinePass();
     50 FunctionPass *createSIAnnotateControlFlowPass();
     51 FunctionPass *createSIFoldOperandsPass();
     52 FunctionPass *createSIPeepholeSDWAPass();
     53 FunctionPass *createSILowerI1CopiesPass();
     54 FunctionPass *createSIShrinkInstructionsPass();
     55 FunctionPass *createSILoadStoreOptimizerPass();
     56 FunctionPass *createSIWholeQuadModePass();
     57 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
     58 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
     59 FunctionPass *createSIFixSGPRCopiesPass();
     60 FunctionPass *createSIMemoryLegalizerPass();
     61 FunctionPass *createSIInsertWaitcntsPass();
     62 FunctionPass *createSIPreAllocateWWMRegsPass();
     63 FunctionPass *createSIFormMemoryClausesPass();
     64 
     65 FunctionPass *createSIPostRABundlerPass();
     66 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *);
     67 FunctionPass *createAMDGPUUseNativeCallsPass();
     68 FunctionPass *createAMDGPUCodeGenPreparePass();
     69 FunctionPass *createAMDGPULateCodeGenPreparePass();
     70 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
     71 FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *);
     72 ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *);
     73 FunctionPass *createAMDGPURewriteOutArgumentsPass();
     74 ModulePass *createAMDGPULowerModuleLDSPass();
     75 FunctionPass *createSIModeRegisterPass();
     76 
     77 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
     78   AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {}
     79   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
     80 
     81 private:
     82   TargetMachine &TM;
     83 };
     84 
     85 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
     86   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
     87 };
     88 
     89 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
     90 
     91 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
     92 extern char &AMDGPUMachineCFGStructurizerID;
     93 
     94 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
     95 
     96 Pass *createAMDGPUAnnotateKernelFeaturesPass();
     97 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
     98 extern char &AMDGPUAnnotateKernelFeaturesID;
     99 
    100 FunctionPass *createAMDGPUAtomicOptimizerPass();
    101 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
    102 extern char &AMDGPUAtomicOptimizerID;
    103 
    104 ModulePass *createAMDGPULowerIntrinsicsPass();
    105 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
    106 extern char &AMDGPULowerIntrinsicsID;
    107 
    108 ModulePass *createAMDGPUFixFunctionBitcastsPass();
    109 void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &);
    110 extern char &AMDGPUFixFunctionBitcastsID;
    111 
    112 FunctionPass *createAMDGPULowerKernelArgumentsPass();
    113 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
    114 extern char &AMDGPULowerKernelArgumentsID;
    115 
    116 ModulePass *createAMDGPULowerKernelAttributesPass();
    117 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
    118 extern char &AMDGPULowerKernelAttributesID;
    119 
    120 struct AMDGPULowerKernelAttributesPass
    121     : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
    122   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
    123 };
    124 
    125 void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &);
    126 extern char &AMDGPUPropagateAttributesEarlyID;
    127 
    128 struct AMDGPUPropagateAttributesEarlyPass
    129     : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> {
    130   AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {}
    131   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
    132 
    133 private:
    134   TargetMachine &TM;
    135 };
    136 
    137 void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &);
    138 extern char &AMDGPUPropagateAttributesLateID;
    139 
    140 struct AMDGPUPropagateAttributesLatePass
    141     : PassInfoMixin<AMDGPUPropagateAttributesLatePass> {
    142   AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {}
    143   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
    144 
    145 private:
    146   TargetMachine &TM;
    147 };
    148 
    149 void initializeAMDGPULowerModuleLDSPass(PassRegistry &);
    150 extern char &AMDGPULowerModuleLDSID;
    151 
    152 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
    153   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
    154 };
    155 
    156 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
    157 extern char &AMDGPURewriteOutArgumentsID;
    158 
    159 void initializeGCNDPPCombinePass(PassRegistry &);
    160 extern char &GCNDPPCombineID;
    161 
    162 void initializeR600ClauseMergePassPass(PassRegistry &);
    163 extern char &R600ClauseMergePassID;
    164 
    165 void initializeR600ControlFlowFinalizerPass(PassRegistry &);
    166 extern char &R600ControlFlowFinalizerID;
    167 
    168 void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &);
    169 extern char &R600ExpandSpecialInstrsPassID;
    170 
    171 void initializeR600VectorRegMergerPass(PassRegistry &);
    172 extern char &R600VectorRegMergerID;
    173 
    174 void initializeR600PacketizerPass(PassRegistry &);
    175 extern char &R600PacketizerID;
    176 
    177 void initializeSIFoldOperandsPass(PassRegistry &);
    178 extern char &SIFoldOperandsID;
    179 
    180 void initializeSIPeepholeSDWAPass(PassRegistry &);
    181 extern char &SIPeepholeSDWAID;
    182 
    183 void initializeSIShrinkInstructionsPass(PassRegistry&);
    184 extern char &SIShrinkInstructionsID;
    185 
    186 void initializeSIFixSGPRCopiesPass(PassRegistry &);
    187 extern char &SIFixSGPRCopiesID;
    188 
    189 void initializeSIFixVGPRCopiesPass(PassRegistry &);
    190 extern char &SIFixVGPRCopiesID;
    191 
    192 void initializeSILowerI1CopiesPass(PassRegistry &);
    193 extern char &SILowerI1CopiesID;
    194 
    195 void initializeSILowerSGPRSpillsPass(PassRegistry &);
    196 extern char &SILowerSGPRSpillsID;
    197 
    198 void initializeSILoadStoreOptimizerPass(PassRegistry &);
    199 extern char &SILoadStoreOptimizerID;
    200 
    201 void initializeSIWholeQuadModePass(PassRegistry &);
    202 extern char &SIWholeQuadModeID;
    203 
    204 void initializeSILowerControlFlowPass(PassRegistry &);
    205 extern char &SILowerControlFlowID;
    206 
    207 void initializeSIPreEmitPeepholePass(PassRegistry &);
    208 extern char &SIPreEmitPeepholeID;
    209 
    210 void initializeSILateBranchLoweringPass(PassRegistry &);
    211 extern char &SILateBranchLoweringPassID;
    212 
    213 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
    214 extern char &SIOptimizeExecMaskingID;
    215 
    216 void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
    217 extern char &SIPreAllocateWWMRegsID;
    218 
    219 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &);
    220 extern char &AMDGPUSimplifyLibCallsID;
    221 
    222 void initializeAMDGPUUseNativeCallsPass(PassRegistry &);
    223 extern char &AMDGPUUseNativeCallsID;
    224 
    225 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
    226 extern char &AMDGPUPerfHintAnalysisID;
    227 
    228 // Passes common to R600 and SI
    229 FunctionPass *createAMDGPUPromoteAlloca();
    230 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
    231 extern char &AMDGPUPromoteAllocaID;
    232 
    233 FunctionPass *createAMDGPUPromoteAllocaToVector();
    234 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
    235 extern char &AMDGPUPromoteAllocaToVectorID;
    236 
    237 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
    238   AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {}
    239   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
    240 
    241 private:
    242   TargetMachine &TM;
    243 };
    244 
    245 struct AMDGPUPromoteAllocaToVectorPass
    246     : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
    247   AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {}
    248   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
    249 
    250 private:
    251   TargetMachine &TM;
    252 };
    253 
    254 Pass *createAMDGPUStructurizeCFGPass();
    255 FunctionPass *createAMDGPUISelDag(
    256   TargetMachine *TM = nullptr,
    257   CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
    258 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
    259 
    260 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
    261   AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
    262   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
    263 
    264 private:
    265   bool GlobalOpt;
    266 };
    267 
    268 ModulePass *createR600OpenCLImageTypeLoweringPass();
    269 FunctionPass *createAMDGPUAnnotateUniformValues();
    270 
    271 ModulePass *createAMDGPUPrintfRuntimeBinding();
    272 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
    273 extern char &AMDGPUPrintfRuntimeBindingID;
    274 
    275 struct AMDGPUPrintfRuntimeBindingPass
    276     : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
    277   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
    278 };
    279 
    280 ModulePass* createAMDGPUUnifyMetadataPass();
    281 void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
    282 extern char &AMDGPUUnifyMetadataID;
    283 
    284 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
    285   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
    286 };
    287 
    288 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
    289 extern char &SIOptimizeExecMaskingPreRAID;
    290 
    291 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
    292 extern char &AMDGPUAnnotateUniformValuesPassID;
    293 
    294 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
    295 extern char &AMDGPUCodeGenPrepareID;
    296 
    297 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
    298 extern char &AMDGPULateCodeGenPrepareID;
    299 
    300 void initializeSIAnnotateControlFlowPass(PassRegistry&);
    301 extern char &SIAnnotateControlFlowPassID;
    302 
    303 void initializeSIMemoryLegalizerPass(PassRegistry&);
    304 extern char &SIMemoryLegalizerID;
    305 
    306 void initializeSIModeRegisterPass(PassRegistry&);
    307 extern char &SIModeRegisterID;
    308 
    309 void initializeSIInsertHardClausesPass(PassRegistry &);
    310 extern char &SIInsertHardClausesID;
    311 
    312 void initializeSIInsertWaitcntsPass(PassRegistry&);
    313 extern char &SIInsertWaitcntsID;
    314 
    315 void initializeSIFormMemoryClausesPass(PassRegistry&);
    316 extern char &SIFormMemoryClausesID;
    317 
    318 void initializeSIPostRABundlerPass(PassRegistry&);
    319 extern char &SIPostRABundlerID;
    320 
    321 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
    322 extern char &AMDGPUUnifyDivergentExitNodesID;
    323 
    324 ImmutablePass *createAMDGPUAAWrapperPass();
    325 void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
    326 ImmutablePass *createAMDGPUExternalAAWrapperPass();
    327 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
    328 
    329 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
    330 
    331 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
    332 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
    333 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
    334 
    335 void initializeGCNNSAReassignPass(PassRegistry &);
    336 extern char &GCNNSAReassignID;
    337 
    338 namespace AMDGPU {
    339 enum TargetIndex {
    340   TI_CONSTDATA_START,
    341   TI_SCRATCH_RSRC_DWORD0,
    342   TI_SCRATCH_RSRC_DWORD1,
    343   TI_SCRATCH_RSRC_DWORD2,
    344   TI_SCRATCH_RSRC_DWORD3
    345 };
    346 }
    347 
    348 /// OpenCL uses address spaces to differentiate between
    349 /// various memory regions on the hardware. On the CPU
    350 /// all of the address spaces point to the same memory,
    351 /// however on the GPU, each address space points to
    352 /// a separate piece of memory that is unique from other
    353 /// memory locations.
    354 namespace AMDGPUAS {
    355   enum : unsigned {
    356     // The maximum value for flat, generic, local, private, constant and region.
    357     MAX_AMDGPU_ADDRESS = 7,
    358 
    359     FLAT_ADDRESS = 0,     ///< Address space for flat memory.
    360     GLOBAL_ADDRESS = 1,   ///< Address space for global memory (RAT0, VTX0).
    361     REGION_ADDRESS = 2,   ///< Address space for region memory. (GDS)
    362 
    363     CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2).
    364     LOCAL_ADDRESS = 3,    ///< Address space for local memory.
    365     PRIVATE_ADDRESS = 5,  ///< Address space for private memory.
    366 
    367     CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory.
    368 
    369     BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.
    370 
    371     /// Address space for direct addressible parameter memory (CONST0).
    372     PARAM_D_ADDRESS = 6,
    373     /// Address space for indirect addressible parameter memory (VTX1).
    374     PARAM_I_ADDRESS = 7,
    375 
    376     // Do not re-order the CONSTANT_BUFFER_* enums.  Several places depend on
    377     // this order to be able to dynamically index a constant buffer, for
    378     // example:
    379     //
    380     // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
    381 
    382     CONSTANT_BUFFER_0 = 8,
    383     CONSTANT_BUFFER_1 = 9,
    384     CONSTANT_BUFFER_2 = 10,
    385     CONSTANT_BUFFER_3 = 11,
    386     CONSTANT_BUFFER_4 = 12,
    387     CONSTANT_BUFFER_5 = 13,
    388     CONSTANT_BUFFER_6 = 14,
    389     CONSTANT_BUFFER_7 = 15,
    390     CONSTANT_BUFFER_8 = 16,
    391     CONSTANT_BUFFER_9 = 17,
    392     CONSTANT_BUFFER_10 = 18,
    393     CONSTANT_BUFFER_11 = 19,
    394     CONSTANT_BUFFER_12 = 20,
    395     CONSTANT_BUFFER_13 = 21,
    396     CONSTANT_BUFFER_14 = 22,
    397     CONSTANT_BUFFER_15 = 23,
    398 
    399     // Some places use this if the address space can't be determined.
    400     UNKNOWN_ADDRESS_SPACE = ~0u,
    401   };
    402 }
    403 
    404 namespace AMDGPU {
    405 
    406 // FIXME: Missing constant_32bit
    407 inline bool isFlatGlobalAddrSpace(unsigned AS) {
    408   return AS == AMDGPUAS::GLOBAL_ADDRESS ||
    409          AS == AMDGPUAS::FLAT_ADDRESS ||
    410          AS == AMDGPUAS::CONSTANT_ADDRESS ||
    411          AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
    412 }
    413 }
    414 
    415 } // End namespace llvm
    416 
    417 #endif
    418