Home | History | Annotate | Line # | Download | only in AMDGPU
      1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //==-----------------------------------------------------------------------===//
      8 //
      9 /// \file
     10 /// Base class for AMDGPU specific classes of TargetSubtarget.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
     15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
     16 
     17 #include "llvm/ADT/Triple.h"
     18 #include "llvm/IR/CallingConv.h"
     19 #include "llvm/Support/Alignment.h"
     20 
     21 namespace llvm {
     22 
     23 enum AMDGPUDwarfFlavour : unsigned;
     24 class Function;
     25 class Instruction;
     26 class MachineFunction;
     27 class TargetMachine;
     28 
     29 class AMDGPUSubtarget {
     30 public:
     31   enum Generation {
     32     INVALID = 0,
     33     R600 = 1,
     34     R700 = 2,
     35     EVERGREEN = 3,
     36     NORTHERN_ISLANDS = 4,
     37     SOUTHERN_ISLANDS = 5,
     38     SEA_ISLANDS = 6,
     39     VOLCANIC_ISLANDS = 7,
     40     GFX9 = 8,
     41     GFX10 = 9
     42   };
     43 
     44 private:
     45   Triple TargetTriple;
     46 
     47 protected:
     48   bool GCN3Encoding;
     49   bool Has16BitInsts;
     50   bool HasMadMixInsts;
     51   bool HasMadMacF32Insts;
     52   bool HasDsSrc2Insts;
     53   bool HasSDWA;
     54   bool HasVOP3PInsts;
     55   bool HasMulI24;
     56   bool HasMulU24;
     57   bool HasInv2PiInlineImm;
     58   bool HasFminFmaxLegacy;
     59   bool EnablePromoteAlloca;
     60   bool HasTrigReducedRange;
     61   unsigned MaxWavesPerEU;
     62   unsigned LocalMemorySize;
     63   char WavefrontSizeLog2;
     64 
     65 public:
     66   AMDGPUSubtarget(const Triple &TT);
     67 
     68   static const AMDGPUSubtarget &get(const MachineFunction &MF);
     69   static const AMDGPUSubtarget &get(const TargetMachine &TM,
     70                                     const Function &F);
     71 
     72   /// \returns Default range flat work group size for a calling convention.
     73   std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
     74 
     75   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
     76   /// for function \p F, or minimum/maximum flat work group sizes explicitly
     77   /// requested using "amdgpu-flat-work-group-size" attribute attached to
     78   /// function \p F.
     79   ///
     80   /// \returns Subtarget's default values if explicitly requested values cannot
     81   /// be converted to integer, or violate subtarget's specifications.
     82   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
     83 
     84   /// \returns Subtarget's default pair of minimum/maximum number of waves per
     85   /// execution unit for function \p F, or minimum/maximum number of waves per
     86   /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
     87   /// attached to function \p F.
     88   ///
     89   /// \returns Subtarget's default values if explicitly requested values cannot
     90   /// be converted to integer, violate subtarget's specifications, or are not
     91   /// compatible with minimum/maximum number of waves limited by flat work group
     92   /// size, register usage, and/or lds usage.
     93   std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
     94 
     95   /// Return the amount of LDS that can be used that will not restrict the
     96   /// occupancy lower than WaveCount.
     97   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
     98                                            const Function &) const;
     99 
    100   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
    101   /// the given LDS memory size is the only constraint.
    102   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
    103 
    104   unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
    105 
    106   bool isAmdHsaOS() const {
    107     return TargetTriple.getOS() == Triple::AMDHSA;
    108   }
    109 
    110   bool isAmdPalOS() const {
    111     return TargetTriple.getOS() == Triple::AMDPAL;
    112   }
    113 
    114   bool isMesa3DOS() const {
    115     return TargetTriple.getOS() == Triple::Mesa3D;
    116   }
    117 
    118   bool isMesaKernel(const Function &F) const;
    119 
    120   bool isAmdHsaOrMesa(const Function &F) const {
    121     return isAmdHsaOS() || isMesaKernel(F);
    122   }
    123 
    124   bool isGCN() const {
    125     return TargetTriple.getArch() == Triple::amdgcn;
    126   }
    127 
    128   bool isGCN3Encoding() const {
    129     return GCN3Encoding;
    130   }
    131 
    132   bool has16BitInsts() const {
    133     return Has16BitInsts;
    134   }
    135 
    136   bool hasMadMixInsts() const {
    137     return HasMadMixInsts;
    138   }
    139 
    140   bool hasMadMacF32Insts() const {
    141     return HasMadMacF32Insts || !isGCN();
    142   }
    143 
    144   bool hasDsSrc2Insts() const {
    145     return HasDsSrc2Insts;
    146   }
    147 
    148   bool hasSDWA() const {
    149     return HasSDWA;
    150   }
    151 
    152   bool hasVOP3PInsts() const {
    153     return HasVOP3PInsts;
    154   }
    155 
    156   bool hasMulI24() const {
    157     return HasMulI24;
    158   }
    159 
    160   bool hasMulU24() const {
    161     return HasMulU24;
    162   }
    163 
    164   bool hasInv2PiInlineImm() const {
    165     return HasInv2PiInlineImm;
    166   }
    167 
    168   bool hasFminFmaxLegacy() const {
    169     return HasFminFmaxLegacy;
    170   }
    171 
    172   bool hasTrigReducedRange() const {
    173     return HasTrigReducedRange;
    174   }
    175 
    176   bool isPromoteAllocaEnabled() const {
    177     return EnablePromoteAlloca;
    178   }
    179 
    180   unsigned getWavefrontSize() const {
    181     return 1 << WavefrontSizeLog2;
    182   }
    183 
    184   unsigned getWavefrontSizeLog2() const {
    185     return WavefrontSizeLog2;
    186   }
    187 
    188   unsigned getLocalMemorySize() const {
    189     return LocalMemorySize;
    190   }
    191 
    192   Align getAlignmentForImplicitArgPtr() const {
    193     return isAmdHsaOS() ? Align(8) : Align(4);
    194   }
    195 
    196   /// Returns the offset in bytes from the start of the input buffer
    197   ///        of the first explicit kernel argument.
    198   unsigned getExplicitKernelArgOffset(const Function &F) const {
    199     return isAmdHsaOrMesa(F) ? 0 : 36;
    200   }
    201 
    202   /// \returns Maximum number of work groups per compute unit supported by the
    203   /// subtarget and limited by given \p FlatWorkGroupSize.
    204   virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
    205 
    206   /// \returns Minimum flat work group size supported by the subtarget.
    207   virtual unsigned getMinFlatWorkGroupSize() const = 0;
    208 
    209   /// \returns Maximum flat work group size supported by the subtarget.
    210   virtual unsigned getMaxFlatWorkGroupSize() const = 0;
    211 
    212   /// \returns Number of waves per execution unit required to support the given
    213   /// \p FlatWorkGroupSize.
    214   virtual unsigned
    215   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
    216 
    217   /// \returns Minimum number of waves per execution unit supported by the
    218   /// subtarget.
    219   virtual unsigned getMinWavesPerEU() const = 0;
    220 
    221   /// \returns Maximum number of waves per execution unit supported by the
    222   /// subtarget without any kind of limitation.
    223   unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
    224 
    225   /// Return the maximum workitem ID value in the function, for the given (0, 1,
    226   /// 2) dimension.
    227   unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
    228 
    229   /// Creates value range metadata on an workitemid.* intrinsic call or load.
    230   bool makeLIDRangeMetadata(Instruction *I) const;
    231 
    232   /// \returns Number of bytes of arguments that are passed to a shader or
    233   /// kernel in addition to the explicit ones declared for the function.
    234   unsigned getImplicitArgNumBytes(const Function &F) const;
    235   uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
    236   unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
    237 
    238   /// \returns Corresponsing DWARF register number mapping flavour for the
    239   /// \p WavefrontSize.
    240   AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
    241 
    242   virtual ~AMDGPUSubtarget() {}
    243 };
    244 
    245 } // end namespace llvm
    246 
    247 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
    248