Home | History | Annotate | Line # | Download | only in Utils
      1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 
      9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
     10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
     11 
     12 #include "SIDefines.h"
     13 #include "llvm/IR/CallingConv.h"
     14 #include "llvm/Support/Alignment.h"
     15 
     16 struct amd_kernel_code_t;
     17 
     18 namespace llvm {
     19 
     20 struct Align;
     21 class Argument;
     22 class Function;
     23 class GCNSubtarget;
     24 class GlobalValue;
     25 class MCRegisterClass;
     26 class MCRegisterInfo;
     27 class MCSubtargetInfo;
     28 class StringRef;
     29 class Triple;
     30 
     31 namespace amdhsa {
     32 struct kernel_descriptor_t;
     33 }
     34 
     35 namespace AMDGPU {
     36 
     37 struct IsaVersion;
     38 
     39 /// \returns HSA OS ABI Version identification.
     40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
     41 /// \returns True if HSA OS ABI Version identification is 2,
     42 /// false otherwise.
     43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
     44 /// \returns True if HSA OS ABI Version identification is 3,
     45 /// false otherwise.
     46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
     47 /// \returns True if HSA OS ABI Version identification is 4,
     48 /// false otherwise.
     49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
     50 /// \returns True if HSA OS ABI Version identification is 3 or 4,
     51 /// false otherwise.
     52 bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI);
     53 
     54 struct GcnBufferFormatInfo {
     55   unsigned Format;
     56   unsigned BitsPerComp;
     57   unsigned NumComponents;
     58   unsigned NumFormat;
     59   unsigned DataFormat;
     60 };
     61 
     62 #define GET_MIMGBaseOpcode_DECL
     63 #define GET_MIMGDim_DECL
     64 #define GET_MIMGEncoding_DECL
     65 #define GET_MIMGLZMapping_DECL
     66 #define GET_MIMGMIPMapping_DECL
     67 #include "AMDGPUGenSearchableTables.inc"
     68 
     69 namespace IsaInfo {
     70 
     71 enum {
     72   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
     73   // doesn't spill SGPRs as much as when 80 is set.
     74   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
     75   TRAP_NUM_SGPRS = 16
     76 };
     77 
     78 enum class TargetIDSetting {
     79   Unsupported,
     80   Any,
     81   Off,
     82   On
     83 };
     84 
     85 class AMDGPUTargetID {
     86 private:
     87   const MCSubtargetInfo &STI;
     88   TargetIDSetting XnackSetting;
     89   TargetIDSetting SramEccSetting;
     90 
     91 public:
     92   explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
     93   ~AMDGPUTargetID() = default;
     94 
     95   /// \return True if the current xnack setting is not "Unsupported".
     96   bool isXnackSupported() const {
     97     return XnackSetting != TargetIDSetting::Unsupported;
     98   }
     99 
    100   /// \returns True if the current xnack setting is "On" or "Any".
    101   bool isXnackOnOrAny() const {
    102     return XnackSetting == TargetIDSetting::On ||
    103         XnackSetting == TargetIDSetting::Any;
    104   }
    105 
    106   /// \returns True if current xnack setting is "On" or "Off",
    107   /// false otherwise.
    108   bool isXnackOnOrOff() const {
    109     return getXnackSetting() == TargetIDSetting::On ||
    110         getXnackSetting() == TargetIDSetting::Off;
    111   }
    112 
    113   /// \returns The current xnack TargetIDSetting, possible options are
    114   /// "Unsupported", "Any", "Off", and "On".
    115   TargetIDSetting getXnackSetting() const {
    116     return XnackSetting;
    117   }
    118 
    119   /// Sets xnack setting to \p NewXnackSetting.
    120   void setXnackSetting(TargetIDSetting NewXnackSetting) {
    121     XnackSetting = NewXnackSetting;
    122   }
    123 
    124   /// \return True if the current sramecc setting is not "Unsupported".
    125   bool isSramEccSupported() const {
    126     return SramEccSetting != TargetIDSetting::Unsupported;
    127   }
    128 
    129   /// \returns True if the current sramecc setting is "On" or "Any".
    130   bool isSramEccOnOrAny() const {
    131   return SramEccSetting == TargetIDSetting::On ||
    132       SramEccSetting == TargetIDSetting::Any;
    133   }
    134 
    135   /// \returns True if current sramecc setting is "On" or "Off",
    136   /// false otherwise.
    137   bool isSramEccOnOrOff() const {
    138     return getSramEccSetting() == TargetIDSetting::On ||
    139         getSramEccSetting() == TargetIDSetting::Off;
    140   }
    141 
    142   /// \returns The current sramecc TargetIDSetting, possible options are
    143   /// "Unsupported", "Any", "Off", and "On".
    144   TargetIDSetting getSramEccSetting() const {
    145     return SramEccSetting;
    146   }
    147 
    148   /// Sets sramecc setting to \p NewSramEccSetting.
    149   void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
    150     SramEccSetting = NewSramEccSetting;
    151   }
    152 
    153   void setTargetIDFromFeaturesString(StringRef FS);
    154   void setTargetIDFromTargetIDStream(StringRef TargetID);
    155 
    156   /// \returns String representation of an object.
    157   std::string toString() const;
    158 };
    159 
    160 /// \returns Wavefront size for given subtarget \p STI.
    161 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
    162 
    163 /// \returns Local memory size in bytes for given subtarget \p STI.
    164 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
    165 
    166 /// \returns Number of execution units per compute unit for given subtarget \p
    167 /// STI.
    168 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
    169 
    170 /// \returns Maximum number of work groups per compute unit for given subtarget
    171 /// \p STI and limited by given \p FlatWorkGroupSize.
    172 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
    173                                unsigned FlatWorkGroupSize);
    174 
    175 /// \returns Minimum number of waves per execution unit for given subtarget \p
    176 /// STI.
    177 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
    178 
    179 /// \returns Maximum number of waves per execution unit for given subtarget \p
    180 /// STI without any kind of limitation.
    181 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
    182 
    183 /// \returns Number of waves per execution unit required to support the given \p
    184 /// FlatWorkGroupSize.
    185 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
    186                                    unsigned FlatWorkGroupSize);
    187 
    188 /// \returns Minimum flat work group size for given subtarget \p STI.
    189 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
    190 
    191 /// \returns Maximum flat work group size for given subtarget \p STI.
    192 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
    193 
    194 /// \returns Number of waves per work group for given subtarget \p STI and
    195 /// \p FlatWorkGroupSize.
    196 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
    197                               unsigned FlatWorkGroupSize);
    198 
    199 /// \returns SGPR allocation granularity for given subtarget \p STI.
    200 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
    201 
    202 /// \returns SGPR encoding granularity for given subtarget \p STI.
    203 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
    204 
    205 /// \returns Total number of SGPRs for given subtarget \p STI.
    206 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
    207 
    208 /// \returns Addressable number of SGPRs for given subtarget \p STI.
    209 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
    210 
    211 /// \returns Minimum number of SGPRs that meets the given number of waves per
    212 /// execution unit requirement for given subtarget \p STI.
    213 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
    214 
    215 /// \returns Maximum number of SGPRs that meets the given number of waves per
    216 /// execution unit requirement for given subtarget \p STI.
    217 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
    218                         bool Addressable);
    219 
    220 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
    221 /// STI when the given special registers are used.
    222 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
    223                           bool FlatScrUsed, bool XNACKUsed);
    224 
    225 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
    226 /// STI when the given special registers are used. XNACK is inferred from
    227 /// \p STI.
    228 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
    229                           bool FlatScrUsed);
    230 
    231 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
    232 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
    233 /// register counts.
    234 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
    235 
    236 /// \returns VGPR allocation granularity for given subtarget \p STI.
    237 ///
    238 /// For subtargets which support it, \p EnableWavefrontSize32 should match
    239 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
    240 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
    241                              Optional<bool> EnableWavefrontSize32 = None);
    242 
    243 /// \returns VGPR encoding granularity for given subtarget \p STI.
    244 ///
    245 /// For subtargets which support it, \p EnableWavefrontSize32 should match
    246 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
    247 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
    248                                 Optional<bool> EnableWavefrontSize32 = None);
    249 
    250 /// \returns Total number of VGPRs for given subtarget \p STI.
    251 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
    252 
    253 /// \returns Addressable number of VGPRs for given subtarget \p STI.
    254 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
    255 
    256 /// \returns Minimum number of VGPRs that meets given number of waves per
    257 /// execution unit requirement for given subtarget \p STI.
    258 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
    259 
    260 /// \returns Maximum number of VGPRs that meets given number of waves per
    261 /// execution unit requirement for given subtarget \p STI.
    262 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
    263 
    264 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
    265 /// \p NumVGPRs are used.
    266 ///
    267 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
    268 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
    269 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
    270                           Optional<bool> EnableWavefrontSize32 = None);
    271 
    272 } // end namespace IsaInfo
    273 
    274 LLVM_READONLY
    275 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
    276 
    277 LLVM_READONLY
    278 int getSOPPWithRelaxation(uint16_t Opcode);
    279 
    280 struct MIMGBaseOpcodeInfo {
    281   MIMGBaseOpcode BaseOpcode;
    282   bool Store;
    283   bool Atomic;
    284   bool AtomicX2;
    285   bool Sampler;
    286   bool Gather4;
    287 
    288   uint8_t NumExtraArgs;
    289   bool Gradients;
    290   bool G16;
    291   bool Coordinates;
    292   bool LodOrClampOrMip;
    293   bool HasD16;
    294   bool MSAA;
    295 };
    296 
    297 LLVM_READONLY
    298 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
    299 
    300 struct MIMGDimInfo {
    301   MIMGDim Dim;
    302   uint8_t NumCoords;
    303   uint8_t NumGradients;
    304   bool MSAA;
    305   bool DA;
    306   uint8_t Encoding;
    307   const char *AsmSuffix;
    308 };
    309 
    310 LLVM_READONLY
    311 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
    312 
    313 LLVM_READONLY
    314 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
    315 
    316 LLVM_READONLY
    317 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
    318 
    319 struct MIMGLZMappingInfo {
    320   MIMGBaseOpcode L;
    321   MIMGBaseOpcode LZ;
    322 };
    323 
    324 struct MIMGMIPMappingInfo {
    325   MIMGBaseOpcode MIP;
    326   MIMGBaseOpcode NONMIP;
    327 };
    328 
    329 struct MIMGG16MappingInfo {
    330   MIMGBaseOpcode G;
    331   MIMGBaseOpcode G16;
    332 };
    333 
    334 LLVM_READONLY
    335 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
    336 
    337 LLVM_READONLY
    338 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
    339 
    340 LLVM_READONLY
    341 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
    342 
    343 LLVM_READONLY
    344 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
    345                   unsigned VDataDwords, unsigned VAddrDwords);
    346 
    347 LLVM_READONLY
    348 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
    349 
    350 LLVM_READONLY
    351 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
    352                            const MIMGDimInfo *Dim, bool IsA16,
    353                            bool IsG16Supported);
    354 
    355 struct MIMGInfo {
    356   uint16_t Opcode;
    357   uint16_t BaseOpcode;
    358   uint8_t MIMGEncoding;
    359   uint8_t VDataDwords;
    360   uint8_t VAddrDwords;
    361 };
    362 
    363 LLVM_READONLY
    364 const MIMGInfo *getMIMGInfo(unsigned Opc);
    365 
    366 LLVM_READONLY
    367 int getMTBUFBaseOpcode(unsigned Opc);
    368 
    369 LLVM_READONLY
    370 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
    371 
    372 LLVM_READONLY
    373 int getMTBUFElements(unsigned Opc);
    374 
    375 LLVM_READONLY
    376 bool getMTBUFHasVAddr(unsigned Opc);
    377 
    378 LLVM_READONLY
    379 bool getMTBUFHasSrsrc(unsigned Opc);
    380 
    381 LLVM_READONLY
    382 bool getMTBUFHasSoffset(unsigned Opc);
    383 
    384 LLVM_READONLY
    385 int getMUBUFBaseOpcode(unsigned Opc);
    386 
    387 LLVM_READONLY
    388 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
    389 
    390 LLVM_READONLY
    391 int getMUBUFElements(unsigned Opc);
    392 
    393 LLVM_READONLY
    394 bool getMUBUFHasVAddr(unsigned Opc);
    395 
    396 LLVM_READONLY
    397 bool getMUBUFHasSrsrc(unsigned Opc);
    398 
    399 LLVM_READONLY
    400 bool getMUBUFHasSoffset(unsigned Opc);
    401 
    402 LLVM_READONLY
    403 bool getMUBUFIsBufferInv(unsigned Opc);
    404 
    405 LLVM_READONLY
    406 bool getSMEMIsBuffer(unsigned Opc);
    407 
    408 LLVM_READONLY
    409 bool getVOP1IsSingle(unsigned Opc);
    410 
    411 LLVM_READONLY
    412 bool getVOP2IsSingle(unsigned Opc);
    413 
    414 LLVM_READONLY
    415 bool getVOP3IsSingle(unsigned Opc);
    416 
    417 LLVM_READONLY
    418 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
    419                                                   uint8_t NumComponents,
    420                                                   uint8_t NumFormat,
    421                                                   const MCSubtargetInfo &STI);
    422 LLVM_READONLY
    423 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
    424                                                   const MCSubtargetInfo &STI);
    425 
    426 LLVM_READONLY
    427 int getMCOpcode(uint16_t Opcode, unsigned Gen);
    428 
    429 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
    430                                const MCSubtargetInfo *STI);
    431 
    432 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
    433     const MCSubtargetInfo *STI);
    434 
    435 bool isGroupSegment(const GlobalValue *GV);
    436 bool isGlobalSegment(const GlobalValue *GV);
    437 bool isReadOnlySegment(const GlobalValue *GV);
    438 
    439 /// \returns True if constants should be emitted to .text section for given
    440 /// target triple \p TT, false otherwise.
    441 bool shouldEmitConstantsToTextSection(const Triple &TT);
    442 
    443 /// \returns Integer value requested using \p F's \p Name attribute.
    444 ///
    445 /// \returns \p Default if attribute is not present.
    446 ///
    447 /// \returns \p Default and emits error if requested value cannot be converted
    448 /// to integer.
    449 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
    450 
    451 /// \returns A pair of integer values requested using \p F's \p Name attribute
    452 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
    453 /// is false).
    454 ///
    455 /// \returns \p Default if attribute is not present.
    456 ///
    457 /// \returns \p Default and emits error if one of the requested values cannot be
    458 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
    459 /// not present.
    460 std::pair<int, int> getIntegerPairAttribute(const Function &F,
    461                                             StringRef Name,
    462                                             std::pair<int, int> Default,
    463                                             bool OnlyFirstRequired = false);
    464 
    465 /// Represents the counter values to wait for in an s_waitcnt instruction.
    466 ///
    467 /// Large values (including the maximum possible integer) can be used to
    468 /// represent "don't care" waits.
    469 struct Waitcnt {
    470   unsigned VmCnt = ~0u;
    471   unsigned ExpCnt = ~0u;
    472   unsigned LgkmCnt = ~0u;
    473   unsigned VsCnt = ~0u;
    474 
    475   Waitcnt() {}
    476   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
    477       : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
    478 
    479   static Waitcnt allZero(bool HasVscnt) {
    480     return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
    481   }
    482   static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
    483 
    484   bool hasWait() const {
    485     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
    486   }
    487 
    488   bool hasWaitExceptVsCnt() const {
    489     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u;
    490   }
    491 
    492   bool hasWaitVsCnt() const {
    493     return VsCnt != ~0u;
    494   }
    495 
    496   bool dominates(const Waitcnt &Other) const {
    497     return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
    498            LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
    499   }
    500 
    501   Waitcnt combined(const Waitcnt &Other) const {
    502     return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
    503                    std::min(LgkmCnt, Other.LgkmCnt),
    504                    std::min(VsCnt, Other.VsCnt));
    505   }
    506 };
    507 
    508 /// \returns Vmcnt bit mask for given isa \p Version.
    509 unsigned getVmcntBitMask(const IsaVersion &Version);
    510 
    511 /// \returns Expcnt bit mask for given isa \p Version.
    512 unsigned getExpcntBitMask(const IsaVersion &Version);
    513 
    514 /// \returns Lgkmcnt bit mask for given isa \p Version.
    515 unsigned getLgkmcntBitMask(const IsaVersion &Version);
    516 
    517 /// \returns Waitcnt bit mask for given isa \p Version.
    518 unsigned getWaitcntBitMask(const IsaVersion &Version);
    519 
    520 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
    521 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
    522 
    523 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
    524 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
    525 
    526 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
    527 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
    528 
    529 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
    530 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
    531 /// \p Lgkmcnt respectively.
    532 ///
    533 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
    534 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
    535 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
    536 ///     \p Expcnt = \p Waitcnt[6:4]
    537 ///     \p Lgkmcnt = \p Waitcnt[11:8]                   (pre-gfx10 only)
    538 ///     \p Lgkmcnt = \p Waitcnt[13:8]                   (gfx10+ only)
    539 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
    540                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
    541 
    542 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
    543 
    544 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
    545 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
    546                      unsigned Vmcnt);
    547 
    548 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
    549 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
    550                       unsigned Expcnt);
    551 
    552 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
    553 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
    554                        unsigned Lgkmcnt);
    555 
    556 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
    557 /// \p Version.
    558 ///
    559 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
    560 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
    561 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
    562 ///     Waitcnt[6:4]   = \p Expcnt
    563 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10 only)
    564 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10+ only)
    565 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
    566 ///
    567 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
    568 /// isa \p Version.
    569 unsigned encodeWaitcnt(const IsaVersion &Version,
    570                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
    571 
    572 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
    573 
    574 namespace Hwreg {
    575 
    576 LLVM_READONLY
    577 int64_t getHwregId(const StringRef Name);
    578 
    579 LLVM_READNONE
    580 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
    581 
    582 LLVM_READNONE
    583 bool isValidHwreg(int64_t Id);
    584 
    585 LLVM_READNONE
    586 bool isValidHwregOffset(int64_t Offset);
    587 
    588 LLVM_READNONE
    589 bool isValidHwregWidth(int64_t Width);
    590 
    591 LLVM_READNONE
    592 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
    593 
    594 LLVM_READNONE
    595 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
    596 
    597 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
    598 
    599 } // namespace Hwreg
    600 
    601 namespace Exp {
    602 
    603 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
    604 
    605 LLVM_READONLY
    606 unsigned getTgtId(const StringRef Name);
    607 
    608 LLVM_READNONE
    609 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
    610 
    611 } // namespace Exp
    612 
    613 namespace MTBUFFormat {
    614 
    615 LLVM_READNONE
    616 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
    617 
    618 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
    619 
    620 int64_t getDfmt(const StringRef Name);
    621 
    622 StringRef getDfmtName(unsigned Id);
    623 
    624 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
    625 
    626 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
    627 
    628 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
    629 
    630 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
    631 
    632 int64_t getUnifiedFormat(const StringRef Name);
    633 
    634 StringRef getUnifiedFormatName(unsigned Id);
    635 
    636 bool isValidUnifiedFormat(unsigned Val);
    637 
    638 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt);
    639 
    640 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
    641 
    642 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
    643 
    644 } // namespace MTBUFFormat
    645 
    646 namespace SendMsg {
    647 
    648 LLVM_READONLY
    649 int64_t getMsgId(const StringRef Name);
    650 
    651 LLVM_READONLY
    652 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
    653 
    654 LLVM_READNONE
    655 StringRef getMsgName(int64_t MsgId);
    656 
    657 LLVM_READNONE
    658 StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
    659 
    660 LLVM_READNONE
    661 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
    662 
    663 LLVM_READNONE
    664 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
    665                   bool Strict = true);
    666 
    667 LLVM_READNONE
    668 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
    669                       const MCSubtargetInfo &STI, bool Strict = true);
    670 
    671 LLVM_READNONE
    672 bool msgRequiresOp(int64_t MsgId);
    673 
    674 LLVM_READNONE
    675 bool msgSupportsStream(int64_t MsgId, int64_t OpId);
    676 
    677 void decodeMsg(unsigned Val,
    678                uint16_t &MsgId,
    679                uint16_t &OpId,
    680                uint16_t &StreamId);
    681 
    682 LLVM_READNONE
    683 uint64_t encodeMsg(uint64_t MsgId,
    684                    uint64_t OpId,
    685                    uint64_t StreamId);
    686 
    687 } // namespace SendMsg
    688 
    689 
    690 unsigned getInitialPSInputAddr(const Function &F);
    691 
    692 LLVM_READNONE
    693 bool isShader(CallingConv::ID CC);
    694 
    695 LLVM_READNONE
    696 bool isGraphics(CallingConv::ID CC);
    697 
    698 LLVM_READNONE
    699 bool isCompute(CallingConv::ID CC);
    700 
    701 LLVM_READNONE
    702 bool isEntryFunctionCC(CallingConv::ID CC);
    703 
    704 // These functions are considered entrypoints into the current module, i.e. they
    705 // are allowed to be called from outside the current module. This is different
    706 // from isEntryFunctionCC, which is only true for functions that are entered by
    707 // the hardware. Module entry points include all entry functions but also
    708 // include functions that can be called from other functions inside or outside
    709 // the current module. Module entry functions are allowed to allocate LDS.
    710 LLVM_READNONE
    711 bool isModuleEntryFunctionCC(CallingConv::ID CC);
    712 
    713 // FIXME: Remove this when calling conventions cleaned up
    714 LLVM_READNONE
    715 inline bool isKernel(CallingConv::ID CC) {
    716   switch (CC) {
    717   case CallingConv::AMDGPU_KERNEL:
    718   case CallingConv::SPIR_KERNEL:
    719     return true;
    720   default:
    721     return false;
    722   }
    723 }
    724 
    725 bool hasXNACK(const MCSubtargetInfo &STI);
    726 bool hasSRAMECC(const MCSubtargetInfo &STI);
    727 bool hasMIMG_R128(const MCSubtargetInfo &STI);
    728 bool hasGFX10A16(const MCSubtargetInfo &STI);
    729 bool hasG16(const MCSubtargetInfo &STI);
    730 bool hasPackedD16(const MCSubtargetInfo &STI);
    731 
    732 bool isSI(const MCSubtargetInfo &STI);
    733 bool isCI(const MCSubtargetInfo &STI);
    734 bool isVI(const MCSubtargetInfo &STI);
    735 bool isGFX9(const MCSubtargetInfo &STI);
    736 bool isGFX9Plus(const MCSubtargetInfo &STI);
    737 bool isGFX10(const MCSubtargetInfo &STI);
    738 bool isGFX10Plus(const MCSubtargetInfo &STI);
    739 bool isGCN3Encoding(const MCSubtargetInfo &STI);
    740 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
    741 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
    742 bool isGFX90A(const MCSubtargetInfo &STI);
    743 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
    744 
    745 /// Is Reg - scalar register
    746 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
    747 
    748 /// Is there any intersection between registers
    749 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
    750 
    751 /// If \p Reg is a pseudo reg, return the correct hardware register given
    752 /// \p STI otherwise return \p Reg.
    753 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
    754 
    755 /// Convert hardware register \p Reg to a pseudo register
    756 LLVM_READNONE
    757 unsigned mc2PseudoReg(unsigned Reg);
    758 
    759 /// Can this operand also contain immediate values?
    760 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
    761 
    762 /// Is this floating-point operand?
    763 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
    764 
    765 /// Does this opearnd support only inlinable literals?
    766 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
    767 
    768 /// Get the size in bits of a register from the register class \p RC.
    769 unsigned getRegBitWidth(unsigned RCID);
    770 
    771 /// Get the size in bits of a register from the register class \p RC.
    772 unsigned getRegBitWidth(const MCRegisterClass &RC);
    773 
    774 /// Get size of register operand
    775 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
    776                            unsigned OpNo);
    777 
    778 LLVM_READNONE
    779 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
    780   switch (OpInfo.OperandType) {
    781   case AMDGPU::OPERAND_REG_IMM_INT32:
    782   case AMDGPU::OPERAND_REG_IMM_FP32:
    783   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
    784   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
    785   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
    786   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
    787   case AMDGPU::OPERAND_REG_IMM_V2INT32:
    788   case AMDGPU::OPERAND_REG_IMM_V2FP32:
    789   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
    790   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
    791     return 4;
    792 
    793   case AMDGPU::OPERAND_REG_IMM_INT64:
    794   case AMDGPU::OPERAND_REG_IMM_FP64:
    795   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
    796   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
    797   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
    798     return 8;
    799 
    800   case AMDGPU::OPERAND_REG_IMM_INT16:
    801   case AMDGPU::OPERAND_REG_IMM_FP16:
    802   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
    803   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
    804   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
    805   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
    806   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
    807   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
    808   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
    809   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
    810   case AMDGPU::OPERAND_REG_IMM_V2INT16:
    811   case AMDGPU::OPERAND_REG_IMM_V2FP16:
    812     return 2;
    813 
    814   default:
    815     llvm_unreachable("unhandled operand type");
    816   }
    817 }
    818 
    819 LLVM_READNONE
    820 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
    821   return getOperandSize(Desc.OpInfo[OpNo]);
    822 }
    823 
    824 /// Is this literal inlinable, and not one of the values intended for floating
    825 /// point values.
    826 LLVM_READNONE
    827 inline bool isInlinableIntLiteral(int64_t Literal) {
    828   return Literal >= -16 && Literal <= 64;
    829 }
    830 
    831 /// Is this literal inlinable
    832 LLVM_READNONE
    833 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
    834 
    835 LLVM_READNONE
    836 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
    837 
    838 LLVM_READNONE
    839 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
    840 
    841 LLVM_READNONE
    842 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
    843 
    844 LLVM_READNONE
    845 bool isInlinableIntLiteralV216(int32_t Literal);
    846 
    847 LLVM_READNONE
    848 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
    849 
    850 bool isArgPassedInSGPR(const Argument *Arg);
    851 
    852 LLVM_READONLY
    853 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
    854                                       int64_t EncodedOffset);
    855 
    856 LLVM_READONLY
    857 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
    858                                     int64_t EncodedOffset,
    859                                     bool IsBuffer);
    860 
    861 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
    862 /// offsets.
    863 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
    864 
    865 /// \returns The encoding that will be used for \p ByteOffset in the
    866 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
    867 /// S_LOAD instructions have a signed offset, on other subtargets it is
    868 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
    869 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
    870                                        int64_t ByteOffset, bool IsBuffer);
    871 
    872 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
    873 /// instruction. This is only useful on CI.s
    874 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
    875                                                 int64_t ByteOffset);
    876 
    877 /// For FLAT segment the offset must be positive;
    878 /// MSB is ignored and forced to zero.
    879 ///
    880 /// \return The number of bits available for the offset field in flat
    881 /// instructions.
    882 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed);
    883 
    884 /// \returns true if this offset is small enough to fit in the SMRD
    885 /// offset field.  \p ByteOffset should be the offset in bytes and
    886 /// not the encoded offset.
    887 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
    888 
    889 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
    890                       const GCNSubtarget *Subtarget,
    891                       Align Alignment = Align(4));
    892 
    893 LLVM_READNONE
    894 inline bool isLegal64BitDPPControl(unsigned DC) {
    895   return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
    896 }
    897 
    898 /// \returns true if the intrinsic is divergent
    899 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
    900 
    901 // Track defaults for fields in the MODE registser.
    902 struct SIModeRegisterDefaults {
    903   /// Floating point opcodes that support exception flag gathering quiet and
    904   /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
    905   /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
    906   /// quieting.
    907   bool IEEE : 1;
    908 
    909   /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
    910   /// clamp NaN to zero; otherwise, pass NaN through.
    911   bool DX10Clamp : 1;
    912 
    913   /// If this is set, neither input or output denormals are flushed for most f32
    914   /// instructions.
    915   bool FP32InputDenormals : 1;
    916   bool FP32OutputDenormals : 1;
    917 
    918   /// If this is set, neither input or output denormals are flushed for both f64
    919   /// and f16/v2f16 instructions.
    920   bool FP64FP16InputDenormals : 1;
    921   bool FP64FP16OutputDenormals : 1;
    922 
    923   SIModeRegisterDefaults() :
    924     IEEE(true),
    925     DX10Clamp(true),
    926     FP32InputDenormals(true),
    927     FP32OutputDenormals(true),
    928     FP64FP16InputDenormals(true),
    929     FP64FP16OutputDenormals(true) {}
    930 
    931   SIModeRegisterDefaults(const Function &F);
    932 
    933   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
    934     SIModeRegisterDefaults Mode;
    935     Mode.IEEE = !AMDGPU::isShader(CC);
    936     return Mode;
    937   }
    938 
    939   bool operator ==(const SIModeRegisterDefaults Other) const {
    940     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
    941            FP32InputDenormals == Other.FP32InputDenormals &&
    942            FP32OutputDenormals == Other.FP32OutputDenormals &&
    943            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
    944            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
    945   }
    946 
    947   bool allFP32Denormals() const {
    948     return FP32InputDenormals && FP32OutputDenormals;
    949   }
    950 
    951   bool allFP64FP16Denormals() const {
    952     return FP64FP16InputDenormals && FP64FP16OutputDenormals;
    953   }
    954 
    955   /// Get the encoding value for the FP_DENORM bits of the mode register for the
    956   /// FP32 denormal mode.
    957   uint32_t fpDenormModeSPValue() const {
    958     if (FP32InputDenormals && FP32OutputDenormals)
    959       return FP_DENORM_FLUSH_NONE;
    960     if (FP32InputDenormals)
    961       return FP_DENORM_FLUSH_OUT;
    962     if (FP32OutputDenormals)
    963       return FP_DENORM_FLUSH_IN;
    964     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
    965   }
    966 
    967   /// Get the encoding value for the FP_DENORM bits of the mode register for the
    968   /// FP64/FP16 denormal mode.
    969   uint32_t fpDenormModeDPValue() const {
    970     if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
    971       return FP_DENORM_FLUSH_NONE;
    972     if (FP64FP16InputDenormals)
    973       return FP_DENORM_FLUSH_OUT;
    974     if (FP64FP16OutputDenormals)
    975       return FP_DENORM_FLUSH_IN;
    976     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
    977   }
    978 
    979   /// Returns true if a flag is compatible if it's enabled in the callee, but
    980   /// disabled in the caller.
    981   static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
    982     return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
    983   }
    984 
    985   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
    986   // be able to override.
    987   bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
    988     if (DX10Clamp != CalleeMode.DX10Clamp)
    989       return false;
    990     if (IEEE != CalleeMode.IEEE)
    991       return false;
    992 
    993     // Allow inlining denormals enabled into denormals flushed functions.
    994     return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
    995            oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
    996            oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
    997            oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
    998   }
    999 };
   1000 
   1001 } // end namespace AMDGPU
   1002 
   1003 raw_ostream &operator<<(raw_ostream &OS,
   1004                         const AMDGPU::IsaInfo::TargetIDSetting S);
   1005 
   1006 } // end namespace llvm
   1007 
   1008 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
   1009