Home | History | Annotate | Line # | Download | only in AMDGPU
      1 //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //==-----------------------------------------------------------------------===//
      8 //
      9 /// \file
     10 /// AMD GCN specific subclass of TargetSubtarget.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
     15 #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
     16 
     17 #include "AMDGPUCallLowering.h"
     18 #include "AMDGPUSubtarget.h"
     19 #include "SIFrameLowering.h"
     20 #include "SIISelLowering.h"
     21 #include "SIInstrInfo.h"
     22 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
     23 
     24 namespace llvm {
     25 
     26 class MCInst;
     27 class MCInstrInfo;
     28 
     29 } // namespace llvm
     30 
     31 #define GET_SUBTARGETINFO_HEADER
     32 #include "AMDGPUGenSubtargetInfo.inc"
     33 
     34 namespace llvm {
     35 
     36 class GCNTargetMachine;
     37 
     38 class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     39                            public AMDGPUSubtarget {
     40 
     41   using AMDGPUSubtarget::getMaxWavesPerEU;
     42 
     43 public:
     44   // Following 2 enums are documented at:
     45   //   - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
     46   enum class TrapHandlerAbi {
     47     NONE   = 0x00,
     48     AMDHSA = 0x01,
     49   };
     50 
     51   enum class TrapID {
     52     LLVMAMDHSATrap      = 0x02,
     53     LLVMAMDHSADebugTrap = 0x03,
     54   };
     55 
     56 private:
     57   /// GlobalISel related APIs.
     58   std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
     59   std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
     60   std::unique_ptr<InstructionSelector> InstSelector;
     61   std::unique_ptr<LegalizerInfo> Legalizer;
     62   std::unique_ptr<RegisterBankInfo> RegBankInfo;
     63 
     64 protected:
     65   // Basic subtarget description.
     66   Triple TargetTriple;
     67   AMDGPU::IsaInfo::AMDGPUTargetID TargetID;
     68   unsigned Gen;
     69   InstrItineraryData InstrItins;
     70   int LDSBankCount;
     71   unsigned MaxPrivateElementSize;
     72 
     73   // Possibly statically set by tablegen, but may want to be overridden.
     74   bool FastFMAF32;
     75   bool FastDenormalF32;
     76   bool HalfRate64Ops;
     77   bool FullRate64Ops;
     78 
     79   // Dynamically set bits that enable features.
     80   bool FlatForGlobal;
     81   bool AutoWaitcntBeforeBarrier;
     82   bool UnalignedScratchAccess;
     83   bool UnalignedAccessMode;
     84   bool HasApertureRegs;
     85   bool SupportsXNACK;
     86 
     87   // This should not be used directly. 'TargetID' tracks the dynamic settings
     88   // for XNACK.
     89   bool EnableXNACK;
     90 
     91   bool EnableTgSplit;
     92   bool EnableCuMode;
     93   bool TrapHandler;
     94 
     95   // Used as options.
     96   bool EnableLoadStoreOpt;
     97   bool EnableUnsafeDSOffsetFolding;
     98   bool EnableSIScheduler;
     99   bool EnableDS128;
    100   bool EnablePRTStrictNull;
    101   bool DumpCode;
    102 
    103   // Subtarget statically properties set by tablegen
    104   bool FP64;
    105   bool FMA;
    106   bool MIMG_R128;
    107   bool IsGCN;
    108   bool CIInsts;
    109   bool GFX8Insts;
    110   bool GFX9Insts;
    111   bool GFX90AInsts;
    112   bool GFX10Insts;
    113   bool GFX10_3Insts;
    114   bool GFX7GFX8GFX9Insts;
    115   bool SGPRInitBug;
    116   bool NegativeScratchOffsetBug;
    117   bool NegativeUnalignedScratchOffsetBug;
    118   bool HasSMemRealTime;
    119   bool HasIntClamp;
    120   bool HasFmaMixInsts;
    121   bool HasMovrel;
    122   bool HasVGPRIndexMode;
    123   bool HasScalarStores;
    124   bool HasScalarAtomics;
    125   bool HasSDWAOmod;
    126   bool HasSDWAScalar;
    127   bool HasSDWASdst;
    128   bool HasSDWAMac;
    129   bool HasSDWAOutModsVOPC;
    130   bool HasDPP;
    131   bool HasDPP8;
    132   bool Has64BitDPP;
    133   bool HasPackedFP32Ops;
    134   bool HasExtendedImageInsts;
    135   bool HasR128A16;
    136   bool HasGFX10A16;
    137   bool HasG16;
    138   bool HasNSAEncoding;
    139   bool GFX10_BEncoding;
    140   bool HasDLInsts;
    141   bool HasDot1Insts;
    142   bool HasDot2Insts;
    143   bool HasDot3Insts;
    144   bool HasDot4Insts;
    145   bool HasDot5Insts;
    146   bool HasDot6Insts;
    147   bool HasDot7Insts;
    148   bool HasMAIInsts;
    149   bool HasPkFmacF16Inst;
    150   bool HasAtomicFaddInsts;
    151   bool SupportsSRAMECC;
    152 
    153   // This should not be used directly. 'TargetID' tracks the dynamic settings
    154   // for SRAMECC.
    155   bool EnableSRAMECC;
    156 
    157   bool HasNoSdstCMPX;
    158   bool HasVscnt;
    159   bool HasGetWaveIdInst;
    160   bool HasSMemTimeInst;
    161   bool HasShaderCyclesRegister;
    162   bool HasRegisterBanking;
    163   bool HasVOP3Literal;
    164   bool HasNoDataDepHazard;
    165   bool FlatAddressSpace;
    166   bool FlatInstOffsets;
    167   bool FlatGlobalInsts;
    168   bool FlatScratchInsts;
    169   bool ScalarFlatScratchInsts;
    170   bool HasArchitectedFlatScratch;
    171   bool AddNoCarryInsts;
    172   bool HasUnpackedD16VMem;
    173   bool R600ALUInst;
    174   bool CaymanISA;
    175   bool CFALUBug;
    176   bool LDSMisalignedBug;
    177   bool HasMFMAInlineLiteralBug;
    178   bool HasVertexCache;
    179   short TexVTXClauseSize;
    180   bool UnalignedBufferAccess;
    181   bool UnalignedDSAccess;
    182   bool HasPackedTID;
    183   bool ScalarizeGlobal;
    184 
    185   bool HasVcmpxPermlaneHazard;
    186   bool HasVMEMtoScalarWriteHazard;
    187   bool HasSMEMtoVectorWriteHazard;
    188   bool HasInstFwdPrefetchBug;
    189   bool HasVcmpxExecWARHazard;
    190   bool HasLdsBranchVmemWARHazard;
    191   bool HasNSAtoVMEMBug;
    192   bool HasNSAClauseBug;
    193   bool HasOffset3fBug;
    194   bool HasFlatSegmentOffsetBug;
    195   bool HasImageStoreD16Bug;
    196   bool HasImageGather4D16Bug;
    197 
    198   // Dummy feature to use for assembler in tablegen.
    199   bool FeatureDisable;
    200 
    201   SelectionDAGTargetInfo TSInfo;
    202 private:
    203   SIInstrInfo InstrInfo;
    204   SITargetLowering TLInfo;
    205   SIFrameLowering FrameLowering;
    206 
    207 public:
    208   // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
    209   static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
    210 
    211   GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
    212                const GCNTargetMachine &TM);
    213   ~GCNSubtarget() override;
    214 
    215   GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
    216                                                    StringRef GPU, StringRef FS);
    217 
    218   const SIInstrInfo *getInstrInfo() const override {
    219     return &InstrInfo;
    220   }
    221 
    222   const SIFrameLowering *getFrameLowering() const override {
    223     return &FrameLowering;
    224   }
    225 
    226   const SITargetLowering *getTargetLowering() const override {
    227     return &TLInfo;
    228   }
    229 
    230   const SIRegisterInfo *getRegisterInfo() const override {
    231     return &InstrInfo.getRegisterInfo();
    232   }
    233 
    234   const CallLowering *getCallLowering() const override {
    235     return CallLoweringInfo.get();
    236   }
    237 
    238   const InlineAsmLowering *getInlineAsmLowering() const override {
    239     return InlineAsmLoweringInfo.get();
    240   }
    241 
    242   InstructionSelector *getInstructionSelector() const override {
    243     return InstSelector.get();
    244   }
    245 
    246   const LegalizerInfo *getLegalizerInfo() const override {
    247     return Legalizer.get();
    248   }
    249 
    250   const RegisterBankInfo *getRegBankInfo() const override {
    251     return RegBankInfo.get();
    252   }
    253 
    254   const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const {
    255     return TargetID;
    256   }
    257 
    258   // Nothing implemented, just prevent crashes on use.
    259   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
    260     return &TSInfo;
    261   }
    262 
    263   const InstrItineraryData *getInstrItineraryData() const override {
    264     return &InstrItins;
    265   }
    266 
    267   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
    268 
    269   Generation getGeneration() const {
    270     return (Generation)Gen;
    271   }
    272 
    273   /// Return the number of high bits known to be zero fror a frame index.
    274   unsigned getKnownHighZeroBitsForFrameIndex() const {
    275     return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
    276   }
    277 
    278   int getLDSBankCount() const {
    279     return LDSBankCount;
    280   }
    281 
    282   unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
    283     return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
    284   }
    285 
    286   unsigned getConstantBusLimit(unsigned Opcode) const;
    287 
    288   bool hasIntClamp() const {
    289     return HasIntClamp;
    290   }
    291 
    292   bool hasFP64() const {
    293     return FP64;
    294   }
    295 
    296   bool hasMIMG_R128() const {
    297     return MIMG_R128;
    298   }
    299 
    300   bool hasHWFP64() const {
    301     return FP64;
    302   }
    303 
    304   bool hasFastFMAF32() const {
    305     return FastFMAF32;
    306   }
    307 
    308   bool hasHalfRate64Ops() const {
    309     return HalfRate64Ops;
    310   }
    311 
    312   bool hasFullRate64Ops() const {
    313     return FullRate64Ops;
    314   }
    315 
    316   bool hasAddr64() const {
    317     return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
    318   }
    319 
    320   bool hasFlat() const {
    321     return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS);
    322   }
    323 
    324   // Return true if the target only has the reverse operand versions of VALU
    325   // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
    326   bool hasOnlyRevVALUShifts() const {
    327     return getGeneration() >= VOLCANIC_ISLANDS;
    328   }
    329 
    330   bool hasFractBug() const {
    331     return getGeneration() == SOUTHERN_ISLANDS;
    332   }
    333 
    334   bool hasBFE() const {
    335     return true;
    336   }
    337 
    338   bool hasBFI() const {
    339     return true;
    340   }
    341 
    342   bool hasBFM() const {
    343     return hasBFE();
    344   }
    345 
    346   bool hasBCNT(unsigned Size) const {
    347     return true;
    348   }
    349 
    350   bool hasFFBL() const {
    351     return true;
    352   }
    353 
    354   bool hasFFBH() const {
    355     return true;
    356   }
    357 
    358   bool hasMed3_16() const {
    359     return getGeneration() >= AMDGPUSubtarget::GFX9;
    360   }
    361 
    362   bool hasMin3Max3_16() const {
    363     return getGeneration() >= AMDGPUSubtarget::GFX9;
    364   }
    365 
    366   bool hasFmaMixInsts() const {
    367     return HasFmaMixInsts;
    368   }
    369 
    370   bool hasCARRY() const {
    371     return true;
    372   }
    373 
    374   bool hasFMA() const {
    375     return FMA;
    376   }
    377 
    378   bool hasSwap() const {
    379     return GFX9Insts;
    380   }
    381 
    382   bool hasScalarPackInsts() const {
    383     return GFX9Insts;
    384   }
    385 
    386   bool hasScalarMulHiInsts() const {
    387     return GFX9Insts;
    388   }
    389 
    390   TrapHandlerAbi getTrapHandlerAbi() const {
    391     return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE;
    392   }
    393 
    394   bool supportsGetDoorbellID() const {
    395     // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
    396     return getGeneration() >= GFX9;
    397   }
    398 
    399   /// True if the offset field of DS instructions works as expected. On SI, the
    400   /// offset uses a 16-bit adder and does not always wrap properly.
    401   bool hasUsableDSOffset() const {
    402     return getGeneration() >= SEA_ISLANDS;
    403   }
    404 
    405   bool unsafeDSOffsetFoldingEnabled() const {
    406     return EnableUnsafeDSOffsetFolding;
    407   }
    408 
    409   /// Condition output from div_scale is usable.
    410   bool hasUsableDivScaleConditionOutput() const {
    411     return getGeneration() != SOUTHERN_ISLANDS;
    412   }
    413 
    414   /// Extra wait hazard is needed in some cases before
    415   /// s_cbranch_vccnz/s_cbranch_vccz.
    416   bool hasReadVCCZBug() const {
    417     return getGeneration() <= SEA_ISLANDS;
    418   }
    419 
    420   /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
    421   bool partialVCCWritesUpdateVCCZ() const {
    422     return getGeneration() >= GFX10;
    423   }
    424 
    425   /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
    426   /// was written by a VALU instruction.
    427   bool hasSMRDReadVALUDefHazard() const {
    428     return getGeneration() == SOUTHERN_ISLANDS;
    429   }
    430 
    431   /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
    432   /// SGPR was written by a VALU Instruction.
    433   bool hasVMEMReadSGPRVALUDefHazard() const {
    434     return getGeneration() >= VOLCANIC_ISLANDS;
    435   }
    436 
    437   bool hasRFEHazards() const {
    438     return getGeneration() >= VOLCANIC_ISLANDS;
    439   }
    440 
    441   /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
    442   unsigned getSetRegWaitStates() const {
    443     return getGeneration() <= SEA_ISLANDS ? 1 : 2;
    444   }
    445 
    446   bool dumpCode() const {
    447     return DumpCode;
    448   }
    449 
    450   /// Return the amount of LDS that can be used that will not restrict the
    451   /// occupancy lower than WaveCount.
    452   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
    453                                            const Function &) const;
    454 
    455   bool supportsMinMaxDenormModes() const {
    456     return getGeneration() >= AMDGPUSubtarget::GFX9;
    457   }
    458 
    459   /// \returns If target supports S_DENORM_MODE.
    460   bool hasDenormModeInst() const {
    461     return getGeneration() >= AMDGPUSubtarget::GFX10;
    462   }
    463 
    464   bool useFlatForGlobal() const {
    465     return FlatForGlobal;
    466   }
    467 
    468   /// \returns If target supports ds_read/write_b128 and user enables generation
    469   /// of ds_read/write_b128.
    470   bool useDS128() const {
    471     return CIInsts && EnableDS128;
    472   }
    473 
    474   /// \return If target supports ds_read/write_b96/128.
    475   bool hasDS96AndDS128() const {
    476     return CIInsts;
    477   }
    478 
    479   /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
    480   bool haveRoundOpsF64() const {
    481     return CIInsts;
    482   }
    483 
    484   /// \returns If MUBUF instructions always perform range checking, even for
    485   /// buffer resources used for private memory access.
    486   bool privateMemoryResourceIsRangeChecked() const {
    487     return getGeneration() < AMDGPUSubtarget::GFX9;
    488   }
    489 
    490   /// \returns If target requires PRT Struct NULL support (zero result registers
    491   /// for sparse texture support).
    492   bool usePRTStrictNull() const {
    493     return EnablePRTStrictNull;
    494   }
    495 
    496   bool hasAutoWaitcntBeforeBarrier() const {
    497     return AutoWaitcntBeforeBarrier;
    498   }
    499 
    500   bool hasUnalignedBufferAccess() const {
    501     return UnalignedBufferAccess;
    502   }
    503 
    504   bool hasUnalignedBufferAccessEnabled() const {
    505     return UnalignedBufferAccess && UnalignedAccessMode;
    506   }
    507 
    508   bool hasUnalignedDSAccess() const {
    509     return UnalignedDSAccess;
    510   }
    511 
    512   bool hasUnalignedDSAccessEnabled() const {
    513     return UnalignedDSAccess && UnalignedAccessMode;
    514   }
    515 
    516   bool hasUnalignedScratchAccess() const {
    517     return UnalignedScratchAccess;
    518   }
    519 
    520   bool hasUnalignedAccessMode() const {
    521     return UnalignedAccessMode;
    522   }
    523 
    524   bool hasApertureRegs() const {
    525     return HasApertureRegs;
    526   }
    527 
    528   bool isTrapHandlerEnabled() const {
    529     return TrapHandler;
    530   }
    531 
    532   bool isXNACKEnabled() const {
    533     return TargetID.isXnackOnOrAny();
    534   }
    535 
    536   bool isTgSplitEnabled() const {
    537     return EnableTgSplit;
    538   }
    539 
    540   bool isCuModeEnabled() const {
    541     return EnableCuMode;
    542   }
    543 
    544   bool hasFlatAddressSpace() const {
    545     return FlatAddressSpace;
    546   }
    547 
    548   bool hasFlatScrRegister() const {
    549     return hasFlatAddressSpace();
    550   }
    551 
    552   bool hasFlatInstOffsets() const {
    553     return FlatInstOffsets;
    554   }
    555 
    556   bool hasFlatGlobalInsts() const {
    557     return FlatGlobalInsts;
    558   }
    559 
    560   bool hasFlatScratchInsts() const {
    561     return FlatScratchInsts;
    562   }
    563 
    564   // Check if target supports ST addressing mode with FLAT scratch instructions.
    565   // The ST addressing mode means no registers are used, either VGPR or SGPR,
    566   // but only immediate offset is swizzled and added to the FLAT scratch base.
    567   bool hasFlatScratchSTMode() const {
    568     return hasFlatScratchInsts() && hasGFX10_3Insts();
    569   }
    570 
    571   bool hasScalarFlatScratchInsts() const {
    572     return ScalarFlatScratchInsts;
    573   }
    574 
    575   bool hasGlobalAddTidInsts() const {
    576     return GFX10_BEncoding;
    577   }
    578 
    579   bool hasAtomicCSub() const {
    580     return GFX10_BEncoding;
    581   }
    582 
    583   bool hasMultiDwordFlatScratchAddressing() const {
    584     return getGeneration() >= GFX9;
    585   }
    586 
    587   bool hasFlatSegmentOffsetBug() const {
    588     return HasFlatSegmentOffsetBug;
    589   }
    590 
    591   bool hasFlatLgkmVMemCountInOrder() const {
    592     return getGeneration() > GFX9;
    593   }
    594 
    595   bool hasD16LoadStore() const {
    596     return getGeneration() >= GFX9;
    597   }
    598 
    599   bool d16PreservesUnusedBits() const {
    600     return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
    601   }
    602 
    603   bool hasD16Images() const {
    604     return getGeneration() >= VOLCANIC_ISLANDS;
    605   }
    606 
    607   /// Return if most LDS instructions have an m0 use that require m0 to be
    608   /// iniitalized.
    609   bool ldsRequiresM0Init() const {
    610     return getGeneration() < GFX9;
    611   }
    612 
    613   // True if the hardware rewinds and replays GWS operations if a wave is
    614   // preempted.
    615   //
    616   // If this is false, a GWS operation requires testing if a nack set the
    617   // MEM_VIOL bit, and repeating if so.
    618   bool hasGWSAutoReplay() const {
    619     return getGeneration() >= GFX9;
    620   }
    621 
    622   /// \returns if target has ds_gws_sema_release_all instruction.
    623   bool hasGWSSemaReleaseAll() const {
    624     return CIInsts;
    625   }
    626 
    627   /// \returns true if the target has integer add/sub instructions that do not
    628   /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
    629   /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
    630   /// for saturation.
    631   bool hasAddNoCarry() const {
    632     return AddNoCarryInsts;
    633   }
    634 
    635   bool hasUnpackedD16VMem() const {
    636     return HasUnpackedD16VMem;
    637   }
    638 
    639   // Covers VS/PS/CS graphics shaders
    640   bool isMesaGfxShader(const Function &F) const {
    641     return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
    642   }
    643 
    644   bool hasMad64_32() const {
    645     return getGeneration() >= SEA_ISLANDS;
    646   }
    647 
    648   bool hasSDWAOmod() const {
    649     return HasSDWAOmod;
    650   }
    651 
    652   bool hasSDWAScalar() const {
    653     return HasSDWAScalar;
    654   }
    655 
    656   bool hasSDWASdst() const {
    657     return HasSDWASdst;
    658   }
    659 
    660   bool hasSDWAMac() const {
    661     return HasSDWAMac;
    662   }
    663 
    664   bool hasSDWAOutModsVOPC() const {
    665     return HasSDWAOutModsVOPC;
    666   }
    667 
    668   bool hasDLInsts() const {
    669     return HasDLInsts;
    670   }
    671 
    672   bool hasDot1Insts() const {
    673     return HasDot1Insts;
    674   }
    675 
    676   bool hasDot2Insts() const {
    677     return HasDot2Insts;
    678   }
    679 
    680   bool hasDot3Insts() const {
    681     return HasDot3Insts;
    682   }
    683 
    684   bool hasDot4Insts() const {
    685     return HasDot4Insts;
    686   }
    687 
    688   bool hasDot5Insts() const {
    689     return HasDot5Insts;
    690   }
    691 
    692   bool hasDot6Insts() const {
    693     return HasDot6Insts;
    694   }
    695 
    696   bool hasDot7Insts() const {
    697     return HasDot7Insts;
    698   }
    699 
    700   bool hasMAIInsts() const {
    701     return HasMAIInsts;
    702   }
    703 
    704   bool hasPkFmacF16Inst() const {
    705     return HasPkFmacF16Inst;
    706   }
    707 
    708   bool hasAtomicFaddInsts() const {
    709     return HasAtomicFaddInsts;
    710   }
    711 
    712   bool hasNoSdstCMPX() const {
    713     return HasNoSdstCMPX;
    714   }
    715 
    716   bool hasVscnt() const {
    717     return HasVscnt;
    718   }
    719 
    720   bool hasGetWaveIdInst() const {
    721     return HasGetWaveIdInst;
    722   }
    723 
    724   bool hasSMemTimeInst() const {
    725     return HasSMemTimeInst;
    726   }
    727 
    728   bool hasShaderCyclesRegister() const {
    729     return HasShaderCyclesRegister;
    730   }
    731 
    732   bool hasRegisterBanking() const {
    733     return HasRegisterBanking;
    734   }
    735 
    736   bool hasVOP3Literal() const {
    737     return HasVOP3Literal;
    738   }
    739 
    740   bool hasNoDataDepHazard() const {
    741     return HasNoDataDepHazard;
    742   }
    743 
    744   bool vmemWriteNeedsExpWaitcnt() const {
    745     return getGeneration() < SEA_ISLANDS;
    746   }
    747 
    748   // Scratch is allocated in 256 dword per wave blocks for the entire
    749   // wavefront. When viewed from the perspecive of an arbitrary workitem, this
    750   // is 4-byte aligned.
    751   //
    752   // Only 4-byte alignment is really needed to access anything. Transformations
    753   // on the pointer value itself may rely on the alignment / known low bits of
    754   // the pointer. Set this to something above the minimum to avoid needing
    755   // dynamic realignment in common cases.
    756   Align getStackAlignment() const { return Align(16); }
    757 
    758   bool enableMachineScheduler() const override {
    759     return true;
    760   }
    761 
    762   bool useAA() const override;
    763 
    764   bool enableSubRegLiveness() const override {
    765     return true;
    766   }
    767 
    768   void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
    769   bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
    770 
    771   // static wrappers
    772   static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
    773 
    774   // XXX - Why is this here if it isn't in the default pass set?
    775   bool enableEarlyIfConversion() const override {
    776     return true;
    777   }
    778 
    779   bool enableFlatScratch() const;
    780 
    781   void overrideSchedPolicy(MachineSchedPolicy &Policy,
    782                            unsigned NumRegionInstrs) const override;
    783 
    784   unsigned getMaxNumUserSGPRs() const {
    785     return 16;
    786   }
    787 
    788   bool hasSMemRealTime() const {
    789     return HasSMemRealTime;
    790   }
    791 
    792   bool hasMovrel() const {
    793     return HasMovrel;
    794   }
    795 
    796   bool hasVGPRIndexMode() const {
    797     return HasVGPRIndexMode;
    798   }
    799 
    800   bool useVGPRIndexMode() const;
    801 
    802   bool hasScalarCompareEq64() const {
    803     return getGeneration() >= VOLCANIC_ISLANDS;
    804   }
    805 
    806   bool hasScalarStores() const {
    807     return HasScalarStores;
    808   }
    809 
    810   bool hasScalarAtomics() const {
    811     return HasScalarAtomics;
    812   }
    813 
    814   bool hasLDSFPAtomics() const {
    815     return GFX8Insts;
    816   }
    817 
    818   /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
    819   bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
    820 
    821   bool hasDPP() const {
    822     return HasDPP;
    823   }
    824 
    825   bool hasDPPBroadcasts() const {
    826     return HasDPP && getGeneration() < GFX10;
    827   }
    828 
    829   bool hasDPPWavefrontShifts() const {
    830     return HasDPP && getGeneration() < GFX10;
    831   }
    832 
    833   bool hasDPP8() const {
    834     return HasDPP8;
    835   }
    836 
    837   bool has64BitDPP() const {
    838     return Has64BitDPP;
    839   }
    840 
    841   bool hasPackedFP32Ops() const {
    842     return HasPackedFP32Ops;
    843   }
    844 
    845   bool hasFmaakFmamkF32Insts() const {
    846     return getGeneration() >= GFX10;
    847   }
    848 
    849   bool hasExtendedImageInsts() const {
    850     return HasExtendedImageInsts;
    851   }
    852 
    853   bool hasR128A16() const {
    854     return HasR128A16;
    855   }
    856 
    857   bool hasGFX10A16() const {
    858     return HasGFX10A16;
    859   }
    860 
    861   bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
    862 
    863   bool hasG16() const { return HasG16; }
    864 
    865   bool hasOffset3fBug() const {
    866     return HasOffset3fBug;
    867   }
    868 
    869   bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
    870 
    871   bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }
    872 
    873   bool hasNSAEncoding() const { return HasNSAEncoding; }
    874 
    875   bool hasGFX10_BEncoding() const {
    876     return GFX10_BEncoding;
    877   }
    878 
    879   bool hasGFX10_3Insts() const {
    880     return GFX10_3Insts;
    881   }
    882 
    883   bool hasMadF16() const;
    884 
    885   bool enableSIScheduler() const {
    886     return EnableSIScheduler;
    887   }
    888 
    889   bool loadStoreOptEnabled() const {
    890     return EnableLoadStoreOpt;
    891   }
    892 
    893   bool hasSGPRInitBug() const {
    894     return SGPRInitBug;
    895   }
    896 
    897   bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; }
    898 
    899   bool hasNegativeUnalignedScratchOffsetBug() const {
    900     return NegativeUnalignedScratchOffsetBug;
    901   }
    902 
    903   bool hasMFMAInlineLiteralBug() const {
    904     return HasMFMAInlineLiteralBug;
    905   }
    906 
    907   bool has12DWordStoreHazard() const {
    908     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
    909   }
    910 
    911   // \returns true if the subtarget supports DWORDX3 load/store instructions.
    912   bool hasDwordx3LoadStores() const {
    913     return CIInsts;
    914   }
    915 
    916   bool hasReadM0MovRelInterpHazard() const {
    917     return getGeneration() == AMDGPUSubtarget::GFX9;
    918   }
    919 
    920   bool hasReadM0SendMsgHazard() const {
    921     return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
    922            getGeneration() <= AMDGPUSubtarget::GFX9;
    923   }
    924 
    925   bool hasVcmpxPermlaneHazard() const {
    926     return HasVcmpxPermlaneHazard;
    927   }
    928 
    929   bool hasVMEMtoScalarWriteHazard() const {
    930     return HasVMEMtoScalarWriteHazard;
    931   }
    932 
    933   bool hasSMEMtoVectorWriteHazard() const {
    934     return HasSMEMtoVectorWriteHazard;
    935   }
    936 
    937   bool hasLDSMisalignedBug() const {
    938     return LDSMisalignedBug && !EnableCuMode;
    939   }
    940 
    941   bool hasInstFwdPrefetchBug() const {
    942     return HasInstFwdPrefetchBug;
    943   }
    944 
    945   bool hasVcmpxExecWARHazard() const {
    946     return HasVcmpxExecWARHazard;
    947   }
    948 
    949   bool hasLdsBranchVmemWARHazard() const {
    950     return HasLdsBranchVmemWARHazard;
    951   }
    952 
    953   bool hasNSAtoVMEMBug() const {
    954     return HasNSAtoVMEMBug;
    955   }
    956 
    957   bool hasNSAClauseBug() const { return HasNSAClauseBug; }
    958 
    959   bool hasHardClauses() const { return getGeneration() >= GFX10; }
    960 
    961   bool hasGFX90AInsts() const { return GFX90AInsts; }
    962 
    963   /// Return if operations acting on VGPR tuples require even alignment.
    964   bool needsAlignedVGPRs() const { return GFX90AInsts; }
    965 
    966   bool hasPackedTID() const { return HasPackedTID; }
    967 
    968   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
    969   /// SGPRs
    970   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
    971 
    972   /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
    973   /// VGPRs
    974   unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
    975 
    976   /// Return occupancy for the given function. Used LDS and a number of
    977   /// registers if provided.
    978   /// Note, occupancy can be affected by the scratch allocation as well, but
    979   /// we do not have enough information to compute it.
    980   unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
    981                             unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
    982 
    983   /// \returns true if the flat_scratch register should be initialized with the
    984   /// pointer to the wave's scratch memory rather than a size and offset.
    985   bool flatScratchIsPointer() const {
    986     return getGeneration() >= AMDGPUSubtarget::GFX9;
    987   }
    988 
    989   /// \returns true if the flat_scratch register is initialized by the HW.
    990   /// In this case it is readonly.
    991   bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; }
    992 
    993   /// \returns true if the machine has merged shaders in which s0-s7 are
    994   /// reserved by the hardware and user SGPRs start at s8
    995   bool hasMergedShaders() const {
    996     return getGeneration() >= GFX9;
    997   }
    998 
    999   /// \returns SGPR allocation granularity supported by the subtarget.
   1000   unsigned getSGPRAllocGranule() const {
   1001     return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
   1002   }
   1003 
   1004   /// \returns SGPR encoding granularity supported by the subtarget.
   1005   unsigned getSGPREncodingGranule() const {
   1006     return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
   1007   }
   1008 
   1009   /// \returns Total number of SGPRs supported by the subtarget.
   1010   unsigned getTotalNumSGPRs() const {
   1011     return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
   1012   }
   1013 
   1014   /// \returns Addressable number of SGPRs supported by the subtarget.
   1015   unsigned getAddressableNumSGPRs() const {
   1016     return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
   1017   }
   1018 
   1019   /// \returns Minimum number of SGPRs that meets the given number of waves per
   1020   /// execution unit requirement supported by the subtarget.
   1021   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
   1022     return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
   1023   }
   1024 
   1025   /// \returns Maximum number of SGPRs that meets the given number of waves per
   1026   /// execution unit requirement supported by the subtarget.
   1027   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
   1028     return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
   1029   }
   1030 
   1031   /// \returns Reserved number of SGPRs for given function \p MF.
   1032   unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
   1033 
   1034   /// \returns Maximum number of SGPRs that meets number of waves per execution
   1035   /// unit requirement for function \p MF, or number of SGPRs explicitly
   1036   /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
   1037   ///
   1038   /// \returns Value that meets number of waves per execution unit requirement
   1039   /// if explicitly requested value cannot be converted to integer, violates
   1040   /// subtarget's specifications, or does not meet number of waves per execution
   1041   /// unit requirement.
   1042   unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
   1043 
   1044   /// \returns VGPR allocation granularity supported by the subtarget.
   1045   unsigned getVGPRAllocGranule() const {
   1046     return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
   1047   }
   1048 
   1049   /// \returns VGPR encoding granularity supported by the subtarget.
   1050   unsigned getVGPREncodingGranule() const {
   1051     return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
   1052   }
   1053 
   1054   /// \returns Total number of VGPRs supported by the subtarget.
   1055   unsigned getTotalNumVGPRs() const {
   1056     return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
   1057   }
   1058 
   1059   /// \returns Addressable number of VGPRs supported by the subtarget.
   1060   unsigned getAddressableNumVGPRs() const {
   1061     return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
   1062   }
   1063 
   1064   /// \returns Minimum number of VGPRs that meets given number of waves per
   1065   /// execution unit requirement supported by the subtarget.
   1066   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
   1067     return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
   1068   }
   1069 
   1070   /// \returns Maximum number of VGPRs that meets given number of waves per
   1071   /// execution unit requirement supported by the subtarget.
   1072   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
   1073     return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
   1074   }
   1075 
   1076   /// \returns Maximum number of VGPRs that meets number of waves per execution
   1077   /// unit requirement for function \p MF, or number of VGPRs explicitly
   1078   /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
   1079   ///
   1080   /// \returns Value that meets number of waves per execution unit requirement
   1081   /// if explicitly requested value cannot be converted to integer, violates
   1082   /// subtarget's specifications, or does not meet number of waves per execution
   1083   /// unit requirement.
   1084   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
   1085 
   1086   void getPostRAMutations(
   1087       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
   1088       const override;
   1089 
   1090   bool isWave32() const {
   1091     return getWavefrontSize() == 32;
   1092   }
   1093 
   1094   bool isWave64() const {
   1095     return getWavefrontSize() == 64;
   1096   }
   1097 
   1098   const TargetRegisterClass *getBoolRC() const {
   1099     return getRegisterInfo()->getBoolRC();
   1100   }
   1101 
   1102   /// \returns Maximum number of work groups per compute unit supported by the
   1103   /// subtarget and limited by given \p FlatWorkGroupSize.
   1104   unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
   1105     return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
   1106   }
   1107 
   1108   /// \returns Minimum flat work group size supported by the subtarget.
   1109   unsigned getMinFlatWorkGroupSize() const override {
   1110     return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
   1111   }
   1112 
   1113   /// \returns Maximum flat work group size supported by the subtarget.
   1114   unsigned getMaxFlatWorkGroupSize() const override {
   1115     return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
   1116   }
   1117 
   1118   /// \returns Number of waves per execution unit required to support the given
   1119   /// \p FlatWorkGroupSize.
   1120   unsigned
   1121   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
   1122     return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
   1123   }
   1124 
   1125   /// \returns Minimum number of waves per execution unit supported by the
   1126   /// subtarget.
   1127   unsigned getMinWavesPerEU() const override {
   1128     return AMDGPU::IsaInfo::getMinWavesPerEU(this);
   1129   }
   1130 
   1131   void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
   1132                              SDep &Dep) const override;
   1133 };
   1134 
   1135 } // end namespace llvm
   1136 
   1137 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
   1138