Home | History | Annotate | Line # | Download | only in AMDGPU
      1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 /// \file
     10 //
     11 //===----------------------------------------------------------------------===//
     12 
     13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
     14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
     15 
     16 #include "AMDGPUArgumentUsageInfo.h"
     17 #include "AMDGPUMachineFunction.h"
     18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     19 #include "SIInstrInfo.h"
     20 #include "llvm/ADT/MapVector.h"
     21 #include "llvm/CodeGen/MIRYamlMapping.h"
     22 #include "llvm/CodeGen/PseudoSourceValue.h"
     23 #include "llvm/Support/raw_ostream.h"
     24 
     25 namespace llvm {
     26 
     27 class MachineFrameInfo;
     28 class MachineFunction;
     29 class TargetRegisterClass;
     30 class SIMachineFunctionInfo;
     31 class SIRegisterInfo;
     32 
     33 class AMDGPUPseudoSourceValue : public PseudoSourceValue {
     34 public:
     35   enum AMDGPUPSVKind : unsigned {
     36     PSVBuffer = PseudoSourceValue::TargetCustom,
     37     PSVImage,
     38     GWSResource
     39   };
     40 
     41 protected:
     42   AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
     43       : PseudoSourceValue(Kind, TII) {}
     44 
     45 public:
     46   bool isConstant(const MachineFrameInfo *) const override {
     47     // This should probably be true for most images, but we will start by being
     48     // conservative.
     49     return false;
     50   }
     51 
     52   bool isAliased(const MachineFrameInfo *) const override {
     53     return true;
     54   }
     55 
     56   bool mayAlias(const MachineFrameInfo *) const override {
     57     return true;
     58   }
     59 };
     60 
     61 class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
     62 public:
     63   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
     64       : AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
     65 
     66   static bool classof(const PseudoSourceValue *V) {
     67     return V->kind() == PSVBuffer;
     68   }
     69 
     70   void printCustom(raw_ostream &OS) const override { OS << "BufferResource"; }
     71 };
     72 
     73 class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
     74 public:
     75   // TODO: Is the img rsrc useful?
     76   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
     77       : AMDGPUPseudoSourceValue(PSVImage, TII) {}
     78 
     79   static bool classof(const PseudoSourceValue *V) {
     80     return V->kind() == PSVImage;
     81   }
     82 
     83   void printCustom(raw_ostream &OS) const override { OS << "ImageResource"; }
     84 };
     85 
     86 class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
     87 public:
     88   explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
     89       : AMDGPUPseudoSourceValue(GWSResource, TII) {}
     90 
     91   static bool classof(const PseudoSourceValue *V) {
     92     return V->kind() == GWSResource;
     93   }
     94 
     95   // These are inaccessible memory from IR.
     96   bool isAliased(const MachineFrameInfo *) const override {
     97     return false;
     98   }
     99 
    100   // These are inaccessible memory from IR.
    101   bool mayAlias(const MachineFrameInfo *) const override {
    102     return false;
    103   }
    104 
    105   void printCustom(raw_ostream &OS) const override {
    106     OS << "GWSResource";
    107   }
    108 };
    109 
    110 namespace yaml {
    111 
    112 struct SIArgument {
    113   bool IsRegister;
    114   union {
    115     StringValue RegisterName;
    116     unsigned StackOffset;
    117   };
    118   Optional<unsigned> Mask;
    119 
    120   // Default constructor, which creates a stack argument.
    121   SIArgument() : IsRegister(false), StackOffset(0) {}
    122   SIArgument(const SIArgument &Other) {
    123     IsRegister = Other.IsRegister;
    124     if (IsRegister) {
    125       ::new ((void *)std::addressof(RegisterName))
    126           StringValue(Other.RegisterName);
    127     } else
    128       StackOffset = Other.StackOffset;
    129     Mask = Other.Mask;
    130   }
    131   SIArgument &operator=(const SIArgument &Other) {
    132     IsRegister = Other.IsRegister;
    133     if (IsRegister) {
    134       ::new ((void *)std::addressof(RegisterName))
    135           StringValue(Other.RegisterName);
    136     } else
    137       StackOffset = Other.StackOffset;
    138     Mask = Other.Mask;
    139     return *this;
    140   }
    141   ~SIArgument() {
    142     if (IsRegister)
    143       RegisterName.~StringValue();
    144   }
    145 
    146   // Helper to create a register or stack argument.
    147   static inline SIArgument createArgument(bool IsReg) {
    148     if (IsReg)
    149       return SIArgument(IsReg);
    150     return SIArgument();
    151   }
    152 
    153 private:
    154   // Construct a register argument.
    155   SIArgument(bool) : IsRegister(true), RegisterName() {}
    156 };
    157 
    158 template <> struct MappingTraits<SIArgument> {
    159   static void mapping(IO &YamlIO, SIArgument &A) {
    160     if (YamlIO.outputting()) {
    161       if (A.IsRegister)
    162         YamlIO.mapRequired("reg", A.RegisterName);
    163       else
    164         YamlIO.mapRequired("offset", A.StackOffset);
    165     } else {
    166       auto Keys = YamlIO.keys();
    167       if (is_contained(Keys, "reg")) {
    168         A = SIArgument::createArgument(true);
    169         YamlIO.mapRequired("reg", A.RegisterName);
    170       } else if (is_contained(Keys, "offset"))
    171         YamlIO.mapRequired("offset", A.StackOffset);
    172       else
    173         YamlIO.setError("missing required key 'reg' or 'offset'");
    174     }
    175     YamlIO.mapOptional("mask", A.Mask);
    176   }
    177   static const bool flow = true;
    178 };
    179 
    180 struct SIArgumentInfo {
    181   Optional<SIArgument> PrivateSegmentBuffer;
    182   Optional<SIArgument> DispatchPtr;
    183   Optional<SIArgument> QueuePtr;
    184   Optional<SIArgument> KernargSegmentPtr;
    185   Optional<SIArgument> DispatchID;
    186   Optional<SIArgument> FlatScratchInit;
    187   Optional<SIArgument> PrivateSegmentSize;
    188 
    189   Optional<SIArgument> WorkGroupIDX;
    190   Optional<SIArgument> WorkGroupIDY;
    191   Optional<SIArgument> WorkGroupIDZ;
    192   Optional<SIArgument> WorkGroupInfo;
    193   Optional<SIArgument> PrivateSegmentWaveByteOffset;
    194 
    195   Optional<SIArgument> ImplicitArgPtr;
    196   Optional<SIArgument> ImplicitBufferPtr;
    197 
    198   Optional<SIArgument> WorkItemIDX;
    199   Optional<SIArgument> WorkItemIDY;
    200   Optional<SIArgument> WorkItemIDZ;
    201 };
    202 
    203 template <> struct MappingTraits<SIArgumentInfo> {
    204   static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
    205     YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
    206     YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
    207     YamlIO.mapOptional("queuePtr", AI.QueuePtr);
    208     YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
    209     YamlIO.mapOptional("dispatchID", AI.DispatchID);
    210     YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
    211     YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
    212 
    213     YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
    214     YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
    215     YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
    216     YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
    217     YamlIO.mapOptional("privateSegmentWaveByteOffset",
    218                        AI.PrivateSegmentWaveByteOffset);
    219 
    220     YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
    221     YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
    222 
    223     YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
    224     YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
    225     YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
    226   }
    227 };
    228 
    229 // Default to default mode for default calling convention.
    230 struct SIMode {
    231   bool IEEE = true;
    232   bool DX10Clamp = true;
    233   bool FP32InputDenormals = true;
    234   bool FP32OutputDenormals = true;
    235   bool FP64FP16InputDenormals = true;
    236   bool FP64FP16OutputDenormals = true;
    237 
    238   SIMode() = default;
    239 
    240   SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
    241     IEEE = Mode.IEEE;
    242     DX10Clamp = Mode.DX10Clamp;
    243     FP32InputDenormals = Mode.FP32InputDenormals;
    244     FP32OutputDenormals = Mode.FP32OutputDenormals;
    245     FP64FP16InputDenormals = Mode.FP64FP16InputDenormals;
    246     FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals;
    247   }
    248 
    249   bool operator ==(const SIMode Other) const {
    250     return IEEE == Other.IEEE &&
    251            DX10Clamp == Other.DX10Clamp &&
    252            FP32InputDenormals == Other.FP32InputDenormals &&
    253            FP32OutputDenormals == Other.FP32OutputDenormals &&
    254            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
    255            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
    256   }
    257 };
    258 
    259 template <> struct MappingTraits<SIMode> {
    260   static void mapping(IO &YamlIO, SIMode &Mode) {
    261     YamlIO.mapOptional("ieee", Mode.IEEE, true);
    262     YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
    263     YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true);
    264     YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true);
    265     YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true);
    266     YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);
    267   }
    268 };
    269 
    270 struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
    271   uint64_t ExplicitKernArgSize = 0;
    272   unsigned MaxKernArgAlign = 0;
    273   unsigned LDSSize = 0;
    274   Align DynLDSAlign;
    275   bool IsEntryFunction = false;
    276   bool NoSignedZerosFPMath = false;
    277   bool MemoryBound = false;
    278   bool WaveLimiter = false;
    279   bool HasSpilledSGPRs = false;
    280   bool HasSpilledVGPRs = false;
    281   uint32_t HighBitsOf32BitAddress = 0;
    282 
    283   // TODO: 10 may be a better default since it's the maximum.
    284   unsigned Occupancy = 0;
    285 
    286   StringValue ScratchRSrcReg = "$private_rsrc_reg";
    287   StringValue FrameOffsetReg = "$fp_reg";
    288   StringValue StackPtrOffsetReg = "$sp_reg";
    289 
    290   Optional<SIArgumentInfo> ArgInfo;
    291   SIMode Mode;
    292   Optional<FrameIndex> ScavengeFI;
    293 
    294   SIMachineFunctionInfo() = default;
    295   SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
    296                         const TargetRegisterInfo &TRI,
    297                         const llvm::MachineFunction &MF);
    298 
    299   void mappingImpl(yaml::IO &YamlIO) override;
    300   ~SIMachineFunctionInfo() = default;
    301 };
    302 
    303 template <> struct MappingTraits<SIMachineFunctionInfo> {
    304   static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
    305     YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
    306                        UINT64_C(0));
    307     YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
    308     YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
    309     YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
    310     YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
    311     YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
    312     YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
    313     YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
    314     YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false);
    315     YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false);
    316     YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
    317                        StringValue("$private_rsrc_reg"));
    318     YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
    319                        StringValue("$fp_reg"));
    320     YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
    321                        StringValue("$sp_reg"));
    322     YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
    323     YamlIO.mapOptional("mode", MFI.Mode, SIMode());
    324     YamlIO.mapOptional("highBitsOf32BitAddress",
    325                        MFI.HighBitsOf32BitAddress, 0u);
    326     YamlIO.mapOptional("occupancy", MFI.Occupancy, 0);
    327     YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI);
    328   }
    329 };
    330 
    331 } // end namespace yaml
    332 
    333 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
    334 /// tells the hardware which interpolation parameters to load.
    335 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
    336   friend class GCNTargetMachine;
    337 
    338   Register TIDReg = AMDGPU::NoRegister;
    339 
    340   // Registers that may be reserved for spilling purposes. These may be the same
    341   // as the input registers.
    342   Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
    343 
    344   // This is the the unswizzled offset from the current dispatch's scratch wave
    345   // base to the beginning of the current function's frame.
    346   Register FrameOffsetReg = AMDGPU::FP_REG;
    347 
    348   // This is an ABI register used in the non-entry calling convention to
    349   // communicate the unswizzled offset from the current dispatch's scratch wave
    350   // base to the beginning of the new function's frame.
    351   Register StackPtrOffsetReg = AMDGPU::SP_REG;
    352 
    353   AMDGPUFunctionArgInfo ArgInfo;
    354 
    355   // Graphics info.
    356   unsigned PSInputAddr = 0;
    357   unsigned PSInputEnable = 0;
    358 
    359   /// Number of bytes of arguments this function has on the stack. If the callee
    360   /// is expected to restore the argument stack this should be a multiple of 16,
    361   /// all usable during a tail call.
    362   ///
    363   /// The alternative would forbid tail call optimisation in some cases: if we
    364   /// want to transfer control from a function with 8-bytes of stack-argument
    365   /// space to a function with 16-bytes then misalignment of this value would
    366   /// make a stack adjustment necessary, which could not be undone by the
    367   /// callee.
    368   unsigned BytesInStackArgArea = 0;
    369 
    370   bool ReturnsVoid = true;
    371 
    372   // A pair of default/requested minimum/maximum flat work group sizes.
    373   // Minimum - first, maximum - second.
    374   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
    375 
    376   // A pair of default/requested minimum/maximum number of waves per execution
    377   // unit. Minimum - first, maximum - second.
    378   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
    379 
    380   std::unique_ptr<const AMDGPUBufferPseudoSourceValue> BufferPSV;
    381   std::unique_ptr<const AMDGPUImagePseudoSourceValue> ImagePSV;
    382   std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
    383 
    384 private:
    385   unsigned LDSWaveSpillSize = 0;
    386   unsigned NumUserSGPRs = 0;
    387   unsigned NumSystemSGPRs = 0;
    388 
    389   bool HasSpilledSGPRs = false;
    390   bool HasSpilledVGPRs = false;
    391   bool HasNonSpillStackObjects = false;
    392   bool IsStackRealigned = false;
    393 
    394   unsigned NumSpilledSGPRs = 0;
    395   unsigned NumSpilledVGPRs = 0;
    396 
    397   // Feature bits required for inputs passed in user SGPRs.
    398   bool PrivateSegmentBuffer : 1;
    399   bool DispatchPtr : 1;
    400   bool QueuePtr : 1;
    401   bool KernargSegmentPtr : 1;
    402   bool DispatchID : 1;
    403   bool FlatScratchInit : 1;
    404 
    405   // Feature bits required for inputs passed in system SGPRs.
    406   bool WorkGroupIDX : 1; // Always initialized.
    407   bool WorkGroupIDY : 1;
    408   bool WorkGroupIDZ : 1;
    409   bool WorkGroupInfo : 1;
    410   bool PrivateSegmentWaveByteOffset : 1;
    411 
    412   bool WorkItemIDX : 1; // Always initialized.
    413   bool WorkItemIDY : 1;
    414   bool WorkItemIDZ : 1;
    415 
    416   // Private memory buffer
    417   // Compute directly in sgpr[0:1]
    418   // Other shaders indirect 64-bits at sgpr[0:1]
    419   bool ImplicitBufferPtr : 1;
    420 
    421   // Pointer to where the ABI inserts special kernel arguments separate from the
    422   // user arguments. This is an offset from the KernargSegmentPtr.
    423   bool ImplicitArgPtr : 1;
    424 
    425   // The hard-wired high half of the address of the global information table
    426   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
    427   // current hardware only allows a 16 bit value.
    428   unsigned GITPtrHigh;
    429 
    430   unsigned HighBitsOf32BitAddress;
    431   unsigned GDSSize;
    432 
    433   // Current recorded maximum possible occupancy.
    434   unsigned Occupancy;
    435 
    436   MCPhysReg getNextUserSGPR() const;
    437 
    438   MCPhysReg getNextSystemSGPR() const;
    439 
    440 public:
    441   struct SpilledReg {
    442     Register VGPR;
    443     int Lane = -1;
    444 
    445     SpilledReg() = default;
    446     SpilledReg(Register R, int L) : VGPR (R), Lane (L) {}
    447 
    448     bool hasLane() { return Lane != -1;}
    449     bool hasReg() { return VGPR != 0;}
    450   };
    451 
    452   struct SGPRSpillVGPR {
    453     // VGPR used for SGPR spills
    454     Register VGPR;
    455 
    456     // If the VGPR is is used for SGPR spills in a non-entrypoint function, the
    457     // stack slot used to save/restore it in the prolog/epilog.
    458     Optional<int> FI;
    459 
    460     SGPRSpillVGPR(Register V, Optional<int> F) : VGPR(V), FI(F) {}
    461   };
    462 
    463   struct VGPRSpillToAGPR {
    464     SmallVector<MCPhysReg, 32> Lanes;
    465     bool FullyAllocated = false;
    466   };
    467 
    468   // Map WWM VGPR to a stack slot that is used to save/restore it in the
    469   // prolog/epilog.
    470   MapVector<Register, Optional<int>> WWMReservedRegs;
    471 
    472 private:
    473   // Track VGPR + wave index for each subregister of the SGPR spilled to
    474   // frameindex key.
    475   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
    476   unsigned NumVGPRSpillLanes = 0;
    477   SmallVector<SGPRSpillVGPR, 2> SpillVGPRs;
    478 
    479   DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
    480 
    481   // AGPRs used for VGPR spills.
    482   SmallVector<MCPhysReg, 32> SpillAGPR;
    483 
    484   // VGPRs used for AGPR spills.
    485   SmallVector<MCPhysReg, 32> SpillVGPR;
    486 
    487   // Emergency stack slot. Sometimes, we create this before finalizing the stack
    488   // frame, so save it here and add it to the RegScavenger later.
    489   Optional<int> ScavengeFI;
    490 
    491 public: // FIXME
    492   /// If this is set, an SGPR used for save/restore of the register used for the
    493   /// frame pointer.
    494   Register SGPRForFPSaveRestoreCopy;
    495   Optional<int> FramePointerSaveIndex;
    496 
    497   /// If this is set, an SGPR used for save/restore of the register used for the
    498   /// base pointer.
    499   Register SGPRForBPSaveRestoreCopy;
    500   Optional<int> BasePointerSaveIndex;
    501 
    502   Register VGPRReservedForSGPRSpill;
    503   bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
    504 
    505 public:
    506   SIMachineFunctionInfo(const MachineFunction &MF);
    507 
    508   bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI,
    509                                 const MachineFunction &MF,
    510                                 PerFunctionMIParsingState &PFS,
    511                                 SMDiagnostic &Error, SMRange &SourceRange);
    512 
    513   void reserveWWMRegister(Register Reg, Optional<int> FI) {
    514     WWMReservedRegs.insert(std::make_pair(Reg, FI));
    515   }
    516 
    517   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
    518     auto I = SGPRToVGPRSpills.find(FrameIndex);
    519     return (I == SGPRToVGPRSpills.end()) ?
    520       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
    521   }
    522 
    523   ArrayRef<SGPRSpillVGPR> getSGPRSpillVGPRs() const { return SpillVGPRs; }
    524 
    525   void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) {
    526     SpillVGPRs[Index].VGPR = NewVGPR;
    527     SpillVGPRs[Index].FI = newFI;
    528     VGPRReservedForSGPRSpill = NewVGPR;
    529   }
    530 
    531   bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF);
    532 
    533   ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
    534     return SpillAGPR;
    535   }
    536 
    537   ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
    538     return SpillVGPR;
    539   }
    540 
    541   MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
    542     auto I = VGPRToAGPRSpills.find(FrameIndex);
    543     return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
    544                                          : I->second.Lanes[Lane];
    545   }
    546 
    547   bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
    548                                  unsigned NumLane) const;
    549   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
    550   bool reserveVGPRforSGPRSpills(MachineFunction &MF);
    551   bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
    552   void removeDeadFrameIndices(MachineFrameInfo &MFI);
    553 
    554   int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
    555   Optional<int> getOptionalScavengeFI() const { return ScavengeFI; }
    556 
    557   bool hasCalculatedTID() const { return TIDReg != 0; };
    558   Register getTIDReg() const { return TIDReg; };
    559   void setTIDReg(Register Reg) { TIDReg = Reg; }
    560 
    561   unsigned getBytesInStackArgArea() const {
    562     return BytesInStackArgArea;
    563   }
    564 
    565   void setBytesInStackArgArea(unsigned Bytes) {
    566     BytesInStackArgArea = Bytes;
    567   }
    568 
    569   // Add user SGPRs.
    570   Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
    571   Register addDispatchPtr(const SIRegisterInfo &TRI);
    572   Register addQueuePtr(const SIRegisterInfo &TRI);
    573   Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
    574   Register addDispatchID(const SIRegisterInfo &TRI);
    575   Register addFlatScratchInit(const SIRegisterInfo &TRI);
    576   Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
    577 
    578   // Add system SGPRs.
    579   Register addWorkGroupIDX() {
    580     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
    581     NumSystemSGPRs += 1;
    582     return ArgInfo.WorkGroupIDX.getRegister();
    583   }
    584 
    585   Register addWorkGroupIDY() {
    586     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
    587     NumSystemSGPRs += 1;
    588     return ArgInfo.WorkGroupIDY.getRegister();
    589   }
    590 
    591   Register addWorkGroupIDZ() {
    592     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
    593     NumSystemSGPRs += 1;
    594     return ArgInfo.WorkGroupIDZ.getRegister();
    595   }
    596 
    597   Register addWorkGroupInfo() {
    598     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
    599     NumSystemSGPRs += 1;
    600     return ArgInfo.WorkGroupInfo.getRegister();
    601   }
    602 
    603   // Add special VGPR inputs
    604   void setWorkItemIDX(ArgDescriptor Arg) {
    605     ArgInfo.WorkItemIDX = Arg;
    606   }
    607 
    608   void setWorkItemIDY(ArgDescriptor Arg) {
    609     ArgInfo.WorkItemIDY = Arg;
    610   }
    611 
    612   void setWorkItemIDZ(ArgDescriptor Arg) {
    613     ArgInfo.WorkItemIDZ = Arg;
    614   }
    615 
    616   Register addPrivateSegmentWaveByteOffset() {
    617     ArgInfo.PrivateSegmentWaveByteOffset
    618       = ArgDescriptor::createRegister(getNextSystemSGPR());
    619     NumSystemSGPRs += 1;
    620     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
    621   }
    622 
    623   void setPrivateSegmentWaveByteOffset(Register Reg) {
    624     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
    625   }
    626 
    627   bool hasPrivateSegmentBuffer() const {
    628     return PrivateSegmentBuffer;
    629   }
    630 
    631   bool hasDispatchPtr() const {
    632     return DispatchPtr;
    633   }
    634 
    635   bool hasQueuePtr() const {
    636     return QueuePtr;
    637   }
    638 
    639   bool hasKernargSegmentPtr() const {
    640     return KernargSegmentPtr;
    641   }
    642 
    643   bool hasDispatchID() const {
    644     return DispatchID;
    645   }
    646 
    647   bool hasFlatScratchInit() const {
    648     return FlatScratchInit;
    649   }
    650 
    651   bool hasWorkGroupIDX() const {
    652     return WorkGroupIDX;
    653   }
    654 
    655   bool hasWorkGroupIDY() const {
    656     return WorkGroupIDY;
    657   }
    658 
    659   bool hasWorkGroupIDZ() const {
    660     return WorkGroupIDZ;
    661   }
    662 
    663   bool hasWorkGroupInfo() const {
    664     return WorkGroupInfo;
    665   }
    666 
    667   bool hasPrivateSegmentWaveByteOffset() const {
    668     return PrivateSegmentWaveByteOffset;
    669   }
    670 
    671   bool hasWorkItemIDX() const {
    672     return WorkItemIDX;
    673   }
    674 
    675   bool hasWorkItemIDY() const {
    676     return WorkItemIDY;
    677   }
    678 
    679   bool hasWorkItemIDZ() const {
    680     return WorkItemIDZ;
    681   }
    682 
    683   bool hasImplicitArgPtr() const {
    684     return ImplicitArgPtr;
    685   }
    686 
    687   bool hasImplicitBufferPtr() const {
    688     return ImplicitBufferPtr;
    689   }
    690 
    691   AMDGPUFunctionArgInfo &getArgInfo() {
    692     return ArgInfo;
    693   }
    694 
    695   const AMDGPUFunctionArgInfo &getArgInfo() const {
    696     return ArgInfo;
    697   }
    698 
    699   std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
    700   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
    701     return ArgInfo.getPreloadedValue(Value);
    702   }
    703 
    704   MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
    705     auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
    706     return Arg ? Arg->getRegister() : MCRegister();
    707   }
    708 
    709   unsigned getGITPtrHigh() const {
    710     return GITPtrHigh;
    711   }
    712 
    713   Register getGITPtrLoReg(const MachineFunction &MF) const;
    714 
    715   uint32_t get32BitAddressHighBits() const {
    716     return HighBitsOf32BitAddress;
    717   }
    718 
    719   unsigned getGDSSize() const {
    720     return GDSSize;
    721   }
    722 
    723   unsigned getNumUserSGPRs() const {
    724     return NumUserSGPRs;
    725   }
    726 
    727   unsigned getNumPreloadedSGPRs() const {
    728     return NumUserSGPRs + NumSystemSGPRs;
    729   }
    730 
    731   Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
    732     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
    733   }
    734 
    735   /// Returns the physical register reserved for use as the resource
    736   /// descriptor for scratch accesses.
    737   Register getScratchRSrcReg() const {
    738     return ScratchRSrcReg;
    739   }
    740 
    741   void setScratchRSrcReg(Register Reg) {
    742     assert(Reg != 0 && "Should never be unset");
    743     ScratchRSrcReg = Reg;
    744   }
    745 
    746   Register getFrameOffsetReg() const {
    747     return FrameOffsetReg;
    748   }
    749 
    750   void setFrameOffsetReg(Register Reg) {
    751     assert(Reg != 0 && "Should never be unset");
    752     FrameOffsetReg = Reg;
    753   }
    754 
    755   void setStackPtrOffsetReg(Register Reg) {
    756     assert(Reg != 0 && "Should never be unset");
    757     StackPtrOffsetReg = Reg;
    758   }
    759 
    760   // Note the unset value for this is AMDGPU::SP_REG rather than
    761   // NoRegister. This is mostly a workaround for MIR tests where state that
    762   // can't be directly computed from the function is not preserved in serialized
    763   // MIR.
    764   Register getStackPtrOffsetReg() const {
    765     return StackPtrOffsetReg;
    766   }
    767 
    768   Register getQueuePtrUserSGPR() const {
    769     return ArgInfo.QueuePtr.getRegister();
    770   }
    771 
    772   Register getImplicitBufferPtrUserSGPR() const {
    773     return ArgInfo.ImplicitBufferPtr.getRegister();
    774   }
    775 
    776   bool hasSpilledSGPRs() const {
    777     return HasSpilledSGPRs;
    778   }
    779 
    780   void setHasSpilledSGPRs(bool Spill = true) {
    781     HasSpilledSGPRs = Spill;
    782   }
    783 
    784   bool hasSpilledVGPRs() const {
    785     return HasSpilledVGPRs;
    786   }
    787 
    788   void setHasSpilledVGPRs(bool Spill = true) {
    789     HasSpilledVGPRs = Spill;
    790   }
    791 
    792   bool hasNonSpillStackObjects() const {
    793     return HasNonSpillStackObjects;
    794   }
    795 
    796   void setHasNonSpillStackObjects(bool StackObject = true) {
    797     HasNonSpillStackObjects = StackObject;
    798   }
    799 
    800   bool isStackRealigned() const {
    801     return IsStackRealigned;
    802   }
    803 
    804   void setIsStackRealigned(bool Realigned = true) {
    805     IsStackRealigned = Realigned;
    806   }
    807 
    808   unsigned getNumSpilledSGPRs() const {
    809     return NumSpilledSGPRs;
    810   }
    811 
    812   unsigned getNumSpilledVGPRs() const {
    813     return NumSpilledVGPRs;
    814   }
    815 
    816   void addToSpilledSGPRs(unsigned num) {
    817     NumSpilledSGPRs += num;
    818   }
    819 
    820   void addToSpilledVGPRs(unsigned num) {
    821     NumSpilledVGPRs += num;
    822   }
    823 
    824   unsigned getPSInputAddr() const {
    825     return PSInputAddr;
    826   }
    827 
    828   unsigned getPSInputEnable() const {
    829     return PSInputEnable;
    830   }
    831 
    832   bool isPSInputAllocated(unsigned Index) const {
    833     return PSInputAddr & (1 << Index);
    834   }
    835 
    836   void markPSInputAllocated(unsigned Index) {
    837     PSInputAddr |= 1 << Index;
    838   }
    839 
    840   void markPSInputEnabled(unsigned Index) {
    841     PSInputEnable |= 1 << Index;
    842   }
    843 
    844   bool returnsVoid() const {
    845     return ReturnsVoid;
    846   }
    847 
    848   void setIfReturnsVoid(bool Value) {
    849     ReturnsVoid = Value;
    850   }
    851 
    852   /// \returns A pair of default/requested minimum/maximum flat work group sizes
    853   /// for this function.
    854   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
    855     return FlatWorkGroupSizes;
    856   }
    857 
    858   /// \returns Default/requested minimum flat work group size for this function.
    859   unsigned getMinFlatWorkGroupSize() const {
    860     return FlatWorkGroupSizes.first;
    861   }
    862 
    863   /// \returns Default/requested maximum flat work group size for this function.
    864   unsigned getMaxFlatWorkGroupSize() const {
    865     return FlatWorkGroupSizes.second;
    866   }
    867 
    868   /// \returns A pair of default/requested minimum/maximum number of waves per
    869   /// execution unit.
    870   std::pair<unsigned, unsigned> getWavesPerEU() const {
    871     return WavesPerEU;
    872   }
    873 
    874   /// \returns Default/requested minimum number of waves per execution unit.
    875   unsigned getMinWavesPerEU() const {
    876     return WavesPerEU.first;
    877   }
    878 
    879   /// \returns Default/requested maximum number of waves per execution unit.
    880   unsigned getMaxWavesPerEU() const {
    881     return WavesPerEU.second;
    882   }
    883 
    884   /// \returns SGPR used for \p Dim's work group ID.
    885   Register getWorkGroupIDSGPR(unsigned Dim) const {
    886     switch (Dim) {
    887     case 0:
    888       assert(hasWorkGroupIDX());
    889       return ArgInfo.WorkGroupIDX.getRegister();
    890     case 1:
    891       assert(hasWorkGroupIDY());
    892       return ArgInfo.WorkGroupIDY.getRegister();
    893     case 2:
    894       assert(hasWorkGroupIDZ());
    895       return ArgInfo.WorkGroupIDZ.getRegister();
    896     }
    897     llvm_unreachable("unexpected dimension");
    898   }
    899 
    900   unsigned getLDSWaveSpillSize() const {
    901     return LDSWaveSpillSize;
    902   }
    903 
    904   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII) {
    905     if (!BufferPSV)
    906       BufferPSV = std::make_unique<AMDGPUBufferPseudoSourceValue>(TII);
    907 
    908     return BufferPSV.get();
    909   }
    910 
    911   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII) {
    912     if (!ImagePSV)
    913       ImagePSV = std::make_unique<AMDGPUImagePseudoSourceValue>(TII);
    914 
    915     return ImagePSV.get();
    916   }
    917 
    918   const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
    919     if (!GWSResourcePSV) {
    920       GWSResourcePSV =
    921           std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
    922     }
    923 
    924     return GWSResourcePSV.get();
    925   }
    926 
    927   unsigned getOccupancy() const {
    928     return Occupancy;
    929   }
    930 
    931   unsigned getMinAllowedOccupancy() const {
    932     if (!isMemoryBound() && !needsWaveLimiter())
    933       return Occupancy;
    934     return (Occupancy < 4) ? Occupancy : 4;
    935   }
    936 
    937   void limitOccupancy(const MachineFunction &MF);
    938 
    939   void limitOccupancy(unsigned Limit) {
    940     if (Occupancy > Limit)
    941       Occupancy = Limit;
    942   }
    943 
    944   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
    945     if (Occupancy < Limit)
    946       Occupancy = Limit;
    947     limitOccupancy(MF);
    948   }
    949 };
    950 
    951 } // end namespace llvm
    952 
    953 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
    954