Home | History | Annotate | Line # | Download | only in AMDGPU
      1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 
      9 #include "SIMachineFunctionInfo.h"
     10 #include "AMDGPUTargetMachine.h"
     11 #include "llvm/CodeGen/MIRParser/MIParser.h"
     12 
     13 #define MAX_LANES 64
     14 
     15 using namespace llvm;
     16 
     17 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     18   : AMDGPUMachineFunction(MF),
     19     PrivateSegmentBuffer(false),
     20     DispatchPtr(false),
     21     QueuePtr(false),
     22     KernargSegmentPtr(false),
     23     DispatchID(false),
     24     FlatScratchInit(false),
     25     WorkGroupIDX(false),
     26     WorkGroupIDY(false),
     27     WorkGroupIDZ(false),
     28     WorkGroupInfo(false),
     29     PrivateSegmentWaveByteOffset(false),
     30     WorkItemIDX(false),
     31     WorkItemIDY(false),
     32     WorkItemIDZ(false),
     33     ImplicitBufferPtr(false),
     34     ImplicitArgPtr(false),
     35     GITPtrHigh(0xffffffff),
     36     HighBitsOf32BitAddress(0),
     37     GDSSize(0) {
     38   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
     39   const Function &F = MF.getFunction();
     40   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
     41   WavesPerEU = ST.getWavesPerEU(F);
     42 
     43   Occupancy = ST.computeOccupancy(F, getLDSSize());
     44   CallingConv::ID CC = F.getCallingConv();
     45 
     46   // FIXME: Should have analysis or something rather than attribute to detect
     47   // calls.
     48   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
     49 
     50   // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
     51   // have any calls.
     52   const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
     53                            CC != CallingConv::AMDGPU_Gfx &&
     54                            (!isEntryFunction() || HasCalls);
     55 
     56   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
     57     if (!F.arg_empty())
     58       KernargSegmentPtr = true;
     59     WorkGroupIDX = true;
     60     WorkItemIDX = true;
     61   } else if (CC == CallingConv::AMDGPU_PS) {
     62     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
     63   }
     64 
     65   if (!isEntryFunction()) {
     66     if (UseFixedABI)
     67       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
     68 
     69     // TODO: Pick a high register, and shift down, similar to a kernel.
     70     FrameOffsetReg = AMDGPU::SGPR33;
     71     StackPtrOffsetReg = AMDGPU::SGPR32;
     72 
     73     if (!ST.enableFlatScratch()) {
     74       // Non-entry functions have no special inputs for now, other registers
     75       // required for scratch access.
     76       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
     77 
     78       ArgInfo.PrivateSegmentBuffer =
     79         ArgDescriptor::createRegister(ScratchRSrcReg);
     80     }
     81 
     82     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
     83       ImplicitArgPtr = true;
     84   } else {
     85     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
     86       KernargSegmentPtr = true;
     87       MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
     88                                  MaxKernArgAlign);
     89     }
     90   }
     91 
     92   if (UseFixedABI) {
     93     WorkGroupIDX = true;
     94     WorkGroupIDY = true;
     95     WorkGroupIDZ = true;
     96     WorkItemIDX = true;
     97     WorkItemIDY = true;
     98     WorkItemIDZ = true;
     99     ImplicitArgPtr = true;
    100   } else {
    101     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
    102       WorkGroupIDX = true;
    103 
    104     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
    105       WorkGroupIDY = true;
    106 
    107     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
    108       WorkGroupIDZ = true;
    109 
    110     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
    111       WorkItemIDX = true;
    112 
    113     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
    114       WorkItemIDY = true;
    115 
    116     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
    117       WorkItemIDZ = true;
    118   }
    119 
    120   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
    121   if (isEntryFunction()) {
    122     // X, XY, and XYZ are the only supported combinations, so make sure Y is
    123     // enabled if Z is.
    124     if (WorkItemIDZ)
    125       WorkItemIDY = true;
    126 
    127     if (!ST.flatScratchIsArchitected()) {
    128       PrivateSegmentWaveByteOffset = true;
    129 
    130       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
    131       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
    132           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
    133         ArgInfo.PrivateSegmentWaveByteOffset =
    134             ArgDescriptor::createRegister(AMDGPU::SGPR5);
    135     }
    136   }
    137 
    138   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
    139   if (isAmdHsaOrMesa) {
    140     if (!ST.enableFlatScratch())
    141       PrivateSegmentBuffer = true;
    142 
    143     if (UseFixedABI) {
    144       DispatchPtr = true;
    145       QueuePtr = true;
    146 
    147       // FIXME: We don't need this?
    148       DispatchID = true;
    149     } else {
    150       if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
    151         DispatchPtr = true;
    152 
    153       if (F.hasFnAttribute("amdgpu-queue-ptr"))
    154         QueuePtr = true;
    155 
    156       if (F.hasFnAttribute("amdgpu-dispatch-id"))
    157         DispatchID = true;
    158     }
    159   } else if (ST.isMesaGfxShader(F)) {
    160     ImplicitBufferPtr = true;
    161   }
    162 
    163   if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
    164     KernargSegmentPtr = true;
    165 
    166   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
    167       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
    168       !ST.flatScratchIsArchitected()) {
    169     // TODO: This could be refined a lot. The attribute is a poor way of
    170     // detecting calls or stack objects that may require it before argument
    171     // lowering.
    172     if (HasCalls || HasStackObjects || ST.enableFlatScratch())
    173       FlatScratchInit = true;
    174   }
    175 
    176   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
    177   StringRef S = A.getValueAsString();
    178   if (!S.empty())
    179     S.consumeInteger(0, GITPtrHigh);
    180 
    181   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
    182   S = A.getValueAsString();
    183   if (!S.empty())
    184     S.consumeInteger(0, HighBitsOf32BitAddress);
    185 
    186   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
    187   if (!S.empty())
    188     S.consumeInteger(0, GDSSize);
    189 }
    190 
    191 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
    192   limitOccupancy(getMaxWavesPerEU());
    193   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
    194   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
    195                  MF.getFunction()));
    196 }
    197 
    198 Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
    199   const SIRegisterInfo &TRI) {
    200   ArgInfo.PrivateSegmentBuffer =
    201     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    202     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
    203   NumUserSGPRs += 4;
    204   return ArgInfo.PrivateSegmentBuffer.getRegister();
    205 }
    206 
    207 Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
    208   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    209     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
    210   NumUserSGPRs += 2;
    211   return ArgInfo.DispatchPtr.getRegister();
    212 }
    213 
    214 Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
    215   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    216     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
    217   NumUserSGPRs += 2;
    218   return ArgInfo.QueuePtr.getRegister();
    219 }
    220 
    221 Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
    222   ArgInfo.KernargSegmentPtr
    223     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    224     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
    225   NumUserSGPRs += 2;
    226   return ArgInfo.KernargSegmentPtr.getRegister();
    227 }
    228 
    229 Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
    230   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    231     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
    232   NumUserSGPRs += 2;
    233   return ArgInfo.DispatchID.getRegister();
    234 }
    235 
    236 Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
    237   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    238     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
    239   NumUserSGPRs += 2;
    240   return ArgInfo.FlatScratchInit.getRegister();
    241 }
    242 
    243 Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
    244   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
    245     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
    246   NumUserSGPRs += 2;
    247   return ArgInfo.ImplicitBufferPtr.getRegister();
    248 }
    249 
    250 bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
    251                                              MCPhysReg Reg) {
    252   for (unsigned I = 0; CSRegs[I]; ++I) {
    253     if (CSRegs[I] == Reg)
    254       return true;
    255   }
    256 
    257   return false;
    258 }
    259 
    260 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
    261 /// SGPR spilling.
    262 //
    263 // FIXME: This only works after processFunctionBeforeFrameFinalized
    264 bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
    265                                                       unsigned NumNeed) const {
    266   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    267   unsigned WaveSize = ST.getWavefrontSize();
    268   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
    269 }
    270 
    271 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
    272 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
    273                                                     int FI) {
    274   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
    275 
    276   // This has already been allocated.
    277   if (!SpillLanes.empty())
    278     return true;
    279 
    280   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    281   const SIRegisterInfo *TRI = ST.getRegisterInfo();
    282   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
    283   MachineRegisterInfo &MRI = MF.getRegInfo();
    284   unsigned WaveSize = ST.getWavefrontSize();
    285   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
    286 
    287   unsigned Size = FrameInfo.getObjectSize(FI);
    288   unsigned NumLanes = Size / 4;
    289 
    290   if (NumLanes > WaveSize)
    291     return false;
    292 
    293   assert(Size >= 4 && "invalid sgpr spill size");
    294   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
    295 
    296   // Make sure to handle the case where a wide SGPR spill may span between two
    297   // VGPRs.
    298   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
    299     Register LaneVGPR;
    300     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
    301 
    302     // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
    303     // when one of the two conditions is true:
    304     // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
    305     // reserved.
    306     // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
    307     // required.
    308     if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
    309       assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
    310       LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
    311     } else if (VGPRIndex == 0) {
    312       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
    313       if (LaneVGPR == AMDGPU::NoRegister) {
    314         // We have no VGPRs left for spilling SGPRs. Reset because we will not
    315         // partially spill the SGPR to VGPRs.
    316         SGPRToVGPRSpills.erase(FI);
    317         NumVGPRSpillLanes -= I;
    318         return false;
    319       }
    320 
    321       Optional<int> SpillFI;
    322       // We need to preserve inactive lanes, so always save, even caller-save
    323       // registers.
    324       if (!isEntryFunction()) {
    325         SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
    326       }
    327 
    328       SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
    329 
    330       // Add this register as live-in to all blocks to avoid machine verifer
    331       // complaining about use of an undefined physical register.
    332       for (MachineBasicBlock &BB : MF)
    333         BB.addLiveIn(LaneVGPR);
    334     } else {
    335       LaneVGPR = SpillVGPRs.back().VGPR;
    336     }
    337 
    338     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
    339   }
    340 
    341   return true;
    342 }
    343 
    344 /// Reserve a VGPR for spilling of SGPRs
    345 bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
    346   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    347   const SIRegisterInfo *TRI = ST.getRegisterInfo();
    348   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
    349 
    350   Register LaneVGPR = TRI->findUnusedRegister(
    351       MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
    352   if (LaneVGPR == Register())
    353     return false;
    354   SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, None));
    355   FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
    356   return true;
    357 }
    358 
    359 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
    360 /// Either AGPR is spilled to VGPR to vice versa.
    361 /// Returns true if a \p FI can be eliminated completely.
    362 bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
    363                                                     int FI,
    364                                                     bool isAGPRtoVGPR) {
    365   MachineRegisterInfo &MRI = MF.getRegInfo();
    366   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
    367   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
    368 
    369   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
    370 
    371   auto &Spill = VGPRToAGPRSpills[FI];
    372 
    373   // This has already been allocated.
    374   if (!Spill.Lanes.empty())
    375     return Spill.FullyAllocated;
    376 
    377   unsigned Size = FrameInfo.getObjectSize(FI);
    378   unsigned NumLanes = Size / 4;
    379   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
    380 
    381   const TargetRegisterClass &RC =
    382       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
    383   auto Regs = RC.getRegisters();
    384 
    385   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
    386   const SIRegisterInfo *TRI = ST.getRegisterInfo();
    387   Spill.FullyAllocated = true;
    388 
    389   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
    390   // once.
    391   BitVector OtherUsedRegs;
    392   OtherUsedRegs.resize(TRI->getNumRegs());
    393 
    394   const uint32_t *CSRMask =
    395       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
    396   if (CSRMask)
    397     OtherUsedRegs.setBitsInMask(CSRMask);
    398 
    399   // TODO: Should include register tuples, but doesn't matter with current
    400   // usage.
    401   for (MCPhysReg Reg : SpillAGPR)
    402     OtherUsedRegs.set(Reg);
    403   for (MCPhysReg Reg : SpillVGPR)
    404     OtherUsedRegs.set(Reg);
    405 
    406   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
    407   for (unsigned I = 0; I < NumLanes; ++I) {
    408     NextSpillReg = std::find_if(
    409         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
    410           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
    411                  !OtherUsedRegs[Reg];
    412         });
    413 
    414     if (NextSpillReg == Regs.end()) { // Registers exhausted
    415       Spill.FullyAllocated = false;
    416       break;
    417     }
    418 
    419     OtherUsedRegs.set(*NextSpillReg);
    420     SpillRegs.push_back(*NextSpillReg);
    421     Spill.Lanes[I] = *NextSpillReg++;
    422   }
    423 
    424   return Spill.FullyAllocated;
    425 }
    426 
    427 void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
    428   // The FP & BP spills haven't been inserted yet, so keep them around.
    429   for (auto &R : SGPRToVGPRSpills) {
    430     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
    431       MFI.RemoveStackObject(R.first);
    432   }
    433 
    434   // All other SPGRs must be allocated on the default stack, so reset the stack
    435   // ID.
    436   for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
    437        ++i)
    438     if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
    439       MFI.setStackID(i, TargetStackID::Default);
    440 
    441   for (auto &R : VGPRToAGPRSpills) {
    442     if (R.second.FullyAllocated)
    443       MFI.RemoveStackObject(R.first);
    444   }
    445 }
    446 
    447 int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
    448                                          const SIRegisterInfo &TRI) {
    449   if (ScavengeFI)
    450     return *ScavengeFI;
    451   if (isEntryFunction()) {
    452     ScavengeFI = MFI.CreateFixedObject(
    453         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
    454   } else {
    455     ScavengeFI = MFI.CreateStackObject(
    456         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
    457         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
    458   }
    459   return *ScavengeFI;
    460 }
    461 
    462 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
    463   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
    464   return AMDGPU::SGPR0 + NumUserSGPRs;
    465 }
    466 
    467 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
    468   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
    469 }
    470 
    471 Register
    472 SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
    473   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    474   if (!ST.isAmdPalOS())
    475     return Register();
    476   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
    477   if (ST.hasMergedShaders()) {
    478     switch (MF.getFunction().getCallingConv()) {
    479     case CallingConv::AMDGPU_HS:
    480     case CallingConv::AMDGPU_GS:
    481       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
    482       // ES+GS merged shader on gfx9+.
    483       GitPtrLo = AMDGPU::SGPR8;
    484       return GitPtrLo;
    485     default:
    486       return GitPtrLo;
    487     }
    488   }
    489   return GitPtrLo;
    490 }
    491 
    492 static yaml::StringValue regToString(Register Reg,
    493                                      const TargetRegisterInfo &TRI) {
    494   yaml::StringValue Dest;
    495   {
    496     raw_string_ostream OS(Dest.Value);
    497     OS << printReg(Reg, &TRI);
    498   }
    499   return Dest;
    500 }
    501 
    502 static Optional<yaml::SIArgumentInfo>
    503 convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
    504                     const TargetRegisterInfo &TRI) {
    505   yaml::SIArgumentInfo AI;
    506 
    507   auto convertArg = [&](Optional<yaml::SIArgument> &A,
    508                         const ArgDescriptor &Arg) {
    509     if (!Arg)
    510       return false;
    511 
    512     // Create a register or stack argument.
    513     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
    514     if (Arg.isRegister()) {
    515       raw_string_ostream OS(SA.RegisterName.Value);
    516       OS << printReg(Arg.getRegister(), &TRI);
    517     } else
    518       SA.StackOffset = Arg.getStackOffset();
    519     // Check and update the optional mask.
    520     if (Arg.isMasked())
    521       SA.Mask = Arg.getMask();
    522 
    523     A = SA;
    524     return true;
    525   };
    526 
    527   bool Any = false;
    528   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
    529   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
    530   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
    531   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
    532   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
    533   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
    534   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
    535   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
    536   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
    537   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
    538   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
    539   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
    540                     ArgInfo.PrivateSegmentWaveByteOffset);
    541   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
    542   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
    543   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
    544   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
    545   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
    546 
    547   if (Any)
    548     return AI;
    549 
    550   return None;
    551 }
    552 
    553 yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
    554     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
    555     const llvm::MachineFunction &MF)
    556     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
    557       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
    558       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
    559       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
    560       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
    561       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
    562       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
    563       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
    564       Occupancy(MFI.getOccupancy()),
    565       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
    566       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
    567       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
    568       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
    569   auto SFI = MFI.getOptionalScavengeFI();
    570   if (SFI)
    571     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
    572 }
    573 
    574 void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
    575   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
    576 }
    577 
    578 bool SIMachineFunctionInfo::initializeBaseYamlFields(
    579     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
    580     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
    581   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
    582   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
    583   LDSSize = YamlMFI.LDSSize;
    584   DynLDSAlign = YamlMFI.DynLDSAlign;
    585   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
    586   Occupancy = YamlMFI.Occupancy;
    587   IsEntryFunction = YamlMFI.IsEntryFunction;
    588   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
    589   MemoryBound = YamlMFI.MemoryBound;
    590   WaveLimiter = YamlMFI.WaveLimiter;
    591   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
    592   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
    593 
    594   if (YamlMFI.ScavengeFI) {
    595     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
    596     if (!FIOrErr) {
    597       // Create a diagnostic for a the frame index.
    598       const MemoryBuffer &Buffer =
    599           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
    600 
    601       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
    602                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
    603                            "", None, None);
    604       SourceRange = YamlMFI.ScavengeFI->SourceRange;
    605       return true;
    606     }
    607     ScavengeFI = *FIOrErr;
    608   } else {
    609     ScavengeFI = None;
    610   }
    611   return false;
    612 }
    613 
    614 // Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
    615 bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
    616                                                    MachineFunction &MF) {
    617   for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
    618     if (i->VGPR == ReservedVGPR) {
    619       SpillVGPRs.erase(i);
    620 
    621       for (MachineBasicBlock &MBB : MF) {
    622         MBB.removeLiveIn(ReservedVGPR);
    623         MBB.sortUniqueLiveIns();
    624       }
    625       this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
    626       return true;
    627     }
    628   }
    629   return false;
    630 }
    631