Home | History | Annotate | Line # | Download | only in AMDGPU
      1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 /// \file
     10 /// Pass to pre-allocated WWM registers
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "AMDGPU.h"
     15 #include "GCNSubtarget.h"
     16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     17 #include "SIMachineFunctionInfo.h"
     18 #include "llvm/ADT/PostOrderIterator.h"
     19 #include "llvm/CodeGen/LiveIntervals.h"
     20 #include "llvm/CodeGen/LiveRegMatrix.h"
     21 #include "llvm/CodeGen/MachineFunctionPass.h"
     22 #include "llvm/InitializePasses.h"
     23 
     24 using namespace llvm;
     25 
     26 #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
     27 
     28 namespace {
     29 
     30 class SIPreAllocateWWMRegs : public MachineFunctionPass {
     31 private:
     32   const SIInstrInfo *TII;
     33   const SIRegisterInfo *TRI;
     34   MachineRegisterInfo *MRI;
     35   LiveIntervals *LIS;
     36   LiveRegMatrix *Matrix;
     37   VirtRegMap *VRM;
     38   RegisterClassInfo RegClassInfo;
     39 
     40   std::vector<unsigned> RegsToRewrite;
     41 #ifndef NDEBUG
     42   void printWWMInfo(const MachineInstr &MI);
     43 #endif
     44 
     45 public:
     46   static char ID;
     47 
     48   SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
     49     initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
     50   }
     51 
     52   bool runOnMachineFunction(MachineFunction &MF) override;
     53 
     54   void getAnalysisUsage(AnalysisUsage &AU) const override {
     55     AU.addRequired<LiveIntervals>();
     56     AU.addPreserved<LiveIntervals>();
     57     AU.addRequired<VirtRegMap>();
     58     AU.addRequired<LiveRegMatrix>();
     59     AU.addPreserved<SlotIndexes>();
     60     AU.setPreservesCFG();
     61     MachineFunctionPass::getAnalysisUsage(AU);
     62   }
     63 
     64 private:
     65   bool processDef(MachineOperand &MO);
     66   void rewriteRegs(MachineFunction &MF);
     67 };
     68 
     69 } // End anonymous namespace.
     70 
     71 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
     72                 "SI Pre-allocate WWM Registers", false, false)
     73 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
     74 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
     75 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
     76 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
     77                 "SI Pre-allocate WWM Registers", false, false)
     78 
     79 char SIPreAllocateWWMRegs::ID = 0;
     80 
     81 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
     82 
     83 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
     84   return new SIPreAllocateWWMRegs();
     85 }
     86 
     87 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
     88   if (!MO.isReg())
     89     return false;
     90 
     91   Register Reg = MO.getReg();
     92   if (Reg.isPhysical())
     93     return false;
     94 
     95   if (!TRI->isVGPR(*MRI, Reg))
     96     return false;
     97 
     98   if (VRM->hasPhys(Reg))
     99     return false;
    100 
    101   LiveInterval &LI = LIS->getInterval(Reg);
    102 
    103   for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
    104     if (!MRI->isPhysRegUsed(PhysReg) &&
    105         Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
    106       Matrix->assign(LI, PhysReg);
    107       assert(PhysReg != 0);
    108       RegsToRewrite.push_back(Reg);
    109       return true;
    110     }
    111   }
    112 
    113   llvm_unreachable("physreg not found for WWM expression");
    114   return false;
    115 }
    116 
    117 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
    118   for (MachineBasicBlock &MBB : MF) {
    119     for (MachineInstr &MI : MBB) {
    120       for (MachineOperand &MO : MI.operands()) {
    121         if (!MO.isReg())
    122           continue;
    123 
    124         const Register VirtReg = MO.getReg();
    125         if (VirtReg.isPhysical())
    126           continue;
    127 
    128         if (!VRM->hasPhys(VirtReg))
    129           continue;
    130 
    131         Register PhysReg = VRM->getPhys(VirtReg);
    132         const unsigned SubReg = MO.getSubReg();
    133         if (SubReg != 0) {
    134           PhysReg = TRI->getSubReg(PhysReg, SubReg);
    135           MO.setSubReg(0);
    136         }
    137 
    138         MO.setReg(PhysReg);
    139         MO.setIsRenamable(false);
    140       }
    141     }
    142   }
    143 
    144   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
    145   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
    146 
    147   for (unsigned Reg : RegsToRewrite) {
    148     LIS->removeInterval(Reg);
    149 
    150     const Register PhysReg = VRM->getPhys(Reg);
    151     assert(PhysReg != 0);
    152 
    153     // Check if PhysReg is already reserved
    154     if (!MFI->WWMReservedRegs.count(PhysReg)) {
    155       Optional<int> FI;
    156       if (!MFI->isEntryFunction()) {
    157         // Create a stack object for a possible spill in the function prologue.
    158         // Note: Non-CSR VGPR also need this as we may overwrite inactive lanes.
    159         const TargetRegisterClass *RC = TRI->getPhysRegClass(PhysReg);
    160         FI = FrameInfo.CreateSpillStackObject(TRI->getSpillSize(*RC),
    161                                               TRI->getSpillAlign(*RC));
    162       }
    163       MFI->reserveWWMRegister(PhysReg, FI);
    164     }
    165   }
    166 
    167   RegsToRewrite.clear();
    168 
    169   // Update the set of reserved registers to include WWM ones.
    170   MRI->freezeReservedRegs(MF);
    171 }
    172 
    173 #ifndef NDEBUG
    174 LLVM_DUMP_METHOD void
    175 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
    176 
    177   unsigned Opc = MI.getOpcode();
    178 
    179   if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) {
    180     dbgs() << "Entering ";
    181   } else {
    182     assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM);
    183     dbgs() << "Exiting ";
    184   }
    185 
    186   if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) {
    187     dbgs() << "Strict WWM ";
    188   } else {
    189     assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM);
    190     dbgs() << "Strict WQM ";
    191   }
    192 
    193   dbgs() << "region: " << MI;
    194 }
    195 
    196 #endif
    197 
    198 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
    199   LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
    200 
    201   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    202 
    203   TII = ST.getInstrInfo();
    204   TRI = &TII->getRegisterInfo();
    205   MRI = &MF.getRegInfo();
    206 
    207   LIS = &getAnalysis<LiveIntervals>();
    208   Matrix = &getAnalysis<LiveRegMatrix>();
    209   VRM = &getAnalysis<VirtRegMap>();
    210 
    211   RegClassInfo.runOnMachineFunction(MF);
    212 
    213   bool RegsAssigned = false;
    214 
    215   // We use a reverse post-order traversal of the control-flow graph to
    216   // guarantee that we visit definitions in dominance order. Since WWM
    217   // expressions are guaranteed to never involve phi nodes, and we can only
    218   // escape WWM through the special WWM instruction, this means that this is a
    219   // perfect elimination order, so we can never do any better.
    220   ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
    221 
    222   for (MachineBasicBlock *MBB : RPOT) {
    223     bool InWWM = false;
    224     for (MachineInstr &MI : *MBB) {
    225       if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
    226           MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
    227         RegsAssigned |= processDef(MI.getOperand(0));
    228 
    229       if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
    230           MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) {
    231         LLVM_DEBUG(printWWMInfo(MI));
    232         InWWM = true;
    233         continue;
    234       }
    235 
    236       if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM ||
    237           MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) {
    238         LLVM_DEBUG(printWWMInfo(MI));
    239         InWWM = false;
    240       }
    241 
    242       if (!InWWM)
    243         continue;
    244 
    245       LLVM_DEBUG(dbgs() << "Processing " << MI);
    246 
    247       for (MachineOperand &DefOpnd : MI.defs()) {
    248         RegsAssigned |= processDef(DefOpnd);
    249       }
    250     }
    251   }
    252 
    253   if (!RegsAssigned)
    254     return false;
    255 
    256   rewriteRegs(MF);
    257   return true;
    258 }
    259