Home | History | Annotate | Line # | Download | only in NVPTX
      1 //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
     10 // of a MachineFunction.
     11 //
     12 //   mov %SPL, %depot
     13 //   cvta.local %SP, %SPL
     14 //
     15 // Because Frame Index is a generic address and alloca can only return generic
     16 // pointer, without this pass the instructions producing alloca'ed address will
     17 // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
     18 // this address with their .local versions, but this may introduce a lot of
     19 // cvta.to.local instructions. Performance can be improved if we avoid casting
     20 // address back and forth and directly calculate local address based on %SPL.
     21 // This peephole pass optimizes these cases, for example
     22 //
     23 // It will transform the following pattern
     24 //    %0 = LEA_ADDRi64 %VRFrame, 4
     25 //    %1 = cvta_to_local_yes_64 %0
     26 //
     27 // into
     28 //    %1 = LEA_ADDRi64 %VRFrameLocal, 4
     29 //
     30 // %VRFrameLocal is the virtual register name of %SPL
     31 //
     32 //===----------------------------------------------------------------------===//
     33 
     34 #include "NVPTX.h"
     35 #include "llvm/CodeGen/MachineFunctionPass.h"
     36 #include "llvm/CodeGen/MachineInstrBuilder.h"
     37 #include "llvm/CodeGen/MachineRegisterInfo.h"
     38 #include "llvm/CodeGen/TargetInstrInfo.h"
     39 #include "llvm/CodeGen/TargetRegisterInfo.h"
     40 
     41 using namespace llvm;
     42 
     43 #define DEBUG_TYPE "nvptx-peephole"
     44 
     45 namespace llvm {
     46 void initializeNVPTXPeepholePass(PassRegistry &);
     47 }
     48 
     49 namespace {
     50 struct NVPTXPeephole : public MachineFunctionPass {
     51  public:
     52   static char ID;
     53   NVPTXPeephole() : MachineFunctionPass(ID) {
     54     initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
     55   }
     56 
     57   bool runOnMachineFunction(MachineFunction &MF) override;
     58 
     59   StringRef getPassName() const override {
     60     return "NVPTX optimize redundant cvta.to.local instruction";
     61   }
     62 
     63   void getAnalysisUsage(AnalysisUsage &AU) const override {
     64     MachineFunctionPass::getAnalysisUsage(AU);
     65   }
     66 };
     67 }
     68 
     69 char NVPTXPeephole::ID = 0;
     70 
     71 INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
     72 
     73 static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
     74   auto &MBB = *Root.getParent();
     75   auto &MF = *MBB.getParent();
     76   // Check current instruction is cvta.to.local
     77   if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
     78       Root.getOpcode() != NVPTX::cvta_to_local_yes)
     79     return false;
     80 
     81   auto &Op = Root.getOperand(1);
     82   const auto &MRI = MF.getRegInfo();
     83   MachineInstr *GenericAddrDef = nullptr;
     84   if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) {
     85     GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
     86   }
     87 
     88   // Check the register operand is uniquely defined by LEA_ADDRi instruction
     89   if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
     90       (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
     91        GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
     92     return false;
     93   }
     94 
     95   // Check the LEA_ADDRi operand is Frame index
     96   auto &BaseAddrOp = GenericAddrDef->getOperand(1);
     97   if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
     98     return true;
     99   }
    100 
    101   return false;
    102 }
    103 
    104 static void CombineCVTAToLocal(MachineInstr &Root) {
    105   auto &MBB = *Root.getParent();
    106   auto &MF = *MBB.getParent();
    107   const auto &MRI = MF.getRegInfo();
    108   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
    109   auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
    110 
    111   MachineInstrBuilder MIB =
    112       BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
    113               Root.getOperand(0).getReg())
    114           .addReg(NVPTX::VRFrameLocal)
    115           .add(Prev.getOperand(2));
    116 
    117   MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
    118 
    119   // Check if MRI has only one non dbg use, which is Root
    120   if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
    121     Prev.eraseFromParentAndMarkDBGValuesForRemoval();
    122   }
    123   Root.eraseFromParentAndMarkDBGValuesForRemoval();
    124 }
    125 
    126 bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
    127   if (skipFunction(MF.getFunction()))
    128     return false;
    129 
    130   bool Changed = false;
    131   // Loop over all of the basic blocks.
    132   for (auto &MBB : MF) {
    133     // Traverse the basic block.
    134     auto BlockIter = MBB.begin();
    135 
    136     while (BlockIter != MBB.end()) {
    137       auto &MI = *BlockIter++;
    138       if (isCVTAToLocalCombinationCandidate(MI)) {
    139         CombineCVTAToLocal(MI);
    140         Changed = true;
    141       }
    142     }  // Instruction
    143   }    // Basic Block
    144 
    145   // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
    146   const auto &MRI = MF.getRegInfo();
    147   if (MRI.use_empty(NVPTX::VRFrame)) {
    148     if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
    149       MI->eraseFromParentAndMarkDBGValuesForRemoval();
    150     }
    151   }
    152 
    153   return Changed;
    154 }
    155 
    156 MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
    157