Home | History | Annotate | Line # | Download | only in AMDGPU
      1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 /// \file
     10 /// Insert s_clause instructions to form hard clauses.
     11 ///
     12 /// Clausing load instructions can give cache coherency benefits. Before gfx10,
     13 /// the hardware automatically detected "soft clauses", which were sequences of
     14 /// memory instructions of the same type. In gfx10 this detection was removed,
     15 /// and the s_clause instruction was introduced to explicitly mark "hard
     16 /// clauses".
     17 ///
     18 /// It's the scheduler's job to form the clauses by putting similar memory
     19 /// instructions next to each other. Our job is just to insert an s_clause
     20 /// instruction to mark the start of each clause.
     21 ///
     22 /// Note that hard clauses are very similar to, but logically distinct from, the
     23 /// groups of instructions that have to be restartable when XNACK is enabled.
     24 /// The rules are slightly different in each case. For example an s_nop
     25 /// instruction breaks a restartable group, but can appear in the middle of a
     26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
     27 /// "soft clauses" or just "clauses".)
     28 ///
     29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
     30 /// groups, not hard clauses.
     31 //
     32 //===----------------------------------------------------------------------===//
     33 
     34 #include "AMDGPU.h"
     35 #include "GCNSubtarget.h"
     36 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     37 #include "llvm/ADT/SmallVector.h"
     38 
     39 using namespace llvm;
     40 
     41 #define DEBUG_TYPE "si-insert-hard-clauses"
     42 
     43 namespace {
     44 
     45 enum HardClauseType {
     46   // Texture, buffer, global or scratch memory instructions.
     47   HARDCLAUSE_VMEM,
     48   // Flat (not global or scratch) memory instructions.
     49   HARDCLAUSE_FLAT,
     50   // Instructions that access LDS.
     51   HARDCLAUSE_LDS,
     52   // Scalar memory instructions.
     53   HARDCLAUSE_SMEM,
     54   // VALU instructions.
     55   HARDCLAUSE_VALU,
     56   LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
     57 
     58   // Internal instructions, which are allowed in the middle of a hard clause,
     59   // except for s_waitcnt.
     60   HARDCLAUSE_INTERNAL,
     61   // Instructions that are not allowed in a hard clause: SALU, export, branch,
     62   // message, GDS, s_waitcnt and anything else not mentioned above.
     63   HARDCLAUSE_ILLEGAL,
     64 };
     65 
     66 class SIInsertHardClauses : public MachineFunctionPass {
     67 public:
     68   static char ID;
     69   const GCNSubtarget *ST = nullptr;
     70 
     71   SIInsertHardClauses() : MachineFunctionPass(ID) {}
     72 
     73   void getAnalysisUsage(AnalysisUsage &AU) const override {
     74     AU.setPreservesCFG();
     75     MachineFunctionPass::getAnalysisUsage(AU);
     76   }
     77 
     78   HardClauseType getHardClauseType(const MachineInstr &MI) {
     79 
     80     // On current architectures we only get a benefit from clausing loads.
     81     if (MI.mayLoad()) {
     82       if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
     83         if (ST->hasNSAClauseBug()) {
     84           const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
     85           if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
     86             return HARDCLAUSE_ILLEGAL;
     87         }
     88         return HARDCLAUSE_VMEM;
     89       }
     90       if (SIInstrInfo::isFLAT(MI))
     91         return HARDCLAUSE_FLAT;
     92       // TODO: LDS
     93       if (SIInstrInfo::isSMRD(MI))
     94         return HARDCLAUSE_SMEM;
     95     }
     96 
     97     // Don't form VALU clauses. It's not clear what benefit they give, if any.
     98 
     99     // In practice s_nop is the only internal instruction we're likely to see.
    100     // It's safe to treat the rest as illegal.
    101     if (MI.getOpcode() == AMDGPU::S_NOP)
    102       return HARDCLAUSE_INTERNAL;
    103     return HARDCLAUSE_ILLEGAL;
    104   }
    105 
    106   // Track information about a clause as we discover it.
    107   struct ClauseInfo {
    108     // The type of all (non-internal) instructions in the clause.
    109     HardClauseType Type = HARDCLAUSE_ILLEGAL;
    110     // The first (necessarily non-internal) instruction in the clause.
    111     MachineInstr *First = nullptr;
    112     // The last non-internal instruction in the clause.
    113     MachineInstr *Last = nullptr;
    114     // The length of the clause including any internal instructions in the
    115     // middle or after the end of the clause.
    116     unsigned Length = 0;
    117     // The base operands of *Last.
    118     SmallVector<const MachineOperand *, 4> BaseOps;
    119   };
    120 
    121   bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
    122     // Get the size of the clause excluding any internal instructions at the
    123     // end.
    124     unsigned Size =
    125         std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1;
    126     if (Size < 2)
    127       return false;
    128     assert(Size <= 64 && "Hard clause is too long!");
    129 
    130     auto &MBB = *CI.First->getParent();
    131     auto ClauseMI =
    132         BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
    133             .addImm(Size - 1);
    134     finalizeBundle(MBB, ClauseMI->getIterator(),
    135                    std::next(CI.Last->getIterator()));
    136     return true;
    137   }
    138 
    139   bool runOnMachineFunction(MachineFunction &MF) override {
    140     if (skipFunction(MF.getFunction()))
    141       return false;
    142 
    143     ST = &MF.getSubtarget<GCNSubtarget>();
    144     if (!ST->hasHardClauses())
    145       return false;
    146 
    147     const SIInstrInfo *SII = ST->getInstrInfo();
    148     const TargetRegisterInfo *TRI = ST->getRegisterInfo();
    149 
    150     bool Changed = false;
    151     for (auto &MBB : MF) {
    152       ClauseInfo CI;
    153       for (auto &MI : MBB) {
    154         HardClauseType Type = getHardClauseType(MI);
    155 
    156         int64_t Dummy1;
    157         bool Dummy2;
    158         unsigned Dummy3;
    159         SmallVector<const MachineOperand *, 4> BaseOps;
    160         if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
    161           if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
    162                                                   Dummy3, TRI)) {
    163             // We failed to get the base operands, so we'll never clause this
    164             // instruction with any other, so pretend it's illegal.
    165             Type = HARDCLAUSE_ILLEGAL;
    166           }
    167         }
    168 
    169         if (CI.Length == 64 ||
    170             (CI.Length && Type != HARDCLAUSE_INTERNAL &&
    171              (Type != CI.Type ||
    172               // Note that we lie to shouldClusterMemOps about the size of the
    173               // cluster. When shouldClusterMemOps is called from the machine
    174               // scheduler it limits the size of the cluster to avoid increasing
    175               // register pressure too much, but this pass runs after register
    176               // allocation so there is no need for that kind of limit.
    177               !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
    178           // Finish the current clause.
    179           Changed |= emitClause(CI, SII);
    180           CI = ClauseInfo();
    181         }
    182 
    183         if (CI.Length) {
    184           // Extend the current clause.
    185           ++CI.Length;
    186           if (Type != HARDCLAUSE_INTERNAL) {
    187             CI.Last = &MI;
    188             CI.BaseOps = std::move(BaseOps);
    189           }
    190         } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
    191           // Start a new clause.
    192           CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)};
    193         }
    194       }
    195 
    196       // Finish the last clause in the basic block if any.
    197       if (CI.Length)
    198         Changed |= emitClause(CI, SII);
    199     }
    200 
    201     return Changed;
    202   }
    203 };
    204 
    205 } // namespace
    206 
    207 char SIInsertHardClauses::ID = 0;
    208 
    209 char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
    210 
    211 INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
    212                 false, false)
    213