Home | History | Annotate | Line # | Download | only in X86
      1 //===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This pass applies cache prefetch instructions based on a profile. The pass
     10 // assumes DiscriminateMemOps ran immediately before, to ensure debug info
     11 // matches the one used at profile generation time. The profile is encoded in
     12 // afdo format (text or binary). It contains prefetch hints recommendations.
     13 // Each recommendation is made in terms of debug info locations, a type (i.e.
     14 // nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a
     15 // memory operand (see X86DiscriminateMemOps). The prefetch will be made for
     16 // a location at that memory operand + the delta specified in the
     17 // recommendation.
     18 //
     19 //===----------------------------------------------------------------------===//
     20 
     21 #include "X86.h"
     22 #include "X86InstrBuilder.h"
     23 #include "X86InstrInfo.h"
     24 #include "X86MachineFunctionInfo.h"
     25 #include "X86Subtarget.h"
     26 #include "llvm/CodeGen/MachineModuleInfo.h"
     27 #include "llvm/IR/DebugInfoMetadata.h"
     28 #include "llvm/ProfileData/SampleProf.h"
     29 #include "llvm/ProfileData/SampleProfReader.h"
     30 #include "llvm/Transforms/IPO/SampleProfile.h"
     31 using namespace llvm;
     32 using namespace sampleprof;
     33 
     34 static cl::opt<std::string>
     35     PrefetchHintsFile("prefetch-hints-file",
     36                       cl::desc("Path to the prefetch hints profile. See also "
     37                                "-x86-discriminate-memops"),
     38                       cl::Hidden);
     39 namespace {
     40 
     41 class X86InsertPrefetch : public MachineFunctionPass {
     42   void getAnalysisUsage(AnalysisUsage &AU) const override;
     43   bool doInitialization(Module &) override;
     44 
     45   bool runOnMachineFunction(MachineFunction &MF) override;
     46   struct PrefetchInfo {
     47     unsigned InstructionID;
     48     int64_t Delta;
     49   };
     50   typedef SmallVectorImpl<PrefetchInfo> Prefetches;
     51   bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI,
     52                         Prefetches &prefetches) const;
     53 
     54 public:
     55   static char ID;
     56   X86InsertPrefetch(const std::string &PrefetchHintsFilename);
     57   StringRef getPassName() const override {
     58     return "X86 Insert Cache Prefetches";
     59   }
     60 
     61 private:
     62   std::string Filename;
     63   std::unique_ptr<SampleProfileReader> Reader;
     64 };
     65 
     66 using PrefetchHints = SampleRecord::CallTargetMap;
     67 
     68 // Return any prefetching hints for the specified MachineInstruction. The hints
     69 // are returned as pairs (name, delta).
     70 ErrorOr<PrefetchHints> getPrefetchHints(const FunctionSamples *TopSamples,
     71                                         const MachineInstr &MI) {
     72   if (const auto &Loc = MI.getDebugLoc())
     73     if (const auto *Samples = TopSamples->findFunctionSamples(Loc))
     74       return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc),
     75                                           Loc->getBaseDiscriminator());
     76   return std::error_code();
     77 }
     78 
     79 // The prefetch instruction can't take memory operands involving vector
     80 // registers.
     81 bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) {
     82   Register BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg();
     83   Register IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg();
     84   return (BaseReg == 0 ||
     85           X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
     86           X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) &&
     87          (IndexReg == 0 ||
     88           X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
     89           X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg));
     90 }
     91 
     92 } // end anonymous namespace
     93 
     94 //===----------------------------------------------------------------------===//
     95 //            Implementation
     96 //===----------------------------------------------------------------------===//
     97 
     98 char X86InsertPrefetch::ID = 0;
     99 
    100 X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename)
    101     : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {}
    102 
    103 /// Return true if the provided MachineInstruction has cache prefetch hints. In
    104 /// that case, the prefetch hints are stored, in order, in the Prefetches
    105 /// vector.
    106 bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples,
    107                                          const MachineInstr &MI,
    108                                          Prefetches &Prefetches) const {
    109   assert(Prefetches.empty() &&
    110          "Expected caller passed empty PrefetchInfo vector.");
    111   static constexpr std::pair<StringLiteral, unsigned> HintTypes[] = {
    112       {"_nta_", X86::PREFETCHNTA},
    113       {"_t0_", X86::PREFETCHT0},
    114       {"_t1_", X86::PREFETCHT1},
    115       {"_t2_", X86::PREFETCHT2},
    116   };
    117   static const char *SerializedPrefetchPrefix = "__prefetch";
    118 
    119   const ErrorOr<PrefetchHints> T = getPrefetchHints(TopSamples, MI);
    120   if (!T)
    121     return false;
    122   int16_t max_index = -1;
    123   // Convert serialized prefetch hints into PrefetchInfo objects, and populate
    124   // the Prefetches vector.
    125   for (const auto &S_V : *T) {
    126     StringRef Name = S_V.getKey();
    127     if (Name.consume_front(SerializedPrefetchPrefix)) {
    128       int64_t D = static_cast<int64_t>(S_V.second);
    129       unsigned IID = 0;
    130       for (const auto &HintType : HintTypes) {
    131         if (Name.startswith(HintType.first)) {
    132           Name = Name.drop_front(HintType.first.size());
    133           IID = HintType.second;
    134           break;
    135         }
    136       }
    137       if (IID == 0)
    138         return false;
    139       uint8_t index = 0;
    140       Name.consumeInteger(10, index);
    141 
    142       if (index >= Prefetches.size())
    143         Prefetches.resize(index + 1);
    144       Prefetches[index] = {IID, D};
    145       max_index = std::max(max_index, static_cast<int16_t>(index));
    146     }
    147   }
    148   assert(max_index + 1 >= 0 &&
    149          "Possible overflow: max_index + 1 should be positive.");
    150   assert(static_cast<size_t>(max_index + 1) == Prefetches.size() &&
    151          "The number of prefetch hints received should match the number of "
    152          "PrefetchInfo objects returned");
    153   return !Prefetches.empty();
    154 }
    155 
    156 bool X86InsertPrefetch::doInitialization(Module &M) {
    157   if (Filename.empty())
    158     return false;
    159 
    160   LLVMContext &Ctx = M.getContext();
    161   ErrorOr<std::unique_ptr<SampleProfileReader>> ReaderOrErr =
    162       SampleProfileReader::create(Filename, Ctx);
    163   if (std::error_code EC = ReaderOrErr.getError()) {
    164     std::string Msg = "Could not open profile: " + EC.message();
    165     Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg,
    166                                              DiagnosticSeverity::DS_Warning));
    167     return false;
    168   }
    169   Reader = std::move(ReaderOrErr.get());
    170   Reader->read();
    171   return true;
    172 }
    173 
    174 void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
    175   AU.setPreservesAll();
    176   MachineFunctionPass::getAnalysisUsage(AU);
    177 }
    178 
    179 bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) {
    180   if (!Reader)
    181     return false;
    182   const FunctionSamples *Samples = Reader->getSamplesFor(MF.getFunction());
    183   if (!Samples)
    184     return false;
    185 
    186   bool Changed = false;
    187 
    188   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
    189   SmallVector<PrefetchInfo, 4> Prefetches;
    190   for (auto &MBB : MF) {
    191     for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) {
    192       auto Current = MI;
    193       ++MI;
    194 
    195       int Offset = X86II::getMemoryOperandNo(Current->getDesc().TSFlags);
    196       if (Offset < 0)
    197         continue;
    198       unsigned Bias = X86II::getOperandBias(Current->getDesc());
    199       int MemOpOffset = Offset + Bias;
    200       // FIXME(mtrofin): ORE message when the recommendation cannot be taken.
    201       if (!IsMemOpCompatibleWithPrefetch(*Current, MemOpOffset))
    202         continue;
    203       Prefetches.clear();
    204       if (!findPrefetchInfo(Samples, *Current, Prefetches))
    205         continue;
    206       assert(!Prefetches.empty() &&
    207              "The Prefetches vector should contain at least a value if "
    208              "findPrefetchInfo returned true.");
    209       for (auto &PrefInfo : Prefetches) {
    210         unsigned PFetchInstrID = PrefInfo.InstructionID;
    211         int64_t Delta = PrefInfo.Delta;
    212         const MCInstrDesc &Desc = TII->get(PFetchInstrID);
    213         MachineInstr *PFetch =
    214             MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true);
    215         MachineInstrBuilder MIB(MF, PFetch);
    216 
    217         static_assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 &&
    218                           X86::AddrIndexReg == 2 && X86::AddrDisp == 3 &&
    219                           X86::AddrSegmentReg == 4,
    220                       "Unexpected change in X86 operand offset order.");
    221 
    222         // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc.
    223         // FIXME(mtrofin): consider adding a:
    224         //     MachineInstrBuilder::set(unsigned offset, op).
    225         MIB.addReg(Current->getOperand(MemOpOffset + X86::AddrBaseReg).getReg())
    226             .addImm(
    227                 Current->getOperand(MemOpOffset + X86::AddrScaleAmt).getImm())
    228             .addReg(
    229                 Current->getOperand(MemOpOffset + X86::AddrIndexReg).getReg())
    230             .addImm(Current->getOperand(MemOpOffset + X86::AddrDisp).getImm() +
    231                     Delta)
    232             .addReg(Current->getOperand(MemOpOffset + X86::AddrSegmentReg)
    233                         .getReg());
    234 
    235         if (!Current->memoperands_empty()) {
    236           MachineMemOperand *CurrentOp = *(Current->memoperands_begin());
    237           MIB.addMemOperand(MF.getMachineMemOperand(
    238               CurrentOp, CurrentOp->getOffset() + Delta, CurrentOp->getSize()));
    239         }
    240 
    241         // Insert before Current. This is because Current may clobber some of
    242         // the registers used to describe the input memory operand.
    243         MBB.insert(Current, PFetch);
    244         Changed = true;
    245       }
    246     }
    247   }
    248   return Changed;
    249 }
    250 
    251 FunctionPass *llvm::createX86InsertPrefetchPass() {
    252   return new X86InsertPrefetch(PrefetchHintsFile);
    253 }
    254