Home | History | Annotate | Line # | Download | only in PowerPC
      1 //===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file implements hazard recognizers for scheduling on PowerPC processors.
     10 //
     11 //===----------------------------------------------------------------------===//
     12 
     13 #include "PPCHazardRecognizers.h"
     14 #include "PPCInstrInfo.h"
     15 #include "PPCSubtarget.h"
     16 #include "llvm/CodeGen/ScheduleDAG.h"
     17 #include "llvm/Support/Debug.h"
     18 #include "llvm/Support/ErrorHandling.h"
     19 #include "llvm/Support/raw_ostream.h"
     20 using namespace llvm;
     21 
     22 #define DEBUG_TYPE "pre-RA-sched"
     23 
     24 bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
     25   // FIXME: Move this.
     26   if (isBCTRAfterSet(SU))
     27     return true;
     28 
     29   const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
     30   if (!MCID)
     31     return false;
     32 
     33   if (!MCID->mayLoad())
     34     return false;
     35 
     36   // SU is a load; for any predecessors in this dispatch group, that are stores,
     37   // and with which we have an ordering dependency, return true.
     38   for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
     39     const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
     40     if (!PredMCID || !PredMCID->mayStore())
     41       continue;
     42 
     43     if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier())
     44       continue;
     45 
     46     for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
     47       if (SU->Preds[i].getSUnit() == CurGroup[j])
     48         return true;
     49   }
     50 
     51   return false;
     52 }
     53 
     54 bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
     55   const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
     56   if (!MCID)
     57     return false;
     58 
     59   if (!MCID->isBranch())
     60     return false;
     61 
     62   // SU is a branch; for any predecessors in this dispatch group, with which we
     63   // have a data dependence and set the counter register, return true.
     64   for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
     65     const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
     66     if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR)
     67       continue;
     68 
     69     if (SU->Preds[i].isCtrl())
     70       continue;
     71 
     72     for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
     73       if (SU->Preds[i].getSUnit() == CurGroup[j])
     74         return true;
     75   }
     76 
     77   return false;
     78 }
     79 
     80 // FIXME: Remove this when we don't need this:
     81 namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } }
     82 
     83 // FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific.
     84 
     85 bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
     86                                                        unsigned &NSlots) {
     87   // FIXME: Indirectly, this information is contained in the itinerary, and
     88   // we should derive it from there instead of separately specifying it
     89   // here.
     90   unsigned IIC = MCID->getSchedClass();
     91   switch (IIC) {
     92   default:
     93     NSlots = 1;
     94     break;
     95   case PPC::Sched::IIC_IntDivW:
     96   case PPC::Sched::IIC_IntDivD:
     97   case PPC::Sched::IIC_LdStLoadUpd:
     98   case PPC::Sched::IIC_LdStLDU:
     99   case PPC::Sched::IIC_LdStLFDU:
    100   case PPC::Sched::IIC_LdStLFDUX:
    101   case PPC::Sched::IIC_LdStLHA:
    102   case PPC::Sched::IIC_LdStLHAU:
    103   case PPC::Sched::IIC_LdStLWA:
    104   case PPC::Sched::IIC_LdStSTU:
    105   case PPC::Sched::IIC_LdStSTFDU:
    106     NSlots = 2;
    107     break;
    108   case PPC::Sched::IIC_LdStLoadUpdX:
    109   case PPC::Sched::IIC_LdStLDUX:
    110   case PPC::Sched::IIC_LdStLHAUX:
    111   case PPC::Sched::IIC_LdStLWARX:
    112   case PPC::Sched::IIC_LdStLDARX:
    113   case PPC::Sched::IIC_LdStSTUX:
    114   case PPC::Sched::IIC_LdStSTDCX:
    115   case PPC::Sched::IIC_LdStSTWCX:
    116   case PPC::Sched::IIC_BrMCRX: // mtcr
    117   // FIXME: Add sync/isync (here and in the itinerary).
    118     NSlots = 4;
    119     break;
    120   }
    121 
    122   // FIXME: record-form instructions need a different itinerary class.
    123   if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1)
    124     NSlots = 2;
    125 
    126   switch (IIC) {
    127   default:
    128     // All multi-slot instructions must come first.
    129     return NSlots > 1;
    130   case PPC::Sched::IIC_BrCR: // cr logicals
    131   case PPC::Sched::IIC_SprMFCR:
    132   case PPC::Sched::IIC_SprMFCRF:
    133   case PPC::Sched::IIC_SprMTSPR:
    134     return true;
    135   }
    136 }
    137 
    138 ScheduleHazardRecognizer::HazardType
    139 PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
    140   if (Stalls == 0 && isLoadAfterStore(SU))
    141     return NoopHazard;
    142 
    143   return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
    144 }
    145 
    146 bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
    147   const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
    148   unsigned NSlots;
    149   if (MCID && mustComeFirst(MCID, NSlots) && CurSlots)
    150     return true;
    151 
    152   return ScoreboardHazardRecognizer::ShouldPreferAnother(SU);
    153 }
    154 
    155 unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
    156   // We only need to fill out a maximum of 5 slots here: The 6th slot could
    157   // only be a second branch, and otherwise the next instruction will start a
    158   // new group.
    159   if (isLoadAfterStore(SU) && CurSlots < 6) {
    160     unsigned Directive =
    161         DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
    162     // If we're using a special group-terminating nop, then we need only one.
    163     // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
    164     if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
    165         Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9)
    166       return 1;
    167 
    168     return 5 - CurSlots;
    169   }
    170 
    171   return ScoreboardHazardRecognizer::PreEmitNoops(SU);
    172 }
    173 
    174 void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
    175   const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
    176   if (MCID) {
    177     if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) {
    178       CurGroup.clear();
    179       CurSlots = CurBranches = 0;
    180     } else {
    181       LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: ");
    182       LLVM_DEBUG(DAG->dumpNode(*SU));
    183 
    184       unsigned NSlots;
    185       bool MustBeFirst = mustComeFirst(MCID, NSlots);
    186 
    187       // If this instruction must come first, but does not, then it starts a
    188       // new group.
    189       if (MustBeFirst && CurSlots) {
    190         CurSlots = CurBranches = 0;
    191         CurGroup.clear();
    192       }
    193 
    194       CurSlots += NSlots;
    195       CurGroup.push_back(SU);
    196 
    197       if (MCID->isBranch())
    198         ++CurBranches;
    199     }
    200   }
    201 
    202   return ScoreboardHazardRecognizer::EmitInstruction(SU);
    203 }
    204 
    205 void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() {
    206   return ScoreboardHazardRecognizer::AdvanceCycle();
    207 }
    208 
    209 void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() {
    210   llvm_unreachable("Bottom-up scheduling not supported");
    211 }
    212 
    213 void PPCDispatchGroupSBHazardRecognizer::Reset() {
    214   CurGroup.clear();
    215   CurSlots = CurBranches = 0;
    216   return ScoreboardHazardRecognizer::Reset();
    217 }
    218 
    219 void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
    220   unsigned Directive =
    221       DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
    222   // If the group has now filled all of its slots, or if we're using a special
    223   // group-terminating nop, the group is complete.
    224   // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
    225   if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
    226       Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9 ||
    227       CurSlots == 6) {
    228     CurGroup.clear();
    229     CurSlots = CurBranches = 0;
    230   } else {
    231     CurGroup.push_back(nullptr);
    232     ++CurSlots;
    233   }
    234 }
    235 
    236 //===----------------------------------------------------------------------===//
    237 // PowerPC 970 Hazard Recognizer
    238 //
    239 // This models the dispatch group formation of the PPC970 processor.  Dispatch
    240 // groups are bundles of up to five instructions that can contain various mixes
    241 // of instructions.  The PPC970 can dispatch a peak of 4 non-branch and one
    242 // branch instruction per-cycle.
    243 //
    244 // There are a number of restrictions to dispatch group formation: some
    245 // instructions can only be issued in the first slot of a dispatch group, & some
    246 // instructions fill an entire dispatch group.  Additionally, only branches can
    247 // issue in the 5th (last) slot.
    248 //
    249 // Finally, there are a number of "structural" hazards on the PPC970.  These
    250 // conditions cause large performance penalties due to misprediction, recovery,
    251 // and replay logic that has to happen.  These cases include setting a CTR and
    252 // branching through it in the same dispatch group, and storing to an address,
    253 // then loading from the same address within a dispatch group.  To avoid these
    254 // conditions, we insert no-op instructions when appropriate.
    255 //
    256 // FIXME: This is missing some significant cases:
    257 //   1. Modeling of microcoded instructions.
    258 //   2. Handling of serialized operations.
    259 //   3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
    260 //
    261 
    262 PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG)
    263     : DAG(DAG) {
    264   EndDispatchGroup();
    265 }
    266 
    267 void PPCHazardRecognizer970::EndDispatchGroup() {
    268   LLVM_DEBUG(errs() << "=== Start of dispatch group\n");
    269   NumIssued = 0;
    270 
    271   // Structural hazard info.
    272   HasCTRSet = false;
    273   NumStores = 0;
    274 }
    275 
    276 
    277 PPCII::PPC970_Unit
    278 PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
    279                                      bool &isFirst, bool &isSingle,
    280                                      bool &isCracked,
    281                                      bool &isLoad, bool &isStore) {
    282   const MCInstrDesc &MCID = DAG.TII->get(Opcode);
    283 
    284   isLoad  = MCID.mayLoad();
    285   isStore = MCID.mayStore();
    286 
    287   uint64_t TSFlags = MCID.TSFlags;
    288 
    289   isFirst   = TSFlags & PPCII::PPC970_First;
    290   isSingle  = TSFlags & PPCII::PPC970_Single;
    291   isCracked = TSFlags & PPCII::PPC970_Cracked;
    292   return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
    293 }
    294 
    295 /// isLoadOfStoredAddress - If we have a load from the previously stored pointer
    296 /// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
    297 bool PPCHazardRecognizer970::
    298 isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
    299   const Value *LoadValue) const {
    300   for (unsigned i = 0, e = NumStores; i != e; ++i) {
    301     // Handle exact and commuted addresses.
    302     if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
    303       return true;
    304 
    305     // Okay, we don't have an exact match, if this is an indexed offset, see if
    306     // we have overlap (which happens during fp->int conversion for example).
    307     if (StoreValue[i] == LoadValue) {
    308       // Okay the base pointers match, so we have [c1+r] vs [c2+r].  Check
    309       // to see if the load and store actually overlap.
    310       if (StoreOffset[i] < LoadOffset) {
    311         if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
    312       } else {
    313         if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
    314       }
    315     }
    316   }
    317   return false;
    318 }
    319 
    320 /// getHazardType - We return hazard for any non-branch instruction that would
    321 /// terminate the dispatch group.  We turn NoopHazard for any
    322 /// instructions that wouldn't terminate the dispatch group that would cause a
    323 /// pipeline flush.
    324 ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
    325 getHazardType(SUnit *SU, int Stalls) {
    326   assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
    327 
    328   MachineInstr *MI = SU->getInstr();
    329 
    330   if (MI->isDebugInstr())
    331     return NoHazard;
    332 
    333   unsigned Opcode = MI->getOpcode();
    334   bool isFirst, isSingle, isCracked, isLoad, isStore;
    335   PPCII::PPC970_Unit InstrType =
    336     GetInstrType(Opcode, isFirst, isSingle, isCracked,
    337                  isLoad, isStore);
    338   if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
    339 
    340   // We can only issue a PPC970_First/PPC970_Single instruction (such as
    341   // crand/mtspr/etc) if this is the first cycle of the dispatch group.
    342   if (NumIssued != 0 && (isFirst || isSingle))
    343     return Hazard;
    344 
    345   // If this instruction is cracked into two ops by the decoder, we know that
    346   // it is not a branch and that it cannot issue if 3 other instructions are
    347   // already in the dispatch group.
    348   if (isCracked && NumIssued > 2)
    349     return Hazard;
    350 
    351   switch (InstrType) {
    352   default: llvm_unreachable("Unknown instruction type!");
    353   case PPCII::PPC970_FXU:
    354   case PPCII::PPC970_LSU:
    355   case PPCII::PPC970_FPU:
    356   case PPCII::PPC970_VALU:
    357   case PPCII::PPC970_VPERM:
    358     // We can only issue a branch as the last instruction in a group.
    359     if (NumIssued == 4) return Hazard;
    360     break;
    361   case PPCII::PPC970_CRU:
    362     // We can only issue a CR instruction in the first two slots.
    363     if (NumIssued >= 2) return Hazard;
    364     break;
    365   case PPCII::PPC970_BRU:
    366     break;
    367   }
    368 
    369   // Do not allow MTCTR and BCTRL to be in the same dispatch group.
    370   if (HasCTRSet && Opcode == PPC::BCTRL)
    371     return NoopHazard;
    372 
    373   // If this is a load following a store, make sure it's not to the same or
    374   // overlapping address.
    375   if (isLoad && NumStores && !MI->memoperands_empty()) {
    376     MachineMemOperand *MO = *MI->memoperands_begin();
    377     if (isLoadOfStoredAddress(MO->getSize(),
    378                               MO->getOffset(), MO->getValue()))
    379       return NoopHazard;
    380   }
    381 
    382   return NoHazard;
    383 }
    384 
    385 void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
    386   MachineInstr *MI = SU->getInstr();
    387 
    388   if (MI->isDebugInstr())
    389     return;
    390 
    391   unsigned Opcode = MI->getOpcode();
    392   bool isFirst, isSingle, isCracked, isLoad, isStore;
    393   PPCII::PPC970_Unit InstrType =
    394     GetInstrType(Opcode, isFirst, isSingle, isCracked,
    395                  isLoad, isStore);
    396   if (InstrType == PPCII::PPC970_Pseudo) return;
    397 
    398   // Update structural hazard information.
    399   if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
    400 
    401   // Track the address stored to.
    402   if (isStore && NumStores < 4 && !MI->memoperands_empty()) {
    403     MachineMemOperand *MO = *MI->memoperands_begin();
    404     StoreSize[NumStores] = MO->getSize();
    405     StoreOffset[NumStores] = MO->getOffset();
    406     StoreValue[NumStores] = MO->getValue();
    407     ++NumStores;
    408   }
    409 
    410   if (InstrType == PPCII::PPC970_BRU || isSingle)
    411     NumIssued = 4;  // Terminate a d-group.
    412   ++NumIssued;
    413 
    414   // If this instruction is cracked into two ops by the decoder, remember that
    415   // we issued two pieces.
    416   if (isCracked)
    417     ++NumIssued;
    418 
    419   if (NumIssued == 5)
    420     EndDispatchGroup();
    421 }
    422 
    423 void PPCHazardRecognizer970::AdvanceCycle() {
    424   assert(NumIssued < 5 && "Illegal dispatch group!");
    425   ++NumIssued;
    426   if (NumIssued == 5)
    427     EndDispatchGroup();
    428 }
    429 
    430 void PPCHazardRecognizer970::Reset() {
    431   EndDispatchGroup();
    432 }
    433 
    434