Home | History | Annotate | Line # | Download | only in Hexagon
      1 //===- HexagonPacketizer.cpp - VLIW packetizer ----------------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This implements a simple VLIW packetizer using DFA. The packetizer works on
     10 // machine basic blocks. For each instruction I in BB, the packetizer consults
     11 // the DFA to see if machine resources are available to execute I. If so, the
     12 // packetizer checks if I depends on any instruction J in the current packet.
     13 // If no dependency is found, I is added to current packet and machine resource
     14 // is marked as taken. If any dependency is found, a target API call is made to
     15 // prune the dependence.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "HexagonVLIWPacketizer.h"
     20 #include "Hexagon.h"
     21 #include "HexagonInstrInfo.h"
     22 #include "HexagonRegisterInfo.h"
     23 #include "HexagonSubtarget.h"
     24 #include "llvm/ADT/BitVector.h"
     25 #include "llvm/ADT/DenseSet.h"
     26 #include "llvm/ADT/STLExtras.h"
     27 #include "llvm/ADT/StringExtras.h"
     28 #include "llvm/Analysis/AliasAnalysis.h"
     29 #include "llvm/CodeGen/MachineBasicBlock.h"
     30 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
     31 #include "llvm/CodeGen/MachineDominators.h"
     32 #include "llvm/CodeGen/MachineFrameInfo.h"
     33 #include "llvm/CodeGen/MachineFunction.h"
     34 #include "llvm/CodeGen/MachineFunctionPass.h"
     35 #include "llvm/CodeGen/MachineInstr.h"
     36 #include "llvm/CodeGen/MachineInstrBundle.h"
     37 #include "llvm/CodeGen/MachineLoopInfo.h"
     38 #include "llvm/CodeGen/MachineOperand.h"
     39 #include "llvm/CodeGen/ScheduleDAG.h"
     40 #include "llvm/CodeGen/TargetRegisterInfo.h"
     41 #include "llvm/CodeGen/TargetSubtargetInfo.h"
     42 #include "llvm/IR/DebugLoc.h"
     43 #include "llvm/InitializePasses.h"
     44 #include "llvm/MC/MCInstrDesc.h"
     45 #include "llvm/Pass.h"
     46 #include "llvm/Support/CommandLine.h"
     47 #include "llvm/Support/Debug.h"
     48 #include "llvm/Support/ErrorHandling.h"
     49 #include "llvm/Support/raw_ostream.h"
     50 #include <cassert>
     51 #include <cstdint>
     52 #include <iterator>
     53 
     54 using namespace llvm;
     55 
     56 #define DEBUG_TYPE "packets"
     57 
     58 static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden,
     59   cl::ZeroOrMore, cl::init(false),
     60   cl::desc("Disable Hexagon packetizer pass"));
     61 
     62 static cl::opt<bool> Slot1Store("slot1-store-slot0-load", cl::Hidden,
     63                                 cl::ZeroOrMore, cl::init(true),
     64                                 cl::desc("Allow slot1 store and slot0 load"));
     65 
     66 static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
     67   cl::ZeroOrMore, cl::Hidden, cl::init(true),
     68   cl::desc("Allow non-solo packetization of volatile memory references"));
     69 
     70 static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false),
     71   cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC"));
     72 
     73 static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores",
     74   cl::init(false), cl::Hidden, cl::ZeroOrMore,
     75   cl::desc("Disable vector double new-value-stores"));
     76 
     77 extern cl::opt<bool> ScheduleInlineAsm;
     78 
     79 namespace llvm {
     80 
     81 FunctionPass *createHexagonPacketizer(bool Minimal);
     82 void initializeHexagonPacketizerPass(PassRegistry&);
     83 
     84 } // end namespace llvm
     85 
     86 namespace {
     87 
     88   class HexagonPacketizer : public MachineFunctionPass {
     89   public:
     90     static char ID;
     91 
     92     HexagonPacketizer(bool Min = false)
     93       : MachineFunctionPass(ID), Minimal(Min) {}
     94 
     95     void getAnalysisUsage(AnalysisUsage &AU) const override {
     96       AU.setPreservesCFG();
     97       AU.addRequired<AAResultsWrapperPass>();
     98       AU.addRequired<MachineBranchProbabilityInfo>();
     99       AU.addRequired<MachineDominatorTree>();
    100       AU.addRequired<MachineLoopInfo>();
    101       AU.addPreserved<MachineDominatorTree>();
    102       AU.addPreserved<MachineLoopInfo>();
    103       MachineFunctionPass::getAnalysisUsage(AU);
    104     }
    105 
    106     StringRef getPassName() const override { return "Hexagon Packetizer"; }
    107     bool runOnMachineFunction(MachineFunction &Fn) override;
    108 
    109     MachineFunctionProperties getRequiredProperties() const override {
    110       return MachineFunctionProperties().set(
    111           MachineFunctionProperties::Property::NoVRegs);
    112     }
    113 
    114   private:
    115     const HexagonInstrInfo *HII = nullptr;
    116     const HexagonRegisterInfo *HRI = nullptr;
    117     const bool Minimal = false;
    118   };
    119 
    120 } // end anonymous namespace
    121 
    122 char HexagonPacketizer::ID = 0;
    123 
    124 INITIALIZE_PASS_BEGIN(HexagonPacketizer, "hexagon-packetizer",
    125                       "Hexagon Packetizer", false, false)
    126 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    127 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
    128 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
    129 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
    130 INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer",
    131                     "Hexagon Packetizer", false, false)
    132 
    133 HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
    134       MachineLoopInfo &MLI, AAResults *AA,
    135       const MachineBranchProbabilityInfo *MBPI, bool Minimal)
    136     : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI),
    137       Minimal(Minimal) {
    138   HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
    139   HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
    140 
    141   addMutation(std::make_unique<HexagonSubtarget::UsrOverflowMutation>());
    142   addMutation(std::make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
    143   addMutation(std::make_unique<HexagonSubtarget::BankConflictMutation>());
    144 }
    145 
    146 // Check if FirstI modifies a register that SecondI reads.
    147 static bool hasWriteToReadDep(const MachineInstr &FirstI,
    148                               const MachineInstr &SecondI,
    149                               const TargetRegisterInfo *TRI) {
    150   for (auto &MO : FirstI.operands()) {
    151     if (!MO.isReg() || !MO.isDef())
    152       continue;
    153     Register R = MO.getReg();
    154     if (SecondI.readsRegister(R, TRI))
    155       return true;
    156   }
    157   return false;
    158 }
    159 
    160 
    161 static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI,
    162       MachineBasicBlock::iterator BundleIt, bool Before) {
    163   MachineBasicBlock::instr_iterator InsertPt;
    164   if (Before)
    165     InsertPt = BundleIt.getInstrIterator();
    166   else
    167     InsertPt = std::next(BundleIt).getInstrIterator();
    168 
    169   MachineBasicBlock &B = *MI.getParent();
    170   // The instruction should at least be bundled with the preceding instruction
    171   // (there will always be one, i.e. BUNDLE, if nothing else).
    172   assert(MI.isBundledWithPred());
    173   if (MI.isBundledWithSucc()) {
    174     MI.clearFlag(MachineInstr::BundledSucc);
    175     MI.clearFlag(MachineInstr::BundledPred);
    176   } else {
    177     // If it's not bundled with the successor (i.e. it is the last one
    178     // in the bundle), then we can simply unbundle it from the predecessor,
    179     // which will take care of updating the predecessor's flag.
    180     MI.unbundleFromPred();
    181   }
    182   B.splice(InsertPt, &B, MI.getIterator());
    183 
    184   // Get the size of the bundle without asserting.
    185   MachineBasicBlock::const_instr_iterator I = BundleIt.getInstrIterator();
    186   MachineBasicBlock::const_instr_iterator E = B.instr_end();
    187   unsigned Size = 0;
    188   for (++I; I != E && I->isBundledWithPred(); ++I)
    189     ++Size;
    190 
    191   // If there are still two or more instructions, then there is nothing
    192   // else to be done.
    193   if (Size > 1)
    194     return BundleIt;
    195 
    196   // Otherwise, extract the single instruction out and delete the bundle.
    197   MachineBasicBlock::iterator NextIt = std::next(BundleIt);
    198   MachineInstr &SingleI = *BundleIt->getNextNode();
    199   SingleI.unbundleFromPred();
    200   assert(!SingleI.isBundledWithSucc());
    201   BundleIt->eraseFromParent();
    202   return NextIt;
    203 }
    204 
    205 bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
    206   auto &HST = MF.getSubtarget<HexagonSubtarget>();
    207   HII = HST.getInstrInfo();
    208   HRI = HST.getRegisterInfo();
    209   auto &MLI = getAnalysis<MachineLoopInfo>();
    210   auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
    211   auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
    212 
    213   if (EnableGenAllInsnClass)
    214     HII->genAllInsnTimingClasses(MF);
    215 
    216   // Instantiate the packetizer.
    217   bool MinOnly = Minimal || DisablePacketizer || !HST.usePackets() ||
    218                  skipFunction(MF.getFunction());
    219   HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI, MinOnly);
    220 
    221   // DFA state table should not be empty.
    222   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
    223 
    224   // Loop over all basic blocks and remove KILL pseudo-instructions
    225   // These instructions confuse the dependence analysis. Consider:
    226   // D0 = ...   (Insn 0)
    227   // R0 = KILL R0, D0 (Insn 1)
    228   // R0 = ... (Insn 2)
    229   // Here, Insn 1 will result in the dependence graph not emitting an output
    230   // dependence between Insn 0 and Insn 2. This can lead to incorrect
    231   // packetization
    232   for (MachineBasicBlock &MB : MF) {
    233     auto End = MB.end();
    234     auto MI = MB.begin();
    235     while (MI != End) {
    236       auto NextI = std::next(MI);
    237       if (MI->isKill()) {
    238         MB.erase(MI);
    239         End = MB.end();
    240       }
    241       MI = NextI;
    242     }
    243   }
    244 
    245   // TinyCore with Duplexes: Translate to big-instructions.
    246   if (HST.isTinyCoreWithDuplex())
    247     HII->translateInstrsForDup(MF, true);
    248 
    249   // Loop over all of the basic blocks.
    250   for (auto &MB : MF) {
    251     auto Begin = MB.begin(), End = MB.end();
    252     while (Begin != End) {
    253       // Find the first non-boundary starting from the end of the last
    254       // scheduling region.
    255       MachineBasicBlock::iterator RB = Begin;
    256       while (RB != End && HII->isSchedulingBoundary(*RB, &MB, MF))
    257         ++RB;
    258       // Find the first boundary starting from the beginning of the new
    259       // region.
    260       MachineBasicBlock::iterator RE = RB;
    261       while (RE != End && !HII->isSchedulingBoundary(*RE, &MB, MF))
    262         ++RE;
    263       // Add the scheduling boundary if it's not block end.
    264       if (RE != End)
    265         ++RE;
    266       // If RB == End, then RE == End.
    267       if (RB != End)
    268         Packetizer.PacketizeMIs(&MB, RB, RE);
    269 
    270       Begin = RE;
    271     }
    272   }
    273 
    274   // TinyCore with Duplexes: Translate to tiny-instructions.
    275   if (HST.isTinyCoreWithDuplex())
    276     HII->translateInstrsForDup(MF, false);
    277 
    278   Packetizer.unpacketizeSoloInstrs(MF);
    279   return true;
    280 }
    281 
    282 // Reserve resources for a constant extender. Trigger an assertion if the
    283 // reservation fails.
    284 void HexagonPacketizerList::reserveResourcesForConstExt() {
    285   if (!tryAllocateResourcesForConstExt(true))
    286     llvm_unreachable("Resources not available");
    287 }
    288 
    289 bool HexagonPacketizerList::canReserveResourcesForConstExt() {
    290   return tryAllocateResourcesForConstExt(false);
    291 }
    292 
    293 // Allocate resources (i.e. 4 bytes) for constant extender. If succeeded,
    294 // return true, otherwise, return false.
    295 bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
    296   auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
    297   bool Avail = ResourceTracker->canReserveResources(*ExtMI);
    298   if (Reserve && Avail)
    299     ResourceTracker->reserveResources(*ExtMI);
    300   MF.DeleteMachineInstr(ExtMI);
    301   return Avail;
    302 }
    303 
    304 bool HexagonPacketizerList::isCallDependent(const MachineInstr &MI,
    305       SDep::Kind DepType, unsigned DepReg) {
    306   // Check for LR dependence.
    307   if (DepReg == HRI->getRARegister())
    308     return true;
    309 
    310   if (HII->isDeallocRet(MI))
    311     if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister())
    312       return true;
    313 
    314   // Call-like instructions can be packetized with preceding instructions
    315   // that define registers implicitly used or modified by the call. Explicit
    316   // uses are still prohibited, as in the case of indirect calls:
    317   //   r0 = ...
    318   //   J2_jumpr r0
    319   if (DepType == SDep::Data) {
    320     for (const MachineOperand &MO : MI.operands())
    321       if (MO.isReg() && MO.getReg() == DepReg && !MO.isImplicit())
    322         return true;
    323   }
    324 
    325   return false;
    326 }
    327 
    328 static bool isRegDependence(const SDep::Kind DepType) {
    329   return DepType == SDep::Data || DepType == SDep::Anti ||
    330          DepType == SDep::Output;
    331 }
    332 
    333 static bool isDirectJump(const MachineInstr &MI) {
    334   return MI.getOpcode() == Hexagon::J2_jump;
    335 }
    336 
    337 static bool isSchedBarrier(const MachineInstr &MI) {
    338   switch (MI.getOpcode()) {
    339   case Hexagon::Y2_barrier:
    340     return true;
    341   }
    342   return false;
    343 }
    344 
    345 static bool isControlFlow(const MachineInstr &MI) {
    346   return MI.getDesc().isTerminator() || MI.getDesc().isCall();
    347 }
    348 
    349 /// Returns true if the instruction modifies a callee-saved register.
    350 static bool doesModifyCalleeSavedReg(const MachineInstr &MI,
    351                                      const TargetRegisterInfo *TRI) {
    352   const MachineFunction &MF = *MI.getParent()->getParent();
    353   for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
    354     if (MI.modifiesRegister(*CSR, TRI))
    355       return true;
    356   return false;
    357 }
    358 
    359 // Returns true if an instruction can be promoted to .new predicate or
    360 // new-value store.
    361 bool HexagonPacketizerList::isNewifiable(const MachineInstr &MI,
    362       const TargetRegisterClass *NewRC) {
    363   // Vector stores can be predicated, and can be new-value stores, but
    364   // they cannot be predicated on a .new predicate value.
    365   if (NewRC == &Hexagon::PredRegsRegClass) {
    366     if (HII->isHVXVec(MI) && MI.mayStore())
    367       return false;
    368     return HII->isPredicated(MI) && HII->getDotNewPredOp(MI, nullptr) > 0;
    369   }
    370   // If the class is not PredRegs, it could only apply to new-value stores.
    371   return HII->mayBeNewStore(MI);
    372 }
    373 
    374 // Promote an instructiont to its .cur form.
    375 // At this time, we have already made a call to canPromoteToDotCur and made
    376 // sure that it can *indeed* be promoted.
    377 bool HexagonPacketizerList::promoteToDotCur(MachineInstr &MI,
    378       SDep::Kind DepType, MachineBasicBlock::iterator &MII,
    379       const TargetRegisterClass* RC) {
    380   assert(DepType == SDep::Data);
    381   int CurOpcode = HII->getDotCurOp(MI);
    382   MI.setDesc(HII->get(CurOpcode));
    383   return true;
    384 }
    385 
    386 void HexagonPacketizerList::cleanUpDotCur() {
    387   MachineInstr *MI = nullptr;
    388   for (auto BI : CurrentPacketMIs) {
    389     LLVM_DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
    390     if (HII->isDotCurInst(*BI)) {
    391       MI = BI;
    392       continue;
    393     }
    394     if (MI) {
    395       for (auto &MO : BI->operands())
    396         if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg())
    397           return;
    398     }
    399   }
    400   if (!MI)
    401     return;
    402   // We did not find a use of the CUR, so de-cur it.
    403   MI->setDesc(HII->get(HII->getNonDotCurOp(*MI)));
    404   LLVM_DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
    405 }
    406 
    407 // Check to see if an instruction can be dot cur.
    408 bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI,
    409       const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
    410       const TargetRegisterClass *RC) {
    411   if (!HII->isHVXVec(MI))
    412     return false;
    413   if (!HII->isHVXVec(*MII))
    414     return false;
    415 
    416   // Already a dot new instruction.
    417   if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI))
    418     return false;
    419 
    420   if (!HII->mayBeCurLoad(MI))
    421     return false;
    422 
    423   // The "cur value" cannot come from inline asm.
    424   if (PacketSU->getInstr()->isInlineAsm())
    425     return false;
    426 
    427   // Make sure candidate instruction uses cur.
    428   LLVM_DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; MI.dump();
    429              dbgs() << "in packet\n";);
    430   MachineInstr &MJ = *MII;
    431   LLVM_DEBUG({
    432     dbgs() << "Checking CUR against ";
    433     MJ.dump();
    434   });
    435   Register DestReg = MI.getOperand(0).getReg();
    436   bool FoundMatch = false;
    437   for (auto &MO : MJ.operands())
    438     if (MO.isReg() && MO.getReg() == DestReg)
    439       FoundMatch = true;
    440   if (!FoundMatch)
    441     return false;
    442 
    443   // Check for existing uses of a vector register within the packet which
    444   // would be affected by converting a vector load into .cur formt.
    445   for (auto BI : CurrentPacketMIs) {
    446     LLVM_DEBUG(dbgs() << "packet has "; BI->dump(););
    447     if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo()))
    448       return false;
    449   }
    450 
    451   LLVM_DEBUG(dbgs() << "Can Dot CUR MI\n"; MI.dump(););
    452   // We can convert the opcode into a .cur.
    453   return true;
    454 }
    455 
    456 // Promote an instruction to its .new form. At this time, we have already
    457 // made a call to canPromoteToDotNew and made sure that it can *indeed* be
    458 // promoted.
    459 bool HexagonPacketizerList::promoteToDotNew(MachineInstr &MI,
    460       SDep::Kind DepType, MachineBasicBlock::iterator &MII,
    461       const TargetRegisterClass* RC) {
    462   assert(DepType == SDep::Data);
    463   int NewOpcode;
    464   if (RC == &Hexagon::PredRegsRegClass)
    465     NewOpcode = HII->getDotNewPredOp(MI, MBPI);
    466   else
    467     NewOpcode = HII->getDotNewOp(MI);
    468   MI.setDesc(HII->get(NewOpcode));
    469   return true;
    470 }
    471 
    472 bool HexagonPacketizerList::demoteToDotOld(MachineInstr &MI) {
    473   int NewOpcode = HII->getDotOldOp(MI);
    474   MI.setDesc(HII->get(NewOpcode));
    475   return true;
    476 }
    477 
    478 bool HexagonPacketizerList::useCallersSP(MachineInstr &MI) {
    479   unsigned Opc = MI.getOpcode();
    480   switch (Opc) {
    481     case Hexagon::S2_storerd_io:
    482     case Hexagon::S2_storeri_io:
    483     case Hexagon::S2_storerh_io:
    484     case Hexagon::S2_storerb_io:
    485       break;
    486     default:
    487       llvm_unreachable("Unexpected instruction");
    488   }
    489   unsigned FrameSize = MF.getFrameInfo().getStackSize();
    490   MachineOperand &Off = MI.getOperand(1);
    491   int64_t NewOff = Off.getImm() - (FrameSize + HEXAGON_LRFP_SIZE);
    492   if (HII->isValidOffset(Opc, NewOff, HRI)) {
    493     Off.setImm(NewOff);
    494     return true;
    495   }
    496   return false;
    497 }
    498 
    499 void HexagonPacketizerList::useCalleesSP(MachineInstr &MI) {
    500   unsigned Opc = MI.getOpcode();
    501   switch (Opc) {
    502     case Hexagon::S2_storerd_io:
    503     case Hexagon::S2_storeri_io:
    504     case Hexagon::S2_storerh_io:
    505     case Hexagon::S2_storerb_io:
    506       break;
    507     default:
    508       llvm_unreachable("Unexpected instruction");
    509   }
    510   unsigned FrameSize = MF.getFrameInfo().getStackSize();
    511   MachineOperand &Off = MI.getOperand(1);
    512   Off.setImm(Off.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
    513 }
    514 
    515 /// Return true if we can update the offset in MI so that MI and MJ
    516 /// can be packetized together.
    517 bool HexagonPacketizerList::updateOffset(SUnit *SUI, SUnit *SUJ) {
    518   assert(SUI->getInstr() && SUJ->getInstr());
    519   MachineInstr &MI = *SUI->getInstr();
    520   MachineInstr &MJ = *SUJ->getInstr();
    521 
    522   unsigned BPI, OPI;
    523   if (!HII->getBaseAndOffsetPosition(MI, BPI, OPI))
    524     return false;
    525   unsigned BPJ, OPJ;
    526   if (!HII->getBaseAndOffsetPosition(MJ, BPJ, OPJ))
    527     return false;
    528   Register Reg = MI.getOperand(BPI).getReg();
    529   if (Reg != MJ.getOperand(BPJ).getReg())
    530     return false;
    531   // Make sure that the dependences do not restrict adding MI to the packet.
    532   // That is, ignore anti dependences, and make sure the only data dependence
    533   // involves the specific register.
    534   for (const auto &PI : SUI->Preds)
    535     if (PI.getKind() != SDep::Anti &&
    536         (PI.getKind() != SDep::Data || PI.getReg() != Reg))
    537       return false;
    538   int Incr;
    539   if (!HII->getIncrementValue(MJ, Incr))
    540     return false;
    541 
    542   int64_t Offset = MI.getOperand(OPI).getImm();
    543   if (!HII->isValidOffset(MI.getOpcode(), Offset+Incr, HRI))
    544     return false;
    545 
    546   MI.getOperand(OPI).setImm(Offset + Incr);
    547   ChangedOffset = Offset;
    548   return true;
    549 }
    550 
    551 /// Undo the changed offset. This is needed if the instruction cannot be
    552 /// added to the current packet due to a different instruction.
    553 void HexagonPacketizerList::undoChangedOffset(MachineInstr &MI) {
    554   unsigned BP, OP;
    555   if (!HII->getBaseAndOffsetPosition(MI, BP, OP))
    556     llvm_unreachable("Unable to find base and offset operands.");
    557   MI.getOperand(OP).setImm(ChangedOffset);
    558 }
    559 
    560 enum PredicateKind {
    561   PK_False,
    562   PK_True,
    563   PK_Unknown
    564 };
    565 
    566 /// Returns true if an instruction is predicated on p0 and false if it's
    567 /// predicated on !p0.
    568 static PredicateKind getPredicateSense(const MachineInstr &MI,
    569                                        const HexagonInstrInfo *HII) {
    570   if (!HII->isPredicated(MI))
    571     return PK_Unknown;
    572   if (HII->isPredicatedTrue(MI))
    573     return PK_True;
    574   return PK_False;
    575 }
    576 
    577 static const MachineOperand &getPostIncrementOperand(const MachineInstr &MI,
    578       const HexagonInstrInfo *HII) {
    579   assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
    580 #ifndef NDEBUG
    581   // Post Increment means duplicates. Use dense map to find duplicates in the
    582   // list. Caution: Densemap initializes with the minimum of 64 buckets,
    583   // whereas there are at most 5 operands in the post increment.
    584   DenseSet<unsigned> DefRegsSet;
    585   for (auto &MO : MI.operands())
    586     if (MO.isReg() && MO.isDef())
    587       DefRegsSet.insert(MO.getReg());
    588 
    589   for (auto &MO : MI.operands())
    590     if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg()))
    591       return MO;
    592 #else
    593   if (MI.mayLoad()) {
    594     const MachineOperand &Op1 = MI.getOperand(1);
    595     // The 2nd operand is always the post increment operand in load.
    596     assert(Op1.isReg() && "Post increment operand has be to a register.");
    597     return Op1;
    598   }
    599   if (MI.getDesc().mayStore()) {
    600     const MachineOperand &Op0 = MI.getOperand(0);
    601     // The 1st operand is always the post increment operand in store.
    602     assert(Op0.isReg() && "Post increment operand has be to a register.");
    603     return Op0;
    604   }
    605 #endif
    606   // we should never come here.
    607   llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
    608 }
    609 
    610 // Get the value being stored.
    611 static const MachineOperand& getStoreValueOperand(const MachineInstr &MI) {
    612   // value being stored is always the last operand.
    613   return MI.getOperand(MI.getNumOperands()-1);
    614 }
    615 
    616 static bool isLoadAbsSet(const MachineInstr &MI) {
    617   unsigned Opc = MI.getOpcode();
    618   switch (Opc) {
    619     case Hexagon::L4_loadrd_ap:
    620     case Hexagon::L4_loadrb_ap:
    621     case Hexagon::L4_loadrh_ap:
    622     case Hexagon::L4_loadrub_ap:
    623     case Hexagon::L4_loadruh_ap:
    624     case Hexagon::L4_loadri_ap:
    625       return true;
    626   }
    627   return false;
    628 }
    629 
    630 static const MachineOperand &getAbsSetOperand(const MachineInstr &MI) {
    631   assert(isLoadAbsSet(MI));
    632   return MI.getOperand(1);
    633 }
    634 
    635 // Can be new value store?
    636 // Following restrictions are to be respected in convert a store into
    637 // a new value store.
    638 // 1. If an instruction uses auto-increment, its address register cannot
    639 //    be a new-value register. Arch Spec 5.4.2.1
    640 // 2. If an instruction uses absolute-set addressing mode, its address
    641 //    register cannot be a new-value register. Arch Spec 5.4.2.1.
    642 // 3. If an instruction produces a 64-bit result, its registers cannot be used
    643 //    as new-value registers. Arch Spec 5.4.2.2.
    644 // 4. If the instruction that sets the new-value register is conditional, then
    645 //    the instruction that uses the new-value register must also be conditional,
    646 //    and both must always have their predicates evaluate identically.
    647 //    Arch Spec 5.4.2.3.
    648 // 5. There is an implied restriction that a packet cannot have another store,
    649 //    if there is a new value store in the packet. Corollary: if there is
    650 //    already a store in a packet, there can not be a new value store.
    651 //    Arch Spec: 3.4.4.2
    652 bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,
    653       const MachineInstr &PacketMI, unsigned DepReg) {
    654   // Make sure we are looking at the store, that can be promoted.
    655   if (!HII->mayBeNewStore(MI))
    656     return false;
    657 
    658   // Make sure there is dependency and can be new value'd.
    659   const MachineOperand &Val = getStoreValueOperand(MI);
    660   if (Val.isReg() && Val.getReg() != DepReg)
    661     return false;
    662 
    663   const MCInstrDesc& MCID = PacketMI.getDesc();
    664 
    665   // First operand is always the result.
    666   const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
    667   // Double regs can not feed into new value store: PRM section: 5.4.2.2.
    668   if (PacketRC == &Hexagon::DoubleRegsRegClass)
    669     return false;
    670 
    671   // New-value stores are of class NV (slot 0), dual stores require class ST
    672   // in slot 0 (PRM 5.5).
    673   for (auto I : CurrentPacketMIs) {
    674     SUnit *PacketSU = MIToSUnit.find(I)->second;
    675     if (PacketSU->getInstr()->mayStore())
    676       return false;
    677   }
    678 
    679   // Make sure it's NOT the post increment register that we are going to
    680   // new value.
    681   if (HII->isPostIncrement(MI) &&
    682       getPostIncrementOperand(MI, HII).getReg() == DepReg) {
    683     return false;
    684   }
    685 
    686   if (HII->isPostIncrement(PacketMI) && PacketMI.mayLoad() &&
    687       getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) {
    688     // If source is post_inc, or absolute-set addressing, it can not feed
    689     // into new value store
    690     //   r3 = memw(r2++#4)
    691     //   memw(r30 + #-1404) = r2.new -> can not be new value store
    692     // arch spec section: 5.4.2.1.
    693     return false;
    694   }
    695 
    696   if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg)
    697     return false;
    698 
    699   // If the source that feeds the store is predicated, new value store must
    700   // also be predicated.
    701   if (HII->isPredicated(PacketMI)) {
    702     if (!HII->isPredicated(MI))
    703       return false;
    704 
    705     // Check to make sure that they both will have their predicates
    706     // evaluate identically.
    707     unsigned predRegNumSrc = 0;
    708     unsigned predRegNumDst = 0;
    709     const TargetRegisterClass* predRegClass = nullptr;
    710 
    711     // Get predicate register used in the source instruction.
    712     for (auto &MO : PacketMI.operands()) {
    713       if (!MO.isReg())
    714         continue;
    715       predRegNumSrc = MO.getReg();
    716       predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc);
    717       if (predRegClass == &Hexagon::PredRegsRegClass)
    718         break;
    719     }
    720     assert((predRegClass == &Hexagon::PredRegsRegClass) &&
    721         "predicate register not found in a predicated PacketMI instruction");
    722 
    723     // Get predicate register used in new-value store instruction.
    724     for (auto &MO : MI.operands()) {
    725       if (!MO.isReg())
    726         continue;
    727       predRegNumDst = MO.getReg();
    728       predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst);
    729       if (predRegClass == &Hexagon::PredRegsRegClass)
    730         break;
    731     }
    732     assert((predRegClass == &Hexagon::PredRegsRegClass) &&
    733            "predicate register not found in a predicated MI instruction");
    734 
    735     // New-value register producer and user (store) need to satisfy these
    736     // constraints:
    737     // 1) Both instructions should be predicated on the same register.
    738     // 2) If producer of the new-value register is .new predicated then store
    739     // should also be .new predicated and if producer is not .new predicated
    740     // then store should not be .new predicated.
    741     // 3) Both new-value register producer and user should have same predicate
    742     // sense, i.e, either both should be negated or both should be non-negated.
    743     if (predRegNumDst != predRegNumSrc ||
    744         HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) ||
    745         getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII))
    746       return false;
    747   }
    748 
    749   // Make sure that other than the new-value register no other store instruction
    750   // register has been modified in the same packet. Predicate registers can be
    751   // modified by they should not be modified between the producer and the store
    752   // instruction as it will make them both conditional on different values.
    753   // We already know this to be true for all the instructions before and
    754   // including PacketMI. Howerver, we need to perform the check for the
    755   // remaining instructions in the packet.
    756 
    757   unsigned StartCheck = 0;
    758 
    759   for (auto I : CurrentPacketMIs) {
    760     SUnit *TempSU = MIToSUnit.find(I)->second;
    761     MachineInstr &TempMI = *TempSU->getInstr();
    762 
    763     // Following condition is true for all the instructions until PacketMI is
    764     // reached (StartCheck is set to 0 before the for loop).
    765     // StartCheck flag is 1 for all the instructions after PacketMI.
    766     if (&TempMI != &PacketMI && !StartCheck) // Start processing only after
    767       continue;                              // encountering PacketMI.
    768 
    769     StartCheck = 1;
    770     if (&TempMI == &PacketMI) // We don't want to check PacketMI for dependence.
    771       continue;
    772 
    773     for (auto &MO : MI.operands())
    774       if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
    775         return false;
    776   }
    777 
    778   // Make sure that for non-POST_INC stores:
    779   // 1. The only use of reg is DepReg and no other registers.
    780   //    This handles base+index registers.
    781   //    The following store can not be dot new.
    782   //    Eg.   r0 = add(r0, #3)
    783   //          memw(r1+r0<<#2) = r0
    784   if (!HII->isPostIncrement(MI)) {
    785     for (unsigned opNum = 0; opNum < MI.getNumOperands()-1; opNum++) {
    786       const MachineOperand &MO = MI.getOperand(opNum);
    787       if (MO.isReg() && MO.getReg() == DepReg)
    788         return false;
    789     }
    790   }
    791 
    792   // If data definition is because of implicit definition of the register,
    793   // do not newify the store. Eg.
    794   // %r9 = ZXTH %r12, implicit %d6, implicit-def %r12
    795   // S2_storerh_io %r8, 2, killed %r12; mem:ST2[%scevgep343]
    796   for (auto &MO : PacketMI.operands()) {
    797     if (MO.isRegMask() && MO.clobbersPhysReg(DepReg))
    798       return false;
    799     if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
    800       continue;
    801     Register R = MO.getReg();
    802     if (R == DepReg || HRI->isSuperRegister(DepReg, R))
    803       return false;
    804   }
    805 
    806   // Handle imp-use of super reg case. There is a target independent side
    807   // change that should prevent this situation but I am handling it for
    808   // just-in-case. For example, we cannot newify R2 in the following case:
    809   // %r3 = A2_tfrsi 0;
    810   // S2_storeri_io killed %r0, 0, killed %r2, implicit killed %d1;
    811   for (auto &MO : MI.operands()) {
    812     if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
    813       return false;
    814   }
    815 
    816   // Can be dot new store.
    817   return true;
    818 }
    819 
    820 // Can this MI to promoted to either new value store or new value jump.
    821 bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr &MI,
    822       const SUnit *PacketSU, unsigned DepReg,
    823       MachineBasicBlock::iterator &MII) {
    824   if (!HII->mayBeNewStore(MI))
    825     return false;
    826 
    827   // Check to see the store can be new value'ed.
    828   MachineInstr &PacketMI = *PacketSU->getInstr();
    829   if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
    830     return true;
    831 
    832   // Check to see the compare/jump can be new value'ed.
    833   // This is done as a pass on its own. Don't need to check it here.
    834   return false;
    835 }
    836 
    837 static bool isImplicitDependency(const MachineInstr &I, bool CheckDef,
    838       unsigned DepReg) {
    839   for (auto &MO : I.operands()) {
    840     if (CheckDef && MO.isRegMask() && MO.clobbersPhysReg(DepReg))
    841       return true;
    842     if (!MO.isReg() || MO.getReg() != DepReg || !MO.isImplicit())
    843       continue;
    844     if (CheckDef == MO.isDef())
    845       return true;
    846   }
    847   return false;
    848 }
    849 
    850 // Check to see if an instruction can be dot new.
    851 bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
    852       const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
    853       const TargetRegisterClass* RC) {
    854   // Already a dot new instruction.
    855   if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
    856     return false;
    857 
    858   if (!isNewifiable(MI, RC))
    859     return false;
    860 
    861   const MachineInstr &PI = *PacketSU->getInstr();
    862 
    863   // The "new value" cannot come from inline asm.
    864   if (PI.isInlineAsm())
    865     return false;
    866 
    867   // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
    868   // sense.
    869   if (PI.isImplicitDef())
    870     return false;
    871 
    872   // If dependency is trough an implicitly defined register, we should not
    873   // newify the use.
    874   if (isImplicitDependency(PI, true, DepReg) ||
    875       isImplicitDependency(MI, false, DepReg))
    876     return false;
    877 
    878   const MCInstrDesc& MCID = PI.getDesc();
    879   const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
    880   if (DisableVecDblNVStores && VecRC == &Hexagon::HvxWRRegClass)
    881     return false;
    882 
    883   // predicate .new
    884   if (RC == &Hexagon::PredRegsRegClass)
    885     return HII->predCanBeUsedAsDotNew(PI, DepReg);
    886 
    887   if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
    888     return false;
    889 
    890   // Create a dot new machine instruction to see if resources can be
    891   // allocated. If not, bail out now.
    892   int NewOpcode = HII->getDotNewOp(MI);
    893   const MCInstrDesc &D = HII->get(NewOpcode);
    894   MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
    895   bool ResourcesAvailable = ResourceTracker->canReserveResources(*NewMI);
    896   MF.DeleteMachineInstr(NewMI);
    897   if (!ResourcesAvailable)
    898     return false;
    899 
    900   // New Value Store only. New Value Jump generated as a separate pass.
    901   if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII))
    902     return false;
    903 
    904   return true;
    905 }
    906 
    907 // Go through the packet instructions and search for an anti dependency between
    908 // them and DepReg from MI. Consider this case:
    909 // Trying to add
    910 // a) %r1 = TFRI_cdNotPt %p3, 2
    911 // to this packet:
    912 // {
    913 //   b) %p0 = C2_or killed %p3, killed %p0
    914 //   c) %p3 = C2_tfrrp %r23
    915 //   d) %r1 = C2_cmovenewit %p3, 4
    916 //  }
    917 // The P3 from a) and d) will be complements after
    918 // a)'s P3 is converted to .new form
    919 // Anti-dep between c) and b) is irrelevant for this case
    920 bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr &MI,
    921                                                         unsigned DepReg) {
    922   SUnit *PacketSUDep = MIToSUnit.find(&MI)->second;
    923 
    924   for (auto I : CurrentPacketMIs) {
    925     // We only care for dependencies to predicated instructions
    926     if (!HII->isPredicated(*I))
    927       continue;
    928 
    929     // Scheduling Unit for current insn in the packet
    930     SUnit *PacketSU = MIToSUnit.find(I)->second;
    931 
    932     // Look at dependencies between current members of the packet and
    933     // predicate defining instruction MI. Make sure that dependency is
    934     // on the exact register we care about.
    935     if (PacketSU->isSucc(PacketSUDep)) {
    936       for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
    937         auto &Dep = PacketSU->Succs[i];
    938         if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti &&
    939             Dep.getReg() == DepReg)
    940           return true;
    941       }
    942     }
    943   }
    944 
    945   return false;
    946 }
    947 
    948 /// Gets the predicate register of a predicated instruction.
    949 static unsigned getPredicatedRegister(MachineInstr &MI,
    950                                       const HexagonInstrInfo *QII) {
    951   /// We use the following rule: The first predicate register that is a use is
    952   /// the predicate register of a predicated instruction.
    953   assert(QII->isPredicated(MI) && "Must be predicated instruction");
    954 
    955   for (auto &Op : MI.operands()) {
    956     if (Op.isReg() && Op.getReg() && Op.isUse() &&
    957         Hexagon::PredRegsRegClass.contains(Op.getReg()))
    958       return Op.getReg();
    959   }
    960 
    961   llvm_unreachable("Unknown instruction operand layout");
    962   return 0;
    963 }
    964 
    965 // Given two predicated instructions, this function detects whether
    966 // the predicates are complements.
    967 bool HexagonPacketizerList::arePredicatesComplements(MachineInstr &MI1,
    968                                                      MachineInstr &MI2) {
    969   // If we don't know the predicate sense of the instructions bail out early, we
    970   // need it later.
    971   if (getPredicateSense(MI1, HII) == PK_Unknown ||
    972       getPredicateSense(MI2, HII) == PK_Unknown)
    973     return false;
    974 
    975   // Scheduling unit for candidate.
    976   SUnit *SU = MIToSUnit[&MI1];
    977 
    978   // One corner case deals with the following scenario:
    979   // Trying to add
    980   // a) %r24 = A2_tfrt %p0, %r25
    981   // to this packet:
    982   // {
    983   //   b) %r25 = A2_tfrf %p0, %r24
    984   //   c) %p0 = C2_cmpeqi %r26, 1
    985   // }
    986   //
    987   // On general check a) and b) are complements, but presence of c) will
    988   // convert a) to .new form, and then it is not a complement.
    989   // We attempt to detect it by analyzing existing dependencies in the packet.
    990 
    991   // Analyze relationships between all existing members of the packet.
    992   // Look for Anti dependecy on the same predicate reg as used in the
    993   // candidate.
    994   for (auto I : CurrentPacketMIs) {
    995     // Scheduling Unit for current insn in the packet.
    996     SUnit *PacketSU = MIToSUnit.find(I)->second;
    997 
    998     // If this instruction in the packet is succeeded by the candidate...
    999     if (PacketSU->isSucc(SU)) {
   1000       for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
   1001         auto Dep = PacketSU->Succs[i];
   1002         // The corner case exist when there is true data dependency between
   1003         // candidate and one of current packet members, this dep is on
   1004         // predicate reg, and there already exist anti dep on the same pred in
   1005         // the packet.
   1006         if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data &&
   1007             Hexagon::PredRegsRegClass.contains(Dep.getReg())) {
   1008           // Here I know that I is predicate setting instruction with true
   1009           // data dep to candidate on the register we care about - c) in the
   1010           // above example. Now I need to see if there is an anti dependency
   1011           // from c) to any other instruction in the same packet on the pred
   1012           // reg of interest.
   1013           if (restrictingDepExistInPacket(*I, Dep.getReg()))
   1014             return false;
   1015         }
   1016       }
   1017     }
   1018   }
   1019 
   1020   // If the above case does not apply, check regular complement condition.
   1021   // Check that the predicate register is the same and that the predicate
   1022   // sense is different We also need to differentiate .old vs. .new: !p0
   1023   // is not complementary to p0.new.
   1024   unsigned PReg1 = getPredicatedRegister(MI1, HII);
   1025   unsigned PReg2 = getPredicatedRegister(MI2, HII);
   1026   return PReg1 == PReg2 &&
   1027          Hexagon::PredRegsRegClass.contains(PReg1) &&
   1028          Hexagon::PredRegsRegClass.contains(PReg2) &&
   1029          getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
   1030          HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2);
   1031 }
   1032 
   1033 // Initialize packetizer flags.
   1034 void HexagonPacketizerList::initPacketizerState() {
   1035   Dependence = false;
   1036   PromotedToDotNew = false;
   1037   GlueToNewValueJump = false;
   1038   GlueAllocframeStore = false;
   1039   FoundSequentialDependence = false;
   1040   ChangedOffset = INT64_MAX;
   1041 }
   1042 
   1043 // Ignore bundling of pseudo instructions.
   1044 bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr &MI,
   1045                                                     const MachineBasicBlock *) {
   1046   if (MI.isDebugInstr())
   1047     return true;
   1048 
   1049   if (MI.isCFIInstruction())
   1050     return false;
   1051 
   1052   // We must print out inline assembly.
   1053   if (MI.isInlineAsm())
   1054     return false;
   1055 
   1056   if (MI.isImplicitDef())
   1057     return false;
   1058 
   1059   // We check if MI has any functional units mapped to it. If it doesn't,
   1060   // we ignore the instruction.
   1061   const MCInstrDesc& TID = MI.getDesc();
   1062   auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass());
   1063   return !IS->getUnits();
   1064 }
   1065 
   1066 bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
   1067   // Ensure any bundles created by gather packetize remain separate.
   1068   if (MI.isBundle())
   1069     return true;
   1070 
   1071   if (MI.isEHLabel() || MI.isCFIInstruction())
   1072     return true;
   1073 
   1074   // Consider inline asm to not be a solo instruction by default.
   1075   // Inline asm will be put in a packet temporarily, but then it will be
   1076   // removed, and placed outside of the packet (before or after, depending
   1077   // on dependencies).  This is to reduce the impact of inline asm as a
   1078   // "packet splitting" instruction.
   1079   if (MI.isInlineAsm() && !ScheduleInlineAsm)
   1080     return true;
   1081 
   1082   if (isSchedBarrier(MI))
   1083     return true;
   1084 
   1085   if (HII->isSolo(MI))
   1086     return true;
   1087 
   1088   if (MI.getOpcode() == Hexagon::A2_nop)
   1089     return true;
   1090 
   1091   return false;
   1092 }
   1093 
   1094 // Quick check if instructions MI and MJ cannot coexist in the same packet.
   1095 // Limit the tests to be "one-way", e.g.  "if MI->isBranch and MJ->isInlineAsm",
   1096 // but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm".
   1097 // For full test call this function twice:
   1098 //   cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI)
   1099 // Doing the test only one way saves the amount of code in this function,
   1100 // since every test would need to be repeated with the MI and MJ reversed.
   1101 static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ,
   1102       const HexagonInstrInfo &HII) {
   1103   const MachineFunction *MF = MI.getParent()->getParent();
   1104   if (MF->getSubtarget<HexagonSubtarget>().hasV60OpsOnly() &&
   1105       HII.isHVXMemWithAIndirect(MI, MJ))
   1106     return true;
   1107 
   1108   // An inline asm cannot be together with a branch, because we may not be
   1109   // able to remove the asm out after packetizing (i.e. if the asm must be
   1110   // moved past the bundle).  Similarly, two asms cannot be together to avoid
   1111   // complications when determining their relative order outside of a bundle.
   1112   if (MI.isInlineAsm())
   1113     return MJ.isInlineAsm() || MJ.isBranch() || MJ.isBarrier() ||
   1114            MJ.isCall() || MJ.isTerminator();
   1115 
   1116   // New-value stores cannot coexist with any other stores.
   1117   if (HII.isNewValueStore(MI) && MJ.mayStore())
   1118     return true;
   1119 
   1120   switch (MI.getOpcode()) {
   1121   case Hexagon::S2_storew_locked:
   1122   case Hexagon::S4_stored_locked:
   1123   case Hexagon::L2_loadw_locked:
   1124   case Hexagon::L4_loadd_locked:
   1125   case Hexagon::Y2_dccleana:
   1126   case Hexagon::Y2_dccleaninva:
   1127   case Hexagon::Y2_dcinva:
   1128   case Hexagon::Y2_dczeroa:
   1129   case Hexagon::Y4_l2fetch:
   1130   case Hexagon::Y5_l2fetch: {
   1131     // These instructions can only be grouped with ALU32 or non-floating-point
   1132     // XTYPE instructions.  Since there is no convenient way of identifying fp
   1133     // XTYPE instructions, only allow grouping with ALU32 for now.
   1134     unsigned TJ = HII.getType(MJ);
   1135     if (TJ != HexagonII::TypeALU32_2op &&
   1136         TJ != HexagonII::TypeALU32_3op &&
   1137         TJ != HexagonII::TypeALU32_ADDI)
   1138       return true;
   1139     break;
   1140   }
   1141   default:
   1142     break;
   1143   }
   1144 
   1145   // "False" really means that the quick check failed to determine if
   1146   // I and J cannot coexist.
   1147   return false;
   1148 }
   1149 
   1150 // Full, symmetric check.
   1151 bool HexagonPacketizerList::cannotCoexist(const MachineInstr &MI,
   1152       const MachineInstr &MJ) {
   1153   return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII);
   1154 }
   1155 
   1156 void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
   1157   for (auto &B : MF) {
   1158     MachineBasicBlock::iterator BundleIt;
   1159     MachineBasicBlock::instr_iterator NextI;
   1160     for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) {
   1161       NextI = std::next(I);
   1162       MachineInstr &MI = *I;
   1163       if (MI.isBundle())
   1164         BundleIt = I;
   1165       if (!MI.isInsideBundle())
   1166         continue;
   1167 
   1168       // Decide on where to insert the instruction that we are pulling out.
   1169       // Debug instructions always go before the bundle, but the placement of
   1170       // INLINE_ASM depends on potential dependencies.  By default, try to
   1171       // put it before the bundle, but if the asm writes to a register that
   1172       // other instructions in the bundle read, then we need to place it
   1173       // after the bundle (to preserve the bundle semantics).
   1174       bool InsertBeforeBundle;
   1175       if (MI.isInlineAsm())
   1176         InsertBeforeBundle = !hasWriteToReadDep(MI, *BundleIt, HRI);
   1177       else if (MI.isDebugValue())
   1178         InsertBeforeBundle = true;
   1179       else
   1180         continue;
   1181 
   1182       BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle);
   1183     }
   1184   }
   1185 }
   1186 
   1187 // Check if a given instruction is of class "system".
   1188 static bool isSystemInstr(const MachineInstr &MI) {
   1189   unsigned Opc = MI.getOpcode();
   1190   switch (Opc) {
   1191     case Hexagon::Y2_barrier:
   1192     case Hexagon::Y2_dcfetchbo:
   1193     case Hexagon::Y4_l2fetch:
   1194     case Hexagon::Y5_l2fetch:
   1195       return true;
   1196   }
   1197   return false;
   1198 }
   1199 
   1200 bool HexagonPacketizerList::hasDeadDependence(const MachineInstr &I,
   1201                                               const MachineInstr &J) {
   1202   // The dependence graph may not include edges between dead definitions,
   1203   // so without extra checks, we could end up packetizing two instruction
   1204   // defining the same (dead) register.
   1205   if (I.isCall() || J.isCall())
   1206     return false;
   1207   if (HII->isPredicated(I) || HII->isPredicated(J))
   1208     return false;
   1209 
   1210   BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
   1211   for (auto &MO : I.operands()) {
   1212     if (!MO.isReg() || !MO.isDef() || !MO.isDead())
   1213       continue;
   1214     DeadDefs[MO.getReg()] = true;
   1215   }
   1216 
   1217   for (auto &MO : J.operands()) {
   1218     if (!MO.isReg() || !MO.isDef() || !MO.isDead())
   1219       continue;
   1220     Register R = MO.getReg();
   1221     if (R != Hexagon::USR_OVF && DeadDefs[R])
   1222       return true;
   1223   }
   1224   return false;
   1225 }
   1226 
   1227 bool HexagonPacketizerList::hasControlDependence(const MachineInstr &I,
   1228                                                  const MachineInstr &J) {
   1229   // A save callee-save register function call can only be in a packet
   1230   // with instructions that don't write to the callee-save registers.
   1231   if ((HII->isSaveCalleeSavedRegsCall(I) &&
   1232        doesModifyCalleeSavedReg(J, HRI)) ||
   1233       (HII->isSaveCalleeSavedRegsCall(J) &&
   1234        doesModifyCalleeSavedReg(I, HRI)))
   1235     return true;
   1236 
   1237   // Two control flow instructions cannot go in the same packet.
   1238   if (isControlFlow(I) && isControlFlow(J))
   1239     return true;
   1240 
   1241   // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
   1242   // contain a speculative indirect jump,
   1243   // a new-value compare jump or a dealloc_return.
   1244   auto isBadForLoopN = [this] (const MachineInstr &MI) -> bool {
   1245     if (MI.isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
   1246       return true;
   1247     if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI))
   1248       return true;
   1249     return false;
   1250   };
   1251 
   1252   if (HII->isLoopN(I) && isBadForLoopN(J))
   1253     return true;
   1254   if (HII->isLoopN(J) && isBadForLoopN(I))
   1255     return true;
   1256 
   1257   // dealloc_return cannot appear in the same packet as a conditional or
   1258   // unconditional jump.
   1259   return HII->isDeallocRet(I) &&
   1260          (J.isBranch() || J.isCall() || J.isBarrier());
   1261 }
   1262 
   1263 bool HexagonPacketizerList::hasRegMaskDependence(const MachineInstr &I,
   1264                                                  const MachineInstr &J) {
   1265   // Adding I to a packet that has J.
   1266 
   1267   // Regmasks are not reflected in the scheduling dependency graph, so
   1268   // we need to check them manually. This code assumes that regmasks only
   1269   // occur on calls, and the problematic case is when we add an instruction
   1270   // defining a register R to a packet that has a call that clobbers R via
   1271   // a regmask. Those cannot be packetized together, because the call will
   1272   // be executed last. That's also a reson why it is ok to add a call
   1273   // clobbering R to a packet that defines R.
   1274 
   1275   // Look for regmasks in J.
   1276   for (const MachineOperand &OpJ : J.operands()) {
   1277     if (!OpJ.isRegMask())
   1278       continue;
   1279     assert((J.isCall() || HII->isTailCall(J)) && "Regmask on a non-call");
   1280     for (const MachineOperand &OpI : I.operands()) {
   1281       if (OpI.isReg()) {
   1282         if (OpJ.clobbersPhysReg(OpI.getReg()))
   1283           return true;
   1284       } else if (OpI.isRegMask()) {
   1285         // Both are regmasks. Assume that they intersect.
   1286         return true;
   1287       }
   1288     }
   1289   }
   1290   return false;
   1291 }
   1292 
   1293 bool HexagonPacketizerList::hasDualStoreDependence(const MachineInstr &I,
   1294                                                    const MachineInstr &J) {
   1295   bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
   1296   bool StoreI = I.mayStore(), StoreJ = J.mayStore();
   1297   if ((SysI && StoreJ) || (SysJ && StoreI))
   1298     return true;
   1299 
   1300   if (StoreI && StoreJ) {
   1301     if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I))
   1302       return true;
   1303   } else {
   1304     // A memop cannot be in the same packet with another memop or a store.
   1305     // Two stores can be together, but here I and J cannot both be stores.
   1306     bool MopStI = HII->isMemOp(I) || StoreI;
   1307     bool MopStJ = HII->isMemOp(J) || StoreJ;
   1308     if (MopStI && MopStJ)
   1309       return true;
   1310   }
   1311 
   1312   return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J));
   1313 }
   1314 
   1315 // SUI is the current instruction that is out side of the current packet.
   1316 // SUJ is the current instruction inside the current packet against which that
   1317 // SUI will be packetized.
   1318 bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
   1319   assert(SUI->getInstr() && SUJ->getInstr());
   1320   MachineInstr &I = *SUI->getInstr();
   1321   MachineInstr &J = *SUJ->getInstr();
   1322 
   1323   // Clear IgnoreDepMIs when Packet starts.
   1324   if (CurrentPacketMIs.size() == 1)
   1325     IgnoreDepMIs.clear();
   1326 
   1327   MachineBasicBlock::iterator II = I.getIterator();
   1328 
   1329   // Solo instructions cannot go in the packet.
   1330   assert(!isSoloInstruction(I) && "Unexpected solo instr!");
   1331 
   1332   if (cannotCoexist(I, J))
   1333     return false;
   1334 
   1335   Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J);
   1336   if (Dependence)
   1337     return false;
   1338 
   1339   // Regmasks are not accounted for in the scheduling graph, so we need
   1340   // to explicitly check for dependencies caused by them. They should only
   1341   // appear on calls, so it's not too pessimistic to reject all regmask
   1342   // dependencies.
   1343   Dependence = hasRegMaskDependence(I, J);
   1344   if (Dependence)
   1345     return false;
   1346 
   1347   // Dual-store does not allow second store, if the first store is not
   1348   // in SLOT0. New value store, new value jump, dealloc_return and memop
   1349   // always take SLOT0. Arch spec 3.4.4.2.
   1350   Dependence = hasDualStoreDependence(I, J);
   1351   if (Dependence)
   1352     return false;
   1353 
   1354   // If an instruction feeds new value jump, glue it.
   1355   MachineBasicBlock::iterator NextMII = I.getIterator();
   1356   ++NextMII;
   1357   if (NextMII != I.getParent()->end() && HII->isNewValueJump(*NextMII)) {
   1358     MachineInstr &NextMI = *NextMII;
   1359 
   1360     bool secondRegMatch = false;
   1361     const MachineOperand &NOp0 = NextMI.getOperand(0);
   1362     const MachineOperand &NOp1 = NextMI.getOperand(1);
   1363 
   1364     if (NOp1.isReg() && I.getOperand(0).getReg() == NOp1.getReg())
   1365       secondRegMatch = true;
   1366 
   1367     for (MachineInstr *PI : CurrentPacketMIs) {
   1368       // NVJ can not be part of the dual jump - Arch Spec: section 7.8.
   1369       if (PI->isCall()) {
   1370         Dependence = true;
   1371         break;
   1372       }
   1373       // Validate:
   1374       // 1. Packet does not have a store in it.
   1375       // 2. If the first operand of the nvj is newified, and the second
   1376       //    operand is also a reg, it (second reg) is not defined in
   1377       //    the same packet.
   1378       // 3. If the second operand of the nvj is newified, (which means
   1379       //    first operand is also a reg), first reg is not defined in
   1380       //    the same packet.
   1381       if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() ||
   1382           HII->isLoopN(*PI)) {
   1383         Dependence = true;
   1384         break;
   1385       }
   1386       // Check #2/#3.
   1387       const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
   1388       if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
   1389         Dependence = true;
   1390         break;
   1391       }
   1392     }
   1393 
   1394     GlueToNewValueJump = true;
   1395     if (Dependence)
   1396       return false;
   1397   }
   1398 
   1399   // There no dependency between a prolog instruction and its successor.
   1400   if (!SUJ->isSucc(SUI))
   1401     return true;
   1402 
   1403   for (unsigned i = 0; i < SUJ->Succs.size(); ++i) {
   1404     if (FoundSequentialDependence)
   1405       break;
   1406 
   1407     if (SUJ->Succs[i].getSUnit() != SUI)
   1408       continue;
   1409 
   1410     SDep::Kind DepType = SUJ->Succs[i].getKind();
   1411     // For direct calls:
   1412     // Ignore register dependences for call instructions for packetization
   1413     // purposes except for those due to r31 and predicate registers.
   1414     //
   1415     // For indirect calls:
   1416     // Same as direct calls + check for true dependences to the register
   1417     // used in the indirect call.
   1418     //
   1419     // We completely ignore Order dependences for call instructions.
   1420     //
   1421     // For returns:
   1422     // Ignore register dependences for return instructions like jumpr,
   1423     // dealloc return unless we have dependencies on the explicit uses
   1424     // of the registers used by jumpr (like r31) or dealloc return
   1425     // (like r29 or r30).
   1426     unsigned DepReg = 0;
   1427     const TargetRegisterClass *RC = nullptr;
   1428     if (DepType == SDep::Data) {
   1429       DepReg = SUJ->Succs[i].getReg();
   1430       RC = HRI->getMinimalPhysRegClass(DepReg);
   1431     }
   1432 
   1433     if (I.isCall() || HII->isJumpR(I) || I.isReturn() || HII->isTailCall(I)) {
   1434       if (!isRegDependence(DepType))
   1435         continue;
   1436       if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg()))
   1437         continue;
   1438     }
   1439 
   1440     if (DepType == SDep::Data) {
   1441       if (canPromoteToDotCur(J, SUJ, DepReg, II, RC))
   1442         if (promoteToDotCur(J, DepType, II, RC))
   1443           continue;
   1444     }
   1445 
   1446     // Data dpendence ok if we have load.cur.
   1447     if (DepType == SDep::Data && HII->isDotCurInst(J)) {
   1448       if (HII->isHVXVec(I))
   1449         continue;
   1450     }
   1451 
   1452     // For instructions that can be promoted to dot-new, try to promote.
   1453     if (DepType == SDep::Data) {
   1454       if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) {
   1455         if (promoteToDotNew(I, DepType, II, RC)) {
   1456           PromotedToDotNew = true;
   1457           if (cannotCoexist(I, J))
   1458             FoundSequentialDependence = true;
   1459           continue;
   1460         }
   1461       }
   1462       if (HII->isNewValueJump(I))
   1463         continue;
   1464     }
   1465 
   1466     // For predicated instructions, if the predicates are complements then
   1467     // there can be no dependence.
   1468     if (HII->isPredicated(I) && HII->isPredicated(J) &&
   1469         arePredicatesComplements(I, J)) {
   1470       // Not always safe to do this translation.
   1471       // DAG Builder attempts to reduce dependence edges using transitive
   1472       // nature of dependencies. Here is an example:
   1473       //
   1474       // r0 = tfr_pt ... (1)
   1475       // r0 = tfr_pf ... (2)
   1476       // r0 = tfr_pt ... (3)
   1477       //
   1478       // There will be an output dependence between (1)->(2) and (2)->(3).
   1479       // However, there is no dependence edge between (1)->(3). This results
   1480       // in all 3 instructions going in the same packet. We ignore dependce
   1481       // only once to avoid this situation.
   1482       auto Itr = find(IgnoreDepMIs, &J);
   1483       if (Itr != IgnoreDepMIs.end()) {
   1484         Dependence = true;
   1485         return false;
   1486       }
   1487       IgnoreDepMIs.push_back(&I);
   1488       continue;
   1489     }
   1490 
   1491     // Ignore Order dependences between unconditional direct branches
   1492     // and non-control-flow instructions.
   1493     if (isDirectJump(I) && !J.isBranch() && !J.isCall() &&
   1494         DepType == SDep::Order)
   1495       continue;
   1496 
   1497     // Ignore all dependences for jumps except for true and output
   1498     // dependences.
   1499     if (I.isConditionalBranch() && DepType != SDep::Data &&
   1500         DepType != SDep::Output)
   1501       continue;
   1502 
   1503     if (DepType == SDep::Output) {
   1504       FoundSequentialDependence = true;
   1505       break;
   1506     }
   1507 
   1508     // For Order dependences:
   1509     // 1. Volatile loads/stores can be packetized together, unless other
   1510     //    rules prevent is.
   1511     // 2. Store followed by a load is not allowed.
   1512     // 3. Store followed by a store is valid.
   1513     // 4. Load followed by any memory operation is allowed.
   1514     if (DepType == SDep::Order) {
   1515       if (!PacketizeVolatiles) {
   1516         bool OrdRefs = I.hasOrderedMemoryRef() || J.hasOrderedMemoryRef();
   1517         if (OrdRefs) {
   1518           FoundSequentialDependence = true;
   1519           break;
   1520         }
   1521       }
   1522       // J is first, I is second.
   1523       bool LoadJ = J.mayLoad(), StoreJ = J.mayStore();
   1524       bool LoadI = I.mayLoad(), StoreI = I.mayStore();
   1525       bool NVStoreJ = HII->isNewValueStore(J);
   1526       bool NVStoreI = HII->isNewValueStore(I);
   1527       bool IsVecJ = HII->isHVXVec(J);
   1528       bool IsVecI = HII->isHVXVec(I);
   1529 
   1530       if (Slot1Store && MF.getSubtarget<HexagonSubtarget>().hasV65Ops() &&
   1531           ((LoadJ && StoreI && !NVStoreI) ||
   1532            (StoreJ && LoadI && !NVStoreJ)) &&
   1533           (J.getOpcode() != Hexagon::S2_allocframe &&
   1534            I.getOpcode() != Hexagon::S2_allocframe) &&
   1535           (J.getOpcode() != Hexagon::L2_deallocframe &&
   1536            I.getOpcode() != Hexagon::L2_deallocframe) &&
   1537           (!HII->isMemOp(J) && !HII->isMemOp(I)) && (!IsVecJ && !IsVecI))
   1538         setmemShufDisabled(true);
   1539       else
   1540         if (StoreJ && LoadI && alias(J, I)) {
   1541           FoundSequentialDependence = true;
   1542           break;
   1543         }
   1544 
   1545       if (!StoreJ)
   1546         if (!LoadJ || (!LoadI && !StoreI)) {
   1547           // If J is neither load nor store, assume a dependency.
   1548           // If J is a load, but I is neither, also assume a dependency.
   1549           FoundSequentialDependence = true;
   1550           break;
   1551         }
   1552       // Store followed by store: not OK on V2.
   1553       // Store followed by load: not OK on all.
   1554       // Load followed by store: OK on all.
   1555       // Load followed by load: OK on all.
   1556       continue;
   1557     }
   1558 
   1559     // Special case for ALLOCFRAME: even though there is dependency
   1560     // between ALLOCFRAME and subsequent store, allow it to be packetized
   1561     // in a same packet. This implies that the store is using the caller's
   1562     // SP. Hence, offset needs to be updated accordingly.
   1563     if (DepType == SDep::Data && J.getOpcode() == Hexagon::S2_allocframe) {
   1564       unsigned Opc = I.getOpcode();
   1565       switch (Opc) {
   1566         case Hexagon::S2_storerd_io:
   1567         case Hexagon::S2_storeri_io:
   1568         case Hexagon::S2_storerh_io:
   1569         case Hexagon::S2_storerb_io:
   1570           if (I.getOperand(0).getReg() == HRI->getStackRegister()) {
   1571             // Since this store is to be glued with allocframe in the same
   1572             // packet, it will use SP of the previous stack frame, i.e.
   1573             // caller's SP. Therefore, we need to recalculate offset
   1574             // according to this change.
   1575             GlueAllocframeStore = useCallersSP(I);
   1576             if (GlueAllocframeStore)
   1577               continue;
   1578           }
   1579           break;
   1580         default:
   1581           break;
   1582       }
   1583     }
   1584 
   1585     // There are certain anti-dependencies that cannot be ignored.
   1586     // Specifically:
   1587     //   J2_call ... implicit-def %r0   ; SUJ
   1588     //   R0 = ...                   ; SUI
   1589     // Those cannot be packetized together, since the call will observe
   1590     // the effect of the assignment to R0.
   1591     if ((DepType == SDep::Anti || DepType == SDep::Output) && J.isCall()) {
   1592       // Check if I defines any volatile register. We should also check
   1593       // registers that the call may read, but these happen to be a
   1594       // subset of the volatile register set.
   1595       for (const MachineOperand &Op : I.operands()) {
   1596         if (Op.isReg() && Op.isDef()) {
   1597           Register R = Op.getReg();
   1598           if (!J.readsRegister(R, HRI) && !J.modifiesRegister(R, HRI))
   1599             continue;
   1600         } else if (!Op.isRegMask()) {
   1601           // If I has a regmask assume dependency.
   1602           continue;
   1603         }
   1604         FoundSequentialDependence = true;
   1605         break;
   1606       }
   1607     }
   1608 
   1609     // Skip over remaining anti-dependences. Two instructions that are
   1610     // anti-dependent can share a packet, since in most such cases all
   1611     // operands are read before any modifications take place.
   1612     // The exceptions are branch and call instructions, since they are
   1613     // executed after all other instructions have completed (at least
   1614     // conceptually).
   1615     if (DepType != SDep::Anti) {
   1616       FoundSequentialDependence = true;
   1617       break;
   1618     }
   1619   }
   1620 
   1621   if (FoundSequentialDependence) {
   1622     Dependence = true;
   1623     return false;
   1624   }
   1625 
   1626   return true;
   1627 }
   1628 
   1629 bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
   1630   assert(SUI->getInstr() && SUJ->getInstr());
   1631   MachineInstr &I = *SUI->getInstr();
   1632   MachineInstr &J = *SUJ->getInstr();
   1633 
   1634   bool Coexist = !cannotCoexist(I, J);
   1635 
   1636   if (Coexist && !Dependence)
   1637     return true;
   1638 
   1639   // Check if the instruction was promoted to a dot-new. If so, demote it
   1640   // back into a dot-old.
   1641   if (PromotedToDotNew)
   1642     demoteToDotOld(I);
   1643 
   1644   cleanUpDotCur();
   1645   // Check if the instruction (must be a store) was glued with an allocframe
   1646   // instruction. If so, restore its offset to its original value, i.e. use
   1647   // current SP instead of caller's SP.
   1648   if (GlueAllocframeStore) {
   1649     useCalleesSP(I);
   1650     GlueAllocframeStore = false;
   1651   }
   1652 
   1653   if (ChangedOffset != INT64_MAX)
   1654     undoChangedOffset(I);
   1655 
   1656   if (GlueToNewValueJump) {
   1657     // Putting I and J together would prevent the new-value jump from being
   1658     // packetized with the producer. In that case I and J must be separated.
   1659     GlueToNewValueJump = false;
   1660     return false;
   1661   }
   1662 
   1663   if (!Coexist)
   1664     return false;
   1665 
   1666   if (ChangedOffset == INT64_MAX && updateOffset(SUI, SUJ)) {
   1667     FoundSequentialDependence = false;
   1668     Dependence = false;
   1669     return true;
   1670   }
   1671 
   1672   return false;
   1673 }
   1674 
   1675 
   1676 bool HexagonPacketizerList::foundLSInPacket() {
   1677   bool FoundLoad = false;
   1678   bool FoundStore = false;
   1679 
   1680   for (auto MJ : CurrentPacketMIs) {
   1681     unsigned Opc = MJ->getOpcode();
   1682     if (Opc == Hexagon::S2_allocframe || Opc == Hexagon::L2_deallocframe)
   1683       continue;
   1684     if (HII->isMemOp(*MJ))
   1685       continue;
   1686     if (MJ->mayLoad())
   1687       FoundLoad = true;
   1688     if (MJ->mayStore() && !HII->isNewValueStore(*MJ))
   1689       FoundStore = true;
   1690   }
   1691   return FoundLoad && FoundStore;
   1692 }
   1693 
   1694 
   1695 MachineBasicBlock::iterator
   1696 HexagonPacketizerList::addToPacket(MachineInstr &MI) {
   1697   MachineBasicBlock::iterator MII = MI.getIterator();
   1698   MachineBasicBlock *MBB = MI.getParent();
   1699 
   1700   if (CurrentPacketMIs.empty())
   1701     PacketStalls = false;
   1702   PacketStalls |= producesStall(MI);
   1703 
   1704   if (MI.isImplicitDef()) {
   1705     // Add to the packet to allow subsequent instructions to be checked
   1706     // properly.
   1707     CurrentPacketMIs.push_back(&MI);
   1708     return MII;
   1709   }
   1710   assert(ResourceTracker->canReserveResources(MI));
   1711 
   1712   bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
   1713   bool Good = true;
   1714 
   1715   if (GlueToNewValueJump) {
   1716     MachineInstr &NvjMI = *++MII;
   1717     // We need to put both instructions in the same packet: MI and NvjMI.
   1718     // Either of them can require a constant extender. Try to add both to
   1719     // the current packet, and if that fails, end the packet and start a
   1720     // new one.
   1721     ResourceTracker->reserveResources(MI);
   1722     if (ExtMI)
   1723       Good = tryAllocateResourcesForConstExt(true);
   1724 
   1725     bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI);
   1726     if (Good) {
   1727       if (ResourceTracker->canReserveResources(NvjMI))
   1728         ResourceTracker->reserveResources(NvjMI);
   1729       else
   1730         Good = false;
   1731     }
   1732     if (Good && ExtNvjMI)
   1733       Good = tryAllocateResourcesForConstExt(true);
   1734 
   1735     if (!Good) {
   1736       endPacket(MBB, MI);
   1737       assert(ResourceTracker->canReserveResources(MI));
   1738       ResourceTracker->reserveResources(MI);
   1739       if (ExtMI) {
   1740         assert(canReserveResourcesForConstExt());
   1741         tryAllocateResourcesForConstExt(true);
   1742       }
   1743       assert(ResourceTracker->canReserveResources(NvjMI));
   1744       ResourceTracker->reserveResources(NvjMI);
   1745       if (ExtNvjMI) {
   1746         assert(canReserveResourcesForConstExt());
   1747         reserveResourcesForConstExt();
   1748       }
   1749     }
   1750     CurrentPacketMIs.push_back(&MI);
   1751     CurrentPacketMIs.push_back(&NvjMI);
   1752     return MII;
   1753   }
   1754 
   1755   ResourceTracker->reserveResources(MI);
   1756   if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
   1757     endPacket(MBB, MI);
   1758     if (PromotedToDotNew)
   1759       demoteToDotOld(MI);
   1760     if (GlueAllocframeStore) {
   1761       useCalleesSP(MI);
   1762       GlueAllocframeStore = false;
   1763     }
   1764     ResourceTracker->reserveResources(MI);
   1765     reserveResourcesForConstExt();
   1766   }
   1767 
   1768   CurrentPacketMIs.push_back(&MI);
   1769   return MII;
   1770 }
   1771 
   1772 void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
   1773                                       MachineBasicBlock::iterator EndMI) {
   1774   // Replace VLIWPacketizerList::endPacket(MBB, EndMI).
   1775   LLVM_DEBUG({
   1776     if (!CurrentPacketMIs.empty()) {
   1777       dbgs() << "Finalizing packet:\n";
   1778       unsigned Idx = 0;
   1779       for (MachineInstr *MI : CurrentPacketMIs) {
   1780         unsigned R = ResourceTracker->getUsedResources(Idx++);
   1781         dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI;
   1782       }
   1783     }
   1784   });
   1785 
   1786   bool memShufDisabled = getmemShufDisabled();
   1787   if (memShufDisabled && !foundLSInPacket()) {
   1788     setmemShufDisabled(false);
   1789     LLVM_DEBUG(dbgs() << "  Not added to NoShufPacket\n");
   1790   }
   1791   memShufDisabled = getmemShufDisabled();
   1792 
   1793   OldPacketMIs.clear();
   1794   for (MachineInstr *MI : CurrentPacketMIs) {
   1795     MachineBasicBlock::instr_iterator NextMI = std::next(MI->getIterator());
   1796     for (auto &I : make_range(HII->expandVGatherPseudo(*MI), NextMI))
   1797       OldPacketMIs.push_back(&I);
   1798   }
   1799   CurrentPacketMIs.clear();
   1800 
   1801   if (OldPacketMIs.size() > 1) {
   1802     MachineBasicBlock::instr_iterator FirstMI(OldPacketMIs.front());
   1803     MachineBasicBlock::instr_iterator LastMI(EndMI.getInstrIterator());
   1804     finalizeBundle(*MBB, FirstMI, LastMI);
   1805     auto BundleMII = std::prev(FirstMI);
   1806     if (memShufDisabled)
   1807       HII->setBundleNoShuf(BundleMII);
   1808 
   1809     setmemShufDisabled(false);
   1810   }
   1811 
   1812   PacketHasDuplex = false;
   1813   PacketHasSLOT0OnlyInsn = false;
   1814   ResourceTracker->clearResources();
   1815   LLVM_DEBUG(dbgs() << "End packet\n");
   1816 }
   1817 
   1818 bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
   1819   if (Minimal)
   1820     return false;
   1821 
   1822   // Constrainst for not packetizing this MI with existing instructions in a
   1823   // packet.
   1824   //	MI is a store instruction.
   1825   //	CurrentPacketMIs has a SLOT0 only instruction with constraint
   1826   //    A_RESTRICT_NOSLOT1_STORE/isRestrictNoSlot1Store.
   1827   if (MI.mayStore() && isPureSlot0InsnWithNoSlot1Store(MI))
   1828     return false;
   1829 
   1830   if (producesStall(MI))
   1831     return false;
   1832 
   1833   // If TinyCore with Duplexes is enabled, check if this MI can form a Duplex
   1834   // with any other instruction in the existing packet.
   1835   auto &HST = MI.getParent()->getParent()->getSubtarget<HexagonSubtarget>();
   1836   // Constraint 1: Only one duplex allowed per packet.
   1837   // Constraint 2: Consider duplex checks only if there is atleast one
   1838   // instruction in a packet.
   1839   // Constraint 3: If one of the existing instructions in the packet has a
   1840   // SLOT0 only instruction that can not be duplexed, do not attempt to form
   1841   // duplexes. (TODO: This will invalidate the L4_return* instructions to form a
   1842   // duplex)
   1843   if (HST.isTinyCoreWithDuplex() && CurrentPacketMIs.size() > 0 &&
   1844       !PacketHasDuplex) {
   1845     // Check for SLOT0 only non-duplexable instruction in packet.
   1846     for (auto &MJ : CurrentPacketMIs)
   1847       PacketHasSLOT0OnlyInsn |= HII->isPureSlot0(*MJ);
   1848     // Get the Big Core Opcode (dup_*).
   1849     int Opcode = HII->getDuplexOpcode(MI, false);
   1850     if (Opcode >= 0) {
   1851       // We now have an instruction that can be duplexed.
   1852       for (auto &MJ : CurrentPacketMIs) {
   1853         if (HII->isDuplexPair(MI, *MJ) && !PacketHasSLOT0OnlyInsn) {
   1854           PacketHasDuplex = true;
   1855           return true;
   1856         }
   1857       }
   1858       // If it can not be duplexed, check if there is a valid transition in DFA
   1859       // with the original opcode.
   1860       MachineInstr &MIRef = const_cast<MachineInstr &>(MI);
   1861       MIRef.setDesc(HII->get(Opcode));
   1862       return ResourceTracker->canReserveResources(MIRef);
   1863     }
   1864   }
   1865 
   1866   return true;
   1867 }
   1868 
   1869 bool HexagonPacketizerList::isPureSlot0InsnWithNoSlot1Store(
   1870     const MachineInstr &MI) {
   1871   bool noSlot1Store = false;
   1872   bool isSlot0Only = false;
   1873   for (auto J : CurrentPacketMIs) {
   1874     noSlot1Store |= HII->isRestrictNoSlot1Store(*J);
   1875     isSlot0Only |= HII->isPureSlot0(*J);
   1876   }
   1877 
   1878   return (noSlot1Store && isSlot0Only);
   1879 }
   1880 
   1881 // V60 forward scheduling.
   1882 bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
   1883   // If the packet already stalls, then ignore the stall from a subsequent
   1884   // instruction in the same packet.
   1885   if (PacketStalls)
   1886     return false;
   1887 
   1888   // Check whether the previous packet is in a different loop. If this is the
   1889   // case, there is little point in trying to avoid a stall because that would
   1890   // favor the rare case (loop entry) over the common case (loop iteration).
   1891   //
   1892   // TODO: We should really be able to check all the incoming edges if this is
   1893   // the first packet in a basic block, so we can avoid stalls from the loop
   1894   // backedge.
   1895   if (!OldPacketMIs.empty()) {
   1896     auto *OldBB = OldPacketMIs.front()->getParent();
   1897     auto *ThisBB = I.getParent();
   1898     if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
   1899       return false;
   1900   }
   1901 
   1902   SUnit *SUI = MIToSUnit[const_cast<MachineInstr *>(&I)];
   1903 
   1904   // If the latency is 0 and there is a data dependence between this
   1905   // instruction and any instruction in the current packet, we disregard any
   1906   // potential stalls due to the instructions in the previous packet. Most of
   1907   // the instruction pairs that can go together in the same packet have 0
   1908   // latency between them. The exceptions are
   1909   // 1. NewValueJumps as they're generated much later and the latencies can't
   1910   // be changed at that point.
   1911   // 2. .cur instructions, if its consumer has a 0 latency successor (such as
   1912   // .new). In this case, the latency between .cur and the consumer stays
   1913   // non-zero even though we can have  both .cur and .new in the same packet.
   1914   // Changing the latency to 0 is not an option as it causes software pipeliner
   1915   // to not pipeline in some cases.
   1916 
   1917   // For Example:
   1918   // {
   1919   //   I1:  v6.cur = vmem(r0++#1)
   1920   //   I2:  v7 = valign(v6,v4,r2)
   1921   //   I3:  vmem(r5++#1) = v7.new
   1922   // }
   1923   // Here I2 and I3 has 0 cycle latency, but I1 and I2 has 2.
   1924 
   1925   for (auto J : CurrentPacketMIs) {
   1926     SUnit *SUJ = MIToSUnit[J];
   1927     for (auto &Pred : SUI->Preds)
   1928       if (Pred.getSUnit() == SUJ)
   1929         if ((Pred.getLatency() == 0 && Pred.isAssignedRegDep()) ||
   1930             HII->isNewValueJump(I) || HII->isToBeScheduledASAP(*J, I))
   1931           return false;
   1932   }
   1933 
   1934   // Check if the latency is greater than one between this instruction and any
   1935   // instruction in the previous packet.
   1936   for (auto J : OldPacketMIs) {
   1937     SUnit *SUJ = MIToSUnit[J];
   1938     for (auto &Pred : SUI->Preds)
   1939       if (Pred.getSUnit() == SUJ && Pred.getLatency() > 1)
   1940         return true;
   1941   }
   1942 
   1943   return false;
   1944 }
   1945 
   1946 //===----------------------------------------------------------------------===//
   1947 //                         Public Constructor Functions
   1948 //===----------------------------------------------------------------------===//
   1949 
   1950 FunctionPass *llvm::createHexagonPacketizer(bool Minimal) {
   1951   return new HexagonPacketizer(Minimal);
   1952 }
   1953