Home | History | Annotate | Line # | Download | only in AMDGPU
      1 //===---------------------------- GCNILPSched.cpp - -----------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 /// \file
     10 //
     11 //===----------------------------------------------------------------------===//
     12 
     13 #include "llvm/CodeGen/ScheduleDAG.h"
     14 
     15 using namespace llvm;
     16 
     17 #define DEBUG_TYPE "machine-scheduler"
     18 
     19 namespace {
     20 
     21 class GCNILPScheduler {
     22   struct Candidate : ilist_node<Candidate> {
     23     SUnit *SU;
     24 
     25     Candidate(SUnit *SU_)
     26       : SU(SU_) {}
     27   };
     28 
     29   SpecificBumpPtrAllocator<Candidate> Alloc;
     30   typedef simple_ilist<Candidate> Queue;
     31   Queue PendingQueue;
     32   Queue AvailQueue;
     33   unsigned CurQueueId = 0;
     34 
     35   std::vector<unsigned> SUNumbers;
     36 
     37   /// CurCycle - The current scheduler state corresponds to this cycle.
     38   unsigned CurCycle = 0;
     39 
     40   unsigned getNodePriority(const SUnit *SU) const;
     41 
     42   const SUnit *pickBest(const SUnit *left, const SUnit *right);
     43   Candidate* pickCandidate();
     44 
     45   void releasePending();
     46   void advanceToCycle(unsigned NextCycle);
     47   void releasePredecessors(const SUnit* SU);
     48 
     49 public:
     50   std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
     51                                      const ScheduleDAG &DAG);
     52 };
     53 } // namespace
     54 
     55 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
     56 /// Smaller number is the higher priority.
     57 static unsigned
     58 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
     59   unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
     60   if (SethiUllmanNumber != 0)
     61     return SethiUllmanNumber;
     62 
     63   unsigned Extra = 0;
     64   for (const SDep &Pred : SU->Preds) {
     65     if (Pred.isCtrl()) continue;  // ignore chain preds
     66     SUnit *PredSU = Pred.getSUnit();
     67     unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
     68     if (PredSethiUllman > SethiUllmanNumber) {
     69       SethiUllmanNumber = PredSethiUllman;
     70       Extra = 0;
     71     }
     72     else if (PredSethiUllman == SethiUllmanNumber)
     73       ++Extra;
     74   }
     75 
     76   SethiUllmanNumber += Extra;
     77 
     78   if (SethiUllmanNumber == 0)
     79     SethiUllmanNumber = 1;
     80 
     81   return SethiUllmanNumber;
     82 }
     83 
     84 // Lower priority means schedule further down. For bottom-up scheduling, lower
     85 // priority SUs are scheduled before higher priority SUs.
     86 unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
     87   assert(SU->NodeNum < SUNumbers.size());
     88   if (SU->NumSuccs == 0 && SU->NumPreds != 0)
     89     // If SU does not have a register use, i.e. it doesn't produce a value
     90     // that would be consumed (e.g. store), then it terminates a chain of
     91     // computation.  Give it a large SethiUllman number so it will be
     92     // scheduled right before its predecessors that it doesn't lengthen
     93     // their live ranges.
     94     return 0xffff;
     95 
     96   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
     97     // If SU does not have a register def, schedule it close to its uses
     98     // because it does not lengthen any live ranges.
     99     return 0;
    100 
    101   return SUNumbers[SU->NodeNum];
    102 }
    103 
    104 /// closestSucc - Returns the scheduled cycle of the successor which is
    105 /// closest to the current cycle.
    106 static unsigned closestSucc(const SUnit *SU) {
    107   unsigned MaxHeight = 0;
    108   for (const SDep &Succ : SU->Succs) {
    109     if (Succ.isCtrl()) continue;  // ignore chain succs
    110     unsigned Height = Succ.getSUnit()->getHeight();
    111     // If there are bunch of CopyToRegs stacked up, they should be considered
    112     // to be at the same position.
    113     if (Height > MaxHeight)
    114       MaxHeight = Height;
    115   }
    116   return MaxHeight;
    117 }
    118 
    119 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
    120 /// for scratch registers, i.e. number of data dependencies.
    121 static unsigned calcMaxScratches(const SUnit *SU) {
    122   unsigned Scratches = 0;
    123   for (const SDep &Pred : SU->Preds) {
    124     if (Pred.isCtrl()) continue;  // ignore chain preds
    125     Scratches++;
    126   }
    127   return Scratches;
    128 }
    129 
    130 // Return -1 if left has higher priority, 1 if right has higher priority.
    131 // Return 0 if latency-based priority is equivalent.
    132 static int BUCompareLatency(const SUnit *left, const SUnit *right) {
    133   // Scheduling an instruction that uses a VReg whose postincrement has not yet
    134   // been scheduled will induce a copy. Model this as an extra cycle of latency.
    135   int LHeight = (int)left->getHeight();
    136   int RHeight = (int)right->getHeight();
    137 
    138   // If either node is scheduling for latency, sort them by height/depth
    139   // and latency.
    140 
    141   // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
    142   // is enabled, grouping instructions by cycle, then its height is already
    143   // covered so only its depth matters. We also reach this point if both stall
    144   // but have the same height.
    145   if (LHeight != RHeight)
    146     return LHeight > RHeight ? 1 : -1;
    147 
    148   int LDepth = left->getDepth();
    149   int RDepth = right->getDepth();
    150   if (LDepth != RDepth) {
    151     LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
    152                       << ") depth " << LDepth << " vs SU (" << right->NodeNum
    153                       << ") depth " << RDepth << "\n");
    154     return LDepth < RDepth ? 1 : -1;
    155   }
    156   if (left->Latency != right->Latency)
    157     return left->Latency > right->Latency ? 1 : -1;
    158 
    159   return 0;
    160 }
    161 
    162 const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
    163 {
    164   // TODO: add register pressure lowering checks
    165 
    166   bool const DisableSchedCriticalPath = false;
    167   int MaxReorderWindow = 6;
    168   if (!DisableSchedCriticalPath) {
    169     int spread = (int)left->getDepth() - (int)right->getDepth();
    170     if (std::abs(spread) > MaxReorderWindow) {
    171       LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
    172                         << left->getDepth() << " != SU(" << right->NodeNum
    173                         << "): " << right->getDepth() << "\n");
    174       return left->getDepth() < right->getDepth() ? right : left;
    175     }
    176   }
    177 
    178   bool const DisableSchedHeight = false;
    179   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
    180     int spread = (int)left->getHeight() - (int)right->getHeight();
    181     if (std::abs(spread) > MaxReorderWindow)
    182       return left->getHeight() > right->getHeight() ? right : left;
    183   }
    184 
    185   // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
    186   unsigned LPriority = getNodePriority(left);
    187   unsigned RPriority = getNodePriority(right);
    188 
    189   if (LPriority != RPriority)
    190     return LPriority > RPriority ? right : left;
    191 
    192   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
    193   // e.g.
    194   // t1 = op t2, c1
    195   // t3 = op t4, c2
    196   //
    197   // and the following instructions are both ready.
    198   // t2 = op c3
    199   // t4 = op c4
    200   //
    201   // Then schedule t2 = op first.
    202   // i.e.
    203   // t4 = op c4
    204   // t2 = op c3
    205   // t1 = op t2, c1
    206   // t3 = op t4, c2
    207   //
    208   // This creates more short live intervals.
    209   unsigned LDist = closestSucc(left);
    210   unsigned RDist = closestSucc(right);
    211   if (LDist != RDist)
    212     return LDist < RDist ? right : left;
    213 
    214   // How many registers becomes live when the node is scheduled.
    215   unsigned LScratch = calcMaxScratches(left);
    216   unsigned RScratch = calcMaxScratches(right);
    217   if (LScratch != RScratch)
    218     return LScratch > RScratch ? right : left;
    219 
    220   bool const DisableSchedCycles = false;
    221   if (!DisableSchedCycles) {
    222     int result = BUCompareLatency(left, right);
    223     if (result != 0)
    224       return result > 0 ? right : left;
    225     return left;
    226   }
    227   else {
    228     if (left->getHeight() != right->getHeight())
    229       return (left->getHeight() > right->getHeight()) ? right : left;
    230 
    231     if (left->getDepth() != right->getDepth())
    232       return (left->getDepth() < right->getDepth()) ? right : left;
    233   }
    234 
    235   assert(left->NodeQueueId && right->NodeQueueId &&
    236         "NodeQueueId cannot be zero");
    237   return (left->NodeQueueId > right->NodeQueueId) ? right : left;
    238 }
    239 
    240 GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
    241   if (AvailQueue.empty())
    242     return nullptr;
    243   auto Best = AvailQueue.begin();
    244   for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
    245     auto NewBestSU = pickBest(Best->SU, I->SU);
    246     if (NewBestSU != Best->SU) {
    247       assert(NewBestSU == I->SU);
    248       Best = I;
    249     }
    250   }
    251   return &*Best;
    252 }
    253 
    254 void GCNILPScheduler::releasePending() {
    255   // Check to see if any of the pending instructions are ready to issue.  If
    256   // so, add them to the available queue.
    257   for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
    258     auto &C = *I++;
    259     if (C.SU->getHeight() <= CurCycle) {
    260       PendingQueue.remove(C);
    261       AvailQueue.push_back(C);
    262       C.SU->NodeQueueId = CurQueueId++;
    263     }
    264   }
    265 }
    266 
    267 /// Move the scheduler state forward by the specified number of Cycles.
    268 void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
    269   if (NextCycle <= CurCycle)
    270     return;
    271   CurCycle = NextCycle;
    272   releasePending();
    273 }
    274 
    275 void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
    276   for (const auto &PredEdge : SU->Preds) {
    277     auto PredSU = PredEdge.getSUnit();
    278     if (PredEdge.isWeak())
    279       continue;
    280     assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
    281 
    282     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
    283 
    284     if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
    285       PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
    286   }
    287 }
    288 
    289 std::vector<const SUnit*>
    290 GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
    291                           const ScheduleDAG &DAG) {
    292   auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
    293 
    294   std::vector<SUnit> SUSavedCopy;
    295   SUSavedCopy.resize(SUnits.size());
    296 
    297   // we cannot save only those fields we touch: some of them are private
    298   // so save units verbatim: this assumes SUnit should have value semantics
    299   for (const SUnit &SU : SUnits)
    300     SUSavedCopy[SU.NodeNum] = SU;
    301 
    302   SUNumbers.assign(SUnits.size(), 0);
    303   for (const SUnit &SU : SUnits)
    304     CalcNodeSethiUllmanNumber(&SU, SUNumbers);
    305 
    306   for (auto SU : BotRoots) {
    307     AvailQueue.push_back(
    308       *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
    309   }
    310   releasePredecessors(&DAG.ExitSU);
    311 
    312   std::vector<const SUnit*> Schedule;
    313   Schedule.reserve(SUnits.size());
    314   while (true) {
    315     if (AvailQueue.empty() && !PendingQueue.empty()) {
    316       auto EarliestSU = std::min_element(
    317         PendingQueue.begin(), PendingQueue.end(),
    318         [=](const Candidate& C1, const Candidate& C2) {
    319         return C1.SU->getHeight() < C2.SU->getHeight();
    320       })->SU;
    321       advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
    322     }
    323     if (AvailQueue.empty())
    324       break;
    325 
    326     LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
    327                          "Ready queue:";
    328                for (auto &C
    329                     : AvailQueue) dbgs()
    330                << ' ' << C.SU->NodeNum;
    331                dbgs() << '\n';);
    332 
    333     auto C = pickCandidate();
    334     assert(C);
    335     AvailQueue.remove(*C);
    336     auto SU = C->SU;
    337     LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
    338 
    339     advanceToCycle(SU->getHeight());
    340 
    341     releasePredecessors(SU);
    342     Schedule.push_back(SU);
    343     SU->isScheduled = true;
    344   }
    345   assert(SUnits.size() == Schedule.size());
    346 
    347   std::reverse(Schedule.begin(), Schedule.end());
    348 
    349   // restore units
    350   for (auto &SU : SUnits)
    351     SU = SUSavedCopy[SU.NodeNum];
    352 
    353   return Schedule;
    354 }
    355 
    356 namespace llvm {
    357 std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
    358                                               const ScheduleDAG &DAG) {
    359   GCNILPScheduler S;
    360   return S.schedule(BotRoots, DAG);
    361 }
    362 }
    363