Home | History | Annotate | Line # | Download | only in AMDGPU
      1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 /// \file
     10 /// This contains a MachineSchedStrategy implementation for maximizing wave
     11 /// occupancy on GCN hardware.
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "GCNSchedStrategy.h"
     15 #include "SIMachineFunctionInfo.h"
     16 
     17 #define DEBUG_TYPE "machine-scheduler"
     18 
     19 using namespace llvm;
     20 
     21 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
     22     const MachineSchedContext *C) :
     23     GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false),
     24     HasExcessPressure(false), MF(nullptr) { }
     25 
     26 void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
     27   GenericScheduler::initialize(DAG);
     28 
     29   const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
     30 
     31   MF = &DAG->MF;
     32 
     33   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
     34 
     35   // FIXME: This is also necessary, because some passes that run after
     36   // scheduling and before regalloc increase register pressure.
     37   const int ErrorMargin = 3;
     38 
     39   SGPRExcessLimit = Context->RegClassInfo
     40     ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin;
     41   VGPRExcessLimit = Context->RegClassInfo
     42     ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin;
     43   if (TargetOccupancy) {
     44     SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true);
     45     VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy);
     46   } else {
     47     SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
     48         AMDGPU::RegisterPressureSets::SReg_32);
     49     VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
     50         AMDGPU::RegisterPressureSets::VGPR_32);
     51   }
     52 
     53   SGPRCriticalLimit -= ErrorMargin;
     54   VGPRCriticalLimit -= ErrorMargin;
     55 }
     56 
     57 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
     58                                      bool AtTop, const RegPressureTracker &RPTracker,
     59                                      const SIRegisterInfo *SRI,
     60                                      unsigned SGPRPressure,
     61                                      unsigned VGPRPressure) {
     62 
     63   Cand.SU = SU;
     64   Cand.AtTop = AtTop;
     65 
     66   // getDownwardPressure() and getUpwardPressure() make temporary changes to
     67   // the tracker, so we need to pass those function a non-const copy.
     68   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
     69 
     70   Pressure.clear();
     71   MaxPressure.clear();
     72 
     73   if (AtTop)
     74     TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
     75   else {
     76     // FIXME: I think for bottom up scheduling, the register pressure is cached
     77     // and can be retrieved by DAG->getPressureDif(SU).
     78     TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
     79   }
     80 
     81   unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
     82   unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
     83 
     84   // If two instructions increase the pressure of different register sets
     85   // by the same amount, the generic scheduler will prefer to schedule the
     86   // instruction that increases the set with the least amount of registers,
     87   // which in our case would be SGPRs.  This is rarely what we want, so
     88   // when we report excess/critical register pressure, we do it either
     89   // only for VGPRs or only for SGPRs.
     90 
     91   // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
     92   const unsigned MaxVGPRPressureInc = 16;
     93   bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
     94   bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
     95 
     96 
     97   // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
     98   // to increase the likelihood we don't go over the limits.  We should improve
     99   // the analysis to look through dependencies to find the path with the least
    100   // register pressure.
    101 
    102   // We only need to update the RPDelta for instructions that increase register
    103   // pressure. Instructions that decrease or keep reg pressure the same will be
    104   // marked as RegExcess in tryCandidate() when they are compared with
    105   // instructions that increase the register pressure.
    106   if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
    107     HasExcessPressure = true;
    108     Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
    109     Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
    110   }
    111 
    112   if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
    113     HasExcessPressure = true;
    114     Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
    115     Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
    116   }
    117 
    118   // Register pressure is considered 'CRITICAL' if it is approaching a value
    119   // that would reduce the wave occupancy for the execution unit.  When
    120   // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
    121   // has the same cost, so we don't need to prefer one over the other.
    122 
    123   int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
    124   int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
    125 
    126   if (SGPRDelta >= 0 || VGPRDelta >= 0) {
    127     HasExcessPressure = true;
    128     if (SGPRDelta > VGPRDelta) {
    129       Cand.RPDelta.CriticalMax =
    130         PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
    131       Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
    132     } else {
    133       Cand.RPDelta.CriticalMax =
    134         PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
    135       Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
    136     }
    137   }
    138 }
    139 
    140 // This function is mostly cut and pasted from
    141 // GenericScheduler::pickNodeFromQueue()
    142 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
    143                                          const CandPolicy &ZonePolicy,
    144                                          const RegPressureTracker &RPTracker,
    145                                          SchedCandidate &Cand) {
    146   const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
    147   ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
    148   unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
    149   unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
    150   ReadyQueue &Q = Zone.Available;
    151   for (SUnit *SU : Q) {
    152 
    153     SchedCandidate TryCand(ZonePolicy);
    154     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
    155                   SGPRPressure, VGPRPressure);
    156     // Pass SchedBoundary only when comparing nodes from the same boundary.
    157     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
    158     GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
    159     if (TryCand.Reason != NoCand) {
    160       // Initialize resource delta if needed in case future heuristics query it.
    161       if (TryCand.ResDelta == SchedResourceDelta())
    162         TryCand.initResourceDelta(Zone.DAG, SchedModel);
    163       Cand.setBest(TryCand);
    164       LLVM_DEBUG(traceCandidate(Cand));
    165     }
    166   }
    167 }
    168 
    169 // This function is mostly cut and pasted from
    170 // GenericScheduler::pickNodeBidirectional()
    171 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
    172   // Schedule as far as possible in the direction of no choice. This is most
    173   // efficient, but also provides the best heuristics for CriticalPSets.
    174   if (SUnit *SU = Bot.pickOnlyChoice()) {
    175     IsTopNode = false;
    176     return SU;
    177   }
    178   if (SUnit *SU = Top.pickOnlyChoice()) {
    179     IsTopNode = true;
    180     return SU;
    181   }
    182   // Set the bottom-up policy based on the state of the current bottom zone and
    183   // the instructions outside the zone, including the top zone.
    184   CandPolicy BotPolicy;
    185   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
    186   // Set the top-down policy based on the state of the current top zone and
    187   // the instructions outside the zone, including the bottom zone.
    188   CandPolicy TopPolicy;
    189   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
    190 
    191   // See if BotCand is still valid (because we previously scheduled from Top).
    192   LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
    193   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
    194       BotCand.Policy != BotPolicy) {
    195     BotCand.reset(CandPolicy());
    196     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
    197     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
    198   } else {
    199     LLVM_DEBUG(traceCandidate(BotCand));
    200 #ifndef NDEBUG
    201     if (VerifyScheduling) {
    202       SchedCandidate TCand;
    203       TCand.reset(CandPolicy());
    204       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
    205       assert(TCand.SU == BotCand.SU &&
    206              "Last pick result should correspond to re-picking right now");
    207     }
    208 #endif
    209   }
    210 
    211   // Check if the top Q has a better candidate.
    212   LLVM_DEBUG(dbgs() << "Picking from Top:\n");
    213   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
    214       TopCand.Policy != TopPolicy) {
    215     TopCand.reset(CandPolicy());
    216     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
    217     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
    218   } else {
    219     LLVM_DEBUG(traceCandidate(TopCand));
    220 #ifndef NDEBUG
    221     if (VerifyScheduling) {
    222       SchedCandidate TCand;
    223       TCand.reset(CandPolicy());
    224       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
    225       assert(TCand.SU == TopCand.SU &&
    226            "Last pick result should correspond to re-picking right now");
    227     }
    228 #endif
    229   }
    230 
    231   // Pick best from BotCand and TopCand.
    232   LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
    233              dbgs() << "Bot Cand: "; traceCandidate(BotCand););
    234   SchedCandidate Cand = BotCand;
    235   TopCand.Reason = NoCand;
    236   GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
    237   if (TopCand.Reason != NoCand) {
    238     Cand.setBest(TopCand);
    239   }
    240   LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
    241 
    242   IsTopNode = Cand.AtTop;
    243   return Cand.SU;
    244 }
    245 
    246 // This function is mostly cut and pasted from
    247 // GenericScheduler::pickNode()
    248 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
    249   if (DAG->top() == DAG->bottom()) {
    250     assert(Top.Available.empty() && Top.Pending.empty() &&
    251            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
    252     return nullptr;
    253   }
    254   SUnit *SU;
    255   do {
    256     if (RegionPolicy.OnlyTopDown) {
    257       SU = Top.pickOnlyChoice();
    258       if (!SU) {
    259         CandPolicy NoPolicy;
    260         TopCand.reset(NoPolicy);
    261         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
    262         assert(TopCand.Reason != NoCand && "failed to find a candidate");
    263         SU = TopCand.SU;
    264       }
    265       IsTopNode = true;
    266     } else if (RegionPolicy.OnlyBottomUp) {
    267       SU = Bot.pickOnlyChoice();
    268       if (!SU) {
    269         CandPolicy NoPolicy;
    270         BotCand.reset(NoPolicy);
    271         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
    272         assert(BotCand.Reason != NoCand && "failed to find a candidate");
    273         SU = BotCand.SU;
    274       }
    275       IsTopNode = false;
    276     } else {
    277       SU = pickNodeBidirectional(IsTopNode);
    278     }
    279   } while (SU->isScheduled);
    280 
    281   if (SU->isTopReady())
    282     Top.removeReady(SU);
    283   if (SU->isBottomReady())
    284     Bot.removeReady(SU);
    285 
    286   if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) {
    287     for (SDep &Dep : SU->Preds) {
    288       if (Dep.isCluster()) {
    289         HasClusteredNodes = true;
    290         break;
    291       }
    292     }
    293   }
    294 
    295   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
    296                     << *SU->getInstr());
    297   return SU;
    298 }
    299 
    300 GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
    301                         std::unique_ptr<MachineSchedStrategy> S) :
    302   ScheduleDAGMILive(C, std::move(S)),
    303   ST(MF.getSubtarget<GCNSubtarget>()),
    304   MFI(*MF.getInfo<SIMachineFunctionInfo>()),
    305   StartingOccupancy(MFI.getOccupancy()),
    306   MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) {
    307 
    308   LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
    309 }
    310 
    311 void GCNScheduleDAGMILive::schedule() {
    312   if (Stage == Collect) {
    313     // Just record regions at the first pass.
    314     Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
    315     return;
    316   }
    317 
    318   std::vector<MachineInstr*> Unsched;
    319   Unsched.reserve(NumRegionInstrs);
    320   for (auto &I : *this) {
    321     Unsched.push_back(&I);
    322   }
    323 
    324   GCNRegPressure PressureBefore;
    325   if (LIS) {
    326     PressureBefore = Pressure[RegionIdx];
    327 
    328     LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:";
    329                GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI);
    330                dbgs() << "Region live-in pressure:  ";
    331                llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs());
    332                dbgs() << "Region register pressure: ";
    333                PressureBefore.print(dbgs()));
    334   }
    335 
    336   GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
    337   // Set HasClusteredNodes to true for late stages where we have already
    338   // collected it. That way pickNode() will not scan SDep's when not needed.
    339   S.HasClusteredNodes = Stage > InitialSchedule;
    340   S.HasExcessPressure = false;
    341   ScheduleDAGMILive::schedule();
    342   Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
    343   RescheduleRegions[RegionIdx] = false;
    344   if (Stage == InitialSchedule && S.HasClusteredNodes)
    345     RegionsWithClusters[RegionIdx] = true;
    346   if (S.HasExcessPressure)
    347     RegionsWithHighRP[RegionIdx] = true;
    348 
    349   if (!LIS)
    350     return;
    351 
    352   // Check the results of scheduling.
    353   auto PressureAfter = getRealRegPressure();
    354 
    355   LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
    356              PressureAfter.print(dbgs()));
    357 
    358   if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
    359       PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
    360     Pressure[RegionIdx] = PressureAfter;
    361     LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
    362     return;
    363   }
    364   unsigned Occ = MFI.getOccupancy();
    365   unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST));
    366   unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST));
    367   LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
    368                     << ", after " << WavesAfter << ".\n");
    369 
    370   // We could not keep current target occupancy because of the just scheduled
    371   // region. Record new occupancy for next scheduling cycle.
    372   unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
    373   // Allow memory bound functions to drop to 4 waves if not limited by an
    374   // attribute.
    375   if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
    376       WavesAfter >= MFI.getMinAllowedOccupancy()) {
    377     LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
    378                       << MFI.getMinAllowedOccupancy() << " waves\n");
    379     NewOccupancy = WavesAfter;
    380   }
    381   if (NewOccupancy < MinOccupancy) {
    382     MinOccupancy = NewOccupancy;
    383     MFI.limitOccupancy(MinOccupancy);
    384     LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
    385                       << MinOccupancy << ".\n");
    386   }
    387 
    388   unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
    389   unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
    390   if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
    391       PressureAfter.getAGPRNum() > MaxVGPRs ||
    392       PressureAfter.getSGPRNum() > MaxSGPRs) {
    393     RescheduleRegions[RegionIdx] = true;
    394     RegionsWithHighRP[RegionIdx] = true;
    395   }
    396 
    397   if (WavesAfter >= MinOccupancy) {
    398     if (Stage == UnclusteredReschedule &&
    399         !PressureAfter.less(ST, PressureBefore)) {
    400       LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
    401     } else if (WavesAfter > MFI.getMinWavesPerEU() ||
    402         PressureAfter.less(ST, PressureBefore) ||
    403         !RescheduleRegions[RegionIdx]) {
    404       Pressure[RegionIdx] = PressureAfter;
    405       if (!RegionsWithClusters[RegionIdx] &&
    406           (Stage + 1) == UnclusteredReschedule)
    407         RescheduleRegions[RegionIdx] = false;
    408       return;
    409     } else {
    410       LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
    411     }
    412   }
    413 
    414   LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
    415   RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] ||
    416                                  (Stage + 1) != UnclusteredReschedule;
    417   RegionEnd = RegionBegin;
    418   for (MachineInstr *MI : Unsched) {
    419     if (MI->isDebugInstr())
    420       continue;
    421 
    422     if (MI->getIterator() != RegionEnd) {
    423       BB->remove(MI);
    424       BB->insert(RegionEnd, MI);
    425       if (!MI->isDebugInstr())
    426         LIS->handleMove(*MI, true);
    427     }
    428     // Reset read-undef flags and update them later.
    429     for (auto &Op : MI->operands())
    430       if (Op.isReg() && Op.isDef())
    431         Op.setIsUndef(false);
    432     RegisterOperands RegOpers;
    433     RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
    434     if (!MI->isDebugInstr()) {
    435       if (ShouldTrackLaneMasks) {
    436         // Adjust liveness and add missing dead+read-undef flags.
    437         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
    438         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
    439       } else {
    440         // Adjust for missing dead-def flags.
    441         RegOpers.detectDeadDefs(*MI, *LIS);
    442       }
    443     }
    444     RegionEnd = MI->getIterator();
    445     ++RegionEnd;
    446     LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
    447   }
    448   RegionBegin = Unsched.front()->getIterator();
    449   Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
    450 
    451   placeDebugValues();
    452 }
    453 
    454 GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const {
    455   GCNDownwardRPTracker RPTracker(*LIS);
    456   RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
    457   return RPTracker.moveMaxPressure();
    458 }
    459 
    460 void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
    461   GCNDownwardRPTracker RPTracker(*LIS);
    462 
    463   // If the block has the only successor then live-ins of that successor are
    464   // live-outs of the current block. We can reuse calculated live set if the
    465   // successor will be sent to scheduling past current block.
    466   const MachineBasicBlock *OnlySucc = nullptr;
    467   if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) {
    468     SlotIndexes *Ind = LIS->getSlotIndexes();
    469     if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
    470       OnlySucc = *MBB->succ_begin();
    471   }
    472 
    473   // Scheduler sends regions from the end of the block upwards.
    474   size_t CurRegion = RegionIdx;
    475   for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
    476     if (Regions[CurRegion].first->getParent() != MBB)
    477       break;
    478   --CurRegion;
    479 
    480   auto I = MBB->begin();
    481   auto LiveInIt = MBBLiveIns.find(MBB);
    482   if (LiveInIt != MBBLiveIns.end()) {
    483     auto LiveIn = std::move(LiveInIt->second);
    484     RPTracker.reset(*MBB->begin(), &LiveIn);
    485     MBBLiveIns.erase(LiveInIt);
    486   } else {
    487     auto &Rgn = Regions[CurRegion];
    488     I = Rgn.first;
    489     auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
    490     auto LRS = BBLiveInMap.lookup(NonDbgMI);
    491 #ifdef EXPENSIVE_CHECKS
    492     assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
    493 #endif
    494     RPTracker.reset(*I, &LRS);
    495   }
    496 
    497   for ( ; ; ) {
    498     I = RPTracker.getNext();
    499 
    500     if (Regions[CurRegion].first == I) {
    501       LiveIns[CurRegion] = RPTracker.getLiveRegs();
    502       RPTracker.clearMaxPressure();
    503     }
    504 
    505     if (Regions[CurRegion].second == I) {
    506       Pressure[CurRegion] = RPTracker.moveMaxPressure();
    507       if (CurRegion-- == RegionIdx)
    508         break;
    509     }
    510     RPTracker.advanceToNext();
    511     RPTracker.advanceBeforeNext();
    512   }
    513 
    514   if (OnlySucc) {
    515     if (I != MBB->end()) {
    516       RPTracker.advanceToNext();
    517       RPTracker.advance(MBB->end());
    518     }
    519     RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs());
    520     RPTracker.advanceBeforeNext();
    521     MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
    522   }
    523 }
    524 
    525 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
    526 GCNScheduleDAGMILive::getBBLiveInMap() const {
    527   assert(!Regions.empty());
    528   std::vector<MachineInstr *> BBStarters;
    529   BBStarters.reserve(Regions.size());
    530   auto I = Regions.rbegin(), E = Regions.rend();
    531   auto *BB = I->first->getParent();
    532   do {
    533     auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
    534     BBStarters.push_back(MI);
    535     do {
    536       ++I;
    537     } while (I != E && I->first->getParent() == BB);
    538   } while (I != E);
    539   return getLiveRegMap(BBStarters, false /*After*/, *LIS);
    540 }
    541 
    542 void GCNScheduleDAGMILive::finalizeSchedule() {
    543   GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
    544   LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
    545 
    546   LiveIns.resize(Regions.size());
    547   Pressure.resize(Regions.size());
    548   RescheduleRegions.resize(Regions.size());
    549   RegionsWithClusters.resize(Regions.size());
    550   RegionsWithHighRP.resize(Regions.size());
    551   RescheduleRegions.set();
    552   RegionsWithClusters.reset();
    553   RegionsWithHighRP.reset();
    554 
    555   if (!Regions.empty())
    556     BBLiveInMap = getBBLiveInMap();
    557 
    558   std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
    559 
    560   do {
    561     Stage++;
    562     RegionIdx = 0;
    563     MachineBasicBlock *MBB = nullptr;
    564 
    565     if (Stage > InitialSchedule) {
    566       if (!LIS)
    567         break;
    568 
    569       // Retry function scheduling if we found resulting occupancy and it is
    570       // lower than used for first pass scheduling. This will give more freedom
    571       // to schedule low register pressure blocks.
    572       // Code is partially copied from MachineSchedulerBase::scheduleRegions().
    573 
    574       if (Stage == UnclusteredReschedule) {
    575         if (RescheduleRegions.none())
    576           continue;
    577         LLVM_DEBUG(dbgs() <<
    578           "Retrying function scheduling without clustering.\n");
    579       }
    580 
    581       if (Stage == ClusteredLowOccupancyReschedule) {
    582         if (StartingOccupancy <= MinOccupancy)
    583           break;
    584 
    585         LLVM_DEBUG(
    586             dbgs()
    587             << "Retrying function scheduling with lowest recorded occupancy "
    588             << MinOccupancy << ".\n");
    589 
    590         S.setTargetOccupancy(MinOccupancy);
    591       }
    592     }
    593 
    594     if (Stage == UnclusteredReschedule)
    595       SavedMutations.swap(Mutations);
    596 
    597     for (auto Region : Regions) {
    598       if ((Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) ||
    599           (Stage == ClusteredLowOccupancyReschedule &&
    600            !RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) {
    601 
    602         ++RegionIdx;
    603         continue;
    604       }
    605 
    606       RegionBegin = Region.first;
    607       RegionEnd = Region.second;
    608 
    609       if (RegionBegin->getParent() != MBB) {
    610         if (MBB) finishBlock();
    611         MBB = RegionBegin->getParent();
    612         startBlock(MBB);
    613         if (Stage == InitialSchedule)
    614           computeBlockPressure(MBB);
    615       }
    616 
    617       unsigned NumRegionInstrs = std::distance(begin(), end());
    618       enterRegion(MBB, begin(), end(), NumRegionInstrs);
    619 
    620       // Skip empty scheduling regions (0 or 1 schedulable instructions).
    621       if (begin() == end() || begin() == std::prev(end())) {
    622         exitRegion();
    623         continue;
    624       }
    625 
    626       LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
    627       LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " "
    628                         << MBB->getName() << "\n  From: " << *begin()
    629                         << "    To: ";
    630                  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
    631                  else dbgs() << "End";
    632                  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
    633 
    634       schedule();
    635 
    636       exitRegion();
    637       ++RegionIdx;
    638     }
    639     finishBlock();
    640 
    641     if (Stage == UnclusteredReschedule)
    642       SavedMutations.swap(Mutations);
    643   } while (Stage != LastStage);
    644 }
    645