Home | History | Annotate | Line # | Download | only in CodeGen
      1 //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 /// \file Implements the ScheduleDAGInstrs class, which implements scheduling
     10 /// for a MachineInstr-based dependency graph.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
     15 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
     16 
     17 #include "llvm/ADT/DenseMap.h"
     18 #include "llvm/ADT/PointerIntPair.h"
     19 #include "llvm/ADT/STLExtras.h"
     20 #include "llvm/ADT/SmallVector.h"
     21 #include "llvm/ADT/SparseMultiSet.h"
     22 #include "llvm/ADT/SparseSet.h"
     23 #include "llvm/CodeGen/LivePhysRegs.h"
     24 #include "llvm/CodeGen/MachineBasicBlock.h"
     25 #include "llvm/CodeGen/ScheduleDAG.h"
     26 #include "llvm/CodeGen/TargetRegisterInfo.h"
     27 #include "llvm/CodeGen/TargetSchedule.h"
     28 #include "llvm/MC/LaneBitmask.h"
     29 #include <cassert>
     30 #include <cstdint>
     31 #include <list>
     32 #include <utility>
     33 #include <vector>
     34 
     35 namespace llvm {
     36 
     37   class AAResults;
     38   class LiveIntervals;
     39   class MachineFrameInfo;
     40   class MachineFunction;
     41   class MachineInstr;
     42   class MachineLoopInfo;
     43   class MachineOperand;
     44   struct MCSchedClassDesc;
     45   class PressureDiffs;
     46   class PseudoSourceValue;
     47   class RegPressureTracker;
     48   class UndefValue;
     49   class Value;
     50 
     51   /// An individual mapping from virtual register number to SUnit.
     52   struct VReg2SUnit {
     53     unsigned VirtReg;
     54     LaneBitmask LaneMask;
     55     SUnit *SU;
     56 
     57     VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU)
     58       : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
     59 
     60     unsigned getSparseSetIndex() const {
     61       return Register::virtReg2Index(VirtReg);
     62     }
     63   };
     64 
     65   /// Mapping from virtual register to SUnit including an operand index.
     66   struct VReg2SUnitOperIdx : public VReg2SUnit {
     67     unsigned OperandIndex;
     68 
     69     VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask,
     70                       unsigned OperandIndex, SUnit *SU)
     71       : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {}
     72   };
     73 
     74   /// Record a physical register access.
     75   /// For non-data-dependent uses, OpIdx == -1.
     76   struct PhysRegSUOper {
     77     SUnit *SU;
     78     int OpIdx;
     79     unsigned Reg;
     80 
     81     PhysRegSUOper(SUnit *su, int op, unsigned R): SU(su), OpIdx(op), Reg(R) {}
     82 
     83     unsigned getSparseSetIndex() const { return Reg; }
     84   };
     85 
     86   /// Use a SparseMultiSet to track physical registers. Storage is only
     87   /// allocated once for the pass. It can be cleared in constant time and reused
     88   /// without any frees.
     89   using Reg2SUnitsMap =
     90       SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>;
     91 
     92   /// Use SparseSet as a SparseMap by relying on the fact that it never
     93   /// compares ValueT's, only unsigned keys. This allows the set to be cleared
     94   /// between scheduling regions in constant time as long as ValueT does not
     95   /// require a destructor.
     96   using VReg2SUnitMap = SparseSet<VReg2SUnit, VirtReg2IndexFunctor>;
     97 
     98   /// Track local uses of virtual registers. These uses are gathered by the DAG
     99   /// builder and may be consulted by the scheduler to avoid iterating an entire
    100   /// vreg use list.
    101   using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>;
    102 
    103   using VReg2SUnitOperIdxMultiMap =
    104       SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>;
    105 
    106   using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>;
    107 
    108   struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> {
    109     UnderlyingObject(ValueType V, bool MayAlias)
    110         : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {}
    111 
    112     ValueType getValue() const { return getPointer(); }
    113     bool mayAlias() const { return getInt(); }
    114   };
    115 
    116   using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>;
    117 
    118   /// A ScheduleDAG for scheduling lists of MachineInstr.
    119   class ScheduleDAGInstrs : public ScheduleDAG {
    120   protected:
    121     const MachineLoopInfo *MLI;
    122     const MachineFrameInfo &MFI;
    123 
    124     /// TargetSchedModel provides an interface to the machine model.
    125     TargetSchedModel SchedModel;
    126 
    127     /// True if the DAG builder should remove kill flags (in preparation for
    128     /// rescheduling).
    129     bool RemoveKillFlags;
    130 
    131     /// The standard DAG builder does not normally include terminators as DAG
    132     /// nodes because it does not create the necessary dependencies to prevent
    133     /// reordering. A specialized scheduler can override
    134     /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate
    135     /// it has taken responsibility for scheduling the terminator correctly.
    136     bool CanHandleTerminators = false;
    137 
    138     /// Whether lane masks should get tracked.
    139     bool TrackLaneMasks = false;
    140 
    141     // State specific to the current scheduling region.
    142     // ------------------------------------------------
    143 
    144     /// The block in which to insert instructions
    145     MachineBasicBlock *BB;
    146 
    147     /// The beginning of the range to be scheduled.
    148     MachineBasicBlock::iterator RegionBegin;
    149 
    150     /// The end of the range to be scheduled.
    151     MachineBasicBlock::iterator RegionEnd;
    152 
    153     /// Instructions in this region (distance(RegionBegin, RegionEnd)).
    154     unsigned NumRegionInstrs;
    155 
    156     /// After calling BuildSchedGraph, each machine instruction in the current
    157     /// scheduling region is mapped to an SUnit.
    158     DenseMap<MachineInstr*, SUnit*> MISUnitMap;
    159 
    160     // State internal to DAG building.
    161     // -------------------------------
    162 
    163     /// Defs, Uses - Remember where defs and uses of each register are as we
    164     /// iterate upward through the instructions. This is allocated here instead
    165     /// of inside BuildSchedGraph to avoid the need for it to be initialized and
    166     /// destructed for each block.
    167     Reg2SUnitsMap Defs;
    168     Reg2SUnitsMap Uses;
    169 
    170     /// Tracks the last instruction(s) in this region defining each virtual
    171     /// register. There may be multiple current definitions for a register with
    172     /// disjunct lanemasks.
    173     VReg2SUnitMultiMap CurrentVRegDefs;
    174     /// Tracks the last instructions in this region using each virtual register.
    175     VReg2SUnitOperIdxMultiMap CurrentVRegUses;
    176 
    177     AAResults *AAForDep = nullptr;
    178 
    179     /// Remember a generic side-effecting instruction as we proceed.
    180     /// No other SU ever gets scheduled around it (except in the special
    181     /// case of a huge region that gets reduced).
    182     SUnit *BarrierChain = nullptr;
    183 
    184   public:
    185     /// A list of SUnits, used in Value2SUsMap, during DAG construction.
    186     /// Note: to gain speed it might be worth investigating an optimized
    187     /// implementation of this data structure, such as a singly linked list
    188     /// with a memory pool (SmallVector was tried but slow and SparseSet is not
    189     /// applicable).
    190     using SUList = std::list<SUnit *>;
    191 
    192   protected:
    193     /// A map from ValueType to SUList, used during DAG construction, as
    194     /// a means of remembering which SUs depend on which memory locations.
    195     class Value2SUsMap;
    196 
    197     /// Reduces maps in FIFO order, by N SUs. This is better than turning
    198     /// every Nth memory SU into BarrierChain in buildSchedGraph(), since
    199     /// it avoids unnecessary edges between seen SUs above the new BarrierChain,
    200     /// and those below it.
    201     void reduceHugeMemNodeMaps(Value2SUsMap &stores,
    202                                Value2SUsMap &loads, unsigned N);
    203 
    204     /// Adds a chain edge between SUa and SUb, but only if both
    205     /// AAResults and Target fail to deny the dependency.
    206     void addChainDependency(SUnit *SUa, SUnit *SUb,
    207                             unsigned Latency = 0);
    208 
    209     /// Adds dependencies as needed from all SUs in list to SU.
    210     void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) {
    211       for (SUnit *Entry : SUs)
    212         addChainDependency(SU, Entry, Latency);
    213     }
    214 
    215     /// Adds dependencies as needed from all SUs in map, to SU.
    216     void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap);
    217 
    218     /// Adds dependencies as needed to SU, from all SUs mapped to V.
    219     void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap,
    220                               ValueType V);
    221 
    222     /// Adds barrier chain edges from all SUs in map, and then clear the map.
    223     /// This is equivalent to insertBarrierChain(), but optimized for the common
    224     /// case where the new BarrierChain (a global memory object) has a higher
    225     /// NodeNum than all SUs in map. It is assumed BarrierChain has been set
    226     /// before calling this.
    227     void addBarrierChain(Value2SUsMap &map);
    228 
    229     /// Inserts a barrier chain in a huge region, far below current SU.
    230     /// Adds barrier chain edges from all SUs in map with higher NodeNums than
    231     /// this new BarrierChain, and remove them from map. It is assumed
    232     /// BarrierChain has been set before calling this.
    233     void insertBarrierChain(Value2SUsMap &map);
    234 
    235     /// For an unanalyzable memory access, this Value is used in maps.
    236     UndefValue *UnknownValue;
    237 
    238 
    239     /// Topo - A topological ordering for SUnits which permits fast IsReachable
    240     /// and similar queries.
    241     ScheduleDAGTopologicalSort Topo;
    242 
    243     using DbgValueVector =
    244         std::vector<std::pair<MachineInstr *, MachineInstr *>>;
    245     /// Remember instruction that precedes DBG_VALUE.
    246     /// These are generated by buildSchedGraph but persist so they can be
    247     /// referenced when emitting the final schedule.
    248     DbgValueVector DbgValues;
    249     MachineInstr *FirstDbgValue = nullptr;
    250 
    251     /// Set of live physical registers for updating kill flags.
    252     LivePhysRegs LiveRegs;
    253 
    254   public:
    255     explicit ScheduleDAGInstrs(MachineFunction &mf,
    256                                const MachineLoopInfo *mli,
    257                                bool RemoveKillFlags = false);
    258 
    259     ~ScheduleDAGInstrs() override = default;
    260 
    261     /// Gets the machine model for instruction scheduling.
    262     const TargetSchedModel *getSchedModel() const { return &SchedModel; }
    263 
    264     /// Resolves and cache a resolved scheduling class for an SUnit.
    265     const MCSchedClassDesc *getSchedClass(SUnit *SU) const {
    266       if (!SU->SchedClass && SchedModel.hasInstrSchedModel())
    267         SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr());
    268       return SU->SchedClass;
    269     }
    270 
    271     /// IsReachable - Checks if SU is reachable from TargetSU.
    272     bool IsReachable(SUnit *SU, SUnit *TargetSU) {
    273       return Topo.IsReachable(SU, TargetSU);
    274     }
    275 
    276     /// Returns an iterator to the top of the current scheduling region.
    277     MachineBasicBlock::iterator begin() const { return RegionBegin; }
    278 
    279     /// Returns an iterator to the bottom of the current scheduling region.
    280     MachineBasicBlock::iterator end() const { return RegionEnd; }
    281 
    282     /// Creates a new SUnit and return a ptr to it.
    283     SUnit *newSUnit(MachineInstr *MI);
    284 
    285     /// Returns an existing SUnit for this MI, or nullptr.
    286     SUnit *getSUnit(MachineInstr *MI) const;
    287 
    288     /// If this method returns true, handling of the scheduling regions
    289     /// themselves (in case of a scheduling boundary in MBB) will be done
    290     /// beginning with the topmost region of MBB.
    291     virtual bool doMBBSchedRegionsTopDown() const { return false; }
    292 
    293     /// Prepares to perform scheduling in the given block.
    294     virtual void startBlock(MachineBasicBlock *BB);
    295 
    296     /// Cleans up after scheduling in the given block.
    297     virtual void finishBlock();
    298 
    299     /// Initialize the DAG and common scheduler state for a new
    300     /// scheduling region. This does not actually create the DAG, only clears
    301     /// it. The scheduling driver may call BuildSchedGraph multiple times per
    302     /// scheduling region.
    303     virtual void enterRegion(MachineBasicBlock *bb,
    304                              MachineBasicBlock::iterator begin,
    305                              MachineBasicBlock::iterator end,
    306                              unsigned regioninstrs);
    307 
    308     /// Called when the scheduler has finished scheduling the current region.
    309     virtual void exitRegion();
    310 
    311     /// Builds SUnits for the current region.
    312     /// If \p RPTracker is non-null, compute register pressure as a side effect.
    313     /// The DAG builder is an efficient place to do it because it already visits
    314     /// operands.
    315     void buildSchedGraph(AAResults *AA,
    316                          RegPressureTracker *RPTracker = nullptr,
    317                          PressureDiffs *PDiffs = nullptr,
    318                          LiveIntervals *LIS = nullptr,
    319                          bool TrackLaneMasks = false);
    320 
    321     /// Adds dependencies from instructions in the current list of
    322     /// instructions being scheduled to scheduling barrier. We want to make sure
    323     /// instructions which define registers that are either used by the
    324     /// terminator or are live-out are properly scheduled. This is especially
    325     /// important when the definition latency of the return value(s) are too
    326     /// high to be hidden by the branch or when the liveout registers used by
    327     /// instructions in the fallthrough block.
    328     void addSchedBarrierDeps();
    329 
    330     /// Orders nodes according to selected style.
    331     ///
    332     /// Typically, a scheduling algorithm will implement schedule() without
    333     /// overriding enterRegion() or exitRegion().
    334     virtual void schedule() = 0;
    335 
    336     /// Allow targets to perform final scheduling actions at the level of the
    337     /// whole MachineFunction. By default does nothing.
    338     virtual void finalizeSchedule() {}
    339 
    340     void dumpNode(const SUnit &SU) const override;
    341     void dump() const override;
    342 
    343     /// Returns a label for a DAG node that points to an instruction.
    344     std::string getGraphNodeLabel(const SUnit *SU) const override;
    345 
    346     /// Returns a label for the region of code covered by the DAG.
    347     std::string getDAGName() const override;
    348 
    349     /// Fixes register kill flags that scheduling has made invalid.
    350     void fixupKills(MachineBasicBlock &MBB);
    351 
    352     /// True if an edge can be added from PredSU to SuccSU without creating
    353     /// a cycle.
    354     bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
    355 
    356     /// Add a DAG edge to the given SU with the given predecessor
    357     /// dependence data.
    358     ///
    359     /// \returns true if the edge may be added without creating a cycle OR if an
    360     /// equivalent edge already existed (false indicates failure).
    361     bool addEdge(SUnit *SuccSU, const SDep &PredDep);
    362 
    363   protected:
    364     void initSUnits();
    365     void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
    366     void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
    367     void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
    368     void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
    369 
    370     /// Returns a mask for which lanes get read/written by the given (register)
    371     /// machine operand.
    372     LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
    373 
    374     /// Returns true if the def register in \p MO has no uses.
    375     bool deadDefHasNoUse(const MachineOperand &MO);
    376   };
    377 
    378   /// Creates a new SUnit and return a ptr to it.
    379   inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) {
    380 #ifndef NDEBUG
    381     const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0];
    382 #endif
    383     SUnits.emplace_back(MI, (unsigned)SUnits.size());
    384     assert((Addr == nullptr || Addr == &SUnits[0]) &&
    385            "SUnits std::vector reallocated on the fly!");
    386     return &SUnits.back();
    387   }
    388 
    389   /// Returns an existing SUnit for this MI, or nullptr.
    390   inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const {
    391     return MISUnitMap.lookup(MI);
    392   }
    393 
    394 } // end namespace llvm
    395 
    396 #endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
    397