Home | History | Annotate | Line # | Download | only in CodeGen
      1 //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file implements the generic RegisterCoalescer interface which
     10 // is used as the common interface used by all clients and
     11 // implementations of register coalescing.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "RegisterCoalescer.h"
     16 #include "llvm/ADT/ArrayRef.h"
     17 #include "llvm/ADT/BitVector.h"
     18 #include "llvm/ADT/DenseSet.h"
     19 #include "llvm/ADT/STLExtras.h"
     20 #include "llvm/ADT/SmallPtrSet.h"
     21 #include "llvm/ADT/SmallVector.h"
     22 #include "llvm/ADT/Statistic.h"
     23 #include "llvm/Analysis/AliasAnalysis.h"
     24 #include "llvm/CodeGen/LiveInterval.h"
     25 #include "llvm/CodeGen/LiveIntervals.h"
     26 #include "llvm/CodeGen/LiveRangeEdit.h"
     27 #include "llvm/CodeGen/MachineBasicBlock.h"
     28 #include "llvm/CodeGen/MachineFunction.h"
     29 #include "llvm/CodeGen/MachineFunctionPass.h"
     30 #include "llvm/CodeGen/MachineInstr.h"
     31 #include "llvm/CodeGen/MachineInstrBuilder.h"
     32 #include "llvm/CodeGen/MachineLoopInfo.h"
     33 #include "llvm/CodeGen/MachineOperand.h"
     34 #include "llvm/CodeGen/MachineRegisterInfo.h"
     35 #include "llvm/CodeGen/Passes.h"
     36 #include "llvm/CodeGen/RegisterClassInfo.h"
     37 #include "llvm/CodeGen/SlotIndexes.h"
     38 #include "llvm/CodeGen/TargetInstrInfo.h"
     39 #include "llvm/CodeGen/TargetOpcodes.h"
     40 #include "llvm/CodeGen/TargetRegisterInfo.h"
     41 #include "llvm/CodeGen/TargetSubtargetInfo.h"
     42 #include "llvm/IR/DebugLoc.h"
     43 #include "llvm/InitializePasses.h"
     44 #include "llvm/MC/LaneBitmask.h"
     45 #include "llvm/MC/MCInstrDesc.h"
     46 #include "llvm/MC/MCRegisterInfo.h"
     47 #include "llvm/Pass.h"
     48 #include "llvm/Support/CommandLine.h"
     49 #include "llvm/Support/Compiler.h"
     50 #include "llvm/Support/Debug.h"
     51 #include "llvm/Support/ErrorHandling.h"
     52 #include "llvm/Support/raw_ostream.h"
     53 #include <algorithm>
     54 #include <cassert>
     55 #include <iterator>
     56 #include <limits>
     57 #include <tuple>
     58 #include <utility>
     59 #include <vector>
     60 
     61 using namespace llvm;
     62 
     63 #define DEBUG_TYPE "regalloc"
     64 
     65 STATISTIC(numJoins    , "Number of interval joins performed");
     66 STATISTIC(numCrossRCs , "Number of cross class joins performed");
     67 STATISTIC(numCommutes , "Number of instruction commuting performed");
     68 STATISTIC(numExtends  , "Number of copies extended");
     69 STATISTIC(NumReMats   , "Number of instructions re-materialized");
     70 STATISTIC(NumInflated , "Number of register classes inflated");
     71 STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
     72 STATISTIC(NumLaneResolves,  "Number of dead lane conflicts resolved");
     73 STATISTIC(NumShrinkToUses,  "Number of shrinkToUses called");
     74 
     75 static cl::opt<bool> EnableJoining("join-liveintervals",
     76                                    cl::desc("Coalesce copies (default=true)"),
     77                                    cl::init(true), cl::Hidden);
     78 
     79 static cl::opt<bool> UseTerminalRule("terminal-rule",
     80                                      cl::desc("Apply the terminal rule"),
     81                                      cl::init(false), cl::Hidden);
     82 
     83 /// Temporary flag to test critical edge unsplitting.
     84 static cl::opt<bool>
     85 EnableJoinSplits("join-splitedges",
     86   cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
     87 
     88 /// Temporary flag to test global copy optimization.
     89 static cl::opt<cl::boolOrDefault>
     90 EnableGlobalCopies("join-globalcopies",
     91   cl::desc("Coalesce copies that span blocks (default=subtarget)"),
     92   cl::init(cl::BOU_UNSET), cl::Hidden);
     93 
     94 static cl::opt<bool>
     95 VerifyCoalescing("verify-coalescing",
     96          cl::desc("Verify machine instrs before and after register coalescing"),
     97          cl::Hidden);
     98 
     99 static cl::opt<unsigned> LateRematUpdateThreshold(
    100     "late-remat-update-threshold", cl::Hidden,
    101     cl::desc("During rematerialization for a copy, if the def instruction has "
    102              "many other copy uses to be rematerialized, delay the multiple "
    103              "separate live interval update work and do them all at once after "
    104              "all those rematerialization are done. It will save a lot of "
    105              "repeated work. "),
    106     cl::init(100));
    107 
    108 static cl::opt<unsigned> LargeIntervalSizeThreshold(
    109     "large-interval-size-threshold", cl::Hidden,
    110     cl::desc("If the valnos size of an interval is larger than the threshold, "
    111              "it is regarded as a large interval. "),
    112     cl::init(100));
    113 
    114 static cl::opt<unsigned> LargeIntervalFreqThreshold(
    115     "large-interval-freq-threshold", cl::Hidden,
    116     cl::desc("For a large interval, if it is coalesed with other live "
    117              "intervals many times more than the threshold, stop its "
    118              "coalescing to control the compile time. "),
    119     cl::init(100));
    120 
    121 namespace {
    122 
    123   class JoinVals;
    124 
    125   class RegisterCoalescer : public MachineFunctionPass,
    126                             private LiveRangeEdit::Delegate {
    127     MachineFunction* MF = nullptr;
    128     MachineRegisterInfo* MRI = nullptr;
    129     const TargetRegisterInfo* TRI = nullptr;
    130     const TargetInstrInfo* TII = nullptr;
    131     LiveIntervals *LIS = nullptr;
    132     const MachineLoopInfo* Loops = nullptr;
    133     AliasAnalysis *AA = nullptr;
    134     RegisterClassInfo RegClassInfo;
    135 
    136     /// Debug variable location tracking -- for each VReg, maintain an
    137     /// ordered-by-slot-index set of DBG_VALUEs, to help quick
    138     /// identification of whether coalescing may change location validity.
    139     using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
    140     DenseMap<Register, std::vector<DbgValueLoc>> DbgVRegToValues;
    141 
    142     /// VRegs may be repeatedly coalesced, and have many DBG_VALUEs attached.
    143     /// To avoid repeatedly merging sets of DbgValueLocs, instead record
    144     /// which vregs have been coalesced, and where to. This map is from
    145     /// vreg => {set of vregs merged in}.
    146     DenseMap<Register, SmallVector<Register, 4>> DbgMergedVRegNums;
    147 
    148     /// A LaneMask to remember on which subregister live ranges we need to call
    149     /// shrinkToUses() later.
    150     LaneBitmask ShrinkMask;
    151 
    152     /// True if the main range of the currently coalesced intervals should be
    153     /// checked for smaller live intervals.
    154     bool ShrinkMainRange = false;
    155 
    156     /// True if the coalescer should aggressively coalesce global copies
    157     /// in favor of keeping local copies.
    158     bool JoinGlobalCopies = false;
    159 
    160     /// True if the coalescer should aggressively coalesce fall-thru
    161     /// blocks exclusively containing copies.
    162     bool JoinSplitEdges = false;
    163 
    164     /// Copy instructions yet to be coalesced.
    165     SmallVector<MachineInstr*, 8> WorkList;
    166     SmallVector<MachineInstr*, 8> LocalWorkList;
    167 
    168     /// Set of instruction pointers that have been erased, and
    169     /// that may be present in WorkList.
    170     SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
    171 
    172     /// Dead instructions that are about to be deleted.
    173     SmallVector<MachineInstr*, 8> DeadDefs;
    174 
    175     /// Virtual registers to be considered for register class inflation.
    176     SmallVector<Register, 8> InflateRegs;
    177 
    178     /// The collection of live intervals which should have been updated
    179     /// immediately after rematerialiation but delayed until
    180     /// lateLiveIntervalUpdate is called.
    181     DenseSet<Register> ToBeUpdated;
    182 
    183     /// Record how many times the large live interval with many valnos
    184     /// has been tried to join with other live interval.
    185     DenseMap<Register, unsigned long> LargeLIVisitCounter;
    186 
    187     /// Recursively eliminate dead defs in DeadDefs.
    188     void eliminateDeadDefs();
    189 
    190     /// LiveRangeEdit callback for eliminateDeadDefs().
    191     void LRE_WillEraseInstruction(MachineInstr *MI) override;
    192 
    193     /// Coalesce the LocalWorkList.
    194     void coalesceLocals();
    195 
    196     /// Join compatible live intervals
    197     void joinAllIntervals();
    198 
    199     /// Coalesce copies in the specified MBB, putting
    200     /// copies that cannot yet be coalesced into WorkList.
    201     void copyCoalesceInMBB(MachineBasicBlock *MBB);
    202 
    203     /// Tries to coalesce all copies in CurrList. Returns true if any progress
    204     /// was made.
    205     bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
    206 
    207     /// If one def has many copy like uses, and those copy uses are all
    208     /// rematerialized, the live interval update needed for those
    209     /// rematerializations will be delayed and done all at once instead
    210     /// of being done multiple times. This is to save compile cost because
    211     /// live interval update is costly.
    212     void lateLiveIntervalUpdate();
    213 
    214     /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange
    215     /// has no value defined in the predecessors. If the incoming value is the
    216     /// same as defined by the copy itself, the value is considered undefined.
    217     bool copyValueUndefInPredecessors(LiveRange &S,
    218                                       const MachineBasicBlock *MBB,
    219                                       LiveQueryResult SLRQ);
    220 
    221     /// Set necessary undef flags on subregister uses after pruning out undef
    222     /// lane segments from the subrange.
    223     void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg,
    224                                     LaneBitmask PrunedLanes);
    225 
    226     /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
    227     /// src/dst of the copy instruction CopyMI.  This returns true if the copy
    228     /// was successfully coalesced away. If it is not currently possible to
    229     /// coalesce this interval, but it may be possible if other things get
    230     /// coalesced, then it returns true by reference in 'Again'.
    231     bool joinCopy(MachineInstr *CopyMI, bool &Again);
    232 
    233     /// Attempt to join these two intervals.  On failure, this
    234     /// returns false.  The output "SrcInt" will not have been modified, so we
    235     /// can use this information below to update aliases.
    236     bool joinIntervals(CoalescerPair &CP);
    237 
    238     /// Attempt joining two virtual registers. Return true on success.
    239     bool joinVirtRegs(CoalescerPair &CP);
    240 
    241     /// If a live interval has many valnos and is coalesced with other
    242     /// live intervals many times, we regard such live interval as having
    243     /// high compile time cost.
    244     bool isHighCostLiveInterval(LiveInterval &LI);
    245 
    246     /// Attempt joining with a reserved physreg.
    247     bool joinReservedPhysReg(CoalescerPair &CP);
    248 
    249     /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
    250     /// Subranges in @p LI which only partially interfere with the desired
    251     /// LaneMask are split as necessary. @p LaneMask are the lanes that
    252     /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
    253     /// lanemasks already adjusted to the coalesced register.
    254     void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
    255                            LaneBitmask LaneMask, CoalescerPair &CP,
    256                            unsigned DstIdx);
    257 
    258     /// Join the liveranges of two subregisters. Joins @p RRange into
    259     /// @p LRange, @p RRange may be invalid afterwards.
    260     void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
    261                           LaneBitmask LaneMask, const CoalescerPair &CP);
    262 
    263     /// We found a non-trivially-coalescable copy. If the source value number is
    264     /// defined by a copy from the destination reg see if we can merge these two
    265     /// destination reg valno# into a single value number, eliminating a copy.
    266     /// This returns true if an interval was modified.
    267     bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
    268 
    269     /// Return true if there are definitions of IntB
    270     /// other than BValNo val# that can reach uses of AValno val# of IntA.
    271     bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
    272                               VNInfo *AValNo, VNInfo *BValNo);
    273 
    274     /// We found a non-trivially-coalescable copy.
    275     /// If the source value number is defined by a commutable instruction and
    276     /// its other operand is coalesced to the copy dest register, see if we
    277     /// can transform the copy into a noop by commuting the definition.
    278     /// This returns a pair of two flags:
    279     /// - the first element is true if an interval was modified,
    280     /// - the second element is true if the destination interval needs
    281     ///   to be shrunk after deleting the copy.
    282     std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP,
    283                                                   MachineInstr *CopyMI);
    284 
    285     /// We found a copy which can be moved to its less frequent predecessor.
    286     bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
    287 
    288     /// If the source of a copy is defined by a
    289     /// trivial computation, replace the copy by rematerialize the definition.
    290     bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
    291                                  bool &IsDefCopy);
    292 
    293     /// Return true if a copy involving a physreg should be joined.
    294     bool canJoinPhys(const CoalescerPair &CP);
    295 
    296     /// Replace all defs and uses of SrcReg to DstReg and update the subregister
    297     /// number if it is not zero. If DstReg is a physical register and the
    298     /// existing subregister number of the def / use being updated is not zero,
    299     /// make sure to set it to the correct physical subregister.
    300     void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
    301 
    302     /// If the given machine operand reads only undefined lanes add an undef
    303     /// flag.
    304     /// This can happen when undef uses were previously concealed by a copy
    305     /// which we coalesced. Example:
    306     ///    %0:sub0<def,read-undef> = ...
    307     ///    %1 = COPY %0           <-- Coalescing COPY reveals undef
    308     ///       = use %1:sub1       <-- hidden undef use
    309     void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
    310                       MachineOperand &MO, unsigned SubRegIdx);
    311 
    312     /// Handle copies of undef values. If the undef value is an incoming
    313     /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
    314     /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
    315     /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
    316     MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);
    317 
    318     /// Check whether or not we should apply the terminal rule on the
    319     /// destination (Dst) of \p Copy.
    320     /// When the terminal rule applies, Copy is not profitable to
    321     /// coalesce.
    322     /// Dst is terminal if it has exactly one affinity (Dst, Src) and
    323     /// at least one interference (Dst, Dst2). If Dst is terminal, the
    324     /// terminal rule consists in checking that at least one of
    325     /// interfering node, say Dst2, has an affinity of equal or greater
    326     /// weight with Src.
    327     /// In that case, Dst2 and Dst will not be able to be both coalesced
    328     /// with Src. Since Dst2 exposes more coalescing opportunities than
    329     /// Dst, we can drop \p Copy.
    330     bool applyTerminalRule(const MachineInstr &Copy) const;
    331 
    332     /// Wrapper method for \see LiveIntervals::shrinkToUses.
    333     /// This method does the proper fixing of the live-ranges when the afore
    334     /// mentioned method returns true.
    335     void shrinkToUses(LiveInterval *LI,
    336                       SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
    337       NumShrinkToUses++;
    338       if (LIS->shrinkToUses(LI, Dead)) {
    339         /// Check whether or not \p LI is composed by multiple connected
    340         /// components and if that is the case, fix that.
    341         SmallVector<LiveInterval*, 8> SplitLIs;
    342         LIS->splitSeparateComponents(*LI, SplitLIs);
    343       }
    344     }
    345 
    346     /// Wrapper Method to do all the necessary work when an Instruction is
    347     /// deleted.
    348     /// Optimizations should use this to make sure that deleted instructions
    349     /// are always accounted for.
    350     void deleteInstr(MachineInstr* MI) {
    351       ErasedInstrs.insert(MI);
    352       LIS->RemoveMachineInstrFromMaps(*MI);
    353       MI->eraseFromParent();
    354     }
    355 
    356     /// Walk over function and initialize the DbgVRegToValues map.
    357     void buildVRegToDbgValueMap(MachineFunction &MF);
    358 
    359     /// Test whether, after merging, any DBG_VALUEs would refer to a
    360     /// different value number than before merging, and whether this can
    361     /// be resolved. If not, mark the DBG_VALUE as being undef.
    362     void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS,
    363                                       JoinVals &LHSVals, LiveRange &RHS,
    364                                       JoinVals &RHSVals);
    365 
    366     void checkMergingChangesDbgValuesImpl(Register Reg, LiveRange &OtherRange,
    367                                           LiveRange &RegRange, JoinVals &Vals2);
    368 
    369   public:
    370     static char ID; ///< Class identification, replacement for typeinfo
    371 
    372     RegisterCoalescer() : MachineFunctionPass(ID) {
    373       initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
    374     }
    375 
    376     void getAnalysisUsage(AnalysisUsage &AU) const override;
    377 
    378     void releaseMemory() override;
    379 
    380     /// This is the pass entry point.
    381     bool runOnMachineFunction(MachineFunction&) override;
    382 
    383     /// Implement the dump method.
    384     void print(raw_ostream &O, const Module* = nullptr) const override;
    385   };
    386 
    387 } // end anonymous namespace
    388 
    389 char RegisterCoalescer::ID = 0;
    390 
    391 char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
    392 
    393 INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
    394                       "Simple Register Coalescing", false, false)
    395 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
    396 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
    397 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
    398 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
    399 INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
    400                     "Simple Register Coalescing", false, false)
    401 
    402 LLVM_NODISCARD static bool isMoveInstr(const TargetRegisterInfo &tri,
    403                                        const MachineInstr *MI, Register &Src,
    404                                        Register &Dst, unsigned &SrcSub,
    405                                        unsigned &DstSub) {
    406   if (MI->isCopy()) {
    407     Dst = MI->getOperand(0).getReg();
    408     DstSub = MI->getOperand(0).getSubReg();
    409     Src = MI->getOperand(1).getReg();
    410     SrcSub = MI->getOperand(1).getSubReg();
    411   } else if (MI->isSubregToReg()) {
    412     Dst = MI->getOperand(0).getReg();
    413     DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
    414                                       MI->getOperand(3).getImm());
    415     Src = MI->getOperand(2).getReg();
    416     SrcSub = MI->getOperand(2).getSubReg();
    417   } else
    418     return false;
    419   return true;
    420 }
    421 
    422 /// Return true if this block should be vacated by the coalescer to eliminate
    423 /// branches. The important cases to handle in the coalescer are critical edges
    424 /// split during phi elimination which contain only copies. Simple blocks that
    425 /// contain non-branches should also be vacated, but this can be handled by an
    426 /// earlier pass similar to early if-conversion.
    427 static bool isSplitEdge(const MachineBasicBlock *MBB) {
    428   if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
    429     return false;
    430 
    431   for (const auto &MI : *MBB) {
    432     if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
    433       return false;
    434   }
    435   return true;
    436 }
    437 
    438 bool CoalescerPair::setRegisters(const MachineInstr *MI) {
    439   SrcReg = DstReg = Register();
    440   SrcIdx = DstIdx = 0;
    441   NewRC = nullptr;
    442   Flipped = CrossClass = false;
    443 
    444   Register Src, Dst;
    445   unsigned SrcSub = 0, DstSub = 0;
    446   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
    447     return false;
    448   Partial = SrcSub || DstSub;
    449 
    450   // If one register is a physreg, it must be Dst.
    451   if (Register::isPhysicalRegister(Src)) {
    452     if (Register::isPhysicalRegister(Dst))
    453       return false;
    454     std::swap(Src, Dst);
    455     std::swap(SrcSub, DstSub);
    456     Flipped = true;
    457   }
    458 
    459   const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
    460 
    461   if (Register::isPhysicalRegister(Dst)) {
    462     // Eliminate DstSub on a physreg.
    463     if (DstSub) {
    464       Dst = TRI.getSubReg(Dst, DstSub);
    465       if (!Dst) return false;
    466       DstSub = 0;
    467     }
    468 
    469     // Eliminate SrcSub by picking a corresponding Dst superregister.
    470     if (SrcSub) {
    471       Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
    472       if (!Dst) return false;
    473     } else if (!MRI.getRegClass(Src)->contains(Dst)) {
    474       return false;
    475     }
    476   } else {
    477     // Both registers are virtual.
    478     const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
    479     const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
    480 
    481     // Both registers have subreg indices.
    482     if (SrcSub && DstSub) {
    483       // Copies between different sub-registers are never coalescable.
    484       if (Src == Dst && SrcSub != DstSub)
    485         return false;
    486 
    487       NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
    488                                          SrcIdx, DstIdx);
    489       if (!NewRC)
    490         return false;
    491     } else if (DstSub) {
    492       // SrcReg will be merged with a sub-register of DstReg.
    493       SrcIdx = DstSub;
    494       NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
    495     } else if (SrcSub) {
    496       // DstReg will be merged with a sub-register of SrcReg.
    497       DstIdx = SrcSub;
    498       NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
    499     } else {
    500       // This is a straight copy without sub-registers.
    501       NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
    502     }
    503 
    504     // The combined constraint may be impossible to satisfy.
    505     if (!NewRC)
    506       return false;
    507 
    508     // Prefer SrcReg to be a sub-register of DstReg.
    509     // FIXME: Coalescer should support subregs symmetrically.
    510     if (DstIdx && !SrcIdx) {
    511       std::swap(Src, Dst);
    512       std::swap(SrcIdx, DstIdx);
    513       Flipped = !Flipped;
    514     }
    515 
    516     CrossClass = NewRC != DstRC || NewRC != SrcRC;
    517   }
    518   // Check our invariants
    519   assert(Register::isVirtualRegister(Src) && "Src must be virtual");
    520   assert(!(Register::isPhysicalRegister(Dst) && DstSub) &&
    521          "Cannot have a physical SubIdx");
    522   SrcReg = Src;
    523   DstReg = Dst;
    524   return true;
    525 }
    526 
    527 bool CoalescerPair::flip() {
    528   if (Register::isPhysicalRegister(DstReg))
    529     return false;
    530   std::swap(SrcReg, DstReg);
    531   std::swap(SrcIdx, DstIdx);
    532   Flipped = !Flipped;
    533   return true;
    534 }
    535 
    536 bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
    537   if (!MI)
    538     return false;
    539   Register Src, Dst;
    540   unsigned SrcSub = 0, DstSub = 0;
    541   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
    542     return false;
    543 
    544   // Find the virtual register that is SrcReg.
    545   if (Dst == SrcReg) {
    546     std::swap(Src, Dst);
    547     std::swap(SrcSub, DstSub);
    548   } else if (Src != SrcReg) {
    549     return false;
    550   }
    551 
    552   // Now check that Dst matches DstReg.
    553   if (DstReg.isPhysical()) {
    554     if (!Dst.isPhysical())
    555       return false;
    556     assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
    557     // DstSub could be set for a physreg from INSERT_SUBREG.
    558     if (DstSub)
    559       Dst = TRI.getSubReg(Dst, DstSub);
    560     // Full copy of Src.
    561     if (!SrcSub)
    562       return DstReg == Dst;
    563     // This is a partial register copy. Check that the parts match.
    564     return Register(TRI.getSubReg(DstReg, SrcSub)) == Dst;
    565   } else {
    566     // DstReg is virtual.
    567     if (DstReg != Dst)
    568       return false;
    569     // Registers match, do the subregisters line up?
    570     return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
    571            TRI.composeSubRegIndices(DstIdx, DstSub);
    572   }
    573 }
    574 
    575 void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
    576   AU.setPreservesCFG();
    577   AU.addRequired<AAResultsWrapperPass>();
    578   AU.addRequired<LiveIntervals>();
    579   AU.addPreserved<LiveIntervals>();
    580   AU.addPreserved<SlotIndexes>();
    581   AU.addRequired<MachineLoopInfo>();
    582   AU.addPreserved<MachineLoopInfo>();
    583   AU.addPreservedID(MachineDominatorsID);
    584   MachineFunctionPass::getAnalysisUsage(AU);
    585 }
    586 
    587 void RegisterCoalescer::eliminateDeadDefs() {
    588   SmallVector<Register, 8> NewRegs;
    589   LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
    590                 nullptr, this).eliminateDeadDefs(DeadDefs);
    591 }
    592 
    593 void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
    594   // MI may be in WorkList. Make sure we don't visit it.
    595   ErasedInstrs.insert(MI);
    596 }
    597 
    598 bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
    599                                              MachineInstr *CopyMI) {
    600   assert(!CP.isPartial() && "This doesn't work for partial copies.");
    601   assert(!CP.isPhys() && "This doesn't work for physreg copies.");
    602 
    603   LiveInterval &IntA =
    604     LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
    605   LiveInterval &IntB =
    606     LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
    607   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
    608 
    609   // We have a non-trivially-coalescable copy with IntA being the source and
    610   // IntB being the dest, thus this defines a value number in IntB.  If the
    611   // source value number (in IntA) is defined by a copy from B, see if we can
    612   // merge these two pieces of B into a single value number, eliminating a copy.
    613   // For example:
    614   //
    615   //  A3 = B0
    616   //    ...
    617   //  B1 = A3      <- this copy
    618   //
    619   // In this case, B0 can be extended to where the B1 copy lives, allowing the
    620   // B1 value number to be replaced with B0 (which simplifies the B
    621   // liveinterval).
    622 
    623   // BValNo is a value number in B that is defined by a copy from A.  'B1' in
    624   // the example above.
    625   LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
    626   if (BS == IntB.end()) return false;
    627   VNInfo *BValNo = BS->valno;
    628 
    629   // Get the location that B is defined at.  Two options: either this value has
    630   // an unknown definition point or it is defined at CopyIdx.  If unknown, we
    631   // can't process it.
    632   if (BValNo->def != CopyIdx) return false;
    633 
    634   // AValNo is the value number in A that defines the copy, A3 in the example.
    635   SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
    636   LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
    637   // The live segment might not exist after fun with physreg coalescing.
    638   if (AS == IntA.end()) return false;
    639   VNInfo *AValNo = AS->valno;
    640 
    641   // If AValNo is defined as a copy from IntB, we can potentially process this.
    642   // Get the instruction that defines this value number.
    643   MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
    644   // Don't allow any partial copies, even if isCoalescable() allows them.
    645   if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
    646     return false;
    647 
    648   // Get the Segment in IntB that this value number starts with.
    649   LiveInterval::iterator ValS =
    650     IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
    651   if (ValS == IntB.end())
    652     return false;
    653 
    654   // Make sure that the end of the live segment is inside the same block as
    655   // CopyMI.
    656   MachineInstr *ValSEndInst =
    657     LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
    658   if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
    659     return false;
    660 
    661   // Okay, we now know that ValS ends in the same block that the CopyMI
    662   // live-range starts.  If there are no intervening live segments between them
    663   // in IntB, we can merge them.
    664   if (ValS+1 != BS) return false;
    665 
    666   LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg(), TRI));
    667 
    668   SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
    669   // We are about to delete CopyMI, so need to remove it as the 'instruction
    670   // that defines this value #'. Update the valnum with the new defining
    671   // instruction #.
    672   BValNo->def = FillerStart;
    673 
    674   // Okay, we can merge them.  We need to insert a new liverange:
    675   // [ValS.end, BS.begin) of either value number, then we merge the
    676   // two value numbers.
    677   IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
    678 
    679   // Okay, merge "B1" into the same value number as "B0".
    680   if (BValNo != ValS->valno)
    681     IntB.MergeValueNumberInto(BValNo, ValS->valno);
    682 
    683   // Do the same for the subregister segments.
    684   for (LiveInterval::SubRange &S : IntB.subranges()) {
    685     // Check for SubRange Segments of the form [1234r,1234d:0) which can be
    686     // removed to prevent creating bogus SubRange Segments.
    687     LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx);
    688     if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) {
    689       S.removeSegment(*SS, true);
    690       continue;
    691     }
    692     // The subrange may have ended before FillerStart. If so, extend it.
    693     if (!S.getVNInfoAt(FillerStart)) {
    694       SlotIndex BBStart =
    695           LIS->getMBBStartIdx(LIS->getMBBFromIndex(FillerStart));
    696       S.extendInBlock(BBStart, FillerStart);
    697     }
    698     VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
    699     S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
    700     VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
    701     if (SubBValNo != SubValSNo)
    702       S.MergeValueNumberInto(SubBValNo, SubValSNo);
    703   }
    704 
    705   LLVM_DEBUG(dbgs() << "   result = " << IntB << '\n');
    706 
    707   // If the source instruction was killing the source register before the
    708   // merge, unset the isKill marker given the live range has been extended.
    709   int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true);
    710   if (UIdx != -1) {
    711     ValSEndInst->getOperand(UIdx).setIsKill(false);
    712   }
    713 
    714   // Rewrite the copy.
    715   CopyMI->substituteRegister(IntA.reg(), IntB.reg(), 0, *TRI);
    716   // If the copy instruction was killing the destination register or any
    717   // subrange before the merge trim the live range.
    718   bool RecomputeLiveRange = AS->end == CopyIdx;
    719   if (!RecomputeLiveRange) {
    720     for (LiveInterval::SubRange &S : IntA.subranges()) {
    721       LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx);
    722       if (SS != S.end() && SS->end == CopyIdx) {
    723         RecomputeLiveRange = true;
    724         break;
    725       }
    726     }
    727   }
    728   if (RecomputeLiveRange)
    729     shrinkToUses(&IntA);
    730 
    731   ++numExtends;
    732   return true;
    733 }
    734 
    735 bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
    736                                              LiveInterval &IntB,
    737                                              VNInfo *AValNo,
    738                                              VNInfo *BValNo) {
    739   // If AValNo has PHI kills, conservatively assume that IntB defs can reach
    740   // the PHI values.
    741   if (LIS->hasPHIKill(IntA, AValNo))
    742     return true;
    743 
    744   for (LiveRange::Segment &ASeg : IntA.segments) {
    745     if (ASeg.valno != AValNo) continue;
    746     LiveInterval::iterator BI = llvm::upper_bound(IntB, ASeg.start);
    747     if (BI != IntB.begin())
    748       --BI;
    749     for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
    750       if (BI->valno == BValNo)
    751         continue;
    752       if (BI->start <= ASeg.start && BI->end > ASeg.start)
    753         return true;
    754       if (BI->start > ASeg.start && BI->start < ASeg.end)
    755         return true;
    756     }
    757   }
    758   return false;
    759 }
    760 
    761 /// Copy segments with value number @p SrcValNo from liverange @p Src to live
    762 /// range @Dst and use value number @p DstValNo there.
    763 static std::pair<bool,bool>
    764 addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src,
    765                      const VNInfo *SrcValNo) {
    766   bool Changed = false;
    767   bool MergedWithDead = false;
    768   for (const LiveRange::Segment &S : Src.segments) {
    769     if (S.valno != SrcValNo)
    770       continue;
    771     // This is adding a segment from Src that ends in a copy that is about
    772     // to be removed. This segment is going to be merged with a pre-existing
    773     // segment in Dst. This works, except in cases when the corresponding
    774     // segment in Dst is dead. For example: adding [192r,208r:1) from Src
    775     // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
    776     // Recognized such cases, so that the segments can be shrunk.
    777     LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
    778     LiveRange::Segment &Merged = *Dst.addSegment(Added);
    779     if (Merged.end.isDead())
    780       MergedWithDead = true;
    781     Changed = true;
    782   }
    783   return std::make_pair(Changed, MergedWithDead);
    784 }
    785 
    786 std::pair<bool,bool>
    787 RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
    788                                             MachineInstr *CopyMI) {
    789   assert(!CP.isPhys());
    790 
    791   LiveInterval &IntA =
    792       LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
    793   LiveInterval &IntB =
    794       LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
    795 
    796   // We found a non-trivially-coalescable copy with IntA being the source and
    797   // IntB being the dest, thus this defines a value number in IntB.  If the
    798   // source value number (in IntA) is defined by a commutable instruction and
    799   // its other operand is coalesced to the copy dest register, see if we can
    800   // transform the copy into a noop by commuting the definition. For example,
    801   //
    802   //  A3 = op A2 killed B0
    803   //    ...
    804   //  B1 = A3      <- this copy
    805   //    ...
    806   //     = op A3   <- more uses
    807   //
    808   // ==>
    809   //
    810   //  B2 = op B0 killed A2
    811   //    ...
    812   //  B1 = B2      <- now an identity copy
    813   //    ...
    814   //     = op B2   <- more uses
    815 
    816   // BValNo is a value number in B that is defined by a copy from A. 'B1' in
    817   // the example above.
    818   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
    819   VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
    820   assert(BValNo != nullptr && BValNo->def == CopyIdx);
    821 
    822   // AValNo is the value number in A that defines the copy, A3 in the example.
    823   VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
    824   assert(AValNo && !AValNo->isUnused() && "COPY source not live");
    825   if (AValNo->isPHIDef())
    826     return { false, false };
    827   MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
    828   if (!DefMI)
    829     return { false, false };
    830   if (!DefMI->isCommutable())
    831     return { false, false };
    832   // If DefMI is a two-address instruction then commuting it will change the
    833   // destination register.
    834   int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg());
    835   assert(DefIdx != -1);
    836   unsigned UseOpIdx;
    837   if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
    838     return { false, false };
    839 
    840   // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
    841   // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
    842   // passed to the method. That _other_ operand is chosen by
    843   // the findCommutedOpIndices() method.
    844   //
    845   // That is obviously an area for improvement in case of instructions having
    846   // more than 2 operands. For example, if some instruction has 3 commutable
    847   // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
    848   // op#2<->op#3) of commute transformation should be considered/tried here.
    849   unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
    850   if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
    851     return { false, false };
    852 
    853   MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
    854   Register NewReg = NewDstMO.getReg();
    855   if (NewReg != IntB.reg() || !IntB.Query(AValNo->def).isKill())
    856     return { false, false };
    857 
    858   // Make sure there are no other definitions of IntB that would reach the
    859   // uses which the new definition can reach.
    860   if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
    861     return { false, false };
    862 
    863   // If some of the uses of IntA.reg is already coalesced away, return false.
    864   // It's not possible to determine whether it's safe to perform the coalescing.
    865   for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg())) {
    866     MachineInstr *UseMI = MO.getParent();
    867     unsigned OpNo = &MO - &UseMI->getOperand(0);
    868     SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
    869     LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
    870     if (US == IntA.end() || US->valno != AValNo)
    871       continue;
    872     // If this use is tied to a def, we can't rewrite the register.
    873     if (UseMI->isRegTiedToDefOperand(OpNo))
    874       return { false, false };
    875   }
    876 
    877   LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
    878                     << *DefMI);
    879 
    880   // At this point we have decided that it is legal to do this
    881   // transformation.  Start by commuting the instruction.
    882   MachineBasicBlock *MBB = DefMI->getParent();
    883   MachineInstr *NewMI =
    884       TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
    885   if (!NewMI)
    886     return { false, false };
    887   if (Register::isVirtualRegister(IntA.reg()) &&
    888       Register::isVirtualRegister(IntB.reg()) &&
    889       !MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg())))
    890     return { false, false };
    891   if (NewMI != DefMI) {
    892     LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
    893     MachineBasicBlock::iterator Pos = DefMI;
    894     MBB->insert(Pos, NewMI);
    895     MBB->erase(DefMI);
    896   }
    897 
    898   // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
    899   // A = or A, B
    900   // ...
    901   // B = A
    902   // ...
    903   // C = killed A
    904   // ...
    905   //   = B
    906 
    907   // Update uses of IntA of the specific Val# with IntB.
    908   for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg()),
    909                                          UE = MRI->use_end();
    910        UI != UE;
    911        /* ++UI is below because of possible MI removal */) {
    912     MachineOperand &UseMO = *UI;
    913     ++UI;
    914     if (UseMO.isUndef())
    915       continue;
    916     MachineInstr *UseMI = UseMO.getParent();
    917     if (UseMI->isDebugValue()) {
    918       // FIXME These don't have an instruction index.  Not clear we have enough
    919       // info to decide whether to do this replacement or not.  For now do it.
    920       UseMO.setReg(NewReg);
    921       continue;
    922     }
    923     SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
    924     LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
    925     assert(US != IntA.end() && "Use must be live");
    926     if (US->valno != AValNo)
    927       continue;
    928     // Kill flags are no longer accurate. They are recomputed after RA.
    929     UseMO.setIsKill(false);
    930     if (Register::isPhysicalRegister(NewReg))
    931       UseMO.substPhysReg(NewReg, *TRI);
    932     else
    933       UseMO.setReg(NewReg);
    934     if (UseMI == CopyMI)
    935       continue;
    936     if (!UseMI->isCopy())
    937       continue;
    938     if (UseMI->getOperand(0).getReg() != IntB.reg() ||
    939         UseMI->getOperand(0).getSubReg())
    940       continue;
    941 
    942     // This copy will become a noop. If it's defining a new val#, merge it into
    943     // BValNo.
    944     SlotIndex DefIdx = UseIdx.getRegSlot();
    945     VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
    946     if (!DVNI)
    947       continue;
    948     LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
    949     assert(DVNI->def == DefIdx);
    950     BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
    951     for (LiveInterval::SubRange &S : IntB.subranges()) {
    952       VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
    953       if (!SubDVNI)
    954         continue;
    955       VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
    956       assert(SubBValNo->def == CopyIdx);
    957       S.MergeValueNumberInto(SubDVNI, SubBValNo);
    958     }
    959 
    960     deleteInstr(UseMI);
    961   }
    962 
    963   // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
    964   // is updated.
    965   bool ShrinkB = false;
    966   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
    967   if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
    968     if (!IntA.hasSubRanges()) {
    969       LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg());
    970       IntA.createSubRangeFrom(Allocator, Mask, IntA);
    971     } else if (!IntB.hasSubRanges()) {
    972       LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg());
    973       IntB.createSubRangeFrom(Allocator, Mask, IntB);
    974     }
    975     SlotIndex AIdx = CopyIdx.getRegSlot(true);
    976     LaneBitmask MaskA;
    977     const SlotIndexes &Indexes = *LIS->getSlotIndexes();
    978     for (LiveInterval::SubRange &SA : IntA.subranges()) {
    979       VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
    980       // Even if we are dealing with a full copy, some lanes can
    981       // still be undefined.
    982       // E.g.,
    983       // undef A.subLow = ...
    984       // B = COPY A <== A.subHigh is undefined here and does
    985       //                not have a value number.
    986       if (!ASubValNo)
    987         continue;
    988       MaskA |= SA.LaneMask;
    989 
    990       IntB.refineSubRanges(
    991           Allocator, SA.LaneMask,
    992           [&Allocator, &SA, CopyIdx, ASubValNo,
    993            &ShrinkB](LiveInterval::SubRange &SR) {
    994             VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator)
    995                                            : SR.getVNInfoAt(CopyIdx);
    996             assert(BSubValNo != nullptr);
    997             auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
    998             ShrinkB |= P.second;
    999             if (P.first)
   1000               BSubValNo->def = ASubValNo->def;
   1001           },
   1002           Indexes, *TRI);
   1003     }
   1004     // Go over all subranges of IntB that have not been covered by IntA,
   1005     // and delete the segments starting at CopyIdx. This can happen if
   1006     // IntA has undef lanes that are defined in IntB.
   1007     for (LiveInterval::SubRange &SB : IntB.subranges()) {
   1008       if ((SB.LaneMask & MaskA).any())
   1009         continue;
   1010       if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx))
   1011         if (S->start.getBaseIndex() == CopyIdx.getBaseIndex())
   1012           SB.removeSegment(*S, true);
   1013     }
   1014   }
   1015 
   1016   BValNo->def = AValNo->def;
   1017   auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
   1018   ShrinkB |= P.second;
   1019   LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
   1020 
   1021   LIS->removeVRegDefAt(IntA, AValNo->def);
   1022 
   1023   LLVM_DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
   1024   ++numCommutes;
   1025   return { true, ShrinkB };
   1026 }
   1027 
   1028 /// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
   1029 /// predecessor of BB2, and if B is not redefined on the way from A = B
   1030 /// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the
   1031 /// execution goes through the path from BB0 to BB2. We may move B = A
   1032 /// to the predecessor without such reversed copy.
   1033 /// So we will transform the program from:
   1034 ///   BB0:
   1035 ///      A = B;    BB1:
   1036 ///       ...         ...
   1037 ///     /     \      /
   1038 ///             BB2:
   1039 ///               ...
   1040 ///               B = A;
   1041 ///
   1042 /// to:
   1043 ///
   1044 ///   BB0:         BB1:
   1045 ///      A = B;        ...
   1046 ///       ...          B = A;
   1047 ///     /     \       /
   1048 ///             BB2:
   1049 ///               ...
   1050 ///
   1051 /// A special case is when BB0 and BB2 are the same BB which is the only
   1052 /// BB in a loop:
   1053 ///   BB1:
   1054 ///        ...
   1055 ///   BB0/BB2:  ----
   1056 ///        B = A;   |
   1057 ///        ...      |
   1058 ///        A = B;   |
   1059 ///          |-------
   1060 ///          |
   1061 /// We may hoist B = A from BB0/BB2 to BB1.
   1062 ///
   1063 /// The major preconditions for correctness to remove such partial
   1064 /// redundancy include:
   1065 /// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
   1066 ///    the PHI is defined by the reversed copy A = B in BB0.
   1067 /// 2. No B is referenced from the start of BB2 to B = A.
   1068 /// 3. No B is defined from A = B to the end of BB0.
   1069 /// 4. BB1 has only one successor.
   1070 ///
   1071 /// 2 and 4 implicitly ensure B is not live at the end of BB1.
   1072 /// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
   1073 /// colder place, which not only prevent endless loop, but also make sure
   1074 /// the movement of copy is beneficial.
   1075 bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
   1076                                                 MachineInstr &CopyMI) {
   1077   assert(!CP.isPhys());
   1078   if (!CopyMI.isFullCopy())
   1079     return false;
   1080 
   1081   MachineBasicBlock &MBB = *CopyMI.getParent();
   1082   // If this block is the target of an invoke/inlineasm_br, moving the copy into
   1083   // the predecessor is tricker, and we don't handle it.
   1084   if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget())
   1085     return false;
   1086 
   1087   if (MBB.pred_size() != 2)
   1088     return false;
   1089 
   1090   LiveInterval &IntA =
   1091       LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
   1092   LiveInterval &IntB =
   1093       LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
   1094 
   1095   // A is defined by PHI at the entry of MBB.
   1096   SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
   1097   VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
   1098   assert(AValNo && !AValNo->isUnused() && "COPY source not live");
   1099   if (!AValNo->isPHIDef())
   1100     return false;
   1101 
   1102   // No B is referenced before CopyMI in MBB.
   1103   if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
   1104     return false;
   1105 
   1106   // MBB has two predecessors: one contains A = B so no copy will be inserted
   1107   // for it. The other one will have a copy moved from MBB.
   1108   bool FoundReverseCopy = false;
   1109   MachineBasicBlock *CopyLeftBB = nullptr;
   1110   for (MachineBasicBlock *Pred : MBB.predecessors()) {
   1111     VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
   1112     MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def);
   1113     if (!DefMI || !DefMI->isFullCopy()) {
   1114       CopyLeftBB = Pred;
   1115       continue;
   1116     }
   1117     // Check DefMI is a reverse copy and it is in BB Pred.
   1118     if (DefMI->getOperand(0).getReg() != IntA.reg() ||
   1119         DefMI->getOperand(1).getReg() != IntB.reg() ||
   1120         DefMI->getParent() != Pred) {
   1121       CopyLeftBB = Pred;
   1122       continue;
   1123     }
   1124     // If there is any other def of B after DefMI and before the end of Pred,
   1125     // we need to keep the copy of B = A at the end of Pred if we remove
   1126     // B = A from MBB.
   1127     bool ValB_Changed = false;
   1128     for (auto VNI : IntB.valnos) {
   1129       if (VNI->isUnused())
   1130         continue;
   1131       if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
   1132         ValB_Changed = true;
   1133         break;
   1134       }
   1135     }
   1136     if (ValB_Changed) {
   1137       CopyLeftBB = Pred;
   1138       continue;
   1139     }
   1140     FoundReverseCopy = true;
   1141   }
   1142 
   1143   // If no reverse copy is found in predecessors, nothing to do.
   1144   if (!FoundReverseCopy)
   1145     return false;
   1146 
   1147   // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
   1148   // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
   1149   // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
   1150   // update IntA/IntB.
   1151   //
   1152   // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
   1153   // MBB is hotter than CopyLeftBB.
   1154   if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
   1155     return false;
   1156 
   1157   // Now (almost sure it's) ok to move copy.
   1158   if (CopyLeftBB) {
   1159     // Position in CopyLeftBB where we should insert new copy.
   1160     auto InsPos = CopyLeftBB->getFirstTerminator();
   1161 
   1162     // Make sure that B isn't referenced in the terminators (if any) at the end
   1163     // of the predecessor since we're about to insert a new definition of B
   1164     // before them.
   1165     if (InsPos != CopyLeftBB->end()) {
   1166       SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true);
   1167       if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB)))
   1168         return false;
   1169     }
   1170 
   1171     LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
   1172                       << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
   1173 
   1174     // Insert new copy to CopyLeftBB.
   1175     MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
   1176                                       TII->get(TargetOpcode::COPY), IntB.reg())
   1177                                   .addReg(IntA.reg());
   1178     SlotIndex NewCopyIdx =
   1179         LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
   1180     IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
   1181     for (LiveInterval::SubRange &SR : IntB.subranges())
   1182       SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
   1183 
   1184     // If the newly created Instruction has an address of an instruction that was
   1185     // deleted before (object recycled by the allocator) it needs to be removed from
   1186     // the deleted list.
   1187     ErasedInstrs.erase(NewCopyMI);
   1188   } else {
   1189     LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
   1190                       << printMBBReference(MBB) << '\t' << CopyMI);
   1191   }
   1192 
   1193   // Remove CopyMI.
   1194   // Note: This is fine to remove the copy before updating the live-ranges.
   1195   // While updating the live-ranges, we only look at slot indices and
   1196   // never go back to the instruction.
   1197   // Mark instructions as deleted.
   1198   deleteInstr(&CopyMI);
   1199 
   1200   // Update the liveness.
   1201   SmallVector<SlotIndex, 8> EndPoints;
   1202   VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
   1203   LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
   1204                   &EndPoints);
   1205   BValNo->markUnused();
   1206   // Extend IntB to the EndPoints of its original live interval.
   1207   LIS->extendToIndices(IntB, EndPoints);
   1208 
   1209   // Now, do the same for its subranges.
   1210   for (LiveInterval::SubRange &SR : IntB.subranges()) {
   1211     EndPoints.clear();
   1212     VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
   1213     assert(BValNo && "All sublanes should be live");
   1214     LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
   1215     BValNo->markUnused();
   1216     // We can have a situation where the result of the original copy is live,
   1217     // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes
   1218     // the copy appear as an endpoint from pruneValue(), but we don't want it
   1219     // to because the copy has been removed.  We can go ahead and remove that
   1220     // endpoint; there is no other situation here that there could be a use at
   1221     // the same place as we know that the copy is a full copy.
   1222     for (unsigned I = 0; I != EndPoints.size(); ) {
   1223       if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) {
   1224         EndPoints[I] = EndPoints.back();
   1225         EndPoints.pop_back();
   1226         continue;
   1227       }
   1228       ++I;
   1229     }
   1230     SmallVector<SlotIndex, 8> Undefs;
   1231     IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI,
   1232                                *LIS->getSlotIndexes());
   1233     LIS->extendToIndices(SR, EndPoints, Undefs);
   1234   }
   1235   // If any dead defs were extended, truncate them.
   1236   shrinkToUses(&IntB);
   1237 
   1238   // Finally, update the live-range of IntA.
   1239   shrinkToUses(&IntA);
   1240   return true;
   1241 }
   1242 
   1243 /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
   1244 /// defining a subregister.
   1245 static bool definesFullReg(const MachineInstr &MI, Register Reg) {
   1246   assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing");
   1247 
   1248   for (const MachineOperand &Op : MI.operands()) {
   1249     if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
   1250       continue;
   1251     // Return true if we define the full register or don't care about the value
   1252     // inside other subregisters.
   1253     if (Op.getSubReg() == 0 || Op.isUndef())
   1254       return true;
   1255   }
   1256   return false;
   1257 }
   1258 
   1259 bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
   1260                                                 MachineInstr *CopyMI,
   1261                                                 bool &IsDefCopy) {
   1262   IsDefCopy = false;
   1263   Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
   1264   unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
   1265   Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
   1266   unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
   1267   if (Register::isPhysicalRegister(SrcReg))
   1268     return false;
   1269 
   1270   LiveInterval &SrcInt = LIS->getInterval(SrcReg);
   1271   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
   1272   VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
   1273   if (!ValNo)
   1274     return false;
   1275   if (ValNo->isPHIDef() || ValNo->isUnused())
   1276     return false;
   1277   MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
   1278   if (!DefMI)
   1279     return false;
   1280   if (DefMI->isCopyLike()) {
   1281     IsDefCopy = true;
   1282     return false;
   1283   }
   1284   if (!TII->isAsCheapAsAMove(*DefMI))
   1285     return false;
   1286   if (!TII->isTriviallyReMaterializable(*DefMI, AA))
   1287     return false;
   1288   if (!definesFullReg(*DefMI, SrcReg))
   1289     return false;
   1290   bool SawStore = false;
   1291   if (!DefMI->isSafeToMove(AA, SawStore))
   1292     return false;
   1293   const MCInstrDesc &MCID = DefMI->getDesc();
   1294   if (MCID.getNumDefs() != 1)
   1295     return false;
   1296   // Only support subregister destinations when the def is read-undef.
   1297   MachineOperand &DstOperand = CopyMI->getOperand(0);
   1298   Register CopyDstReg = DstOperand.getReg();
   1299   if (DstOperand.getSubReg() && !DstOperand.isUndef())
   1300     return false;
   1301 
   1302   // If both SrcIdx and DstIdx are set, correct rematerialization would widen
   1303   // the register substantially (beyond both source and dest size). This is bad
   1304   // for performance since it can cascade through a function, introducing many
   1305   // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
   1306   // around after a few subreg copies).
   1307   if (SrcIdx && DstIdx)
   1308     return false;
   1309 
   1310   const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
   1311   if (!DefMI->isImplicitDef()) {
   1312     if (DstReg.isPhysical()) {
   1313       Register NewDstReg = DstReg;
   1314 
   1315       unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
   1316                                               DefMI->getOperand(0).getSubReg());
   1317       if (NewDstIdx)
   1318         NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
   1319 
   1320       // Finally, make sure that the physical subregister that will be
   1321       // constructed later is permitted for the instruction.
   1322       if (!DefRC->contains(NewDstReg))
   1323         return false;
   1324     } else {
   1325       // Theoretically, some stack frame reference could exist. Just make sure
   1326       // it hasn't actually happened.
   1327       assert(Register::isVirtualRegister(DstReg) &&
   1328              "Only expect to deal with virtual or physical registers");
   1329     }
   1330   }
   1331 
   1332   DebugLoc DL = CopyMI->getDebugLoc();
   1333   MachineBasicBlock *MBB = CopyMI->getParent();
   1334   MachineBasicBlock::iterator MII =
   1335     std::next(MachineBasicBlock::iterator(CopyMI));
   1336   TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI);
   1337   MachineInstr &NewMI = *std::prev(MII);
   1338   NewMI.setDebugLoc(DL);
   1339 
   1340   // In a situation like the following:
   1341   //     %0:subreg = instr              ; DefMI, subreg = DstIdx
   1342   //     %1        = copy %0:subreg ; CopyMI, SrcIdx = 0
   1343   // instead of widening %1 to the register class of %0 simply do:
   1344   //     %1 = instr
   1345   const TargetRegisterClass *NewRC = CP.getNewRC();
   1346   if (DstIdx != 0) {
   1347     MachineOperand &DefMO = NewMI.getOperand(0);
   1348     if (DefMO.getSubReg() == DstIdx) {
   1349       assert(SrcIdx == 0 && CP.isFlipped()
   1350              && "Shouldn't have SrcIdx+DstIdx at this point");
   1351       const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
   1352       const TargetRegisterClass *CommonRC =
   1353         TRI->getCommonSubClass(DefRC, DstRC);
   1354       if (CommonRC != nullptr) {
   1355         NewRC = CommonRC;
   1356         DstIdx = 0;
   1357         DefMO.setSubReg(0);
   1358         DefMO.setIsUndef(false); // Only subregs can have def+undef.
   1359       }
   1360     }
   1361   }
   1362 
   1363   // CopyMI may have implicit operands, save them so that we can transfer them
   1364   // over to the newly materialized instruction after CopyMI is removed.
   1365   SmallVector<MachineOperand, 4> ImplicitOps;
   1366   ImplicitOps.reserve(CopyMI->getNumOperands() -
   1367                       CopyMI->getDesc().getNumOperands());
   1368   for (unsigned I = CopyMI->getDesc().getNumOperands(),
   1369                 E = CopyMI->getNumOperands();
   1370        I != E; ++I) {
   1371     MachineOperand &MO = CopyMI->getOperand(I);
   1372     if (MO.isReg()) {
   1373       assert(MO.isImplicit() && "No explicit operands after implicit operands.");
   1374       // Discard VReg implicit defs.
   1375       if (Register::isPhysicalRegister(MO.getReg()))
   1376         ImplicitOps.push_back(MO);
   1377     }
   1378   }
   1379 
   1380   LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI);
   1381   CopyMI->eraseFromParent();
   1382   ErasedInstrs.insert(CopyMI);
   1383 
   1384   // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
   1385   // We need to remember these so we can add intervals once we insert
   1386   // NewMI into SlotIndexes.
   1387   SmallVector<MCRegister, 4> NewMIImplDefs;
   1388   for (unsigned i = NewMI.getDesc().getNumOperands(),
   1389                 e = NewMI.getNumOperands();
   1390        i != e; ++i) {
   1391     MachineOperand &MO = NewMI.getOperand(i);
   1392     if (MO.isReg() && MO.isDef()) {
   1393       assert(MO.isImplicit() && MO.isDead() &&
   1394              Register::isPhysicalRegister(MO.getReg()));
   1395       NewMIImplDefs.push_back(MO.getReg().asMCReg());
   1396     }
   1397   }
   1398 
   1399   if (DstReg.isVirtual()) {
   1400     unsigned NewIdx = NewMI.getOperand(0).getSubReg();
   1401 
   1402     if (DefRC != nullptr) {
   1403       if (NewIdx)
   1404         NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
   1405       else
   1406         NewRC = TRI->getCommonSubClass(NewRC, DefRC);
   1407       assert(NewRC && "subreg chosen for remat incompatible with instruction");
   1408     }
   1409     // Remap subranges to new lanemask and change register class.
   1410     LiveInterval &DstInt = LIS->getInterval(DstReg);
   1411     for (LiveInterval::SubRange &SR : DstInt.subranges()) {
   1412       SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
   1413     }
   1414     MRI->setRegClass(DstReg, NewRC);
   1415 
   1416     // Update machine operands and add flags.
   1417     updateRegDefsUses(DstReg, DstReg, DstIdx);
   1418     NewMI.getOperand(0).setSubReg(NewIdx);
   1419     // updateRegDefUses can add an "undef" flag to the definition, since
   1420     // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
   1421     // sure that "undef" is not set.
   1422     if (NewIdx == 0)
   1423       NewMI.getOperand(0).setIsUndef(false);
   1424     // Add dead subregister definitions if we are defining the whole register
   1425     // but only part of it is live.
   1426     // This could happen if the rematerialization instruction is rematerializing
   1427     // more than actually is used in the register.
   1428     // An example would be:
   1429     // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
   1430     // ; Copying only part of the register here, but the rest is undef.
   1431     // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
   1432     // ==>
   1433     // ; Materialize all the constants but only using one
   1434     // %2 = LOAD_CONSTANTS 5, 8
   1435     //
   1436     // at this point for the part that wasn't defined before we could have
   1437     // subranges missing the definition.
   1438     if (NewIdx == 0 && DstInt.hasSubRanges()) {
   1439       SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
   1440       SlotIndex DefIndex =
   1441           CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
   1442       LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
   1443       VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator();
   1444       for (LiveInterval::SubRange &SR : DstInt.subranges()) {
   1445         if (!SR.liveAt(DefIndex))
   1446           SR.createDeadDef(DefIndex, Alloc);
   1447         MaxMask &= ~SR.LaneMask;
   1448       }
   1449       if (MaxMask.any()) {
   1450         LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
   1451         SR->createDeadDef(DefIndex, Alloc);
   1452       }
   1453     }
   1454 
   1455     // Make sure that the subrange for resultant undef is removed
   1456     // For example:
   1457     //   %1:sub1<def,read-undef> = LOAD CONSTANT 1
   1458     //   %2 = COPY %1
   1459     // ==>
   1460     //   %2:sub1<def, read-undef> = LOAD CONSTANT 1
   1461     //     ; Correct but need to remove the subrange for %2:sub0
   1462     //     ; as it is now undef
   1463     if (NewIdx != 0 && DstInt.hasSubRanges()) {
   1464       // The affected subregister segments can be removed.
   1465       SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
   1466       LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
   1467       bool UpdatedSubRanges = false;
   1468       SlotIndex DefIndex =
   1469           CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
   1470       VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
   1471       for (LiveInterval::SubRange &SR : DstInt.subranges()) {
   1472         if ((SR.LaneMask & DstMask).none()) {
   1473           LLVM_DEBUG(dbgs()
   1474                      << "Removing undefined SubRange "
   1475                      << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
   1476           // VNI is in ValNo - remove any segments in this SubRange that have this ValNo
   1477           if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
   1478             SR.removeValNo(RmValNo);
   1479             UpdatedSubRanges = true;
   1480           }
   1481         } else {
   1482           // We know that this lane is defined by this instruction,
   1483           // but at this point it may be empty because it is not used by
   1484           // anything. This happens when updateRegDefUses adds the missing
   1485           // lanes. Assign that lane a dead def so that the interferences
   1486           // are properly modeled.
   1487           if (SR.empty())
   1488             SR.createDeadDef(DefIndex, Alloc);
   1489         }
   1490       }
   1491       if (UpdatedSubRanges)
   1492         DstInt.removeEmptySubRanges();
   1493     }
   1494   } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
   1495     // The New instruction may be defining a sub-register of what's actually
   1496     // been asked for. If so it must implicitly define the whole thing.
   1497     assert(Register::isPhysicalRegister(DstReg) &&
   1498            "Only expect virtual or physical registers in remat");
   1499     NewMI.getOperand(0).setIsDead(true);
   1500     NewMI.addOperand(MachineOperand::CreateReg(
   1501         CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
   1502     // Record small dead def live-ranges for all the subregisters
   1503     // of the destination register.
   1504     // Otherwise, variables that live through may miss some
   1505     // interferences, thus creating invalid allocation.
   1506     // E.g., i386 code:
   1507     // %1 = somedef ; %1 GR8
   1508     // %2 = remat ; %2 GR32
   1509     // CL = COPY %2.sub_8bit
   1510     // = somedef %1 ; %1 GR8
   1511     // =>
   1512     // %1 = somedef ; %1 GR8
   1513     // dead ECX = remat ; implicit-def CL
   1514     // = somedef %1 ; %1 GR8
   1515     // %1 will see the interferences with CL but not with CH since
   1516     // no live-ranges would have been created for ECX.
   1517     // Fix that!
   1518     SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
   1519     for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI);
   1520          Units.isValid(); ++Units)
   1521       if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
   1522         LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
   1523   }
   1524 
   1525   if (NewMI.getOperand(0).getSubReg())
   1526     NewMI.getOperand(0).setIsUndef();
   1527 
   1528   // Transfer over implicit operands to the rematerialized instruction.
   1529   for (MachineOperand &MO : ImplicitOps)
   1530     NewMI.addOperand(MO);
   1531 
   1532   SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
   1533   for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
   1534     MCRegister Reg = NewMIImplDefs[i];
   1535     for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
   1536       if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
   1537         LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
   1538   }
   1539 
   1540   LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
   1541   ++NumReMats;
   1542 
   1543   // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
   1544   // to describe DstReg instead.
   1545   if (MRI->use_nodbg_empty(SrcReg)) {
   1546     for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
   1547       MachineInstr *UseMI = UseMO.getParent();
   1548       if (UseMI->isDebugValue()) {
   1549         if (Register::isPhysicalRegister(DstReg))
   1550           UseMO.substPhysReg(DstReg, *TRI);
   1551         else
   1552           UseMO.setReg(DstReg);
   1553         // Move the debug value directly after the def of the rematerialized
   1554         // value in DstReg.
   1555         MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
   1556         LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
   1557       }
   1558     }
   1559   }
   1560 
   1561   if (ToBeUpdated.count(SrcReg))
   1562     return true;
   1563 
   1564   unsigned NumCopyUses = 0;
   1565   for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
   1566     if (UseMO.getParent()->isCopyLike())
   1567       NumCopyUses++;
   1568   }
   1569   if (NumCopyUses < LateRematUpdateThreshold) {
   1570     // The source interval can become smaller because we removed a use.
   1571     shrinkToUses(&SrcInt, &DeadDefs);
   1572     if (!DeadDefs.empty())
   1573       eliminateDeadDefs();
   1574   } else {
   1575     ToBeUpdated.insert(SrcReg);
   1576   }
   1577   return true;
   1578 }
   1579 
   1580 MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
   1581   // ProcessImplicitDefs may leave some copies of <undef> values, it only
   1582   // removes local variables. When we have a copy like:
   1583   //
   1584   //   %1 = COPY undef %2
   1585   //
   1586   // We delete the copy and remove the corresponding value number from %1.
   1587   // Any uses of that value number are marked as <undef>.
   1588 
   1589   // Note that we do not query CoalescerPair here but redo isMoveInstr as the
   1590   // CoalescerPair may have a new register class with adjusted subreg indices
   1591   // at this point.
   1592   Register SrcReg, DstReg;
   1593   unsigned SrcSubIdx = 0, DstSubIdx = 0;
   1594   if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
   1595     return nullptr;
   1596 
   1597   SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
   1598   const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
   1599   // CopyMI is undef iff SrcReg is not live before the instruction.
   1600   if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
   1601     LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
   1602     for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
   1603       if ((SR.LaneMask & SrcMask).none())
   1604         continue;
   1605       if (SR.liveAt(Idx))
   1606         return nullptr;
   1607     }
   1608   } else if (SrcLI.liveAt(Idx))
   1609     return nullptr;
   1610 
   1611   // If the undef copy defines a live-out value (i.e. an input to a PHI def),
   1612   // then replace it with an IMPLICIT_DEF.
   1613   LiveInterval &DstLI = LIS->getInterval(DstReg);
   1614   SlotIndex RegIndex = Idx.getRegSlot();
   1615   LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
   1616   assert(Seg != nullptr && "No segment for defining instruction");
   1617   if (VNInfo *V = DstLI.getVNInfoAt(Seg->end)) {
   1618     if (V->isPHIDef()) {
   1619       CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
   1620       for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
   1621         MachineOperand &MO = CopyMI->getOperand(i-1);
   1622         if (MO.isReg() && MO.isUse())
   1623           CopyMI->RemoveOperand(i-1);
   1624       }
   1625       LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
   1626                            "implicit def\n");
   1627       return CopyMI;
   1628     }
   1629   }
   1630 
   1631   // Remove any DstReg segments starting at the instruction.
   1632   LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
   1633 
   1634   // Remove value or merge with previous one in case of a subregister def.
   1635   if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
   1636     VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
   1637     DstLI.MergeValueNumberInto(VNI, PrevVNI);
   1638 
   1639     // The affected subregister segments can be removed.
   1640     LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
   1641     for (LiveInterval::SubRange &SR : DstLI.subranges()) {
   1642       if ((SR.LaneMask & DstMask).none())
   1643         continue;
   1644 
   1645       VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
   1646       assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
   1647       SR.removeValNo(SVNI);
   1648     }
   1649     DstLI.removeEmptySubRanges();
   1650   } else
   1651     LIS->removeVRegDefAt(DstLI, RegIndex);
   1652 
   1653   // Mark uses as undef.
   1654   for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
   1655     if (MO.isDef() /*|| MO.isUndef()*/)
   1656       continue;
   1657     const MachineInstr &MI = *MO.getParent();
   1658     SlotIndex UseIdx = LIS->getInstructionIndex(MI);
   1659     LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
   1660     bool isLive;
   1661     if (!UseMask.all() && DstLI.hasSubRanges()) {
   1662       isLive = false;
   1663       for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
   1664         if ((SR.LaneMask & UseMask).none())
   1665           continue;
   1666         if (SR.liveAt(UseIdx)) {
   1667           isLive = true;
   1668           break;
   1669         }
   1670       }
   1671     } else
   1672       isLive = DstLI.liveAt(UseIdx);
   1673     if (isLive)
   1674       continue;
   1675     MO.setIsUndef(true);
   1676     LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
   1677   }
   1678 
   1679   // A def of a subregister may be a use of the other subregisters, so
   1680   // deleting a def of a subregister may also remove uses. Since CopyMI
   1681   // is still part of the function (but about to be erased), mark all
   1682   // defs of DstReg in it as <undef>, so that shrinkToUses would
   1683   // ignore them.
   1684   for (MachineOperand &MO : CopyMI->operands())
   1685     if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
   1686       MO.setIsUndef(true);
   1687   LIS->shrinkToUses(&DstLI);
   1688 
   1689   return CopyMI;
   1690 }
   1691 
   1692 void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
   1693                                      MachineOperand &MO, unsigned SubRegIdx) {
   1694   LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
   1695   if (MO.isDef())
   1696     Mask = ~Mask;
   1697   bool IsUndef = true;
   1698   for (const LiveInterval::SubRange &S : Int.subranges()) {
   1699     if ((S.LaneMask & Mask).none())
   1700       continue;
   1701     if (S.liveAt(UseIdx)) {
   1702       IsUndef = false;
   1703       break;
   1704     }
   1705   }
   1706   if (IsUndef) {
   1707     MO.setIsUndef(true);
   1708     // We found out some subregister use is actually reading an undefined
   1709     // value. In some cases the whole vreg has become undefined at this
   1710     // point so we have to potentially shrink the main range if the
   1711     // use was ending a live segment there.
   1712     LiveQueryResult Q = Int.Query(UseIdx);
   1713     if (Q.valueOut() == nullptr)
   1714       ShrinkMainRange = true;
   1715   }
   1716 }
   1717 
   1718 void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
   1719                                           unsigned SubIdx) {
   1720   bool DstIsPhys = Register::isPhysicalRegister(DstReg);
   1721   LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
   1722 
   1723   if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
   1724     for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
   1725       unsigned SubReg = MO.getSubReg();
   1726       if (SubReg == 0 || MO.isUndef())
   1727         continue;
   1728       MachineInstr &MI = *MO.getParent();
   1729       if (MI.isDebugValue())
   1730         continue;
   1731       SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
   1732       addUndefFlag(*DstInt, UseIdx, MO, SubReg);
   1733     }
   1734   }
   1735 
   1736   SmallPtrSet<MachineInstr*, 8> Visited;
   1737   for (MachineRegisterInfo::reg_instr_iterator
   1738        I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
   1739        I != E; ) {
   1740     MachineInstr *UseMI = &*(I++);
   1741 
   1742     // Each instruction can only be rewritten once because sub-register
   1743     // composition is not always idempotent. When SrcReg != DstReg, rewriting
   1744     // the UseMI operands removes them from the SrcReg use-def chain, but when
   1745     // SrcReg is DstReg we could encounter UseMI twice if it has multiple
   1746     // operands mentioning the virtual register.
   1747     if (SrcReg == DstReg && !Visited.insert(UseMI).second)
   1748       continue;
   1749 
   1750     SmallVector<unsigned,8> Ops;
   1751     bool Reads, Writes;
   1752     std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
   1753 
   1754     // If SrcReg wasn't read, it may still be the case that DstReg is live-in
   1755     // because SrcReg is a sub-register.
   1756     if (DstInt && !Reads && SubIdx && !UseMI->isDebugValue())
   1757       Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
   1758 
   1759     // Replace SrcReg with DstReg in all UseMI operands.
   1760     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
   1761       MachineOperand &MO = UseMI->getOperand(Ops[i]);
   1762 
   1763       // Adjust <undef> flags in case of sub-register joins. We don't want to
   1764       // turn a full def into a read-modify-write sub-register def and vice
   1765       // versa.
   1766       if (SubIdx && MO.isDef())
   1767         MO.setIsUndef(!Reads);
   1768 
   1769       // A subreg use of a partially undef (super) register may be a complete
   1770       // undef use now and then has to be marked that way.
   1771       if (MO.isUse() && !DstIsPhys) {
   1772         unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
   1773         if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) {
   1774           if (!DstInt->hasSubRanges()) {
   1775             BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
   1776             LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
   1777             LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
   1778             LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
   1779             DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
   1780             // The unused lanes are just empty live-ranges at this point.
   1781             // It is the caller responsibility to set the proper
   1782             // dead segments if there is an actual dead def of the
   1783             // unused lanes. This may happen with rematerialization.
   1784             DstInt->createSubRange(Allocator, UnusedLanes);
   1785           }
   1786           SlotIndex MIIdx = UseMI->isDebugValue()
   1787             ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
   1788             : LIS->getInstructionIndex(*UseMI);
   1789           SlotIndex UseIdx = MIIdx.getRegSlot(true);
   1790           addUndefFlag(*DstInt, UseIdx, MO, SubUseIdx);
   1791         }
   1792       }
   1793 
   1794       if (DstIsPhys)
   1795         MO.substPhysReg(DstReg, *TRI);
   1796       else
   1797         MO.substVirtReg(DstReg, SubIdx, *TRI);
   1798     }
   1799 
   1800     LLVM_DEBUG({
   1801       dbgs() << "\t\tupdated: ";
   1802       if (!UseMI->isDebugValue())
   1803         dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
   1804       dbgs() << *UseMI;
   1805     });
   1806   }
   1807 }
   1808 
   1809 bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
   1810   // Always join simple intervals that are defined by a single copy from a
   1811   // reserved register. This doesn't increase register pressure, so it is
   1812   // always beneficial.
   1813   if (!MRI->isReserved(CP.getDstReg())) {
   1814     LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
   1815     return false;
   1816   }
   1817 
   1818   LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
   1819   if (JoinVInt.containsOneValue())
   1820     return true;
   1821 
   1822   LLVM_DEBUG(
   1823       dbgs() << "\tCannot join complex intervals into reserved register.\n");
   1824   return false;
   1825 }
   1826 
   1827 bool RegisterCoalescer::copyValueUndefInPredecessors(
   1828     LiveRange &S, const MachineBasicBlock *MBB, LiveQueryResult SLRQ) {
   1829   for (const MachineBasicBlock *Pred : MBB->predecessors()) {
   1830     SlotIndex PredEnd = LIS->getMBBEndIdx(Pred);
   1831     if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) {
   1832       // If this is a self loop, we may be reading the same value.
   1833       if (V->id != SLRQ.valueOutOrDead()->id)
   1834         return false;
   1835     }
   1836   }
   1837 
   1838   return true;
   1839 }
   1840 
   1841 void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
   1842                                                    Register Reg,
   1843                                                    LaneBitmask PrunedLanes) {
   1844   // If we had other instructions in the segment reading the undef sublane
   1845   // value, we need to mark them with undef.
   1846   for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
   1847     unsigned SubRegIdx = MO.getSubReg();
   1848     if (SubRegIdx == 0 || MO.isUndef())
   1849       continue;
   1850 
   1851     LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
   1852     SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
   1853     for (LiveInterval::SubRange &S : LI.subranges()) {
   1854       if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) {
   1855         MO.setIsUndef();
   1856         break;
   1857       }
   1858     }
   1859   }
   1860 
   1861   LI.removeEmptySubRanges();
   1862 
   1863   // A def of a subregister may be a use of other register lanes. Replacing
   1864   // such a def with a def of a different register will eliminate the use,
   1865   // and may cause the recorded live range to be larger than the actual
   1866   // liveness in the program IR.
   1867   LIS->shrinkToUses(&LI);
   1868 }
   1869 
   1870 bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
   1871   Again = false;
   1872   LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
   1873 
   1874   CoalescerPair CP(*TRI);
   1875   if (!CP.setRegisters(CopyMI)) {
   1876     LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");
   1877     return false;
   1878   }
   1879 
   1880   if (CP.getNewRC()) {
   1881     auto SrcRC = MRI->getRegClass(CP.getSrcReg());
   1882     auto DstRC = MRI->getRegClass(CP.getDstReg());
   1883     unsigned SrcIdx = CP.getSrcIdx();
   1884     unsigned DstIdx = CP.getDstIdx();
   1885     if (CP.isFlipped()) {
   1886       std::swap(SrcIdx, DstIdx);
   1887       std::swap(SrcRC, DstRC);
   1888     }
   1889     if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
   1890                              CP.getNewRC(), *LIS)) {
   1891       LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
   1892       return false;
   1893     }
   1894   }
   1895 
   1896   // Dead code elimination. This really should be handled by MachineDCE, but
   1897   // sometimes dead copies slip through, and we can't generate invalid live
   1898   // ranges.
   1899   if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
   1900     LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");
   1901     DeadDefs.push_back(CopyMI);
   1902     eliminateDeadDefs();
   1903     return true;
   1904   }
   1905 
   1906   // Eliminate undefs.
   1907   if (!CP.isPhys()) {
   1908     // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
   1909     if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
   1910       if (UndefMI->isImplicitDef())
   1911         return false;
   1912       deleteInstr(CopyMI);
   1913       return false;  // Not coalescable.
   1914     }
   1915   }
   1916 
   1917   // Coalesced copies are normally removed immediately, but transformations
   1918   // like removeCopyByCommutingDef() can inadvertently create identity copies.
   1919   // When that happens, just join the values and remove the copy.
   1920   if (CP.getSrcReg() == CP.getDstReg()) {
   1921     LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
   1922     LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
   1923     const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
   1924     LiveQueryResult LRQ = LI.Query(CopyIdx);
   1925     if (VNInfo *DefVNI = LRQ.valueDefined()) {
   1926       VNInfo *ReadVNI = LRQ.valueIn();
   1927       assert(ReadVNI && "No value before copy and no <undef> flag.");
   1928       assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
   1929 
   1930       // Track incoming undef lanes we need to eliminate from the subrange.
   1931       LaneBitmask PrunedLanes;
   1932       MachineBasicBlock *MBB = CopyMI->getParent();
   1933 
   1934       // Process subregister liveranges.
   1935       for (LiveInterval::SubRange &S : LI.subranges()) {
   1936         LiveQueryResult SLRQ = S.Query(CopyIdx);
   1937         if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
   1938           if (VNInfo *SReadVNI = SLRQ.valueIn())
   1939             SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI);
   1940 
   1941           // If this copy introduced an undef subrange from an incoming value,
   1942           // we need to eliminate the undef live in values from the subrange.
   1943           if (copyValueUndefInPredecessors(S, MBB, SLRQ)) {
   1944             LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n");
   1945             PrunedLanes |= S.LaneMask;
   1946             S.removeValNo(SDefVNI);
   1947           }
   1948         }
   1949       }
   1950 
   1951       LI.MergeValueNumberInto(DefVNI, ReadVNI);
   1952       if (PrunedLanes.any()) {
   1953         LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: "
   1954                           << PrunedLanes << '\n');
   1955         setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes);
   1956       }
   1957 
   1958       LLVM_DEBUG(dbgs() << "\tMerged values:          " << LI << '\n');
   1959     }
   1960     deleteInstr(CopyMI);
   1961     return true;
   1962   }
   1963 
   1964   // Enforce policies.
   1965   if (CP.isPhys()) {
   1966     LLVM_DEBUG(dbgs() << "\tConsidering merging "
   1967                       << printReg(CP.getSrcReg(), TRI) << " with "
   1968                       << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');
   1969     if (!canJoinPhys(CP)) {
   1970       // Before giving up coalescing, if definition of source is defined by
   1971       // trivial computation, try rematerializing it.
   1972       bool IsDefCopy = false;
   1973       if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
   1974         return true;
   1975       if (IsDefCopy)
   1976         Again = true;  // May be possible to coalesce later.
   1977       return false;
   1978     }
   1979   } else {
   1980     // When possible, let DstReg be the larger interval.
   1981     if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
   1982                            LIS->getInterval(CP.getDstReg()).size())
   1983       CP.flip();
   1984 
   1985     LLVM_DEBUG({
   1986       dbgs() << "\tConsidering merging to "
   1987              << TRI->getRegClassName(CP.getNewRC()) << " with ";
   1988       if (CP.getDstIdx() && CP.getSrcIdx())
   1989         dbgs() << printReg(CP.getDstReg()) << " in "
   1990                << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
   1991                << printReg(CP.getSrcReg()) << " in "
   1992                << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
   1993       else
   1994         dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
   1995                << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
   1996     });
   1997   }
   1998 
   1999   ShrinkMask = LaneBitmask::getNone();
   2000   ShrinkMainRange = false;
   2001 
   2002   // Okay, attempt to join these two intervals.  On failure, this returns false.
   2003   // Otherwise, if one of the intervals being joined is a physreg, this method
   2004   // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
   2005   // been modified, so we can use this information below to update aliases.
   2006   if (!joinIntervals(CP)) {
   2007     // Coalescing failed.
   2008 
   2009     // If definition of source is defined by trivial computation, try
   2010     // rematerializing it.
   2011     bool IsDefCopy = false;
   2012     if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
   2013       return true;
   2014 
   2015     // If we can eliminate the copy without merging the live segments, do so
   2016     // now.
   2017     if (!CP.isPartial() && !CP.isPhys()) {
   2018       bool Changed = adjustCopiesBackFrom(CP, CopyMI);
   2019       bool Shrink = false;
   2020       if (!Changed)
   2021         std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI);
   2022       if (Changed) {
   2023         deleteInstr(CopyMI);
   2024         if (Shrink) {
   2025           Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
   2026           LiveInterval &DstLI = LIS->getInterval(DstReg);
   2027           shrinkToUses(&DstLI);
   2028           LLVM_DEBUG(dbgs() << "\t\tshrunk:   " << DstLI << '\n');
   2029         }
   2030         LLVM_DEBUG(dbgs() << "\tTrivial!\n");
   2031         return true;
   2032       }
   2033     }
   2034 
   2035     // Try and see if we can partially eliminate the copy by moving the copy to
   2036     // its predecessor.
   2037     if (!CP.isPartial() && !CP.isPhys())
   2038       if (removePartialRedundancy(CP, *CopyMI))
   2039         return true;
   2040 
   2041     // Otherwise, we are unable to join the intervals.
   2042     LLVM_DEBUG(dbgs() << "\tInterference!\n");
   2043     Again = true;  // May be possible to coalesce later.
   2044     return false;
   2045   }
   2046 
   2047   // Coalescing to a virtual register that is of a sub-register class of the
   2048   // other. Make sure the resulting register is set to the right register class.
   2049   if (CP.isCrossClass()) {
   2050     ++numCrossRCs;
   2051     MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
   2052   }
   2053 
   2054   // Removing sub-register copies can ease the register class constraints.
   2055   // Make sure we attempt to inflate the register class of DstReg.
   2056   if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
   2057     InflateRegs.push_back(CP.getDstReg());
   2058 
   2059   // CopyMI has been erased by joinIntervals at this point. Remove it from
   2060   // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
   2061   // to the work list. This keeps ErasedInstrs from growing needlessly.
   2062   ErasedInstrs.erase(CopyMI);
   2063 
   2064   // Rewrite all SrcReg operands to DstReg.
   2065   // Also update DstReg operands to include DstIdx if it is set.
   2066   if (CP.getDstIdx())
   2067     updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
   2068   updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
   2069 
   2070   // Shrink subregister ranges if necessary.
   2071   if (ShrinkMask.any()) {
   2072     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
   2073     for (LiveInterval::SubRange &S : LI.subranges()) {
   2074       if ((S.LaneMask & ShrinkMask).none())
   2075         continue;
   2076       LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
   2077                         << ")\n");
   2078       LIS->shrinkToUses(S, LI.reg());
   2079     }
   2080     LI.removeEmptySubRanges();
   2081   }
   2082 
   2083   // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live
   2084   // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval
   2085   // is not up-to-date, need to update the merged live interval here.
   2086   if (ToBeUpdated.count(CP.getSrcReg()))
   2087     ShrinkMainRange = true;
   2088 
   2089   if (ShrinkMainRange) {
   2090     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
   2091     shrinkToUses(&LI);
   2092   }
   2093 
   2094   // SrcReg is guaranteed to be the register whose live interval that is
   2095   // being merged.
   2096   LIS->removeInterval(CP.getSrcReg());
   2097 
   2098   // Update regalloc hint.
   2099   TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
   2100 
   2101   LLVM_DEBUG({
   2102     dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
   2103            << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
   2104     dbgs() << "\tResult = ";
   2105     if (CP.isPhys())
   2106       dbgs() << printReg(CP.getDstReg(), TRI);
   2107     else
   2108       dbgs() << LIS->getInterval(CP.getDstReg());
   2109     dbgs() << '\n';
   2110   });
   2111 
   2112   ++numJoins;
   2113   return true;
   2114 }
   2115 
   2116 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   2117   Register DstReg = CP.getDstReg();
   2118   Register SrcReg = CP.getSrcReg();
   2119   assert(CP.isPhys() && "Must be a physreg copy");
   2120   assert(MRI->isReserved(DstReg) && "Not a reserved register");
   2121   LiveInterval &RHS = LIS->getInterval(SrcReg);
   2122   LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
   2123 
   2124   assert(RHS.containsOneValue() && "Invalid join with reserved register");
   2125 
   2126   // Optimization for reserved registers like ESP. We can only merge with a
   2127   // reserved physreg if RHS has a single value that is a copy of DstReg.
   2128   // The live range of the reserved register will look like a set of dead defs
   2129   // - we don't properly track the live range of reserved registers.
   2130 
   2131   // Deny any overlapping intervals.  This depends on all the reserved
   2132   // register live ranges to look like dead defs.
   2133   if (!MRI->isConstantPhysReg(DstReg)) {
   2134     for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
   2135       // Abort if not all the regunits are reserved.
   2136       for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
   2137         if (!MRI->isReserved(*RI))
   2138           return false;
   2139       }
   2140       if (RHS.overlaps(LIS->getRegUnit(*UI))) {
   2141         LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI)
   2142                           << '\n');
   2143         return false;
   2144       }
   2145     }
   2146 
   2147     // We must also check for overlaps with regmask clobbers.
   2148     BitVector RegMaskUsable;
   2149     if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
   2150         !RegMaskUsable.test(DstReg)) {
   2151       LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");
   2152       return false;
   2153     }
   2154   }
   2155 
   2156   // Skip any value computations, we are not adding new values to the
   2157   // reserved register.  Also skip merging the live ranges, the reserved
   2158   // register live range doesn't need to be accurate as long as all the
   2159   // defs are there.
   2160 
   2161   // Delete the identity copy.
   2162   MachineInstr *CopyMI;
   2163   if (CP.isFlipped()) {
   2164     // Physreg is copied into vreg
   2165     //   %y = COPY %physreg_x
   2166     //   ...  //< no other def of %physreg_x here
   2167     //   use %y
   2168     // =>
   2169     //   ...
   2170     //   use %physreg_x
   2171     CopyMI = MRI->getVRegDef(SrcReg);
   2172   } else {
   2173     // VReg is copied into physreg:
   2174     //   %y = def
   2175     //   ... //< no other def or use of %physreg_x here
   2176     //   %physreg_x = COPY %y
   2177     // =>
   2178     //   %physreg_x = def
   2179     //   ...
   2180     if (!MRI->hasOneNonDBGUse(SrcReg)) {
   2181       LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
   2182       return false;
   2183     }
   2184 
   2185     if (!LIS->intervalIsInOneMBB(RHS)) {
   2186       LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");
   2187       return false;
   2188     }
   2189 
   2190     MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
   2191     CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
   2192     SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
   2193     SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
   2194 
   2195     if (!MRI->isConstantPhysReg(DstReg)) {
   2196       // We checked above that there are no interfering defs of the physical
   2197       // register. However, for this case, where we intend to move up the def of
   2198       // the physical register, we also need to check for interfering uses.
   2199       SlotIndexes *Indexes = LIS->getSlotIndexes();
   2200       for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
   2201            SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
   2202         MachineInstr *MI = LIS->getInstructionFromIndex(SI);
   2203         if (MI->readsRegister(DstReg, TRI)) {
   2204           LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
   2205           return false;
   2206         }
   2207       }
   2208     }
   2209 
   2210     // We're going to remove the copy which defines a physical reserved
   2211     // register, so remove its valno, etc.
   2212     LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
   2213                       << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
   2214 
   2215     LIS->removePhysRegDefAt(DstReg.asMCReg(), CopyRegIdx);
   2216     // Create a new dead def at the new def location.
   2217     for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
   2218       LiveRange &LR = LIS->getRegUnit(*UI);
   2219       LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
   2220     }
   2221   }
   2222 
   2223   deleteInstr(CopyMI);
   2224 
   2225   // We don't track kills for reserved registers.
   2226   MRI->clearKillFlags(CP.getSrcReg());
   2227 
   2228   return true;
   2229 }
   2230 
   2231 //===----------------------------------------------------------------------===//
   2232 //                 Interference checking and interval joining
   2233 //===----------------------------------------------------------------------===//
   2234 //
   2235 // In the easiest case, the two live ranges being joined are disjoint, and
   2236 // there is no interference to consider. It is quite common, though, to have
   2237 // overlapping live ranges, and we need to check if the interference can be
   2238 // resolved.
   2239 //
   2240 // The live range of a single SSA value forms a sub-tree of the dominator tree.
   2241 // This means that two SSA values overlap if and only if the def of one value
   2242 // is contained in the live range of the other value. As a special case, the
   2243 // overlapping values can be defined at the same index.
   2244 //
   2245 // The interference from an overlapping def can be resolved in these cases:
   2246 //
   2247 // 1. Coalescable copies. The value is defined by a copy that would become an
   2248 //    identity copy after joining SrcReg and DstReg. The copy instruction will
   2249 //    be removed, and the value will be merged with the source value.
   2250 //
   2251 //    There can be several copies back and forth, causing many values to be
   2252 //    merged into one. We compute a list of ultimate values in the joined live
   2253 //    range as well as a mappings from the old value numbers.
   2254 //
   2255 // 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
   2256 //    predecessors have a live out value. It doesn't cause real interference,
   2257 //    and can be merged into the value it overlaps. Like a coalescable copy, it
   2258 //    can be erased after joining.
   2259 //
   2260 // 3. Copy of external value. The overlapping def may be a copy of a value that
   2261 //    is already in the other register. This is like a coalescable copy, but
   2262 //    the live range of the source register must be trimmed after erasing the
   2263 //    copy instruction:
   2264 //
   2265 //      %src = COPY %ext
   2266 //      %dst = COPY %ext  <-- Remove this COPY, trim the live range of %ext.
   2267 //
   2268 // 4. Clobbering undefined lanes. Vector registers are sometimes built by
   2269 //    defining one lane at a time:
   2270 //
   2271 //      %dst:ssub0<def,read-undef> = FOO
   2272 //      %src = BAR
   2273 //      %dst:ssub1 = COPY %src
   2274 //
   2275 //    The live range of %src overlaps the %dst value defined by FOO, but
   2276 //    merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
   2277 //    which was undef anyway.
   2278 //
   2279 //    The value mapping is more complicated in this case. The final live range
   2280 //    will have different value numbers for both FOO and BAR, but there is no
   2281 //    simple mapping from old to new values. It may even be necessary to add
   2282 //    new PHI values.
   2283 //
   2284 // 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
   2285 //    is live, but never read. This can happen because we don't compute
   2286 //    individual live ranges per lane.
   2287 //
   2288 //      %dst = FOO
   2289 //      %src = BAR
   2290 //      %dst:ssub1 = COPY %src
   2291 //
   2292 //    This kind of interference is only resolved locally. If the clobbered
   2293 //    lane value escapes the block, the join is aborted.
   2294 
   2295 namespace {
   2296 
   2297 /// Track information about values in a single virtual register about to be
   2298 /// joined. Objects of this class are always created in pairs - one for each
   2299 /// side of the CoalescerPair (or one for each lane of a side of the coalescer
   2300 /// pair)
   2301 class JoinVals {
   2302   /// Live range we work on.
   2303   LiveRange &LR;
   2304 
   2305   /// (Main) register we work on.
   2306   const Register Reg;
   2307 
   2308   /// Reg (and therefore the values in this liverange) will end up as
   2309   /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
   2310   /// CP.SrcIdx.
   2311   const unsigned SubIdx;
   2312 
   2313   /// The LaneMask that this liverange will occupy the coalesced register. May
   2314   /// be smaller than the lanemask produced by SubIdx when merging subranges.
   2315   const LaneBitmask LaneMask;
   2316 
   2317   /// This is true when joining sub register ranges, false when joining main
   2318   /// ranges.
   2319   const bool SubRangeJoin;
   2320 
   2321   /// Whether the current LiveInterval tracks subregister liveness.
   2322   const bool TrackSubRegLiveness;
   2323 
   2324   /// Values that will be present in the final live range.
   2325   SmallVectorImpl<VNInfo*> &NewVNInfo;
   2326 
   2327   const CoalescerPair &CP;
   2328   LiveIntervals *LIS;
   2329   SlotIndexes *Indexes;
   2330   const TargetRegisterInfo *TRI;
   2331 
   2332   /// Value number assignments. Maps value numbers in LI to entries in
   2333   /// NewVNInfo. This is suitable for passing to LiveInterval::join().
   2334   SmallVector<int, 8> Assignments;
   2335 
   2336   public:
   2337   /// Conflict resolution for overlapping values.
   2338   enum ConflictResolution {
   2339     /// No overlap, simply keep this value.
   2340     CR_Keep,
   2341 
   2342     /// Merge this value into OtherVNI and erase the defining instruction.
   2343     /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
   2344     /// values.
   2345     CR_Erase,
   2346 
   2347     /// Merge this value into OtherVNI but keep the defining instruction.
   2348     /// This is for the special case where OtherVNI is defined by the same
   2349     /// instruction.
   2350     CR_Merge,
   2351 
   2352     /// Keep this value, and have it replace OtherVNI where possible. This
   2353     /// complicates value mapping since OtherVNI maps to two different values
   2354     /// before and after this def.
   2355     /// Used when clobbering undefined or dead lanes.
   2356     CR_Replace,
   2357 
   2358     /// Unresolved conflict. Visit later when all values have been mapped.
   2359     CR_Unresolved,
   2360 
   2361     /// Unresolvable conflict. Abort the join.
   2362     CR_Impossible
   2363   };
   2364 
   2365   private:
   2366   /// Per-value info for LI. The lane bit masks are all relative to the final
   2367   /// joined register, so they can be compared directly between SrcReg and
   2368   /// DstReg.
   2369   struct Val {
   2370     ConflictResolution Resolution = CR_Keep;
   2371 
   2372     /// Lanes written by this def, 0 for unanalyzed values.
   2373     LaneBitmask WriteLanes;
   2374 
   2375     /// Lanes with defined values in this register. Other lanes are undef and
   2376     /// safe to clobber.
   2377     LaneBitmask ValidLanes;
   2378 
   2379     /// Value in LI being redefined by this def.
   2380     VNInfo *RedefVNI = nullptr;
   2381 
   2382     /// Value in the other live range that overlaps this def, if any.
   2383     VNInfo *OtherVNI = nullptr;
   2384 
   2385     /// Is this value an IMPLICIT_DEF that can be erased?
   2386     ///
   2387     /// IMPLICIT_DEF values should only exist at the end of a basic block that
   2388     /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
   2389     /// safely erased if they are overlapping a live value in the other live
   2390     /// interval.
   2391     ///
   2392     /// Weird control flow graphs and incomplete PHI handling in
   2393     /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
   2394     /// longer live ranges. Such IMPLICIT_DEF values should be treated like
   2395     /// normal values.
   2396     bool ErasableImplicitDef = false;
   2397 
   2398     /// True when the live range of this value will be pruned because of an
   2399     /// overlapping CR_Replace value in the other live range.
   2400     bool Pruned = false;
   2401 
   2402     /// True once Pruned above has been computed.
   2403     bool PrunedComputed = false;
   2404 
   2405     /// True if this value is determined to be identical to OtherVNI
   2406     /// (in valuesIdentical). This is used with CR_Erase where the erased
   2407     /// copy is redundant, i.e. the source value is already the same as
   2408     /// the destination. In such cases the subranges need to be updated
   2409     /// properly. See comment at pruneSubRegValues for more info.
   2410     bool Identical = false;
   2411 
   2412     Val() = default;
   2413 
   2414     bool isAnalyzed() const { return WriteLanes.any(); }
   2415   };
   2416 
   2417   /// One entry per value number in LI.
   2418   SmallVector<Val, 8> Vals;
   2419 
   2420   /// Compute the bitmask of lanes actually written by DefMI.
   2421   /// Set Redef if there are any partial register definitions that depend on the
   2422   /// previous value of the register.
   2423   LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
   2424 
   2425   /// Find the ultimate value that VNI was copied from.
   2426   std::pair<const VNInfo *, Register> followCopyChain(const VNInfo *VNI) const;
   2427 
   2428   bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
   2429 
   2430   /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
   2431   /// Return a conflict resolution when possible, but leave the hard cases as
   2432   /// CR_Unresolved.
   2433   /// Recursively calls computeAssignment() on this and Other, guaranteeing that
   2434   /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
   2435   /// The recursion always goes upwards in the dominator tree, making loops
   2436   /// impossible.
   2437   ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
   2438 
   2439   /// Compute the value assignment for ValNo in RI.
   2440   /// This may be called recursively by analyzeValue(), but never for a ValNo on
   2441   /// the stack.
   2442   void computeAssignment(unsigned ValNo, JoinVals &Other);
   2443 
   2444   /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
   2445   /// the extent of the tainted lanes in the block.
   2446   ///
   2447   /// Multiple values in Other.LR can be affected since partial redefinitions
   2448   /// can preserve previously tainted lanes.
   2449   ///
   2450   ///   1 %dst = VLOAD           <-- Define all lanes in %dst
   2451   ///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
   2452   ///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
   2453   ///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
   2454   ///
   2455   /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
   2456   /// entry to TaintedVals.
   2457   ///
   2458   /// Returns false if the tainted lanes extend beyond the basic block.
   2459   bool
   2460   taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
   2461               SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
   2462 
   2463   /// Return true if MI uses any of the given Lanes from Reg.
   2464   /// This does not include partial redefinitions of Reg.
   2465   bool usesLanes(const MachineInstr &MI, Register, unsigned, LaneBitmask) const;
   2466 
   2467   /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
   2468   /// be pruned:
   2469   ///
   2470   ///   %dst = COPY %src
   2471   ///   %src = COPY %dst  <-- This value to be pruned.
   2472   ///   %dst = COPY %src  <-- This value is a copy of a pruned value.
   2473   bool isPrunedValue(unsigned ValNo, JoinVals &Other);
   2474 
   2475 public:
   2476   JoinVals(LiveRange &LR, Register Reg, unsigned SubIdx, LaneBitmask LaneMask,
   2477            SmallVectorImpl<VNInfo *> &newVNInfo, const CoalescerPair &cp,
   2478            LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
   2479            bool TrackSubRegLiveness)
   2480       : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
   2481         SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
   2482         NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
   2483         TRI(TRI), Assignments(LR.getNumValNums(), -1),
   2484         Vals(LR.getNumValNums()) {}
   2485 
   2486   /// Analyze defs in LR and compute a value mapping in NewVNInfo.
   2487   /// Returns false if any conflicts were impossible to resolve.
   2488   bool mapValues(JoinVals &Other);
   2489 
   2490   /// Try to resolve conflicts that require all values to be mapped.
   2491   /// Returns false if any conflicts were impossible to resolve.
   2492   bool resolveConflicts(JoinVals &Other);
   2493 
   2494   /// Prune the live range of values in Other.LR where they would conflict with
   2495   /// CR_Replace values in LR. Collect end points for restoring the live range
   2496   /// after joining.
   2497   void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
   2498                    bool changeInstrs);
   2499 
   2500   /// Removes subranges starting at copies that get removed. This sometimes
   2501   /// happens when undefined subranges are copied around. These ranges contain
   2502   /// no useful information and can be removed.
   2503   void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
   2504 
   2505   /// Pruning values in subranges can lead to removing segments in these
   2506   /// subranges started by IMPLICIT_DEFs. The corresponding segments in
   2507   /// the main range also need to be removed. This function will mark
   2508   /// the corresponding values in the main range as pruned, so that
   2509   /// eraseInstrs can do the final cleanup.
   2510   /// The parameter @p LI must be the interval whose main range is the
   2511   /// live range LR.
   2512   void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
   2513 
   2514   /// Erase any machine instructions that have been coalesced away.
   2515   /// Add erased instructions to ErasedInstrs.
   2516   /// Add foreign virtual registers to ShrinkRegs if their live range ended at
   2517   /// the erased instrs.
   2518   void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
   2519                    SmallVectorImpl<Register> &ShrinkRegs,
   2520                    LiveInterval *LI = nullptr);
   2521 
   2522   /// Remove liverange defs at places where implicit defs will be removed.
   2523   void removeImplicitDefs();
   2524 
   2525   /// Get the value assignments suitable for passing to LiveInterval::join.
   2526   const int *getAssignments() const { return Assignments.data(); }
   2527 
   2528   /// Get the conflict resolution for a value number.
   2529   ConflictResolution getResolution(unsigned Num) const {
   2530     return Vals[Num].Resolution;
   2531   }
   2532 };
   2533 
   2534 } // end anonymous namespace
   2535 
   2536 LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
   2537   const {
   2538   LaneBitmask L;
   2539   for (const MachineOperand &MO : DefMI->operands()) {
   2540     if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
   2541       continue;
   2542     L |= TRI->getSubRegIndexLaneMask(
   2543            TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
   2544     if (MO.readsReg())
   2545       Redef = true;
   2546   }
   2547   return L;
   2548 }
   2549 
   2550 std::pair<const VNInfo *, Register>
   2551 JoinVals::followCopyChain(const VNInfo *VNI) const {
   2552   Register TrackReg = Reg;
   2553 
   2554   while (!VNI->isPHIDef()) {
   2555     SlotIndex Def = VNI->def;
   2556     MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
   2557     assert(MI && "No defining instruction");
   2558     if (!MI->isFullCopy())
   2559       return std::make_pair(VNI, TrackReg);
   2560     Register SrcReg = MI->getOperand(1).getReg();
   2561     if (!SrcReg.isVirtual())
   2562       return std::make_pair(VNI, TrackReg);
   2563 
   2564     const LiveInterval &LI = LIS->getInterval(SrcReg);
   2565     const VNInfo *ValueIn;
   2566     // No subrange involved.
   2567     if (!SubRangeJoin || !LI.hasSubRanges()) {
   2568       LiveQueryResult LRQ = LI.Query(Def);
   2569       ValueIn = LRQ.valueIn();
   2570     } else {
   2571       // Query subranges. Ensure that all matching ones take us to the same def
   2572       // (allowing some of them to be undef).
   2573       ValueIn = nullptr;
   2574       for (const LiveInterval::SubRange &S : LI.subranges()) {
   2575         // Transform lanemask to a mask in the joined live interval.
   2576         LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
   2577         if ((SMask & LaneMask).none())
   2578           continue;
   2579         LiveQueryResult LRQ = S.Query(Def);
   2580         if (!ValueIn) {
   2581           ValueIn = LRQ.valueIn();
   2582           continue;
   2583         }
   2584         if (LRQ.valueIn() && ValueIn != LRQ.valueIn())
   2585           return std::make_pair(VNI, TrackReg);
   2586       }
   2587     }
   2588     if (ValueIn == nullptr) {
   2589       // Reaching an undefined value is legitimate, for example:
   2590       //
   2591       // 1   undef %0.sub1 = ...  ;; %0.sub0 == undef
   2592       // 2   %1 = COPY %0         ;; %1 is defined here.
   2593       // 3   %0 = COPY %1         ;; Now %0.sub0 has a definition,
   2594       //                          ;; but it's equivalent to "undef".
   2595       return std::make_pair(nullptr, SrcReg);
   2596     }
   2597     VNI = ValueIn;
   2598     TrackReg = SrcReg;
   2599   }
   2600   return std::make_pair(VNI, TrackReg);
   2601 }
   2602 
   2603 bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
   2604                                const JoinVals &Other) const {
   2605   const VNInfo *Orig0;
   2606   Register Reg0;
   2607   std::tie(Orig0, Reg0) = followCopyChain(Value0);
   2608   if (Orig0 == Value1 && Reg0 == Other.Reg)
   2609     return true;
   2610 
   2611   const VNInfo *Orig1;
   2612   Register Reg1;
   2613   std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
   2614   // If both values are undefined, and the source registers are the same
   2615   // register, the values are identical. Filter out cases where only one
   2616   // value is defined.
   2617   if (Orig0 == nullptr || Orig1 == nullptr)
   2618     return Orig0 == Orig1 && Reg0 == Reg1;
   2619 
   2620   // The values are equal if they are defined at the same place and use the
   2621   // same register. Note that we cannot compare VNInfos directly as some of
   2622   // them might be from a copy created in mergeSubRangeInto()  while the other
   2623   // is from the original LiveInterval.
   2624   return Orig0->def == Orig1->def && Reg0 == Reg1;
   2625 }
   2626 
   2627 JoinVals::ConflictResolution
   2628 JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   2629   Val &V = Vals[ValNo];
   2630   assert(!V.isAnalyzed() && "Value has already been analyzed!");
   2631   VNInfo *VNI = LR.getValNumInfo(ValNo);
   2632   if (VNI->isUnused()) {
   2633     V.WriteLanes = LaneBitmask::getAll();
   2634     return CR_Keep;
   2635   }
   2636 
   2637   // Get the instruction defining this value, compute the lanes written.
   2638   const MachineInstr *DefMI = nullptr;
   2639   if (VNI->isPHIDef()) {
   2640     // Conservatively assume that all lanes in a PHI are valid.
   2641     LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
   2642                                      : TRI->getSubRegIndexLaneMask(SubIdx);
   2643     V.ValidLanes = V.WriteLanes = Lanes;
   2644   } else {
   2645     DefMI = Indexes->getInstructionFromIndex(VNI->def);
   2646     assert(DefMI != nullptr);
   2647     if (SubRangeJoin) {
   2648       // We don't care about the lanes when joining subregister ranges.
   2649       V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
   2650       if (DefMI->isImplicitDef()) {
   2651         V.ValidLanes = LaneBitmask::getNone();
   2652         V.ErasableImplicitDef = true;
   2653       }
   2654     } else {
   2655       bool Redef = false;
   2656       V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
   2657 
   2658       // If this is a read-modify-write instruction, there may be more valid
   2659       // lanes than the ones written by this instruction.
   2660       // This only covers partial redef operands. DefMI may have normal use
   2661       // operands reading the register. They don't contribute valid lanes.
   2662       //
   2663       // This adds ssub1 to the set of valid lanes in %src:
   2664       //
   2665       //   %src:ssub1 = FOO
   2666       //
   2667       // This leaves only ssub1 valid, making any other lanes undef:
   2668       //
   2669       //   %src:ssub1<def,read-undef> = FOO %src:ssub2
   2670       //
   2671       // The <read-undef> flag on the def operand means that old lane values are
   2672       // not important.
   2673       if (Redef) {
   2674         V.RedefVNI = LR.Query(VNI->def).valueIn();
   2675         assert((TrackSubRegLiveness || V.RedefVNI) &&
   2676                "Instruction is reading nonexistent value");
   2677         if (V.RedefVNI != nullptr) {
   2678           computeAssignment(V.RedefVNI->id, Other);
   2679           V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
   2680         }
   2681       }
   2682 
   2683       // An IMPLICIT_DEF writes undef values.
   2684       if (DefMI->isImplicitDef()) {
   2685         // We normally expect IMPLICIT_DEF values to be live only until the end
   2686         // of their block. If the value is really live longer and gets pruned in
   2687         // another block, this flag is cleared again.
   2688         //
   2689         // Clearing the valid lanes is deferred until it is sure this can be
   2690         // erased.
   2691         V.ErasableImplicitDef = true;
   2692       }
   2693     }
   2694   }
   2695 
   2696   // Find the value in Other that overlaps VNI->def, if any.
   2697   LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
   2698 
   2699   // It is possible that both values are defined by the same instruction, or
   2700   // the values are PHIs defined in the same block. When that happens, the two
   2701   // values should be merged into one, but not into any preceding value.
   2702   // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
   2703   if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
   2704     assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
   2705 
   2706     // One value stays, the other is merged. Keep the earlier one, or the first
   2707     // one we see.
   2708     if (OtherVNI->def < VNI->def)
   2709       Other.computeAssignment(OtherVNI->id, *this);
   2710     else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
   2711       // This is an early-clobber def overlapping a live-in value in the other
   2712       // register. Not mergeable.
   2713       V.OtherVNI = OtherLRQ.valueIn();
   2714       return CR_Impossible;
   2715     }
   2716     V.OtherVNI = OtherVNI;
   2717     Val &OtherV = Other.Vals[OtherVNI->id];
   2718     // Keep this value, check for conflicts when analyzing OtherVNI.
   2719     if (!OtherV.isAnalyzed())
   2720       return CR_Keep;
   2721     // Both sides have been analyzed now.
   2722     // Allow overlapping PHI values. Any real interference would show up in a
   2723     // predecessor, the PHI itself can't introduce any conflicts.
   2724     if (VNI->isPHIDef())
   2725       return CR_Merge;
   2726     if ((V.ValidLanes & OtherV.ValidLanes).any())
   2727       // Overlapping lanes can't be resolved.
   2728       return CR_Impossible;
   2729     else
   2730       return CR_Merge;
   2731   }
   2732 
   2733   // No simultaneous def. Is Other live at the def?
   2734   V.OtherVNI = OtherLRQ.valueIn();
   2735   if (!V.OtherVNI)
   2736     // No overlap, no conflict.
   2737     return CR_Keep;
   2738 
   2739   assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
   2740 
   2741   // We have overlapping values, or possibly a kill of Other.
   2742   // Recursively compute assignments up the dominator tree.
   2743   Other.computeAssignment(V.OtherVNI->id, *this);
   2744   Val &OtherV = Other.Vals[V.OtherVNI->id];
   2745 
   2746   if (OtherV.ErasableImplicitDef) {
   2747     // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
   2748     // This shouldn't normally happen, but ProcessImplicitDefs can leave such
   2749     // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
   2750     // technically.
   2751     //
   2752     // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
   2753     // to erase the IMPLICIT_DEF instruction.
   2754     if (DefMI &&
   2755         DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
   2756       LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
   2757                  << " extends into "
   2758                  << printMBBReference(*DefMI->getParent())
   2759                  << ", keeping it.\n");
   2760       OtherV.ErasableImplicitDef = false;
   2761     } else {
   2762       // We deferred clearing these lanes in case we needed to save them
   2763       OtherV.ValidLanes &= ~OtherV.WriteLanes;
   2764     }
   2765   }
   2766 
   2767   // Allow overlapping PHI values. Any real interference would show up in a
   2768   // predecessor, the PHI itself can't introduce any conflicts.
   2769   if (VNI->isPHIDef())
   2770     return CR_Replace;
   2771 
   2772   // Check for simple erasable conflicts.
   2773   if (DefMI->isImplicitDef())
   2774     return CR_Erase;
   2775 
   2776   // Include the non-conflict where DefMI is a coalescable copy that kills
   2777   // OtherVNI. We still want the copy erased and value numbers merged.
   2778   if (CP.isCoalescable(DefMI)) {
   2779     // Some of the lanes copied from OtherVNI may be undef, making them undef
   2780     // here too.
   2781     V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
   2782     return CR_Erase;
   2783   }
   2784 
   2785   // This may not be a real conflict if DefMI simply kills Other and defines
   2786   // VNI.
   2787   if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
   2788     return CR_Keep;
   2789 
   2790   // Handle the case where VNI and OtherVNI can be proven to be identical:
   2791   //
   2792   //   %other = COPY %ext
   2793   //   %this  = COPY %ext <-- Erase this copy
   2794   //
   2795   if (DefMI->isFullCopy() && !CP.isPartial() &&
   2796       valuesIdentical(VNI, V.OtherVNI, Other)) {
   2797     V.Identical = true;
   2798     return CR_Erase;
   2799   }
   2800 
   2801   // The remaining checks apply to the lanes, which aren't tracked here.  This
   2802   // was already decided to be OK via the following CR_Replace condition.
   2803   // CR_Replace.
   2804   if (SubRangeJoin)
   2805     return CR_Replace;
   2806 
   2807   // If the lanes written by this instruction were all undef in OtherVNI, it is
   2808   // still safe to join the live ranges. This can't be done with a simple value
   2809   // mapping, though - OtherVNI will map to multiple values:
   2810   //
   2811   //   1 %dst:ssub0 = FOO                <-- OtherVNI
   2812   //   2 %src = BAR                      <-- VNI
   2813   //   3 %dst:ssub1 = COPY killed %src    <-- Eliminate this copy.
   2814   //   4 BAZ killed %dst
   2815   //   5 QUUX killed %src
   2816   //
   2817   // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
   2818   // handles this complex value mapping.
   2819   if ((V.WriteLanes & OtherV.ValidLanes).none())
   2820     return CR_Replace;
   2821 
   2822   // If the other live range is killed by DefMI and the live ranges are still
   2823   // overlapping, it must be because we're looking at an early clobber def:
   2824   //
   2825   //   %dst<def,early-clobber> = ASM killed %src
   2826   //
   2827   // In this case, it is illegal to merge the two live ranges since the early
   2828   // clobber def would clobber %src before it was read.
   2829   if (OtherLRQ.isKill()) {
   2830     // This case where the def doesn't overlap the kill is handled above.
   2831     assert(VNI->def.isEarlyClobber() &&
   2832            "Only early clobber defs can overlap a kill");
   2833     return CR_Impossible;
   2834   }
   2835 
   2836   // VNI is clobbering live lanes in OtherVNI, but there is still the
   2837   // possibility that no instructions actually read the clobbered lanes.
   2838   // If we're clobbering all the lanes in OtherVNI, at least one must be read.
   2839   // Otherwise Other.RI wouldn't be live here.
   2840   if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
   2841     return CR_Impossible;
   2842 
   2843   // We need to verify that no instructions are reading the clobbered lanes. To
   2844   // save compile time, we'll only check that locally. Don't allow the tainted
   2845   // value to escape the basic block.
   2846   MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
   2847   if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
   2848     return CR_Impossible;
   2849 
   2850   // There are still some things that could go wrong besides clobbered lanes
   2851   // being read, for example OtherVNI may be only partially redefined in MBB,
   2852   // and some clobbered lanes could escape the block. Save this analysis for
   2853   // resolveConflicts() when all values have been mapped. We need to know
   2854   // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
   2855   // that now - the recursive analyzeValue() calls must go upwards in the
   2856   // dominator tree.
   2857   return CR_Unresolved;
   2858 }
   2859 
   2860 void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
   2861   Val &V = Vals[ValNo];
   2862   if (V.isAnalyzed()) {
   2863     // Recursion should always move up the dominator tree, so ValNo is not
   2864     // supposed to reappear before it has been assigned.
   2865     assert(Assignments[ValNo] != -1 && "Bad recursion?");
   2866     return;
   2867   }
   2868   switch ((V.Resolution = analyzeValue(ValNo, Other))) {
   2869   case CR_Erase:
   2870   case CR_Merge:
   2871     // Merge this ValNo into OtherVNI.
   2872     assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
   2873     assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
   2874     Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
   2875     LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
   2876                       << LR.getValNumInfo(ValNo)->def << " into "
   2877                       << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
   2878                       << V.OtherVNI->def << " --> @"
   2879                       << NewVNInfo[Assignments[ValNo]]->def << '\n');
   2880     break;
   2881   case CR_Replace:
   2882   case CR_Unresolved: {
   2883     // The other value is going to be pruned if this join is successful.
   2884     assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
   2885     Val &OtherV = Other.Vals[V.OtherVNI->id];
   2886     // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
   2887     // its lanes.
   2888     if (OtherV.ErasableImplicitDef &&
   2889         TrackSubRegLiveness &&
   2890         (OtherV.WriteLanes & ~V.ValidLanes).any()) {
   2891       LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n");
   2892 
   2893       OtherV.ErasableImplicitDef = false;
   2894       // The valid lanes written by the implicit_def were speculatively cleared
   2895       // before, so make this more conservative. It may be better to track this,
   2896       // I haven't found a testcase where it matters.
   2897       OtherV.ValidLanes = LaneBitmask::getAll();
   2898     }
   2899 
   2900     OtherV.Pruned = true;
   2901     LLVM_FALLTHROUGH;
   2902   }
   2903   default:
   2904     // This value number needs to go in the final joined live range.
   2905     Assignments[ValNo] = NewVNInfo.size();
   2906     NewVNInfo.push_back(LR.getValNumInfo(ValNo));
   2907     break;
   2908   }
   2909 }
   2910 
   2911 bool JoinVals::mapValues(JoinVals &Other) {
   2912   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   2913     computeAssignment(i, Other);
   2914     if (Vals[i].Resolution == CR_Impossible) {
   2915       LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
   2916                         << '@' << LR.getValNumInfo(i)->def << '\n');
   2917       return false;
   2918     }
   2919   }
   2920   return true;
   2921 }
   2922 
   2923 bool JoinVals::
   2924 taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
   2925             SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
   2926   VNInfo *VNI = LR.getValNumInfo(ValNo);
   2927   MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
   2928   SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
   2929 
   2930   // Scan Other.LR from VNI.def to MBBEnd.
   2931   LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
   2932   assert(OtherI != Other.LR.end() && "No conflict?");
   2933   do {
   2934     // OtherI is pointing to a tainted value. Abort the join if the tainted
   2935     // lanes escape the block.
   2936     SlotIndex End = OtherI->end;
   2937     if (End >= MBBEnd) {
   2938       LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
   2939                         << OtherI->valno->id << '@' << OtherI->start << '\n');
   2940       return false;
   2941     }
   2942     LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
   2943                       << OtherI->valno->id << '@' << OtherI->start << " to "
   2944                       << End << '\n');
   2945     // A dead def is not a problem.
   2946     if (End.isDead())
   2947       break;
   2948     TaintExtent.push_back(std::make_pair(End, TaintedLanes));
   2949 
   2950     // Check for another def in the MBB.
   2951     if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
   2952       break;
   2953 
   2954     // Lanes written by the new def are no longer tainted.
   2955     const Val &OV = Other.Vals[OtherI->valno->id];
   2956     TaintedLanes &= ~OV.WriteLanes;
   2957     if (!OV.RedefVNI)
   2958       break;
   2959   } while (TaintedLanes.any());
   2960   return true;
   2961 }
   2962 
   2963 bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx,
   2964                          LaneBitmask Lanes) const {
   2965   if (MI.isDebugOrPseudoInstr())
   2966     return false;
   2967   for (const MachineOperand &MO : MI.operands()) {
   2968     if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg)
   2969       continue;
   2970     if (!MO.readsReg())
   2971       continue;
   2972     unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
   2973     if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
   2974       return true;
   2975   }
   2976   return false;
   2977 }
   2978 
   2979 bool JoinVals::resolveConflicts(JoinVals &Other) {
   2980   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   2981     Val &V = Vals[i];
   2982     assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
   2983     if (V.Resolution != CR_Unresolved)
   2984       continue;
   2985     LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
   2986                       << LR.getValNumInfo(i)->def
   2987                       << ' ' << PrintLaneMask(LaneMask) << '\n');
   2988     if (SubRangeJoin)
   2989       return false;
   2990 
   2991     ++NumLaneConflicts;
   2992     assert(V.OtherVNI && "Inconsistent conflict resolution.");
   2993     VNInfo *VNI = LR.getValNumInfo(i);
   2994     const Val &OtherV = Other.Vals[V.OtherVNI->id];
   2995 
   2996     // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
   2997     // join, those lanes will be tainted with a wrong value. Get the extent of
   2998     // the tainted lanes.
   2999     LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
   3000     SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent;
   3001     if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
   3002       // Tainted lanes would extend beyond the basic block.
   3003       return false;
   3004 
   3005     assert(!TaintExtent.empty() && "There should be at least one conflict.");
   3006 
   3007     // Now look at the instructions from VNI->def to TaintExtent (inclusive).
   3008     MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
   3009     MachineBasicBlock::iterator MI = MBB->begin();
   3010     if (!VNI->isPHIDef()) {
   3011       MI = Indexes->getInstructionFromIndex(VNI->def);
   3012       // No need to check the instruction defining VNI for reads.
   3013       ++MI;
   3014     }
   3015     assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
   3016            "Interference ends on VNI->def. Should have been handled earlier");
   3017     MachineInstr *LastMI =
   3018       Indexes->getInstructionFromIndex(TaintExtent.front().first);
   3019     assert(LastMI && "Range must end at a proper instruction");
   3020     unsigned TaintNum = 0;
   3021     while (true) {
   3022       assert(MI != MBB->end() && "Bad LastMI");
   3023       if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
   3024         LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
   3025         return false;
   3026       }
   3027       // LastMI is the last instruction to use the current value.
   3028       if (&*MI == LastMI) {
   3029         if (++TaintNum == TaintExtent.size())
   3030           break;
   3031         LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
   3032         assert(LastMI && "Range must end at a proper instruction");
   3033         TaintedLanes = TaintExtent[TaintNum].second;
   3034       }
   3035       ++MI;
   3036     }
   3037 
   3038     // The tainted lanes are unused.
   3039     V.Resolution = CR_Replace;
   3040     ++NumLaneResolves;
   3041   }
   3042   return true;
   3043 }
   3044 
   3045 bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
   3046   Val &V = Vals[ValNo];
   3047   if (V.Pruned || V.PrunedComputed)
   3048     return V.Pruned;
   3049 
   3050   if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
   3051     return V.Pruned;
   3052 
   3053   // Follow copies up the dominator tree and check if any intermediate value
   3054   // has been pruned.
   3055   V.PrunedComputed = true;
   3056   V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
   3057   return V.Pruned;
   3058 }
   3059 
   3060 void JoinVals::pruneValues(JoinVals &Other,
   3061                            SmallVectorImpl<SlotIndex> &EndPoints,
   3062                            bool changeInstrs) {
   3063   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   3064     SlotIndex Def = LR.getValNumInfo(i)->def;
   3065     switch (Vals[i].Resolution) {
   3066     case CR_Keep:
   3067       break;
   3068     case CR_Replace: {
   3069       // This value takes precedence over the value in Other.LR.
   3070       LIS->pruneValue(Other.LR, Def, &EndPoints);
   3071       // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
   3072       // instructions are only inserted to provide a live-out value for PHI
   3073       // predecessors, so the instruction should simply go away once its value
   3074       // has been replaced.
   3075       Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
   3076       bool EraseImpDef = OtherV.ErasableImplicitDef &&
   3077                          OtherV.Resolution == CR_Keep;
   3078       if (!Def.isBlock()) {
   3079         if (changeInstrs) {
   3080           // Remove <def,read-undef> flags. This def is now a partial redef.
   3081           // Also remove dead flags since the joined live range will
   3082           // continue past this instruction.
   3083           for (MachineOperand &MO :
   3084                Indexes->getInstructionFromIndex(Def)->operands()) {
   3085             if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
   3086               if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
   3087                 MO.setIsUndef(false);
   3088               MO.setIsDead(false);
   3089             }
   3090           }
   3091         }
   3092         // This value will reach instructions below, but we need to make sure
   3093         // the live range also reaches the instruction at Def.
   3094         if (!EraseImpDef)
   3095           EndPoints.push_back(Def);
   3096       }
   3097       LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
   3098                         << ": " << Other.LR << '\n');
   3099       break;
   3100     }
   3101     case CR_Erase:
   3102     case CR_Merge:
   3103       if (isPrunedValue(i, Other)) {
   3104         // This value is ultimately a copy of a pruned value in LR or Other.LR.
   3105         // We can no longer trust the value mapping computed by
   3106         // computeAssignment(), the value that was originally copied could have
   3107         // been replaced.
   3108         LIS->pruneValue(LR, Def, &EndPoints);
   3109         LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
   3110                           << Def << ": " << LR << '\n');
   3111       }
   3112       break;
   3113     case CR_Unresolved:
   3114     case CR_Impossible:
   3115       llvm_unreachable("Unresolved conflicts");
   3116     }
   3117   }
   3118 }
   3119 
   3120 // Check if the segment consists of a copied live-through value (i.e. the copy
   3121 // in the block only extended the liveness, of an undef value which we may need
   3122 // to handle).
   3123 static bool isLiveThrough(const LiveQueryResult Q) {
   3124   return Q.valueIn() && Q.valueIn()->isPHIDef() && Q.valueIn() == Q.valueOut();
   3125 }
   3126 
   3127 /// Consider the following situation when coalescing the copy between
   3128 /// %31 and %45 at 800. (The vertical lines represent live range segments.)
   3129 ///
   3130 ///                              Main range         Subrange 0004 (sub2)
   3131 ///                              %31    %45           %31    %45
   3132 ///  544    %45 = COPY %28               +                    +
   3133 ///                                      | v1                 | v1
   3134 ///  560B bb.1:                          +                    +
   3135 ///  624        = %45.sub2               | v2                 | v2
   3136 ///  800    %31 = COPY %45        +      +             +      +
   3137 ///                               | v0                 | v0
   3138 ///  816    %31.sub1 = ...        +                    |
   3139 ///  880    %30 = COPY %31        | v1                 +
   3140 ///  928    %45 = COPY %30        |      +                    +
   3141 ///                               |      | v0                 | v0  <--+
   3142 ///  992B   ; backedge -> bb.1    |      +                    +        |
   3143 /// 1040        = %31.sub0        +                                    |
   3144 ///                                                 This value must remain
   3145 ///                                                 live-out!
   3146 ///
   3147 /// Assuming that %31 is coalesced into %45, the copy at 928 becomes
   3148 /// redundant, since it copies the value from %45 back into it. The
   3149 /// conflict resolution for the main range determines that %45.v0 is
   3150 /// to be erased, which is ok since %31.v1 is identical to it.
   3151 /// The problem happens with the subrange for sub2: it has to be live
   3152 /// on exit from the block, but since 928 was actually a point of
   3153 /// definition of %45.sub2, %45.sub2 was not live immediately prior
   3154 /// to that definition. As a result, when 928 was erased, the value v0
   3155 /// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
   3156 /// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
   3157 /// providing an incorrect value to the use at 624.
   3158 ///
   3159 /// Since the main-range values %31.v1 and %45.v0 were proved to be
   3160 /// identical, the corresponding values in subranges must also be the
   3161 /// same. A redundant copy is removed because it's not needed, and not
   3162 /// because it copied an undefined value, so any liveness that originated
   3163 /// from that copy cannot disappear. When pruning a value that started
   3164 /// at the removed copy, the corresponding identical value must be
   3165 /// extended to replace it.
   3166 void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
   3167   // Look for values being erased.
   3168   bool DidPrune = false;
   3169   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   3170     Val &V = Vals[i];
   3171     // We should trigger in all cases in which eraseInstrs() does something.
   3172     // match what eraseInstrs() is doing, print a message so
   3173     if (V.Resolution != CR_Erase &&
   3174         (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))
   3175       continue;
   3176 
   3177     // Check subranges at the point where the copy will be removed.
   3178     SlotIndex Def = LR.getValNumInfo(i)->def;
   3179     SlotIndex OtherDef;
   3180     if (V.Identical)
   3181       OtherDef = V.OtherVNI->def;
   3182 
   3183     // Print message so mismatches with eraseInstrs() can be diagnosed.
   3184     LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
   3185                       << '\n');
   3186     for (LiveInterval::SubRange &S : LI.subranges()) {
   3187       LiveQueryResult Q = S.Query(Def);
   3188 
   3189       // If a subrange starts at the copy then an undefined value has been
   3190       // copied and we must remove that subrange value as well.
   3191       VNInfo *ValueOut = Q.valueOutOrDead();
   3192       if (ValueOut != nullptr && (Q.valueIn() == nullptr ||
   3193                                   (V.Identical && V.Resolution == CR_Erase &&
   3194                                    ValueOut->def == Def))) {
   3195         LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
   3196                           << " at " << Def << "\n");
   3197         SmallVector<SlotIndex,8> EndPoints;
   3198         LIS->pruneValue(S, Def, &EndPoints);
   3199         DidPrune = true;
   3200         // Mark value number as unused.
   3201         ValueOut->markUnused();
   3202 
   3203         if (V.Identical && S.Query(OtherDef).valueOutOrDead()) {
   3204           // If V is identical to V.OtherVNI (and S was live at OtherDef),
   3205           // then we can't simply prune V from S. V needs to be replaced
   3206           // with V.OtherVNI.
   3207           LIS->extendToIndices(S, EndPoints);
   3208         }
   3209 
   3210         // We may need to eliminate the subrange if the copy introduced a live
   3211         // out undef value.
   3212         if (ValueOut->isPHIDef())
   3213           ShrinkMask |= S.LaneMask;
   3214         continue;
   3215       }
   3216 
   3217       // If a subrange ends at the copy, then a value was copied but only
   3218       // partially used later. Shrink the subregister range appropriately.
   3219       //
   3220       // Ultimately this calls shrinkToUses, so assuming ShrinkMask is
   3221       // conservatively correct.
   3222       if ((Q.valueIn() != nullptr && Q.valueOut() == nullptr) ||
   3223           (V.Resolution == CR_Erase && isLiveThrough(Q))) {
   3224         LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
   3225                           << PrintLaneMask(S.LaneMask) << " at " << Def
   3226                           << "\n");
   3227         ShrinkMask |= S.LaneMask;
   3228       }
   3229     }
   3230   }
   3231   if (DidPrune)
   3232     LI.removeEmptySubRanges();
   3233 }
   3234 
   3235 /// Check if any of the subranges of @p LI contain a definition at @p Def.
   3236 static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) {
   3237   for (LiveInterval::SubRange &SR : LI.subranges()) {
   3238     if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
   3239       if (VNI->def == Def)
   3240         return true;
   3241   }
   3242   return false;
   3243 }
   3244 
   3245 void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
   3246   assert(&static_cast<LiveRange&>(LI) == &LR);
   3247 
   3248   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   3249     if (Vals[i].Resolution != CR_Keep)
   3250       continue;
   3251     VNInfo *VNI = LR.getValNumInfo(i);
   3252     if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
   3253       continue;
   3254     Vals[i].Pruned = true;
   3255     ShrinkMainRange = true;
   3256   }
   3257 }
   3258 
   3259 void JoinVals::removeImplicitDefs() {
   3260   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   3261     Val &V = Vals[i];
   3262     if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
   3263       continue;
   3264 
   3265     VNInfo *VNI = LR.getValNumInfo(i);
   3266     VNI->markUnused();
   3267     LR.removeValNo(VNI);
   3268   }
   3269 }
   3270 
   3271 void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
   3272                            SmallVectorImpl<Register> &ShrinkRegs,
   3273                            LiveInterval *LI) {
   3274   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   3275     // Get the def location before markUnused() below invalidates it.
   3276     VNInfo *VNI = LR.getValNumInfo(i);
   3277     SlotIndex Def = VNI->def;
   3278     switch (Vals[i].Resolution) {
   3279     case CR_Keep: {
   3280       // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
   3281       // longer. The IMPLICIT_DEF instructions are only inserted by
   3282       // PHIElimination to guarantee that all PHI predecessors have a value.
   3283       if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
   3284         break;
   3285       // Remove value number i from LR.
   3286       // For intervals with subranges, removing a segment from the main range
   3287       // may require extending the previous segment: for each definition of
   3288       // a subregister, there will be a corresponding def in the main range.
   3289       // That def may fall in the middle of a segment from another subrange.
   3290       // In such cases, removing this def from the main range must be
   3291       // complemented by extending the main range to account for the liveness
   3292       // of the other subrange.
   3293       // The new end point of the main range segment to be extended.
   3294       SlotIndex NewEnd;
   3295       if (LI != nullptr) {
   3296         LiveRange::iterator I = LR.FindSegmentContaining(Def);
   3297         assert(I != LR.end());
   3298         // Do not extend beyond the end of the segment being removed.
   3299         // The segment may have been pruned in preparation for joining
   3300         // live ranges.
   3301         NewEnd = I->end;
   3302       }
   3303 
   3304       LR.removeValNo(VNI);
   3305       // Note that this VNInfo is reused and still referenced in NewVNInfo,
   3306       // make it appear like an unused value number.
   3307       VNI->markUnused();
   3308 
   3309       if (LI != nullptr && LI->hasSubRanges()) {
   3310         assert(static_cast<LiveRange*>(LI) == &LR);
   3311         // Determine the end point based on the subrange information:
   3312         // minimum of (earliest def of next segment,
   3313         //             latest end point of containing segment)
   3314         SlotIndex ED, LE;
   3315         for (LiveInterval::SubRange &SR : LI->subranges()) {
   3316           LiveRange::iterator I = SR.find(Def);
   3317           if (I == SR.end())
   3318             continue;
   3319           if (I->start > Def)
   3320             ED = ED.isValid() ? std::min(ED, I->start) : I->start;
   3321           else
   3322             LE = LE.isValid() ? std::max(LE, I->end) : I->end;
   3323         }
   3324         if (LE.isValid())
   3325           NewEnd = std::min(NewEnd, LE);
   3326         if (ED.isValid())
   3327           NewEnd = std::min(NewEnd, ED);
   3328 
   3329         // We only want to do the extension if there was a subrange that
   3330         // was live across Def.
   3331         if (LE.isValid()) {
   3332           LiveRange::iterator S = LR.find(Def);
   3333           if (S != LR.begin())
   3334             std::prev(S)->end = NewEnd;
   3335         }
   3336       }
   3337       LLVM_DEBUG({
   3338         dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
   3339         if (LI != nullptr)
   3340           dbgs() << "\t\t  LHS = " << *LI << '\n';
   3341       });
   3342       LLVM_FALLTHROUGH;
   3343     }
   3344 
   3345     case CR_Erase: {
   3346       MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
   3347       assert(MI && "No instruction to erase");
   3348       if (MI->isCopy()) {
   3349         Register Reg = MI->getOperand(1).getReg();
   3350         if (Register::isVirtualRegister(Reg) && Reg != CP.getSrcReg() &&
   3351             Reg != CP.getDstReg())
   3352           ShrinkRegs.push_back(Reg);
   3353       }
   3354       ErasedInstrs.insert(MI);
   3355       LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
   3356       LIS->RemoveMachineInstrFromMaps(*MI);
   3357       MI->eraseFromParent();
   3358       break;
   3359     }
   3360     default:
   3361       break;
   3362     }
   3363   }
   3364 }
   3365 
   3366 void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
   3367                                          LaneBitmask LaneMask,
   3368                                          const CoalescerPair &CP) {
   3369   SmallVector<VNInfo*, 16> NewVNInfo;
   3370   JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
   3371                    NewVNInfo, CP, LIS, TRI, true, true);
   3372   JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
   3373                    NewVNInfo, CP, LIS, TRI, true, true);
   3374 
   3375   // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
   3376   // We should be able to resolve all conflicts here as we could successfully do
   3377   // it on the mainrange already. There is however a problem when multiple
   3378   // ranges get mapped to the "overflow" lane mask bit which creates unexpected
   3379   // interferences.
   3380   if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
   3381     // We already determined that it is legal to merge the intervals, so this
   3382     // should never fail.
   3383     llvm_unreachable("*** Couldn't join subrange!\n");
   3384   }
   3385   if (!LHSVals.resolveConflicts(RHSVals) ||
   3386       !RHSVals.resolveConflicts(LHSVals)) {
   3387     // We already determined that it is legal to merge the intervals, so this
   3388     // should never fail.
   3389     llvm_unreachable("*** Couldn't join subrange!\n");
   3390   }
   3391 
   3392   // The merging algorithm in LiveInterval::join() can't handle conflicting
   3393   // value mappings, so we need to remove any live ranges that overlap a
   3394   // CR_Replace resolution. Collect a set of end points that can be used to
   3395   // restore the live range after joining.
   3396   SmallVector<SlotIndex, 8> EndPoints;
   3397   LHSVals.pruneValues(RHSVals, EndPoints, false);
   3398   RHSVals.pruneValues(LHSVals, EndPoints, false);
   3399 
   3400   LHSVals.removeImplicitDefs();
   3401   RHSVals.removeImplicitDefs();
   3402 
   3403   LRange.verify();
   3404   RRange.verify();
   3405 
   3406   // Join RRange into LHS.
   3407   LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
   3408               NewVNInfo);
   3409 
   3410   LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask)
   3411                     << ' ' << LRange << "\n");
   3412   if (EndPoints.empty())
   3413     return;
   3414 
   3415   // Recompute the parts of the live range we had to remove because of
   3416   // CR_Replace conflicts.
   3417   LLVM_DEBUG({
   3418     dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
   3419     for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
   3420       dbgs() << EndPoints[i];
   3421       if (i != n-1)
   3422         dbgs() << ',';
   3423     }
   3424     dbgs() << ":  " << LRange << '\n';
   3425   });
   3426   LIS->extendToIndices(LRange, EndPoints);
   3427 }
   3428 
   3429 void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
   3430                                           const LiveRange &ToMerge,
   3431                                           LaneBitmask LaneMask,
   3432                                           CoalescerPair &CP,
   3433                                           unsigned ComposeSubRegIdx) {
   3434   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
   3435   LI.refineSubRanges(
   3436       Allocator, LaneMask,
   3437       [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) {
   3438         if (SR.empty()) {
   3439           SR.assign(ToMerge, Allocator);
   3440         } else {
   3441           // joinSubRegRange() destroys the merged range, so we need a copy.
   3442           LiveRange RangeCopy(ToMerge, Allocator);
   3443           joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
   3444         }
   3445       },
   3446       *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx);
   3447 }
   3448 
   3449 bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
   3450   if (LI.valnos.size() < LargeIntervalSizeThreshold)
   3451     return false;
   3452   auto &Counter = LargeLIVisitCounter[LI.reg()];
   3453   if (Counter < LargeIntervalFreqThreshold) {
   3454     Counter++;
   3455     return false;
   3456   }
   3457   return true;
   3458 }
   3459 
   3460 bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   3461   SmallVector<VNInfo*, 16> NewVNInfo;
   3462   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
   3463   LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
   3464   bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
   3465   JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
   3466                    NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
   3467   JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
   3468                    NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
   3469 
   3470   LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
   3471 
   3472   if (isHighCostLiveInterval(LHS) || isHighCostLiveInterval(RHS))
   3473     return false;
   3474 
   3475   // First compute NewVNInfo and the simple value mappings.
   3476   // Detect impossible conflicts early.
   3477   if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
   3478     return false;
   3479 
   3480   // Some conflicts can only be resolved after all values have been mapped.
   3481   if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
   3482     return false;
   3483 
   3484   // All clear, the live ranges can be merged.
   3485   if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
   3486     BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
   3487 
   3488     // Transform lanemasks from the LHS to masks in the coalesced register and
   3489     // create initial subranges if necessary.
   3490     unsigned DstIdx = CP.getDstIdx();
   3491     if (!LHS.hasSubRanges()) {
   3492       LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
   3493                                      : TRI->getSubRegIndexLaneMask(DstIdx);
   3494       // LHS must support subregs or we wouldn't be in this codepath.
   3495       assert(Mask.any());
   3496       LHS.createSubRangeFrom(Allocator, Mask, LHS);
   3497     } else if (DstIdx != 0) {
   3498       // Transform LHS lanemasks to new register class if necessary.
   3499       for (LiveInterval::SubRange &R : LHS.subranges()) {
   3500         LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
   3501         R.LaneMask = Mask;
   3502       }
   3503     }
   3504     LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS
   3505                       << '\n');
   3506 
   3507     // Determine lanemasks of RHS in the coalesced register and merge subranges.
   3508     unsigned SrcIdx = CP.getSrcIdx();
   3509     if (!RHS.hasSubRanges()) {
   3510       LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
   3511                                      : TRI->getSubRegIndexLaneMask(SrcIdx);
   3512       mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx);
   3513     } else {
   3514       // Pair up subranges and merge.
   3515       for (LiveInterval::SubRange &R : RHS.subranges()) {
   3516         LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
   3517         mergeSubRangeInto(LHS, R, Mask, CP, DstIdx);
   3518       }
   3519     }
   3520     LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
   3521 
   3522     // Pruning implicit defs from subranges may result in the main range
   3523     // having stale segments.
   3524     LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
   3525 
   3526     LHSVals.pruneSubRegValues(LHS, ShrinkMask);
   3527     RHSVals.pruneSubRegValues(LHS, ShrinkMask);
   3528   }
   3529 
   3530   // The merging algorithm in LiveInterval::join() can't handle conflicting
   3531   // value mappings, so we need to remove any live ranges that overlap a
   3532   // CR_Replace resolution. Collect a set of end points that can be used to
   3533   // restore the live range after joining.
   3534   SmallVector<SlotIndex, 8> EndPoints;
   3535   LHSVals.pruneValues(RHSVals, EndPoints, true);
   3536   RHSVals.pruneValues(LHSVals, EndPoints, true);
   3537 
   3538   // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
   3539   // registers to require trimming.
   3540   SmallVector<Register, 8> ShrinkRegs;
   3541   LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
   3542   RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
   3543   while (!ShrinkRegs.empty())
   3544     shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
   3545 
   3546   // Scan and mark undef any DBG_VALUEs that would refer to a different value.
   3547   checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals);
   3548 
   3549   // Join RHS into LHS.
   3550   LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
   3551 
   3552   // Kill flags are going to be wrong if the live ranges were overlapping.
   3553   // Eventually, we should simply clear all kill flags when computing live
   3554   // ranges. They are reinserted after register allocation.
   3555   MRI->clearKillFlags(LHS.reg());
   3556   MRI->clearKillFlags(RHS.reg());
   3557 
   3558   if (!EndPoints.empty()) {
   3559     // Recompute the parts of the live range we had to remove because of
   3560     // CR_Replace conflicts.
   3561     LLVM_DEBUG({
   3562       dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
   3563       for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
   3564         dbgs() << EndPoints[i];
   3565         if (i != n-1)
   3566           dbgs() << ',';
   3567       }
   3568       dbgs() << ":  " << LHS << '\n';
   3569     });
   3570     LIS->extendToIndices((LiveRange&)LHS, EndPoints);
   3571   }
   3572 
   3573   return true;
   3574 }
   3575 
   3576 bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
   3577   return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
   3578 }
   3579 
   3580 void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
   3581 {
   3582   const SlotIndexes &Slots = *LIS->getSlotIndexes();
   3583   SmallVector<MachineInstr *, 8> ToInsert;
   3584 
   3585   // After collecting a block of DBG_VALUEs into ToInsert, enter them into the
   3586   // vreg => DbgValueLoc map.
   3587   auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
   3588     for (auto *X : ToInsert) {
   3589       for (auto Op : X->debug_operands()) {
   3590         if (Op.isReg() && Op.getReg().isVirtual())
   3591           DbgVRegToValues[Op.getReg()].push_back({Slot, X});
   3592       }
   3593     }
   3594 
   3595     ToInsert.clear();
   3596   };
   3597 
   3598   // Iterate over all instructions, collecting them into the ToInsert vector.
   3599   // Once a non-debug instruction is found, record the slot index of the
   3600   // collected DBG_VALUEs.
   3601   for (auto &MBB : MF) {
   3602     SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB);
   3603 
   3604     for (auto &MI : MBB) {
   3605       if (MI.isDebugValue()) {
   3606         if (any_of(MI.debug_operands(), [](const MachineOperand &MO) {
   3607               return MO.isReg() && MO.getReg().isVirtual();
   3608             }))
   3609           ToInsert.push_back(&MI);
   3610       } else if (!MI.isDebugOrPseudoInstr()) {
   3611         CurrentSlot = Slots.getInstructionIndex(MI);
   3612         CloseNewDVRange(CurrentSlot);
   3613       }
   3614     }
   3615 
   3616     // Close range of DBG_VALUEs at the end of blocks.
   3617     CloseNewDVRange(Slots.getMBBEndIdx(&MBB));
   3618   }
   3619 
   3620   // Sort all DBG_VALUEs we've seen by slot number.
   3621   for (auto &Pair : DbgVRegToValues)
   3622     llvm::sort(Pair.second);
   3623 }
   3624 
   3625 void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
   3626                                                      LiveRange &LHS,
   3627                                                      JoinVals &LHSVals,
   3628                                                      LiveRange &RHS,
   3629                                                      JoinVals &RHSVals) {
   3630   auto ScanForDstReg = [&](Register Reg) {
   3631     checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals);
   3632   };
   3633 
   3634   auto ScanForSrcReg = [&](Register Reg) {
   3635     checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
   3636   };
   3637 
   3638   // Scan for potentially unsound DBG_VALUEs: examine first the register number
   3639   // Reg, and then any other vregs that may have been merged into  it.
   3640   auto PerformScan = [this](Register Reg, std::function<void(Register)> Func) {
   3641     Func(Reg);
   3642     if (DbgMergedVRegNums.count(Reg))
   3643       for (Register X : DbgMergedVRegNums[Reg])
   3644         Func(X);
   3645   };
   3646 
   3647   // Scan for unsound updates of both the source and destination register.
   3648   PerformScan(CP.getSrcReg(), ScanForSrcReg);
   3649   PerformScan(CP.getDstReg(), ScanForDstReg);
   3650 }
   3651 
   3652 void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg,
   3653                                                          LiveRange &OtherLR,
   3654                                                          LiveRange &RegLR,
   3655                                                          JoinVals &RegVals) {
   3656   // Are there any DBG_VALUEs to examine?
   3657   auto VRegMapIt = DbgVRegToValues.find(Reg);
   3658   if (VRegMapIt == DbgVRegToValues.end())
   3659     return;
   3660 
   3661   auto &DbgValueSet = VRegMapIt->second;
   3662   auto DbgValueSetIt = DbgValueSet.begin();
   3663   auto SegmentIt = OtherLR.begin();
   3664 
   3665   bool LastUndefResult = false;
   3666   SlotIndex LastUndefIdx;
   3667 
   3668   // If the "Other" register is live at a slot Idx, test whether Reg can
   3669   // safely be merged with it, or should be marked undef.
   3670   auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult,
   3671                       &LastUndefIdx](SlotIndex Idx) -> bool {
   3672     // Our worst-case performance typically happens with asan, causing very
   3673     // many DBG_VALUEs of the same location. Cache a copy of the most recent
   3674     // result for this edge-case.
   3675     if (LastUndefIdx == Idx)
   3676       return LastUndefResult;
   3677 
   3678     // If the other range was live, and Reg's was not, the register coalescer
   3679     // will not have tried to resolve any conflicts. We don't know whether
   3680     // the DBG_VALUE will refer to the same value number, so it must be made
   3681     // undef.
   3682     auto OtherIt = RegLR.find(Idx);
   3683     if (OtherIt == RegLR.end())
   3684       return true;
   3685 
   3686     // Both the registers were live: examine the conflict resolution record for
   3687     // the value number Reg refers to. CR_Keep meant that this value number
   3688     // "won" and the merged register definitely refers to that value. CR_Erase
   3689     // means the value number was a redundant copy of the other value, which
   3690     // was coalesced and Reg deleted. It's safe to refer to the other register
   3691     // (which will be the source of the copy).
   3692     auto Resolution = RegVals.getResolution(OtherIt->valno->id);
   3693     LastUndefResult = Resolution != JoinVals::CR_Keep &&
   3694                       Resolution != JoinVals::CR_Erase;
   3695     LastUndefIdx = Idx;
   3696     return LastUndefResult;
   3697   };
   3698 
   3699   // Iterate over both the live-range of the "Other" register, and the set of
   3700   // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest
   3701   // slot index. This relies on the DbgValueSet being ordered.
   3702   while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) {
   3703     if (DbgValueSetIt->first < SegmentIt->end) {
   3704       // "Other" is live and there is a DBG_VALUE of Reg: test if we should
   3705       // set it undef.
   3706       if (DbgValueSetIt->first >= SegmentIt->start) {
   3707         bool HasReg = DbgValueSetIt->second->hasDebugOperandForReg(Reg);
   3708         bool ShouldUndefReg = ShouldUndef(DbgValueSetIt->first);
   3709         if (HasReg && ShouldUndefReg) {
   3710           // Mark undef, erase record of this DBG_VALUE to avoid revisiting.
   3711           DbgValueSetIt->second->setDebugValueUndef();
   3712           continue;
   3713         }
   3714       }
   3715       ++DbgValueSetIt;
   3716     } else {
   3717       ++SegmentIt;
   3718     }
   3719   }
   3720 }
   3721 
   3722 namespace {
   3723 
   3724 /// Information concerning MBB coalescing priority.
   3725 struct MBBPriorityInfo {
   3726   MachineBasicBlock *MBB;
   3727   unsigned Depth;
   3728   bool IsSplit;
   3729 
   3730   MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
   3731     : MBB(mbb), Depth(depth), IsSplit(issplit) {}
   3732 };
   3733 
   3734 } // end anonymous namespace
   3735 
   3736 /// C-style comparator that sorts first based on the loop depth of the basic
   3737 /// block (the unsigned), and then on the MBB number.
   3738 ///
   3739 /// EnableGlobalCopies assumes that the primary sort key is loop depth.
   3740 static int compareMBBPriority(const MBBPriorityInfo *LHS,
   3741                               const MBBPriorityInfo *RHS) {
   3742   // Deeper loops first
   3743   if (LHS->Depth != RHS->Depth)
   3744     return LHS->Depth > RHS->Depth ? -1 : 1;
   3745 
   3746   // Try to unsplit critical edges next.
   3747   if (LHS->IsSplit != RHS->IsSplit)
   3748     return LHS->IsSplit ? -1 : 1;
   3749 
   3750   // Prefer blocks that are more connected in the CFG. This takes care of
   3751   // the most difficult copies first while intervals are short.
   3752   unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
   3753   unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
   3754   if (cl != cr)
   3755     return cl > cr ? -1 : 1;
   3756 
   3757   // As a last resort, sort by block number.
   3758   return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
   3759 }
   3760 
   3761 /// \returns true if the given copy uses or defines a local live range.
   3762 static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
   3763   if (!Copy->isCopy())
   3764     return false;
   3765 
   3766   if (Copy->getOperand(1).isUndef())
   3767     return false;
   3768 
   3769   Register SrcReg = Copy->getOperand(1).getReg();
   3770   Register DstReg = Copy->getOperand(0).getReg();
   3771   if (Register::isPhysicalRegister(SrcReg) ||
   3772       Register::isPhysicalRegister(DstReg))
   3773     return false;
   3774 
   3775   return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
   3776     || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
   3777 }
   3778 
   3779 void RegisterCoalescer::lateLiveIntervalUpdate() {
   3780   for (Register reg : ToBeUpdated) {
   3781     if (!LIS->hasInterval(reg))
   3782       continue;
   3783     LiveInterval &LI = LIS->getInterval(reg);
   3784     shrinkToUses(&LI, &DeadDefs);
   3785     if (!DeadDefs.empty())
   3786       eliminateDeadDefs();
   3787   }
   3788   ToBeUpdated.clear();
   3789 }
   3790 
   3791 bool RegisterCoalescer::
   3792 copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
   3793   bool Progress = false;
   3794   for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
   3795     if (!CurrList[i])
   3796       continue;
   3797     // Skip instruction pointers that have already been erased, for example by
   3798     // dead code elimination.
   3799     if (ErasedInstrs.count(CurrList[i])) {
   3800       CurrList[i] = nullptr;
   3801       continue;
   3802     }
   3803     bool Again = false;
   3804     bool Success = joinCopy(CurrList[i], Again);
   3805     Progress |= Success;
   3806     if (Success || !Again)
   3807       CurrList[i] = nullptr;
   3808   }
   3809   return Progress;
   3810 }
   3811 
   3812 /// Check if DstReg is a terminal node.
   3813 /// I.e., it does not have any affinity other than \p Copy.
   3814 static bool isTerminalReg(Register DstReg, const MachineInstr &Copy,
   3815                           const MachineRegisterInfo *MRI) {
   3816   assert(Copy.isCopyLike());
   3817   // Check if the destination of this copy as any other affinity.
   3818   for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
   3819     if (&MI != &Copy && MI.isCopyLike())
   3820       return false;
   3821   return true;
   3822 }
   3823 
   3824 bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
   3825   assert(Copy.isCopyLike());
   3826   if (!UseTerminalRule)
   3827     return false;
   3828   Register SrcReg, DstReg;
   3829   unsigned SrcSubReg = 0, DstSubReg = 0;
   3830   if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
   3831     return false;
   3832   // Check if the destination of this copy has any other affinity.
   3833   if (DstReg.isPhysical() ||
   3834       // If SrcReg is a physical register, the copy won't be coalesced.
   3835       // Ignoring it may have other side effect (like missing
   3836       // rematerialization). So keep it.
   3837       SrcReg.isPhysical() || !isTerminalReg(DstReg, Copy, MRI))
   3838     return false;
   3839 
   3840   // DstReg is a terminal node. Check if it interferes with any other
   3841   // copy involving SrcReg.
   3842   const MachineBasicBlock *OrigBB = Copy.getParent();
   3843   const LiveInterval &DstLI = LIS->getInterval(DstReg);
   3844   for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
   3845     // Technically we should check if the weight of the new copy is
   3846     // interesting compared to the other one and update the weight
   3847     // of the copies accordingly. However, this would only work if
   3848     // we would gather all the copies first then coalesce, whereas
   3849     // right now we interleave both actions.
   3850     // For now, just consider the copies that are in the same block.
   3851     if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
   3852       continue;
   3853     Register OtherSrcReg, OtherReg;
   3854     unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
   3855     if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
   3856                 OtherSubReg))
   3857       return false;
   3858     if (OtherReg == SrcReg)
   3859       OtherReg = OtherSrcReg;
   3860     // Check if OtherReg is a non-terminal.
   3861     if (Register::isPhysicalRegister(OtherReg) ||
   3862         isTerminalReg(OtherReg, MI, MRI))
   3863       continue;
   3864     // Check that OtherReg interfere with DstReg.
   3865     if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
   3866       LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg)
   3867                         << '\n');
   3868       return true;
   3869     }
   3870   }
   3871   return false;
   3872 }
   3873 
   3874 void
   3875 RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
   3876   LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");
   3877 
   3878   // Collect all copy-like instructions in MBB. Don't start coalescing anything
   3879   // yet, it might invalidate the iterator.
   3880   const unsigned PrevSize = WorkList.size();
   3881   if (JoinGlobalCopies) {
   3882     SmallVector<MachineInstr*, 2> LocalTerminals;
   3883     SmallVector<MachineInstr*, 2> GlobalTerminals;
   3884     // Coalesce copies bottom-up to coalesce local defs before local uses. They
   3885     // are not inherently easier to resolve, but slightly preferable until we
   3886     // have local live range splitting. In particular this is required by
   3887     // cmp+jmp macro fusion.
   3888     for (MachineInstr &MI : *MBB) {
   3889       if (!MI.isCopyLike())
   3890         continue;
   3891       bool ApplyTerminalRule = applyTerminalRule(MI);
   3892       if (isLocalCopy(&MI, LIS)) {
   3893         if (ApplyTerminalRule)
   3894           LocalTerminals.push_back(&MI);
   3895         else
   3896           LocalWorkList.push_back(&MI);
   3897       } else {
   3898         if (ApplyTerminalRule)
   3899           GlobalTerminals.push_back(&MI);
   3900         else
   3901           WorkList.push_back(&MI);
   3902       }
   3903     }
   3904     // Append the copies evicted by the terminal rule at the end of the list.
   3905     LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
   3906     WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
   3907   }
   3908   else {
   3909     SmallVector<MachineInstr*, 2> Terminals;
   3910     for (MachineInstr &MII : *MBB)
   3911       if (MII.isCopyLike()) {
   3912         if (applyTerminalRule(MII))
   3913           Terminals.push_back(&MII);
   3914         else
   3915           WorkList.push_back(&MII);
   3916       }
   3917     // Append the copies evicted by the terminal rule at the end of the list.
   3918     WorkList.append(Terminals.begin(), Terminals.end());
   3919   }
   3920   // Try coalescing the collected copies immediately, and remove the nulls.
   3921   // This prevents the WorkList from getting too large since most copies are
   3922   // joinable on the first attempt.
   3923   MutableArrayRef<MachineInstr*>
   3924     CurrList(WorkList.begin() + PrevSize, WorkList.end());
   3925   if (copyCoalesceWorkList(CurrList))
   3926     WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
   3927                                nullptr), WorkList.end());
   3928 }
   3929 
   3930 void RegisterCoalescer::coalesceLocals() {
   3931   copyCoalesceWorkList(LocalWorkList);
   3932   for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
   3933     if (LocalWorkList[j])
   3934       WorkList.push_back(LocalWorkList[j]);
   3935   }
   3936   LocalWorkList.clear();
   3937 }
   3938 
   3939 void RegisterCoalescer::joinAllIntervals() {
   3940   LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
   3941   assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
   3942 
   3943   std::vector<MBBPriorityInfo> MBBs;
   3944   MBBs.reserve(MF->size());
   3945   for (MachineBasicBlock &MBB : *MF) {
   3946     MBBs.push_back(MBBPriorityInfo(&MBB, Loops->getLoopDepth(&MBB),
   3947                                    JoinSplitEdges && isSplitEdge(&MBB)));
   3948   }
   3949   array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
   3950 
   3951   // Coalesce intervals in MBB priority order.
   3952   unsigned CurrDepth = std::numeric_limits<unsigned>::max();
   3953   for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
   3954     // Try coalescing the collected local copies for deeper loops.
   3955     if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
   3956       coalesceLocals();
   3957       CurrDepth = MBBs[i].Depth;
   3958     }
   3959     copyCoalesceInMBB(MBBs[i].MBB);
   3960   }
   3961   lateLiveIntervalUpdate();
   3962   coalesceLocals();
   3963 
   3964   // Joining intervals can allow other intervals to be joined.  Iteratively join
   3965   // until we make no progress.
   3966   while (copyCoalesceWorkList(WorkList))
   3967     /* empty */ ;
   3968   lateLiveIntervalUpdate();
   3969 }
   3970 
   3971 void RegisterCoalescer::releaseMemory() {
   3972   ErasedInstrs.clear();
   3973   WorkList.clear();
   3974   DeadDefs.clear();
   3975   InflateRegs.clear();
   3976   LargeLIVisitCounter.clear();
   3977 }
   3978 
   3979 bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   3980   LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
   3981                     << "********** Function: " << fn.getName() << '\n');
   3982 
   3983   // Variables changed between a setjmp and a longjump can have undefined value
   3984   // after the longjmp. This behaviour can be observed if such a variable is
   3985   // spilled, so longjmp won't restore the value in the spill slot.
   3986   // RegisterCoalescer should not run in functions with a setjmp to avoid
   3987   // merging such undefined variables with predictable ones.
   3988   //
   3989   // TODO: Could specifically disable coalescing registers live across setjmp
   3990   // calls
   3991   if (fn.exposesReturnsTwice()) {
   3992     LLVM_DEBUG(
   3993         dbgs() << "* Skipped as it exposes funcions that returns twice.\n");
   3994     return false;
   3995   }
   3996 
   3997   MF = &fn;
   3998   MRI = &fn.getRegInfo();
   3999   const TargetSubtargetInfo &STI = fn.getSubtarget();
   4000   TRI = STI.getRegisterInfo();
   4001   TII = STI.getInstrInfo();
   4002   LIS = &getAnalysis<LiveIntervals>();
   4003   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   4004   Loops = &getAnalysis<MachineLoopInfo>();
   4005   if (EnableGlobalCopies == cl::BOU_UNSET)
   4006     JoinGlobalCopies = STI.enableJoinGlobalCopies();
   4007   else
   4008     JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
   4009 
   4010   // The MachineScheduler does not currently require JoinSplitEdges. This will
   4011   // either be enabled unconditionally or replaced by a more general live range
   4012   // splitting optimization.
   4013   JoinSplitEdges = EnableJoinSplits;
   4014 
   4015   if (VerifyCoalescing)
   4016     MF->verify(this, "Before register coalescing");
   4017 
   4018   DbgVRegToValues.clear();
   4019   DbgMergedVRegNums.clear();
   4020   buildVRegToDbgValueMap(fn);
   4021 
   4022   RegClassInfo.runOnMachineFunction(fn);
   4023 
   4024   // Join (coalesce) intervals if requested.
   4025   if (EnableJoining)
   4026     joinAllIntervals();
   4027 
   4028   // After deleting a lot of copies, register classes may be less constrained.
   4029   // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
   4030   // DPR inflation.
   4031   array_pod_sort(InflateRegs.begin(), InflateRegs.end());
   4032   InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
   4033                     InflateRegs.end());
   4034   LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
   4035                     << " regs.\n");
   4036   for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
   4037     Register Reg = InflateRegs[i];
   4038     if (MRI->reg_nodbg_empty(Reg))
   4039       continue;
   4040     if (MRI->recomputeRegClass(Reg)) {
   4041       LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to "
   4042                         << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
   4043       ++NumInflated;
   4044 
   4045       LiveInterval &LI = LIS->getInterval(Reg);
   4046       if (LI.hasSubRanges()) {
   4047         // If the inflated register class does not support subregisters anymore
   4048         // remove the subranges.
   4049         if (!MRI->shouldTrackSubRegLiveness(Reg)) {
   4050           LI.clearSubRanges();
   4051         } else {
   4052 #ifndef NDEBUG
   4053           LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
   4054           // If subranges are still supported, then the same subregs
   4055           // should still be supported.
   4056           for (LiveInterval::SubRange &S : LI.subranges()) {
   4057             assert((S.LaneMask & ~MaxMask).none());
   4058           }
   4059 #endif
   4060         }
   4061       }
   4062     }
   4063   }
   4064 
   4065   LLVM_DEBUG(dump());
   4066   if (VerifyCoalescing)
   4067     MF->verify(this, "After register coalescing");
   4068   return true;
   4069 }
   4070 
   4071 void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
   4072    LIS->print(O, m);
   4073 }
   4074