Home | History | Annotate | Line # | Download | only in Scalar
      1 //===- SpeculateAroundPHIs.cpp --------------------------------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 
      9 #include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
     10 #include "llvm/ADT/PostOrderIterator.h"
     11 #include "llvm/ADT/Sequence.h"
     12 #include "llvm/ADT/SetVector.h"
     13 #include "llvm/ADT/Statistic.h"
     14 #include "llvm/Analysis/TargetTransformInfo.h"
     15 #include "llvm/Analysis/ValueTracking.h"
     16 #include "llvm/IR/BasicBlock.h"
     17 #include "llvm/IR/IRBuilder.h"
     18 #include "llvm/IR/Instructions.h"
     19 #include "llvm/IR/IntrinsicInst.h"
     20 #include "llvm/Support/Debug.h"
     21 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     22 
     23 using namespace llvm;
     24 
     25 #define DEBUG_TYPE "spec-phis"
     26 
     27 STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around");
     28 STATISTIC(NumEdgesSplit,
     29           "Number of critical edges which were split for speculation");
     30 STATISTIC(NumSpeculatedInstructions,
     31           "Number of instructions we speculated around the PHI nodes");
     32 STATISTIC(NumNewRedundantInstructions,
     33           "Number of new, redundant instructions inserted");
     34 
     35 /// Check whether speculating the users of a PHI node around the PHI
     36 /// will be safe.
     37 ///
     38 /// This checks both that all of the users are safe and also that all of their
     39 /// operands are either recursively safe or already available along an incoming
     40 /// edge to the PHI.
     41 ///
     42 /// This routine caches both all the safe nodes explored in `PotentialSpecSet`
     43 /// and the chain of nodes that definitively reach any unsafe node in
     44 /// `UnsafeSet`. By preserving these between repeated calls to this routine for
     45 /// PHIs in the same basic block, the exploration here can be reused. However,
     46 /// these caches must no be reused for PHIs in a different basic block as they
     47 /// reflect what is available along incoming edges.
     48 static bool
     49 isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT,
     50                           SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
     51                           SmallPtrSetImpl<Instruction *> &UnsafeSet) {
     52   auto *PhiBB = PN.getParent();
     53   SmallPtrSet<Instruction *, 4> Visited;
     54   SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
     55 
     56   // Walk each user of the PHI node.
     57   for (Use &U : PN.uses()) {
     58     auto *UI = cast<Instruction>(U.getUser());
     59 
     60     // Ensure the use post-dominates the PHI node. This ensures that, in the
     61     // absence of unwinding, the use will actually be reached.
     62     // FIXME: We use a blunt hammer of requiring them to be in the same basic
     63     // block. We should consider using actual post-dominance here in the
     64     // future.
     65     if (UI->getParent() != PhiBB) {
     66       LLVM_DEBUG(dbgs() << "  Unsafe: use in a different BB: " << *UI << "\n");
     67       return false;
     68     }
     69 
     70     if (const auto *CS = dyn_cast<CallBase>(UI)) {
     71       if (CS->isConvergent() || CS->cannotDuplicate()) {
     72         LLVM_DEBUG(dbgs() << "  Unsafe: convergent "
     73                    "callsite cannot de duplicated: " << *UI << '\n');
     74         return false;
     75       }
     76     }
     77 
     78     // FIXME: This check is much too conservative. We're not going to move these
     79     // instructions onto new dynamic paths through the program unless there is
     80     // a call instruction between the use and the PHI node. And memory isn't
     81     // changing unless there is a store in that same sequence. We should
     82     // probably change this to do at least a limited scan of the intervening
     83     // instructions and allow handling stores in easily proven safe cases.
     84     if (mayBeMemoryDependent(*UI)) {
     85       LLVM_DEBUG(dbgs() << "  Unsafe: can't speculate use: " << *UI << "\n");
     86       return false;
     87     }
     88 
     89     // Now do a depth-first search of everything these users depend on to make
     90     // sure they are transitively safe. This is a depth-first search, but we
     91     // check nodes in preorder to minimize the amount of checking.
     92     Visited.insert(UI);
     93     DFSStack.push_back({UI, UI->value_op_begin()});
     94     do {
     95       User::value_op_iterator OpIt;
     96       std::tie(UI, OpIt) = DFSStack.pop_back_val();
     97 
     98       while (OpIt != UI->value_op_end()) {
     99         auto *OpI = dyn_cast<Instruction>(*OpIt);
    100         // Increment to the next operand for whenever we continue.
    101         ++OpIt;
    102         // No need to visit non-instructions, which can't form dependencies.
    103         if (!OpI)
    104           continue;
    105 
    106         // Now do the main pre-order checks that this operand is a viable
    107         // dependency of something we want to speculate.
    108 
    109         // First do a few checks for instructions that won't require
    110         // speculation at all because they are trivially available on the
    111         // incoming edge (either through dominance or through an incoming value
    112         // to a PHI).
    113         //
    114         // The cases in the current block will be trivially dominated by the
    115         // edge.
    116         auto *ParentBB = OpI->getParent();
    117         if (ParentBB == PhiBB) {
    118           if (isa<PHINode>(OpI)) {
    119             // We can trivially map through phi nodes in the same block.
    120             continue;
    121           }
    122         } else if (DT.dominates(ParentBB, PhiBB)) {
    123           // Instructions from dominating blocks are already available.
    124           continue;
    125         }
    126 
    127         // Once we know that we're considering speculating the operand, check
    128         // if we've already explored this subgraph and found it to be safe.
    129         if (PotentialSpecSet.count(OpI))
    130           continue;
    131 
    132         // If we've already explored this subgraph and found it unsafe, bail.
    133         // If when we directly test whether this is safe it fails, bail.
    134         if (UnsafeSet.count(OpI) || ParentBB != PhiBB ||
    135             mayBeMemoryDependent(*OpI)) {
    136           LLVM_DEBUG(dbgs() << "  Unsafe: can't speculate transitive use: "
    137                             << *OpI << "\n");
    138           // Record the stack of instructions which reach this node as unsafe
    139           // so we prune subsequent searches.
    140           UnsafeSet.insert(OpI);
    141           for (auto &StackPair : DFSStack) {
    142             Instruction *I = StackPair.first;
    143             UnsafeSet.insert(I);
    144           }
    145           return false;
    146         }
    147 
    148         // Skip any operands we're already recursively checking.
    149         if (!Visited.insert(OpI).second)
    150           continue;
    151 
    152         // Push onto the stack and descend. We can directly continue this
    153         // loop when ascending.
    154         DFSStack.push_back({UI, OpIt});
    155         UI = OpI;
    156         OpIt = OpI->value_op_begin();
    157       }
    158 
    159       // This node and all its operands are safe. Go ahead and cache that for
    160       // reuse later.
    161       PotentialSpecSet.insert(UI);
    162 
    163       // Continue with the next node on the stack.
    164     } while (!DFSStack.empty());
    165   }
    166 
    167 #ifndef NDEBUG
    168   // Every visited operand should have been marked as safe for speculation at
    169   // this point. Verify this and return success.
    170   for (auto *I : Visited)
    171     assert(PotentialSpecSet.count(I) &&
    172            "Failed to mark a visited instruction as safe!");
    173 #endif
    174   return true;
    175 }
    176 
    177 /// Check whether, in isolation, a given PHI node is both safe and profitable
    178 /// to speculate users around.
    179 ///
    180 /// This handles checking whether there are any constant operands to a PHI
    181 /// which could represent a useful speculation candidate, whether the users of
    182 /// the PHI are safe to speculate including all their transitive dependencies,
    183 /// and whether after speculation there will be some cost savings (profit) to
    184 /// folding the operands into the users of the PHI node. Returns true if both
    185 /// safe and profitable with relevant cost savings updated in the map and with
    186 /// an update to the `PotentialSpecSet`. Returns false if either safety or
    187 /// profitability are absent. Some new entries may be made to the
    188 /// `PotentialSpecSet` even when this routine returns false, but they remain
    189 /// conservatively correct.
    190 ///
    191 /// The profitability check here is a local one, but it checks this in an
    192 /// interesting way. Beyond checking that the total cost of materializing the
    193 /// constants will be less than the cost of folding them into their users, it
    194 /// also checks that no one incoming constant will have a higher cost when
    195 /// folded into its users rather than materialized. This higher cost could
    196 /// result in a dynamic *path* that is more expensive even when the total cost
    197 /// is lower. Currently, all of the interesting cases where this optimization
    198 /// should fire are ones where it is a no-loss operation in this sense. If we
    199 /// ever want to be more aggressive here, we would need to balance the
    200 /// different incoming edges' cost by looking at their respective
    201 /// probabilities.
    202 static bool isSafeAndProfitableToSpeculateAroundPHI(
    203     PHINode &PN, SmallDenseMap<PHINode *, InstructionCost, 16> &CostSavingsMap,
    204     SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
    205     SmallPtrSetImpl<Instruction *> &UnsafeSet, DominatorTree &DT,
    206     TargetTransformInfo &TTI) {
    207   // First see whether there is any cost savings to speculating around this
    208   // PHI, and build up a map of the constant inputs to how many times they
    209   // occur.
    210   bool NonFreeMat = false;
    211   struct CostsAndCount {
    212     InstructionCost MatCost = TargetTransformInfo::TCC_Free;
    213     InstructionCost FoldedCost = TargetTransformInfo::TCC_Free;
    214     int Count = 0;
    215   };
    216   SmallDenseMap<ConstantInt *, CostsAndCount, 16> CostsAndCounts;
    217   SmallPtrSet<BasicBlock *, 16> IncomingConstantBlocks;
    218   for (int i : llvm::seq<int>(0, PN.getNumIncomingValues())) {
    219     auto *IncomingC = dyn_cast<ConstantInt>(PN.getIncomingValue(i));
    220     if (!IncomingC)
    221       continue;
    222 
    223     // Only visit each incoming edge with a constant input once.
    224     if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second)
    225       continue;
    226 
    227     auto InsertResult = CostsAndCounts.insert({IncomingC, {}});
    228     // Count how many edges share a given incoming costant.
    229     ++InsertResult.first->second.Count;
    230     // Only compute the cost the first time we see a particular constant.
    231     if (!InsertResult.second)
    232       continue;
    233 
    234     InstructionCost &MatCost = InsertResult.first->second.MatCost;
    235     MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType(),
    236                                 TargetTransformInfo::TCK_SizeAndLatency);
    237     NonFreeMat |= MatCost != TTI.TCC_Free;
    238   }
    239   if (!NonFreeMat) {
    240     LLVM_DEBUG(dbgs() << "    Free: " << PN << "\n");
    241     // No profit in free materialization.
    242     return false;
    243   }
    244 
    245   // Now check that the uses of this PHI can actually be speculated,
    246   // otherwise we'll still have to materialize the PHI value.
    247   if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) {
    248     LLVM_DEBUG(dbgs() << "    Unsafe PHI: " << PN << "\n");
    249     return false;
    250   }
    251 
    252   // Compute how much (if any) savings are available by speculating around this
    253   // PHI.
    254   for (Use &U : PN.uses()) {
    255     auto *UserI = cast<Instruction>(U.getUser());
    256     // Now check whether there is any savings to folding the incoming constants
    257     // into this use.
    258     unsigned Idx = U.getOperandNo();
    259 
    260     // If we have a binary operator that is commutative, an actual constant
    261     // operand would end up on the RHS, so pretend the use of the PHI is on the
    262     // RHS.
    263     //
    264     // Technically, this is a bit weird if *both* operands are PHIs we're
    265     // speculating. But if that is the case, giving an "optimistic" cost isn't
    266     // a bad thing because after speculation it will constant fold. And
    267     // moreover, such cases should likely have been constant folded already by
    268     // some other pass, so we shouldn't worry about "modeling" them terribly
    269     // accurately here. Similarly, if the other operand is a constant, it still
    270     // seems fine to be "optimistic" in our cost modeling, because when the
    271     // incoming operand from the PHI node is also a constant, we will end up
    272     // constant folding.
    273     if (UserI->isBinaryOp() && UserI->isCommutative() && Idx != 1)
    274       // Assume we will commute the constant to the RHS to be canonical.
    275       Idx = 1;
    276 
    277     // Get the intrinsic ID if this user is an intrinsic.
    278     Intrinsic::ID IID = Intrinsic::not_intrinsic;
    279     if (auto *UserII = dyn_cast<IntrinsicInst>(UserI))
    280       IID = UserII->getIntrinsicID();
    281 
    282     for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) {
    283       ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first;
    284       InstructionCost MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
    285       InstructionCost &FoldedCost =
    286           IncomingConstantAndCostsAndCount.second.FoldedCost;
    287       if (IID)
    288         FoldedCost +=
    289           TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(),
    290                                   IncomingC->getType(),
    291                                   TargetTransformInfo::TCK_SizeAndLatency);
    292       else
    293         FoldedCost +=
    294             TTI.getIntImmCostInst(UserI->getOpcode(), Idx,
    295                                   IncomingC->getValue(), IncomingC->getType(),
    296                                   TargetTransformInfo::TCK_SizeAndLatency);
    297 
    298       // If we accumulate more folded cost for this incoming constant than
    299       // materialized cost, then we'll regress any edge with this constant so
    300       // just bail. We're only interested in cases where folding the incoming
    301       // constants is at least break-even on all paths.
    302       if (FoldedCost > MatCost) {
    303         LLVM_DEBUG(dbgs() << "  Not profitable to fold imm: " << *IncomingC
    304                           << "\n"
    305                              "    Materializing cost:    "
    306                           << MatCost
    307                           << "\n"
    308                              "    Accumulated folded cost: "
    309                           << FoldedCost << "\n");
    310         return false;
    311       }
    312     }
    313   }
    314 
    315   // Compute the total cost savings afforded by this PHI node.
    316   InstructionCost TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free;
    317   for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) {
    318     InstructionCost MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
    319     InstructionCost FoldedCost =
    320         IncomingConstantAndCostsAndCount.second.FoldedCost;
    321     int Count = IncomingConstantAndCostsAndCount.second.Count;
    322 
    323     TotalMatCost += MatCost * Count;
    324     TotalFoldedCost += FoldedCost * Count;
    325   }
    326   assert(TotalMatCost.isValid() && "Constants must be  materializable");
    327   assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is "
    328                                             "less that its materialized cost, "
    329                                             "the sum must be as well.");
    330   LLVM_DEBUG(dbgs() << "    Cost savings " << (TotalMatCost - TotalFoldedCost)
    331                     << ": " << PN << "\n");
    332   CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost;
    333   return true;
    334 }
    335 
    336 /// Simple helper to walk all the users of a list of phis depth first, and call
    337 /// a visit function on each one in post-order.
    338 ///
    339 /// All of the PHIs should be in the same basic block, and this is primarily
    340 /// used to make a single depth-first walk across their collective users
    341 /// without revisiting any subgraphs. Callers should provide a fast, idempotent
    342 /// callable to test whether a node has been visited and the more important
    343 /// callable to actually visit a particular node.
    344 ///
    345 /// Depth-first and postorder here refer to the *operand* graph -- we start
    346 /// from a collection of users of PHI nodes and walk "up" the operands
    347 /// depth-first.
    348 template <typename IsVisitedT, typename VisitT>
    349 static void visitPHIUsersAndDepsInPostOrder(ArrayRef<PHINode *> PNs,
    350                                             IsVisitedT IsVisited,
    351                                             VisitT Visit) {
    352   SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
    353   for (auto *PN : PNs)
    354     for (Use &U : PN->uses()) {
    355       auto *UI = cast<Instruction>(U.getUser());
    356       if (IsVisited(UI))
    357         // Already visited this user, continue across the roots.
    358         continue;
    359 
    360       // Otherwise, walk the operand graph depth-first and visit each
    361       // dependency in postorder.
    362       DFSStack.push_back({UI, UI->value_op_begin()});
    363       do {
    364         User::value_op_iterator OpIt;
    365         std::tie(UI, OpIt) = DFSStack.pop_back_val();
    366         while (OpIt != UI->value_op_end()) {
    367           auto *OpI = dyn_cast<Instruction>(*OpIt);
    368           // Increment to the next operand for whenever we continue.
    369           ++OpIt;
    370           // No need to visit non-instructions, which can't form dependencies,
    371           // or instructions outside of our potential dependency set that we
    372           // were given. Finally, if we've already visited the node, continue
    373           // to the next.
    374           if (!OpI || IsVisited(OpI))
    375             continue;
    376 
    377           // Push onto the stack and descend. We can directly continue this
    378           // loop when ascending.
    379           DFSStack.push_back({UI, OpIt});
    380           UI = OpI;
    381           OpIt = OpI->value_op_begin();
    382         }
    383 
    384         // Finished visiting children, visit this node.
    385         assert(!IsVisited(UI) && "Should not have already visited a node!");
    386         Visit(UI);
    387       } while (!DFSStack.empty());
    388     }
    389 }
    390 
    391 /// Find profitable PHIs to speculate.
    392 ///
    393 /// For a PHI node to be profitable, we need the cost of speculating its users
    394 /// (and their dependencies) to not exceed the savings of folding the PHI's
    395 /// constant operands into the speculated users.
    396 ///
    397 /// Computing this is surprisingly challenging. Because users of two different
    398 /// PHI nodes can depend on each other or on common other instructions, it may
    399 /// be profitable to speculate two PHI nodes together even though neither one
    400 /// in isolation is profitable. The straightforward way to find all the
    401 /// profitable PHIs would be to check each combination of PHIs' cost, but this
    402 /// is exponential in complexity.
    403 ///
    404 /// Even if we assume that we only care about cases where we can consider each
    405 /// PHI node in isolation (rather than considering cases where none are
    406 /// profitable in isolation but some subset are profitable as a set), we still
    407 /// have a challenge. The obvious way to find all individually profitable PHIs
    408 /// is to iterate until reaching a fixed point, but this will be quadratic in
    409 /// complexity. =/
    410 ///
    411 /// This code currently uses a linear-to-compute order for a greedy approach.
    412 /// It won't find cases where a set of PHIs must be considered together, but it
    413 /// handles most cases of order dependence without quadratic iteration. The
    414 /// specific order used is the post-order across the operand DAG. When the last
    415 /// user of a PHI is visited in this postorder walk, we check it for
    416 /// profitability.
    417 ///
    418 /// There is an orthogonal extra complexity to all of this: computing the cost
    419 /// itself can easily become a linear computation making everything again (at
    420 /// best) quadratic. Using a postorder over the operand graph makes it
    421 /// particularly easy to avoid this through dynamic programming. As we do the
    422 /// postorder walk, we build the transitive cost of that subgraph. It is also
    423 /// straightforward to then update these costs when we mark a PHI for
    424 /// speculation so that subsequent PHIs don't re-pay the cost of already
    425 /// speculated instructions.
    426 static SmallVector<PHINode *, 16> findProfitablePHIs(
    427     ArrayRef<PHINode *> PNs,
    428     const SmallDenseMap<PHINode *, InstructionCost, 16> &CostSavingsMap,
    429     const SmallPtrSetImpl<Instruction *> &PotentialSpecSet, int NumPreds,
    430     DominatorTree &DT, TargetTransformInfo &TTI) {
    431   SmallVector<PHINode *, 16> SpecPNs;
    432 
    433   // First, establish a reverse mapping from immediate users of the PHI nodes
    434   // to the nodes themselves, and count how many users each PHI node has in
    435   // a way we can update while processing them.
    436   SmallDenseMap<Instruction *, TinyPtrVector<PHINode *>, 16> UserToPNMap;
    437   SmallDenseMap<PHINode *, int, 16> PNUserCountMap;
    438   SmallPtrSet<Instruction *, 16> UserSet;
    439   for (auto *PN : PNs) {
    440     assert(UserSet.empty() && "Must start with an empty user set!");
    441     for (Use &U : PN->uses())
    442       UserSet.insert(cast<Instruction>(U.getUser()));
    443     PNUserCountMap[PN] = UserSet.size();
    444     for (auto *UI : UserSet)
    445       UserToPNMap.insert({UI, {}}).first->second.push_back(PN);
    446     UserSet.clear();
    447   }
    448 
    449   // Now do a DFS across the operand graph of the users, computing cost as we
    450   // go and when all costs for a given PHI are known, checking that PHI for
    451   // profitability.
    452   SmallDenseMap<Instruction *, InstructionCost, 16> SpecCostMap;
    453   visitPHIUsersAndDepsInPostOrder(
    454       PNs,
    455       /*IsVisited*/
    456       [&](Instruction *I) {
    457         // We consider anything that isn't potentially speculated to be
    458         // "visited" as it is already handled. Similarly, anything that *is*
    459         // potentially speculated but for which we have an entry in our cost
    460         // map, we're done.
    461         return !PotentialSpecSet.count(I) || SpecCostMap.count(I);
    462       },
    463       /*Visit*/
    464       [&](Instruction *I) {
    465         // We've fully visited the operands, so sum their cost with this node
    466         // and update the cost map.
    467         InstructionCost Cost = TTI.TCC_Free;
    468         for (Value *OpV : I->operand_values())
    469           if (auto *OpI = dyn_cast<Instruction>(OpV)) {
    470             auto CostMapIt = SpecCostMap.find(OpI);
    471             if (CostMapIt != SpecCostMap.end())
    472               Cost += CostMapIt->second;
    473           }
    474         Cost += TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
    475         bool Inserted = SpecCostMap.insert({I, Cost}).second;
    476         (void)Inserted;
    477         assert(Inserted && "Must not re-insert a cost during the DFS!");
    478 
    479         // Now check if this node had a corresponding PHI node using it. If so,
    480         // we need to decrement the outstanding user count for it.
    481         auto UserPNsIt = UserToPNMap.find(I);
    482         if (UserPNsIt == UserToPNMap.end())
    483           return;
    484         auto &UserPNs = UserPNsIt->second;
    485         auto UserPNsSplitIt = std::stable_partition(
    486             UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) {
    487               int &PNUserCount = PNUserCountMap.find(UserPN)->second;
    488               assert(
    489                   PNUserCount > 0 &&
    490                   "Should never re-visit a PN after its user count hits zero!");
    491               --PNUserCount;
    492               return PNUserCount != 0;
    493             });
    494 
    495         // FIXME: Rather than one at a time, we should sum the savings as the
    496         // cost will be completely shared.
    497         SmallVector<Instruction *, 16> SpecWorklist;
    498         for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) {
    499           InstructionCost SpecCost = TTI.TCC_Free;
    500           for (Use &U : PN->uses())
    501             SpecCost +=
    502                 SpecCostMap.find(cast<Instruction>(U.getUser()))->second;
    503           SpecCost *= (NumPreds - 1);
    504           // When the user count of a PHI node hits zero, we should check its
    505           // profitability. If profitable, we should mark it for speculation
    506           // and zero out the cost of everything it depends on.
    507           InstructionCost CostSavings = CostSavingsMap.find(PN)->second;
    508           if (SpecCost > CostSavings) {
    509             LLVM_DEBUG(dbgs() << "  Not profitable, speculation cost: " << *PN
    510                               << "\n"
    511                                  "    Cost savings:     "
    512                               << CostSavings
    513                               << "\n"
    514                                  "    Speculation cost: "
    515                               << SpecCost << "\n");
    516             continue;
    517           }
    518 
    519           // We're going to speculate this user-associated PHI. Copy it out and
    520           // add its users to the worklist to update their cost.
    521           SpecPNs.push_back(PN);
    522           for (Use &U : PN->uses()) {
    523             auto *UI = cast<Instruction>(U.getUser());
    524             auto CostMapIt = SpecCostMap.find(UI);
    525             if (CostMapIt->second == 0)
    526               continue;
    527             // Zero out this cost entry to avoid duplicates.
    528             CostMapIt->second = 0;
    529             SpecWorklist.push_back(UI);
    530           }
    531         }
    532 
    533         // Now walk all the operands of the users in the worklist transitively
    534         // to zero out all the memoized costs.
    535         while (!SpecWorklist.empty()) {
    536           Instruction *SpecI = SpecWorklist.pop_back_val();
    537           assert(SpecCostMap.find(SpecI)->second == 0 &&
    538                  "Didn't zero out a cost!");
    539 
    540           // Walk the operands recursively to zero out their cost as well.
    541           for (auto *OpV : SpecI->operand_values()) {
    542             auto *OpI = dyn_cast<Instruction>(OpV);
    543             if (!OpI)
    544               continue;
    545             auto CostMapIt = SpecCostMap.find(OpI);
    546             if (CostMapIt == SpecCostMap.end() || CostMapIt->second == 0)
    547               continue;
    548             CostMapIt->second = 0;
    549             SpecWorklist.push_back(OpI);
    550           }
    551         }
    552       });
    553 
    554   return SpecPNs;
    555 }
    556 
    557 /// Speculate users around a set of PHI nodes.
    558 ///
    559 /// This routine does the actual speculation around a set of PHI nodes where we
    560 /// have determined this to be both safe and profitable.
    561 ///
    562 /// This routine handles any spliting of critical edges necessary to create
    563 /// a safe block to speculate into as well as cloning the instructions and
    564 /// rewriting all uses.
    565 static void speculatePHIs(ArrayRef<PHINode *> SpecPNs,
    566                           SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
    567                           SmallSetVector<BasicBlock *, 16> &PredSet,
    568                           DominatorTree &DT) {
    569   LLVM_DEBUG(dbgs() << "  Speculating around " << SpecPNs.size() << " PHIs!\n");
    570   NumPHIsSpeculated += SpecPNs.size();
    571 
    572   // Split any critical edges so that we have a block to hoist into.
    573   auto *ParentBB = SpecPNs[0]->getParent();
    574   SmallVector<BasicBlock *, 16> SpecPreds;
    575   SpecPreds.reserve(PredSet.size());
    576   for (auto *PredBB : PredSet) {
    577     auto *NewPredBB = SplitCriticalEdge(
    578         PredBB, ParentBB,
    579         CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges());
    580     if (NewPredBB) {
    581       ++NumEdgesSplit;
    582       LLVM_DEBUG(dbgs() << "  Split critical edge from: " << PredBB->getName()
    583                         << "\n");
    584       SpecPreds.push_back(NewPredBB);
    585     } else {
    586       assert(PredBB->getSingleSuccessor() == ParentBB &&
    587              "We need a non-critical predecessor to speculate into.");
    588       assert(!isa<InvokeInst>(PredBB->getTerminator()) &&
    589              "Cannot have a non-critical invoke!");
    590 
    591       // Already non-critical, use existing pred.
    592       SpecPreds.push_back(PredBB);
    593     }
    594   }
    595 
    596   SmallPtrSet<Instruction *, 16> SpecSet;
    597   SmallVector<Instruction *, 16> SpecList;
    598   visitPHIUsersAndDepsInPostOrder(SpecPNs,
    599                                   /*IsVisited*/
    600                                   [&](Instruction *I) {
    601                                     // This is visited if we don't need to
    602                                     // speculate it or we already have
    603                                     // speculated it.
    604                                     return !PotentialSpecSet.count(I) ||
    605                                            SpecSet.count(I);
    606                                   },
    607                                   /*Visit*/
    608                                   [&](Instruction *I) {
    609                                     // All operands scheduled, schedule this
    610                                     // node.
    611                                     SpecSet.insert(I);
    612                                     SpecList.push_back(I);
    613                                   });
    614 
    615   int NumSpecInsts = SpecList.size() * SpecPreds.size();
    616   int NumRedundantInsts = NumSpecInsts - SpecList.size();
    617   LLVM_DEBUG(dbgs() << "  Inserting " << NumSpecInsts
    618                     << " speculated instructions, " << NumRedundantInsts
    619                     << " redundancies\n");
    620   NumSpeculatedInstructions += NumSpecInsts;
    621   NumNewRedundantInstructions += NumRedundantInsts;
    622 
    623   // Each predecessor is numbered by its index in `SpecPreds`, so for each
    624   // instruction we speculate, the speculated instruction is stored in that
    625   // index of the vector associated with the original instruction. We also
    626   // store the incoming values for each predecessor from any PHIs used.
    627   SmallDenseMap<Instruction *, SmallVector<Value *, 2>, 16> SpeculatedValueMap;
    628 
    629   // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming
    630   // value. This handles both the PHIs we are speculating around and any other
    631   // PHIs that happen to be used.
    632   for (auto *OrigI : SpecList)
    633     for (auto *OpV : OrigI->operand_values()) {
    634       auto *OpPN = dyn_cast<PHINode>(OpV);
    635       if (!OpPN || OpPN->getParent() != ParentBB)
    636         continue;
    637 
    638       auto InsertResult = SpeculatedValueMap.insert({OpPN, {}});
    639       if (!InsertResult.second)
    640         continue;
    641 
    642       auto &SpeculatedVals = InsertResult.first->second;
    643 
    644       // Populating our structure for mapping is particularly annoying because
    645       // finding an incoming value for a particular predecessor block in a PHI
    646       // node is a linear time operation! To avoid quadratic behavior, we build
    647       // a map for this PHI node's incoming values and then translate it into
    648       // the more compact representation used below.
    649       SmallDenseMap<BasicBlock *, Value *, 16> IncomingValueMap;
    650       for (int i : llvm::seq<int>(0, OpPN->getNumIncomingValues()))
    651         IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i);
    652 
    653       for (auto *PredBB : SpecPreds)
    654         SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second);
    655     }
    656 
    657   // Speculate into each predecessor.
    658   for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) {
    659     auto *PredBB = SpecPreds[PredIdx];
    660     assert(PredBB->getSingleSuccessor() == ParentBB &&
    661            "We need a non-critical predecessor to speculate into.");
    662 
    663     for (auto *OrigI : SpecList) {
    664       auto *NewI = OrigI->clone();
    665       NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx));
    666       NewI->insertBefore(PredBB->getTerminator());
    667 
    668       // Rewrite all the operands to the previously speculated instructions.
    669       // Because we're walking in-order, the defs must precede the uses and we
    670       // should already have these mappings.
    671       for (Use &U : NewI->operands()) {
    672         auto *OpI = dyn_cast<Instruction>(U.get());
    673         if (!OpI)
    674           continue;
    675         auto MapIt = SpeculatedValueMap.find(OpI);
    676         if (MapIt == SpeculatedValueMap.end())
    677           continue;
    678         const auto &SpeculatedVals = MapIt->second;
    679         assert(SpeculatedVals[PredIdx] &&
    680                "Must have a speculated value for this predecessor!");
    681         assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() &&
    682                "Speculated value has the wrong type!");
    683 
    684         // Rewrite the use to this predecessor's speculated instruction.
    685         U.set(SpeculatedVals[PredIdx]);
    686       }
    687 
    688       // Commute instructions which now have a constant in the LHS but not the
    689       // RHS.
    690       if (NewI->isBinaryOp() && NewI->isCommutative() &&
    691           isa<Constant>(NewI->getOperand(0)) &&
    692           !isa<Constant>(NewI->getOperand(1)))
    693         NewI->getOperandUse(0).swap(NewI->getOperandUse(1));
    694 
    695       SpeculatedValueMap[OrigI].push_back(NewI);
    696       assert(SpeculatedValueMap[OrigI][PredIdx] == NewI &&
    697              "Mismatched speculated instruction index!");
    698     }
    699   }
    700 
    701   // Walk the speculated instruction list and if they have uses, insert a PHI
    702   // for them from the speculated versions, and replace the uses with the PHI.
    703   // Then erase the instructions as they have been fully speculated. The walk
    704   // needs to be in reverse so that we don't think there are users when we'll
    705   // actually eventually remove them later.
    706   IRBuilder<> IRB(SpecPNs[0]);
    707   for (auto *OrigI : llvm::reverse(SpecList)) {
    708     // Check if we need a PHI for any remaining users and if so, insert it.
    709     if (!OrigI->use_empty()) {
    710       auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(),
    711                                     Twine(OrigI->getName()) + ".phi");
    712       // Add the incoming values we speculated.
    713       auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second;
    714       for (int PredIdx : llvm::seq<int>(0, SpecPreds.size()))
    715         SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]);
    716 
    717       // And replace the uses with the PHI node.
    718       OrigI->replaceAllUsesWith(SpecIPN);
    719     }
    720 
    721     // It is important to immediately erase this so that it stops using other
    722     // instructions. This avoids inserting needless PHIs of them.
    723     OrigI->eraseFromParent();
    724   }
    725 
    726   // All of the uses of the speculated phi nodes should be removed at this
    727   // point, so erase them.
    728   for (auto *SpecPN : SpecPNs) {
    729     assert(SpecPN->use_empty() && "All users should have been speculated!");
    730     SpecPN->eraseFromParent();
    731   }
    732 }
    733 
    734 /// Try to speculate around a series of PHIs from a single basic block.
    735 ///
    736 /// This routine checks whether any of these PHIs are profitable to speculate
    737 /// users around. If safe and profitable, it does the speculation. It returns
    738 /// true when at least some speculation occurs.
    739 static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
    740                                DominatorTree &DT, TargetTransformInfo &TTI) {
    741   LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n");
    742 
    743   // Savings in cost from speculating around a PHI node.
    744   SmallDenseMap<PHINode *, InstructionCost, 16> CostSavingsMap;
    745 
    746   // Remember the set of instructions that are candidates for speculation so
    747   // that we can quickly walk things within that space. This prunes out
    748   // instructions already available along edges, etc.
    749   SmallPtrSet<Instruction *, 16> PotentialSpecSet;
    750 
    751   // Remember the set of instructions that are (transitively) unsafe to
    752   // speculate into the incoming edges of this basic block. This avoids
    753   // recomputing them for each PHI node we check. This set is specific to this
    754   // block though as things are pruned out of it based on what is available
    755   // along incoming edges.
    756   SmallPtrSet<Instruction *, 16> UnsafeSet;
    757 
    758   // For each PHI node in this block, check whether there are immediate folding
    759   // opportunities from speculation, and whether that speculation will be
    760   // valid. This determise the set of safe PHIs to speculate.
    761   llvm::erase_if(PNs, [&](PHINode *PN) {
    762     return !isSafeAndProfitableToSpeculateAroundPHI(
    763         *PN, CostSavingsMap, PotentialSpecSet, UnsafeSet, DT, TTI);
    764   });
    765   // If no PHIs were profitable, skip.
    766   if (PNs.empty()) {
    767     LLVM_DEBUG(dbgs() << "  No safe and profitable PHIs found!\n");
    768     return false;
    769   }
    770 
    771   // We need to know how much speculation will cost which is determined by how
    772   // many incoming edges will need a copy of each speculated instruction.
    773   SmallSetVector<BasicBlock *, 16> PredSet;
    774   for (auto *PredBB : PNs[0]->blocks()) {
    775     if (!PredSet.insert(PredBB))
    776       continue;
    777 
    778     // We cannot speculate when a predecessor is an indirect branch.
    779     // FIXME: We also can't reliably create a non-critical edge block for
    780     // speculation if the predecessor is an invoke. This doesn't seem
    781     // fundamental and we should probably be splitting critical edges
    782     // differently.
    783     const auto *TermInst = PredBB->getTerminator();
    784     if (isa<IndirectBrInst>(TermInst) ||
    785         isa<InvokeInst>(TermInst) ||
    786         isa<CallBrInst>(TermInst)) {
    787       LLVM_DEBUG(dbgs() << "  Invalid: predecessor terminator: "
    788                         << PredBB->getName() << "\n");
    789       return false;
    790     }
    791   }
    792   if (PredSet.size() < 2) {
    793     LLVM_DEBUG(dbgs() << "  Unimportant: phi with only one predecessor\n");
    794     return false;
    795   }
    796 
    797   SmallVector<PHINode *, 16> SpecPNs = findProfitablePHIs(
    798       PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI);
    799   if (SpecPNs.empty())
    800     // Nothing to do.
    801     return false;
    802 
    803   speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT);
    804   return true;
    805 }
    806 
    807 PreservedAnalyses SpeculateAroundPHIsPass::run(Function &F,
    808                                                FunctionAnalysisManager &AM) {
    809   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
    810   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
    811 
    812   bool Changed = false;
    813   for (auto *BB : ReversePostOrderTraversal<Function *>(&F)) {
    814     SmallVector<PHINode *, 16> PNs;
    815     auto BBI = BB->begin();
    816     while (auto *PN = dyn_cast<PHINode>(&*BBI)) {
    817       PNs.push_back(PN);
    818       ++BBI;
    819     }
    820 
    821     if (PNs.empty())
    822       continue;
    823 
    824     Changed |= tryToSpeculatePHIs(PNs, DT, TTI);
    825   }
    826 
    827   if (!Changed)
    828     return PreservedAnalyses::all();
    829 
    830   PreservedAnalyses PA;
    831   return PA;
    832 }
    833