Home | History | Annotate | Line # | Download | only in Utils
      1 //===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This pass transforms loops by placing phi nodes at the end of the loops for
     10 // all values that are live across the loop boundary.  For example, it turns
     11 // the left into the right code:
     12 //
     13 // for (...)                for (...)
     14 //   if (c)                   if (c)
     15 //     X1 = ...                 X1 = ...
     16 //   else                     else
     17 //     X2 = ...                 X2 = ...
     18 //   X3 = phi(X1, X2)         X3 = phi(X1, X2)
     19 // ... = X3 + 4             X4 = phi(X3)
     20 //                          ... = X4 + 4
     21 //
     22 // This is still valid LLVM; the extra phi nodes are purely redundant, and will
     23 // be trivially eliminated by InstCombine.  The major benefit of this
     24 // transformation is that it makes many other loop optimizations, such as
     25 // LoopUnswitching, simpler.
     26 //
     27 //===----------------------------------------------------------------------===//
     28 
     29 #include "llvm/Transforms/Utils/LCSSA.h"
     30 #include "llvm/ADT/STLExtras.h"
     31 #include "llvm/ADT/Statistic.h"
     32 #include "llvm/Analysis/AliasAnalysis.h"
     33 #include "llvm/Analysis/BasicAliasAnalysis.h"
     34 #include "llvm/Analysis/BranchProbabilityInfo.h"
     35 #include "llvm/Analysis/GlobalsModRef.h"
     36 #include "llvm/Analysis/LoopPass.h"
     37 #include "llvm/Analysis/MemorySSA.h"
     38 #include "llvm/Analysis/ScalarEvolution.h"
     39 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
     40 #include "llvm/IR/Constants.h"
     41 #include "llvm/IR/DebugInfo.h"
     42 #include "llvm/IR/Dominators.h"
     43 #include "llvm/IR/Function.h"
     44 #include "llvm/IR/IRBuilder.h"
     45 #include "llvm/IR/Instructions.h"
     46 #include "llvm/IR/IntrinsicInst.h"
     47 #include "llvm/IR/PredIteratorCache.h"
     48 #include "llvm/InitializePasses.h"
     49 #include "llvm/Pass.h"
     50 #include "llvm/Support/CommandLine.h"
     51 #include "llvm/Transforms/Utils.h"
     52 #include "llvm/Transforms/Utils/LoopUtils.h"
     53 #include "llvm/Transforms/Utils/SSAUpdater.h"
     54 using namespace llvm;
     55 
     56 #define DEBUG_TYPE "lcssa"
     57 
     58 STATISTIC(NumLCSSA, "Number of live out of a loop variables");
     59 
     60 #ifdef EXPENSIVE_CHECKS
     61 static bool VerifyLoopLCSSA = true;
     62 #else
     63 static bool VerifyLoopLCSSA = false;
     64 #endif
     65 static cl::opt<bool, true>
     66     VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA),
     67                         cl::Hidden,
     68                         cl::desc("Verify loop lcssa form (time consuming)"));
     69 
     70 /// Return true if the specified block is in the list.
     71 static bool isExitBlock(BasicBlock *BB,
     72                         const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
     73   return is_contained(ExitBlocks, BB);
     74 }
     75 
     76 /// For every instruction from the worklist, check to see if it has any uses
     77 /// that are outside the current loop.  If so, insert LCSSA PHI nodes and
     78 /// rewrite the uses.
     79 bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
     80                                     const DominatorTree &DT, const LoopInfo &LI,
     81                                     ScalarEvolution *SE, IRBuilderBase &Builder,
     82                                     SmallVectorImpl<PHINode *> *PHIsToRemove) {
     83   SmallVector<Use *, 16> UsesToRewrite;
     84   SmallSetVector<PHINode *, 16> LocalPHIsToRemove;
     85   PredIteratorCache PredCache;
     86   bool Changed = false;
     87 
     88   IRBuilderBase::InsertPointGuard InsertPtGuard(Builder);
     89 
     90   // Cache the Loop ExitBlocks across this loop.  We expect to get a lot of
     91   // instructions within the same loops, computing the exit blocks is
     92   // expensive, and we're not mutating the loop structure.
     93   SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks;
     94 
     95   while (!Worklist.empty()) {
     96     UsesToRewrite.clear();
     97 
     98     Instruction *I = Worklist.pop_back_val();
     99     assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist");
    100     BasicBlock *InstBB = I->getParent();
    101     Loop *L = LI.getLoopFor(InstBB);
    102     assert(L && "Instruction belongs to a BB that's not part of a loop");
    103     if (!LoopExitBlocks.count(L))
    104       L->getExitBlocks(LoopExitBlocks[L]);
    105     assert(LoopExitBlocks.count(L));
    106     const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L];
    107 
    108     if (ExitBlocks.empty())
    109       continue;
    110 
    111     for (Use &U : I->uses()) {
    112       Instruction *User = cast<Instruction>(U.getUser());
    113       BasicBlock *UserBB = User->getParent();
    114 
    115       // For practical purposes, we consider that the use in a PHI
    116       // occurs in the respective predecessor block. For more info,
    117       // see the `phi` doc in LangRef and the LCSSA doc.
    118       if (auto *PN = dyn_cast<PHINode>(User))
    119         UserBB = PN->getIncomingBlock(U);
    120 
    121       if (InstBB != UserBB && !L->contains(UserBB))
    122         UsesToRewrite.push_back(&U);
    123     }
    124 
    125     // If there are no uses outside the loop, exit with no change.
    126     if (UsesToRewrite.empty())
    127       continue;
    128 
    129     ++NumLCSSA; // We are applying the transformation
    130 
    131     // Invoke instructions are special in that their result value is not
    132     // available along their unwind edge. The code below tests to see whether
    133     // DomBB dominates the value, so adjust DomBB to the normal destination
    134     // block, which is effectively where the value is first usable.
    135     BasicBlock *DomBB = InstBB;
    136     if (auto *Inv = dyn_cast<InvokeInst>(I))
    137       DomBB = Inv->getNormalDest();
    138 
    139     const DomTreeNode *DomNode = DT.getNode(DomBB);
    140 
    141     SmallVector<PHINode *, 16> AddedPHIs;
    142     SmallVector<PHINode *, 8> PostProcessPHIs;
    143 
    144     SmallVector<PHINode *, 4> InsertedPHIs;
    145     SSAUpdater SSAUpdate(&InsertedPHIs);
    146     SSAUpdate.Initialize(I->getType(), I->getName());
    147 
    148     // Force re-computation of I, as some users now need to use the new PHI
    149     // node.
    150     if (SE)
    151       SE->forgetValue(I);
    152 
    153     // Insert the LCSSA phi's into all of the exit blocks dominated by the
    154     // value, and add them to the Phi's map.
    155     for (BasicBlock *ExitBB : ExitBlocks) {
    156       if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
    157         continue;
    158 
    159       // If we already inserted something for this BB, don't reprocess it.
    160       if (SSAUpdate.HasValueForBlock(ExitBB))
    161         continue;
    162       Builder.SetInsertPoint(&ExitBB->front());
    163       PHINode *PN = Builder.CreatePHI(I->getType(), PredCache.size(ExitBB),
    164                                       I->getName() + ".lcssa");
    165       // Get the debug location from the original instruction.
    166       PN->setDebugLoc(I->getDebugLoc());
    167 
    168       // Add inputs from inside the loop for this PHI. This is valid
    169       // because `I` dominates `ExitBB` (checked above).  This implies
    170       // that every incoming block/edge is dominated by `I` as well,
    171       // i.e. we can add uses of `I` to those incoming edges/append to the incoming
    172       // blocks without violating the SSA dominance property.
    173       for (BasicBlock *Pred : PredCache.get(ExitBB)) {
    174         PN->addIncoming(I, Pred);
    175 
    176         // If the exit block has a predecessor not within the loop, arrange for
    177         // the incoming value use corresponding to that predecessor to be
    178         // rewritten in terms of a different LCSSA PHI.
    179         if (!L->contains(Pred))
    180           UsesToRewrite.push_back(
    181               &PN->getOperandUse(PN->getOperandNumForIncomingValue(
    182                   PN->getNumIncomingValues() - 1)));
    183       }
    184 
    185       AddedPHIs.push_back(PN);
    186 
    187       // Remember that this phi makes the value alive in this block.
    188       SSAUpdate.AddAvailableValue(ExitBB, PN);
    189 
    190       // LoopSimplify might fail to simplify some loops (e.g. when indirect
    191       // branches are involved). In such situations, it might happen that an
    192       // exit for Loop L1 is the header of a disjoint Loop L2. Thus, when we
    193       // create PHIs in such an exit block, we are also inserting PHIs into L2's
    194       // header. This could break LCSSA form for L2 because these inserted PHIs
    195       // can also have uses outside of L2. Remember all PHIs in such situation
    196       // as to revisit than later on. FIXME: Remove this if indirectbr support
    197       // into LoopSimplify gets improved.
    198       if (auto *OtherLoop = LI.getLoopFor(ExitBB))
    199         if (!L->contains(OtherLoop))
    200           PostProcessPHIs.push_back(PN);
    201     }
    202 
    203     // Rewrite all uses outside the loop in terms of the new PHIs we just
    204     // inserted.
    205     for (Use *UseToRewrite : UsesToRewrite) {
    206       Instruction *User = cast<Instruction>(UseToRewrite->getUser());
    207       BasicBlock *UserBB = User->getParent();
    208 
    209       // For practical purposes, we consider that the use in a PHI
    210       // occurs in the respective predecessor block. For more info,
    211       // see the `phi` doc in LangRef and the LCSSA doc.
    212       if (auto *PN = dyn_cast<PHINode>(User))
    213         UserBB = PN->getIncomingBlock(*UseToRewrite);
    214 
    215       // If this use is in an exit block, rewrite to use the newly inserted PHI.
    216       // This is required for correctness because SSAUpdate doesn't handle uses
    217       // in the same block.  It assumes the PHI we inserted is at the end of the
    218       // block.
    219       if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
    220         UseToRewrite->set(&UserBB->front());
    221         continue;
    222       }
    223 
    224       // If we added a single PHI, it must dominate all uses and we can directly
    225       // rename it.
    226       if (AddedPHIs.size() == 1) {
    227         UseToRewrite->set(AddedPHIs[0]);
    228         continue;
    229       }
    230 
    231       // Otherwise, do full PHI insertion.
    232       SSAUpdate.RewriteUse(*UseToRewrite);
    233     }
    234 
    235     SmallVector<DbgValueInst *, 4> DbgValues;
    236     llvm::findDbgValues(DbgValues, I);
    237 
    238     // Update pre-existing debug value uses that reside outside the loop.
    239     for (auto DVI : DbgValues) {
    240       BasicBlock *UserBB = DVI->getParent();
    241       if (InstBB == UserBB || L->contains(UserBB))
    242         continue;
    243       // We currently only handle debug values residing in blocks that were
    244       // traversed while rewriting the uses. If we inserted just a single PHI,
    245       // we will handle all relevant debug values.
    246       Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0]
    247                                        : SSAUpdate.FindValueForBlock(UserBB);
    248       if (V)
    249         DVI->replaceVariableLocationOp(I, V);
    250     }
    251 
    252     // SSAUpdater might have inserted phi-nodes inside other loops. We'll need
    253     // to post-process them to keep LCSSA form.
    254     for (PHINode *InsertedPN : InsertedPHIs) {
    255       if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
    256         if (!L->contains(OtherLoop))
    257           PostProcessPHIs.push_back(InsertedPN);
    258     }
    259 
    260     // Post process PHI instructions that were inserted into another disjoint
    261     // loop and update their exits properly.
    262     for (auto *PostProcessPN : PostProcessPHIs)
    263       if (!PostProcessPN->use_empty())
    264         Worklist.push_back(PostProcessPN);
    265 
    266     // Keep track of PHI nodes that we want to remove because they did not have
    267     // any uses rewritten.
    268     for (PHINode *PN : AddedPHIs)
    269       if (PN->use_empty())
    270         LocalPHIsToRemove.insert(PN);
    271 
    272     Changed = true;
    273   }
    274 
    275   // Remove PHI nodes that did not have any uses rewritten or add them to
    276   // PHIsToRemove, so the caller can remove them after some additional cleanup.
    277   // We need to redo the use_empty() check here, because even if the PHI node
    278   // wasn't used when added to LocalPHIsToRemove, later added PHI nodes can be
    279   // using it.  This cleanup is not guaranteed to handle trees/cycles of PHI
    280   // nodes that only are used by each other. Such situations has only been
    281   // noticed when the input IR contains unreachable code, and leaving some extra
    282   // redundant PHI nodes in such situations is considered a minor problem.
    283   if (PHIsToRemove) {
    284     PHIsToRemove->append(LocalPHIsToRemove.begin(), LocalPHIsToRemove.end());
    285   } else {
    286     for (PHINode *PN : LocalPHIsToRemove)
    287       if (PN->use_empty())
    288         PN->eraseFromParent();
    289   }
    290   return Changed;
    291 }
    292 
    293 // Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
    294 static void computeBlocksDominatingExits(
    295     Loop &L, const DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
    296     SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
    297   // We start from the exit blocks, as every block trivially dominates itself
    298   // (not strictly).
    299   SmallVector<BasicBlock *, 8> BBWorklist(ExitBlocks);
    300 
    301   while (!BBWorklist.empty()) {
    302     BasicBlock *BB = BBWorklist.pop_back_val();
    303 
    304     // Check if this is a loop header. If this is the case, we're done.
    305     if (L.getHeader() == BB)
    306       continue;
    307 
    308     // Otherwise, add its immediate predecessor in the dominator tree to the
    309     // worklist, unless we visited it already.
    310     BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock();
    311 
    312     // Exit blocks can have an immediate dominator not beloinging to the
    313     // loop. For an exit block to be immediately dominated by another block
    314     // outside the loop, it implies not all paths from that dominator, to the
    315     // exit block, go through the loop.
    316     // Example:
    317     //
    318     // |---- A
    319     // |     |
    320     // |     B<--
    321     // |     |  |
    322     // |---> C --
    323     //       |
    324     //       D
    325     //
    326     // C is the exit block of the loop and it's immediately dominated by A,
    327     // which doesn't belong to the loop.
    328     if (!L.contains(IDomBB))
    329       continue;
    330 
    331     if (BlocksDominatingExits.insert(IDomBB))
    332       BBWorklist.push_back(IDomBB);
    333   }
    334 }
    335 
    336 bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
    337                      ScalarEvolution *SE) {
    338   bool Changed = false;
    339 
    340 #ifdef EXPENSIVE_CHECKS
    341   // Verify all sub-loops are in LCSSA form already.
    342   for (Loop *SubLoop: L)
    343     assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) && "Subloop not in LCSSA!");
    344 #endif
    345 
    346   SmallVector<BasicBlock *, 8> ExitBlocks;
    347   L.getExitBlocks(ExitBlocks);
    348   if (ExitBlocks.empty())
    349     return false;
    350 
    351   SmallSetVector<BasicBlock *, 8> BlocksDominatingExits;
    352 
    353   // We want to avoid use-scanning leveraging dominance informations.
    354   // If a block doesn't dominate any of the loop exits, the none of the values
    355   // defined in the loop can be used outside.
    356   // We compute the set of blocks fullfilling the conditions in advance
    357   // walking the dominator tree upwards until we hit a loop header.
    358   computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits);
    359 
    360   SmallVector<Instruction *, 8> Worklist;
    361 
    362   // Look at all the instructions in the loop, checking to see if they have uses
    363   // outside the loop.  If so, put them into the worklist to rewrite those uses.
    364   for (BasicBlock *BB : BlocksDominatingExits) {
    365     // Skip blocks that are part of any sub-loops, they must be in LCSSA
    366     // already.
    367     if (LI->getLoopFor(BB) != &L)
    368       continue;
    369     for (Instruction &I : *BB) {
    370       // Reject two common cases fast: instructions with no uses (like stores)
    371       // and instructions with one use that is in the same block as this.
    372       if (I.use_empty() ||
    373           (I.hasOneUse() && I.user_back()->getParent() == BB &&
    374            !isa<PHINode>(I.user_back())))
    375         continue;
    376 
    377       // Tokens cannot be used in PHI nodes, so we skip over them.
    378       // We can run into tokens which are live out of a loop with catchswitch
    379       // instructions in Windows EH if the catchswitch has one catchpad which
    380       // is inside the loop and another which is not.
    381       if (I.getType()->isTokenTy())
    382         continue;
    383 
    384       Worklist.push_back(&I);
    385     }
    386   }
    387 
    388   IRBuilder<> Builder(L.getHeader()->getContext());
    389   Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE, Builder);
    390 
    391   // If we modified the code, remove any caches about the loop from SCEV to
    392   // avoid dangling entries.
    393   // FIXME: This is a big hammer, can we clear the cache more selectively?
    394   if (SE && Changed)
    395     SE->forgetLoop(&L);
    396 
    397   assert(L.isLCSSAForm(DT));
    398 
    399   return Changed;
    400 }
    401 
    402 /// Process a loop nest depth first.
    403 bool llvm::formLCSSARecursively(Loop &L, const DominatorTree &DT,
    404                                 const LoopInfo *LI, ScalarEvolution *SE) {
    405   bool Changed = false;
    406 
    407   // Recurse depth-first through inner loops.
    408   for (Loop *SubLoop : L.getSubLoops())
    409     Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE);
    410 
    411   Changed |= formLCSSA(L, DT, LI, SE);
    412   return Changed;
    413 }
    414 
    415 /// Process all loops in the function, inner-most out.
    416 static bool formLCSSAOnAllLoops(const LoopInfo *LI, const DominatorTree &DT,
    417                                 ScalarEvolution *SE) {
    418   bool Changed = false;
    419   for (auto &L : *LI)
    420     Changed |= formLCSSARecursively(*L, DT, LI, SE);
    421   return Changed;
    422 }
    423 
    424 namespace {
    425 struct LCSSAWrapperPass : public FunctionPass {
    426   static char ID; // Pass identification, replacement for typeid
    427   LCSSAWrapperPass() : FunctionPass(ID) {
    428     initializeLCSSAWrapperPassPass(*PassRegistry::getPassRegistry());
    429   }
    430 
    431   // Cached analysis information for the current function.
    432   DominatorTree *DT;
    433   LoopInfo *LI;
    434   ScalarEvolution *SE;
    435 
    436   bool runOnFunction(Function &F) override;
    437   void verifyAnalysis() const override {
    438     // This check is very expensive. On the loop intensive compiles it may cause
    439     // up to 10x slowdown. Currently it's disabled by default. LPPassManager
    440     // always does limited form of the LCSSA verification. Similar reasoning
    441     // was used for the LoopInfo verifier.
    442     if (VerifyLoopLCSSA) {
    443       assert(all_of(*LI,
    444                     [&](Loop *L) {
    445                       return L->isRecursivelyLCSSAForm(*DT, *LI);
    446                     }) &&
    447              "LCSSA form is broken!");
    448     }
    449   };
    450 
    451   /// This transformation requires natural loop information & requires that
    452   /// loop preheaders be inserted into the CFG.  It maintains both of these,
    453   /// as well as the CFG.  It also requires dominator information.
    454   void getAnalysisUsage(AnalysisUsage &AU) const override {
    455     AU.setPreservesCFG();
    456 
    457     AU.addRequired<DominatorTreeWrapperPass>();
    458     AU.addRequired<LoopInfoWrapperPass>();
    459     AU.addPreservedID(LoopSimplifyID);
    460     AU.addPreserved<AAResultsWrapperPass>();
    461     AU.addPreserved<BasicAAWrapperPass>();
    462     AU.addPreserved<GlobalsAAWrapperPass>();
    463     AU.addPreserved<ScalarEvolutionWrapperPass>();
    464     AU.addPreserved<SCEVAAWrapperPass>();
    465     AU.addPreserved<BranchProbabilityInfoWrapperPass>();
    466     AU.addPreserved<MemorySSAWrapperPass>();
    467 
    468     // This is needed to perform LCSSA verification inside LPPassManager
    469     AU.addRequired<LCSSAVerificationPass>();
    470     AU.addPreserved<LCSSAVerificationPass>();
    471   }
    472 };
    473 }
    474 
    475 char LCSSAWrapperPass::ID = 0;
    476 INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
    477                       false, false)
    478 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
    479 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
    480 INITIALIZE_PASS_DEPENDENCY(LCSSAVerificationPass)
    481 INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
    482                     false, false)
    483 
    484 Pass *llvm::createLCSSAPass() { return new LCSSAWrapperPass(); }
    485 char &llvm::LCSSAID = LCSSAWrapperPass::ID;
    486 
    487 /// Transform \p F into loop-closed SSA form.
    488 bool LCSSAWrapperPass::runOnFunction(Function &F) {
    489   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
    490   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
    491   auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
    492   SE = SEWP ? &SEWP->getSE() : nullptr;
    493 
    494   return formLCSSAOnAllLoops(LI, *DT, SE);
    495 }
    496 
    497 PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
    498   auto &LI = AM.getResult<LoopAnalysis>(F);
    499   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
    500   auto *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
    501   if (!formLCSSAOnAllLoops(&LI, DT, SE))
    502     return PreservedAnalyses::all();
    503 
    504   PreservedAnalyses PA;
    505   PA.preserveSet<CFGAnalyses>();
    506   PA.preserve<ScalarEvolutionAnalysis>();
    507   // BPI maps terminators to probabilities, since we don't modify the CFG, no
    508   // updates are needed to preserve it.
    509   PA.preserve<BranchProbabilityAnalysis>();
    510   PA.preserve<MemorySSAAnalysis>();
    511   return PA;
    512 }
    513