1 //===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass transforms loops that contain branches on loop-invariant conditions 10 // to multiple loops. For example, it turns the left into the right code: 11 // 12 // for (...) if (lic) 13 // A for (...) 14 // if (lic) A; B; C 15 // B else 16 // C for (...) 17 // A; C 18 // 19 // This can increase the size of the code exponentially (doubling it every time 20 // a loop is unswitched) so we only unswitch if the resultant code will be 21 // smaller than a threshold. 22 // 23 // This pass expects LICM to be run before it to hoist invariant conditions out 24 // of the loop, to make the unswitching opportunity obvious. 25 // 26 //===----------------------------------------------------------------------===// 27 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallVector.h" 32 #include "llvm/ADT/Statistic.h" 33 #include "llvm/Analysis/AssumptionCache.h" 34 #include "llvm/Analysis/CodeMetrics.h" 35 #include "llvm/Analysis/InstructionSimplify.h" 36 #include "llvm/Analysis/LazyBlockFrequencyInfo.h" 37 #include "llvm/Analysis/LegacyDivergenceAnalysis.h" 38 #include "llvm/Analysis/LoopInfo.h" 39 #include "llvm/Analysis/LoopIterator.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/MustExecute.h" 44 #include "llvm/Analysis/ScalarEvolution.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/IR/Attributes.h" 47 #include "llvm/IR/BasicBlock.h" 48 #include "llvm/IR/Constant.h" 49 #include "llvm/IR/Constants.h" 50 #include "llvm/IR/DerivedTypes.h" 51 #include "llvm/IR/Dominators.h" 52 #include "llvm/IR/Function.h" 53 #include "llvm/IR/IRBuilder.h" 54 #include "llvm/IR/InstrTypes.h" 55 #include "llvm/IR/Instruction.h" 56 #include "llvm/IR/Instructions.h" 57 #include "llvm/IR/IntrinsicInst.h" 58 #include "llvm/IR/Intrinsics.h" 59 #include "llvm/IR/Module.h" 60 #include "llvm/IR/Type.h" 61 #include "llvm/IR/User.h" 62 #include "llvm/IR/Value.h" 63 #include "llvm/IR/ValueHandle.h" 64 #include "llvm/InitializePasses.h" 65 #include "llvm/Pass.h" 66 #include "llvm/Support/Casting.h" 67 #include "llvm/Support/CommandLine.h" 68 #include "llvm/Support/Debug.h" 69 #include "llvm/Support/raw_ostream.h" 70 #include "llvm/Transforms/Scalar.h" 71 #include "llvm/Transforms/Scalar/LoopPassManager.h" 72 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 73 #include "llvm/Transforms/Utils/Cloning.h" 74 #include "llvm/Transforms/Utils/Local.h" 75 #include "llvm/Transforms/Utils/LoopUtils.h" 76 #include "llvm/Transforms/Utils/ValueMapper.h" 77 #include <algorithm> 78 #include <cassert> 79 #include <map> 80 #include <set> 81 #include <tuple> 82 #include <utility> 83 #include <vector> 84 85 using namespace llvm; 86 87 #define DEBUG_TYPE "loop-unswitch" 88 89 STATISTIC(NumBranches, "Number of branches unswitched"); 90 STATISTIC(NumSwitches, "Number of switches unswitched"); 91 STATISTIC(NumGuards, "Number of guards unswitched"); 92 STATISTIC(NumSelects , "Number of selects unswitched"); 93 STATISTIC(NumTrivial , "Number of unswitches that are trivial"); 94 STATISTIC(NumSimplify, "Number of simplifications of unswitched code"); 95 STATISTIC(TotalInsts, "Total number of instructions analyzed"); 96 97 // The specific value of 100 here was chosen based only on intuition and a 98 // few specific examples. 99 static cl::opt<unsigned> 100 Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), 101 cl::init(100), cl::Hidden); 102 103 static cl::opt<unsigned> 104 MSSAThreshold("loop-unswitch-memoryssa-threshold", 105 cl::desc("Max number of memory uses to explore during " 106 "partial unswitching analysis"), 107 cl::init(100), cl::Hidden); 108 109 namespace { 110 111 class LUAnalysisCache { 112 using UnswitchedValsMap = 113 DenseMap<const SwitchInst *, SmallPtrSet<const Value *, 8>>; 114 using UnswitchedValsIt = UnswitchedValsMap::iterator; 115 116 struct LoopProperties { 117 unsigned CanBeUnswitchedCount; 118 unsigned WasUnswitchedCount; 119 unsigned SizeEstimation; 120 UnswitchedValsMap UnswitchedVals; 121 }; 122 123 // Here we use std::map instead of DenseMap, since we need to keep valid 124 // LoopProperties pointer for current loop for better performance. 125 using LoopPropsMap = std::map<const Loop *, LoopProperties>; 126 using LoopPropsMapIt = LoopPropsMap::iterator; 127 128 LoopPropsMap LoopsProperties; 129 UnswitchedValsMap *CurLoopInstructions = nullptr; 130 LoopProperties *CurrentLoopProperties = nullptr; 131 132 // A loop unswitching with an estimated cost above this threshold 133 // is not performed. MaxSize is turned into unswitching quota for 134 // the current loop, and reduced correspondingly, though note that 135 // the quota is returned by releaseMemory() when the loop has been 136 // processed, so that MaxSize will return to its previous 137 // value. So in most cases MaxSize will equal the Threshold flag 138 // when a new loop is processed. An exception to that is that 139 // MaxSize will have a smaller value while processing nested loops 140 // that were introduced due to loop unswitching of an outer loop. 141 // 142 // FIXME: The way that MaxSize works is subtle and depends on the 143 // pass manager processing loops and calling releaseMemory() in a 144 // specific order. It would be good to find a more straightforward 145 // way of doing what MaxSize does. 146 unsigned MaxSize; 147 148 public: 149 LUAnalysisCache() : MaxSize(Threshold) {} 150 151 // Analyze loop. Check its size, calculate is it possible to unswitch 152 // it. Returns true if we can unswitch this loop. 153 bool countLoop(const Loop *L, const TargetTransformInfo &TTI, 154 AssumptionCache *AC); 155 156 // Clean all data related to given loop. 157 void forgetLoop(const Loop *L); 158 159 // Mark case value as unswitched. 160 // Since SI instruction can be partly unswitched, in order to avoid 161 // extra unswitching in cloned loops keep track all unswitched values. 162 void setUnswitched(const SwitchInst *SI, const Value *V); 163 164 // Check was this case value unswitched before or not. 165 bool isUnswitched(const SwitchInst *SI, const Value *V); 166 167 // Returns true if another unswitching could be done within the cost 168 // threshold. 169 bool costAllowsUnswitching(); 170 171 // Clone all loop-unswitch related loop properties. 172 // Redistribute unswitching quotas. 173 // Note, that new loop data is stored inside the VMap. 174 void cloneData(const Loop *NewLoop, const Loop *OldLoop, 175 const ValueToValueMapTy &VMap); 176 }; 177 178 class LoopUnswitch : public LoopPass { 179 LoopInfo *LI; // Loop information 180 LPPassManager *LPM; 181 AssumptionCache *AC; 182 183 // Used to check if second loop needs processing after 184 // rewriteLoopBodyWithConditionConstant rewrites first loop. 185 std::vector<Loop*> LoopProcessWorklist; 186 187 LUAnalysisCache BranchesInfo; 188 189 bool OptimizeForSize; 190 bool RedoLoop = false; 191 192 Loop *CurrentLoop = nullptr; 193 DominatorTree *DT = nullptr; 194 MemorySSA *MSSA = nullptr; 195 AAResults *AA = nullptr; 196 std::unique_ptr<MemorySSAUpdater> MSSAU; 197 BasicBlock *LoopHeader = nullptr; 198 BasicBlock *LoopPreheader = nullptr; 199 200 bool SanitizeMemory; 201 SimpleLoopSafetyInfo SafetyInfo; 202 203 // LoopBlocks contains all of the basic blocks of the loop, including the 204 // preheader of the loop, the body of the loop, and the exit blocks of the 205 // loop, in that order. 206 std::vector<BasicBlock*> LoopBlocks; 207 // NewBlocks contained cloned copy of basic blocks from LoopBlocks. 208 std::vector<BasicBlock*> NewBlocks; 209 210 bool HasBranchDivergence; 211 212 public: 213 static char ID; // Pass ID, replacement for typeid 214 215 explicit LoopUnswitch(bool Os = false, bool HasBranchDivergence = false) 216 : LoopPass(ID), OptimizeForSize(Os), 217 HasBranchDivergence(HasBranchDivergence) { 218 initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); 219 } 220 221 bool runOnLoop(Loop *L, LPPassManager &LPM) override; 222 bool processCurrentLoop(); 223 bool isUnreachableDueToPreviousUnswitching(BasicBlock *); 224 225 /// This transformation requires natural loop information & requires that 226 /// loop preheaders be inserted into the CFG. 227 /// 228 void getAnalysisUsage(AnalysisUsage &AU) const override { 229 // Lazy BFI and BPI are marked as preserved here so Loop Unswitching 230 // can remain part of the same loop pass as LICM 231 AU.addPreserved<LazyBlockFrequencyInfoPass>(); 232 AU.addPreserved<LazyBranchProbabilityInfoPass>(); 233 AU.addRequired<AssumptionCacheTracker>(); 234 AU.addRequired<TargetTransformInfoWrapperPass>(); 235 if (EnableMSSALoopDependency) { 236 AU.addRequired<MemorySSAWrapperPass>(); 237 AU.addPreserved<MemorySSAWrapperPass>(); 238 } 239 if (HasBranchDivergence) 240 AU.addRequired<LegacyDivergenceAnalysis>(); 241 getLoopAnalysisUsage(AU); 242 } 243 244 private: 245 void releaseMemory() override { BranchesInfo.forgetLoop(CurrentLoop); } 246 247 void initLoopData() { 248 LoopHeader = CurrentLoop->getHeader(); 249 LoopPreheader = CurrentLoop->getLoopPreheader(); 250 } 251 252 /// Split all of the edges from inside the loop to their exit blocks. 253 /// Update the appropriate Phi nodes as we do so. 254 void splitExitEdges(Loop *L, 255 const SmallVectorImpl<BasicBlock *> &ExitBlocks); 256 257 bool tryTrivialLoopUnswitch(bool &Changed); 258 259 bool unswitchIfProfitable(Value *LoopCond, Constant *Val, 260 Instruction *TI = nullptr, 261 ArrayRef<Instruction *> ToDuplicate = {}); 262 void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, 263 BasicBlock *ExitBlock, Instruction *TI); 264 void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L, 265 Instruction *TI, 266 ArrayRef<Instruction *> ToDuplicate = {}); 267 268 void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, 269 Constant *Val, bool IsEqual); 270 271 void 272 emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, 273 BasicBlock *TrueDest, BasicBlock *FalseDest, 274 BranchInst *OldBranch, Instruction *TI, 275 ArrayRef<Instruction *> ToDuplicate = {}); 276 277 void simplifyCode(std::vector<Instruction *> &Worklist, Loop *L); 278 279 /// Given that the Invariant is not equal to Val. Simplify instructions 280 /// in the loop. 281 Value *simplifyInstructionWithNotEqual(Instruction *Inst, Value *Invariant, 282 Constant *Val); 283 }; 284 285 } // end anonymous namespace 286 287 // Analyze loop. Check its size, calculate is it possible to unswitch 288 // it. Returns true if we can unswitch this loop. 289 bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI, 290 AssumptionCache *AC) { 291 LoopPropsMapIt PropsIt; 292 bool Inserted; 293 std::tie(PropsIt, Inserted) = 294 LoopsProperties.insert(std::make_pair(L, LoopProperties())); 295 296 LoopProperties &Props = PropsIt->second; 297 298 if (Inserted) { 299 // New loop. 300 301 // Limit the number of instructions to avoid causing significant code 302 // expansion, and the number of basic blocks, to avoid loops with 303 // large numbers of branches which cause loop unswitching to go crazy. 304 // This is a very ad-hoc heuristic. 305 306 SmallPtrSet<const Value *, 32> EphValues; 307 CodeMetrics::collectEphemeralValues(L, AC, EphValues); 308 309 // FIXME: This is overly conservative because it does not take into 310 // consideration code simplification opportunities and code that can 311 // be shared by the resultant unswitched loops. 312 CodeMetrics Metrics; 313 for (BasicBlock *BB : L->blocks()) 314 Metrics.analyzeBasicBlock(BB, TTI, EphValues); 315 316 Props.SizeEstimation = Metrics.NumInsts; 317 Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); 318 Props.WasUnswitchedCount = 0; 319 MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; 320 321 if (Metrics.notDuplicatable) { 322 LLVM_DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName() 323 << ", contents cannot be " 324 << "duplicated!\n"); 325 return false; 326 } 327 } 328 329 // Be careful. This links are good only before new loop addition. 330 CurrentLoopProperties = &Props; 331 CurLoopInstructions = &Props.UnswitchedVals; 332 333 return true; 334 } 335 336 // Clean all data related to given loop. 337 void LUAnalysisCache::forgetLoop(const Loop *L) { 338 LoopPropsMapIt LIt = LoopsProperties.find(L); 339 340 if (LIt != LoopsProperties.end()) { 341 LoopProperties &Props = LIt->second; 342 MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) * 343 Props.SizeEstimation; 344 LoopsProperties.erase(LIt); 345 } 346 347 CurrentLoopProperties = nullptr; 348 CurLoopInstructions = nullptr; 349 } 350 351 // Mark case value as unswitched. 352 // Since SI instruction can be partly unswitched, in order to avoid 353 // extra unswitching in cloned loops keep track all unswitched values. 354 void LUAnalysisCache::setUnswitched(const SwitchInst *SI, const Value *V) { 355 (*CurLoopInstructions)[SI].insert(V); 356 } 357 358 // Check was this case value unswitched before or not. 359 bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) { 360 return (*CurLoopInstructions)[SI].count(V); 361 } 362 363 bool LUAnalysisCache::costAllowsUnswitching() { 364 return CurrentLoopProperties->CanBeUnswitchedCount > 0; 365 } 366 367 // Clone all loop-unswitch related loop properties. 368 // Redistribute unswitching quotas. 369 // Note, that new loop data is stored inside the VMap. 370 void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop, 371 const ValueToValueMapTy &VMap) { 372 LoopProperties &NewLoopProps = LoopsProperties[NewLoop]; 373 LoopProperties &OldLoopProps = *CurrentLoopProperties; 374 UnswitchedValsMap &Insts = OldLoopProps.UnswitchedVals; 375 376 // Reallocate "can-be-unswitched quota" 377 378 --OldLoopProps.CanBeUnswitchedCount; 379 ++OldLoopProps.WasUnswitchedCount; 380 NewLoopProps.WasUnswitchedCount = 0; 381 unsigned Quota = OldLoopProps.CanBeUnswitchedCount; 382 NewLoopProps.CanBeUnswitchedCount = Quota / 2; 383 OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2; 384 385 NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation; 386 387 // Clone unswitched values info: 388 // for new loop switches we clone info about values that was 389 // already unswitched and has redundant successors. 390 for (const auto &I : Insts) { 391 const SwitchInst *OldInst = I.first; 392 Value *NewI = VMap.lookup(OldInst); 393 const SwitchInst *NewInst = cast_or_null<SwitchInst>(NewI); 394 assert(NewInst && "All instructions that are in SrcBB must be in VMap."); 395 396 NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst]; 397 } 398 } 399 400 char LoopUnswitch::ID = 0; 401 402 INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops", 403 false, false) 404 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 405 INITIALIZE_PASS_DEPENDENCY(LoopPass) 406 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 407 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 408 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 409 INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops", 410 false, false) 411 412 Pass *llvm::createLoopUnswitchPass(bool Os, bool HasBranchDivergence) { 413 return new LoopUnswitch(Os, HasBranchDivergence); 414 } 415 416 /// Operator chain lattice. 417 enum OperatorChain { 418 OC_OpChainNone, ///< There is no operator. 419 OC_OpChainOr, ///< There are only ORs. 420 OC_OpChainAnd, ///< There are only ANDs. 421 OC_OpChainMixed ///< There are ANDs and ORs. 422 }; 423 424 /// Cond is a condition that occurs in L. If it is invariant in the loop, or has 425 /// an invariant piece, return the invariant. Otherwise, return null. 426 // 427 /// NOTE: findLIVLoopCondition will not return a partial LIV by walking up a 428 /// mixed operator chain, as we can not reliably find a value which will 429 /// simplify the operator chain. If the chain is AND-only or OR-only, we can use 430 /// 0 or ~0 to simplify the chain. 431 /// 432 /// NOTE: In case a partial LIV and a mixed operator chain, we may be able to 433 /// simplify the condition itself to a loop variant condition, but at the 434 /// cost of creating an entirely new loop. 435 static Value *findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, 436 OperatorChain &ParentChain, 437 DenseMap<Value *, Value *> &Cache, 438 MemorySSAUpdater *MSSAU) { 439 auto CacheIt = Cache.find(Cond); 440 if (CacheIt != Cache.end()) 441 return CacheIt->second; 442 443 // We started analyze new instruction, increment scanned instructions counter. 444 ++TotalInsts; 445 446 // We can never unswitch on vector conditions. 447 if (Cond->getType()->isVectorTy()) 448 return nullptr; 449 450 // Constants should be folded, not unswitched on! 451 if (isa<Constant>(Cond)) return nullptr; 452 453 // TODO: Handle: br (VARIANT|INVARIANT). 454 455 // Hoist simple values out. 456 if (L->makeLoopInvariant(Cond, Changed, nullptr, MSSAU)) { 457 Cache[Cond] = Cond; 458 return Cond; 459 } 460 461 // Walk up the operator chain to find partial invariant conditions. 462 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond)) 463 if (BO->getOpcode() == Instruction::And || 464 BO->getOpcode() == Instruction::Or) { 465 // Given the previous operator, compute the current operator chain status. 466 OperatorChain NewChain; 467 switch (ParentChain) { 468 case OC_OpChainNone: 469 NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd : 470 OC_OpChainOr; 471 break; 472 case OC_OpChainOr: 473 NewChain = BO->getOpcode() == Instruction::Or ? OC_OpChainOr : 474 OC_OpChainMixed; 475 break; 476 case OC_OpChainAnd: 477 NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd : 478 OC_OpChainMixed; 479 break; 480 case OC_OpChainMixed: 481 NewChain = OC_OpChainMixed; 482 break; 483 } 484 485 // If we reach a Mixed state, we do not want to keep walking up as we can not 486 // reliably find a value that will simplify the chain. With this check, we 487 // will return null on the first sight of mixed chain and the caller will 488 // either backtrack to find partial LIV in other operand or return null. 489 if (NewChain != OC_OpChainMixed) { 490 // Update the current operator chain type before we search up the chain. 491 ParentChain = NewChain; 492 // If either the left or right side is invariant, we can unswitch on this, 493 // which will cause the branch to go away in one loop and the condition to 494 // simplify in the other one. 495 if (Value *LHS = findLIVLoopCondition(BO->getOperand(0), L, Changed, 496 ParentChain, Cache, MSSAU)) { 497 Cache[Cond] = LHS; 498 return LHS; 499 } 500 // We did not manage to find a partial LIV in operand(0). Backtrack and try 501 // operand(1). 502 ParentChain = NewChain; 503 if (Value *RHS = findLIVLoopCondition(BO->getOperand(1), L, Changed, 504 ParentChain, Cache, MSSAU)) { 505 Cache[Cond] = RHS; 506 return RHS; 507 } 508 } 509 } 510 511 Cache[Cond] = nullptr; 512 return nullptr; 513 } 514 515 /// Cond is a condition that occurs in L. If it is invariant in the loop, or has 516 /// an invariant piece, return the invariant along with the operator chain type. 517 /// Otherwise, return null. 518 static std::pair<Value *, OperatorChain> 519 findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, 520 MemorySSAUpdater *MSSAU) { 521 DenseMap<Value *, Value *> Cache; 522 OperatorChain OpChain = OC_OpChainNone; 523 Value *FCond = findLIVLoopCondition(Cond, L, Changed, OpChain, Cache, MSSAU); 524 525 // In case we do find a LIV, it can not be obtained by walking up a mixed 526 // operator chain. 527 assert((!FCond || OpChain != OC_OpChainMixed) && 528 "Do not expect a partial LIV with mixed operator chain"); 529 return {FCond, OpChain}; 530 } 531 532 bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) { 533 if (skipLoop(L)) 534 return false; 535 536 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache( 537 *L->getHeader()->getParent()); 538 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 539 LPM = &LPMRef; 540 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 541 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 542 if (EnableMSSALoopDependency) { 543 MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); 544 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 545 assert(DT && "Cannot update MemorySSA without a valid DomTree."); 546 } 547 CurrentLoop = L; 548 Function *F = CurrentLoop->getHeader()->getParent(); 549 550 SanitizeMemory = F->hasFnAttribute(Attribute::SanitizeMemory); 551 if (SanitizeMemory) 552 SafetyInfo.computeLoopSafetyInfo(L); 553 554 if (MSSA && VerifyMemorySSA) 555 MSSA->verifyMemorySSA(); 556 557 bool Changed = false; 558 do { 559 assert(CurrentLoop->isLCSSAForm(*DT)); 560 if (MSSA && VerifyMemorySSA) 561 MSSA->verifyMemorySSA(); 562 RedoLoop = false; 563 Changed |= processCurrentLoop(); 564 } while (RedoLoop); 565 566 if (MSSA && VerifyMemorySSA) 567 MSSA->verifyMemorySSA(); 568 569 return Changed; 570 } 571 572 // Return true if the BasicBlock BB is unreachable from the loop header. 573 // Return false, otherwise. 574 bool LoopUnswitch::isUnreachableDueToPreviousUnswitching(BasicBlock *BB) { 575 auto *Node = DT->getNode(BB)->getIDom(); 576 BasicBlock *DomBB = Node->getBlock(); 577 while (CurrentLoop->contains(DomBB)) { 578 BranchInst *BInst = dyn_cast<BranchInst>(DomBB->getTerminator()); 579 580 Node = DT->getNode(DomBB)->getIDom(); 581 DomBB = Node->getBlock(); 582 583 if (!BInst || !BInst->isConditional()) 584 continue; 585 586 Value *Cond = BInst->getCondition(); 587 if (!isa<ConstantInt>(Cond)) 588 continue; 589 590 BasicBlock *UnreachableSucc = 591 Cond == ConstantInt::getTrue(Cond->getContext()) 592 ? BInst->getSuccessor(1) 593 : BInst->getSuccessor(0); 594 595 if (DT->dominates(UnreachableSucc, BB)) 596 return true; 597 } 598 return false; 599 } 600 601 /// FIXME: Remove this workaround when freeze related patches are done. 602 /// LoopUnswitch and Equality propagation in GVN have discrepancy about 603 /// whether branch on undef/poison has undefine behavior. Here it is to 604 /// rule out some common cases that we found such discrepancy already 605 /// causing problems. Detail could be found in PR31652. Note if the 606 /// func returns true, it is unsafe. But if it is false, it doesn't mean 607 /// it is necessarily safe. 608 static bool equalityPropUnSafe(Value &LoopCond) { 609 ICmpInst *CI = dyn_cast<ICmpInst>(&LoopCond); 610 if (!CI || !CI->isEquality()) 611 return false; 612 613 Value *LHS = CI->getOperand(0); 614 Value *RHS = CI->getOperand(1); 615 if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) 616 return true; 617 618 auto HasUndefInPHI = [](PHINode &PN) { 619 for (Value *Opd : PN.incoming_values()) { 620 if (isa<UndefValue>(Opd)) 621 return true; 622 } 623 return false; 624 }; 625 PHINode *LPHI = dyn_cast<PHINode>(LHS); 626 PHINode *RPHI = dyn_cast<PHINode>(RHS); 627 if ((LPHI && HasUndefInPHI(*LPHI)) || (RPHI && HasUndefInPHI(*RPHI))) 628 return true; 629 630 auto HasUndefInSelect = [](SelectInst &SI) { 631 if (isa<UndefValue>(SI.getTrueValue()) || 632 isa<UndefValue>(SI.getFalseValue())) 633 return true; 634 return false; 635 }; 636 SelectInst *LSI = dyn_cast<SelectInst>(LHS); 637 SelectInst *RSI = dyn_cast<SelectInst>(RHS); 638 if ((LSI && HasUndefInSelect(*LSI)) || (RSI && HasUndefInSelect(*RSI))) 639 return true; 640 return false; 641 } 642 643 /// Do actual work and unswitch loop if possible and profitable. 644 bool LoopUnswitch::processCurrentLoop() { 645 bool Changed = false; 646 647 initLoopData(); 648 649 // If LoopSimplify was unable to form a preheader, don't do any unswitching. 650 if (!LoopPreheader) 651 return false; 652 653 // Loops with indirectbr cannot be cloned. 654 if (!CurrentLoop->isSafeToClone()) 655 return false; 656 657 // Without dedicated exits, splitting the exit edge may fail. 658 if (!CurrentLoop->hasDedicatedExits()) 659 return false; 660 661 LLVMContext &Context = LoopHeader->getContext(); 662 663 // Analyze loop cost, and stop unswitching if loop content can not be duplicated. 664 if (!BranchesInfo.countLoop( 665 CurrentLoop, 666 getAnalysis<TargetTransformInfoWrapperPass>().getTTI( 667 *CurrentLoop->getHeader()->getParent()), 668 AC)) 669 return false; 670 671 // Try trivial unswitch first before loop over other basic blocks in the loop. 672 if (tryTrivialLoopUnswitch(Changed)) { 673 return true; 674 } 675 676 // Do not do non-trivial unswitch while optimizing for size. 677 // FIXME: Use Function::hasOptSize(). 678 if (OptimizeForSize || 679 LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize)) 680 return Changed; 681 682 // Run through the instructions in the loop, keeping track of three things: 683 // 684 // - That we do not unswitch loops containing convergent operations, as we 685 // might be making them control dependent on the unswitch value when they 686 // were not before. 687 // FIXME: This could be refined to only bail if the convergent operation is 688 // not already control-dependent on the unswitch value. 689 // 690 // - That basic blocks in the loop contain invokes whose predecessor edges we 691 // cannot split. 692 // 693 // - The set of guard intrinsics encountered (these are non terminator 694 // instructions that are also profitable to be unswitched). 695 696 SmallVector<IntrinsicInst *, 4> Guards; 697 698 for (const auto BB : CurrentLoop->blocks()) { 699 for (auto &I : *BB) { 700 auto *CB = dyn_cast<CallBase>(&I); 701 if (!CB) 702 continue; 703 if (CB->isConvergent()) 704 return Changed; 705 if (auto *II = dyn_cast<InvokeInst>(&I)) 706 if (!II->getUnwindDest()->canSplitPredecessors()) 707 return Changed; 708 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 709 if (II->getIntrinsicID() == Intrinsic::experimental_guard) 710 Guards.push_back(II); 711 } 712 } 713 714 for (IntrinsicInst *Guard : Guards) { 715 Value *LoopCond = findLIVLoopCondition(Guard->getOperand(0), CurrentLoop, 716 Changed, MSSAU.get()) 717 .first; 718 if (LoopCond && 719 unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { 720 // NB! Unswitching (if successful) could have erased some of the 721 // instructions in Guards leaving dangling pointers there. This is fine 722 // because we're returning now, and won't look at Guards again. 723 ++NumGuards; 724 return true; 725 } 726 } 727 728 // Loop over all of the basic blocks in the loop. If we find an interior 729 // block that is branching on a loop-invariant condition, we can unswitch this 730 // loop. 731 for (Loop::block_iterator I = CurrentLoop->block_begin(), 732 E = CurrentLoop->block_end(); 733 I != E; ++I) { 734 Instruction *TI = (*I)->getTerminator(); 735 736 // Unswitching on a potentially uninitialized predicate is not 737 // MSan-friendly. Limit this to the cases when the original predicate is 738 // guaranteed to execute, to avoid creating a use-of-uninitialized-value 739 // in the code that did not have one. 740 // This is a workaround for the discrepancy between LLVM IR and MSan 741 // semantics. See PR28054 for more details. 742 if (SanitizeMemory && 743 !SafetyInfo.isGuaranteedToExecute(*TI, DT, CurrentLoop)) 744 continue; 745 746 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 747 // Some branches may be rendered unreachable because of previous 748 // unswitching. 749 // Unswitch only those branches that are reachable. 750 if (isUnreachableDueToPreviousUnswitching(*I)) 751 continue; 752 753 // If this isn't branching on an invariant condition, we can't unswitch 754 // it. 755 if (BI->isConditional()) { 756 // See if this, or some part of it, is loop invariant. If so, we can 757 // unswitch on it if we desire. 758 Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop, 759 Changed, MSSAU.get()) 760 .first; 761 if (LoopCond && !equalityPropUnSafe(*LoopCond) && 762 unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { 763 ++NumBranches; 764 return true; 765 } 766 } 767 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 768 Value *SC = SI->getCondition(); 769 Value *LoopCond; 770 OperatorChain OpChain; 771 std::tie(LoopCond, OpChain) = 772 findLIVLoopCondition(SC, CurrentLoop, Changed, MSSAU.get()); 773 774 unsigned NumCases = SI->getNumCases(); 775 if (LoopCond && NumCases) { 776 // Find a value to unswitch on: 777 // FIXME: this should chose the most expensive case! 778 // FIXME: scan for a case with a non-critical edge? 779 Constant *UnswitchVal = nullptr; 780 // Find a case value such that at least one case value is unswitched 781 // out. 782 if (OpChain == OC_OpChainAnd) { 783 // If the chain only has ANDs and the switch has a case value of 0. 784 // Dropping in a 0 to the chain will unswitch out the 0-casevalue. 785 auto *AllZero = cast<ConstantInt>(Constant::getNullValue(SC->getType())); 786 if (BranchesInfo.isUnswitched(SI, AllZero)) 787 continue; 788 // We are unswitching 0 out. 789 UnswitchVal = AllZero; 790 } else if (OpChain == OC_OpChainOr) { 791 // If the chain only has ORs and the switch has a case value of ~0. 792 // Dropping in a ~0 to the chain will unswitch out the ~0-casevalue. 793 auto *AllOne = cast<ConstantInt>(Constant::getAllOnesValue(SC->getType())); 794 if (BranchesInfo.isUnswitched(SI, AllOne)) 795 continue; 796 // We are unswitching ~0 out. 797 UnswitchVal = AllOne; 798 } else { 799 assert(OpChain == OC_OpChainNone && 800 "Expect to unswitch on trivial chain"); 801 // Do not process same value again and again. 802 // At this point we have some cases already unswitched and 803 // some not yet unswitched. Let's find the first not yet unswitched one. 804 for (auto Case : SI->cases()) { 805 Constant *UnswitchValCandidate = Case.getCaseValue(); 806 if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) { 807 UnswitchVal = UnswitchValCandidate; 808 break; 809 } 810 } 811 } 812 813 if (!UnswitchVal) 814 continue; 815 816 if (unswitchIfProfitable(LoopCond, UnswitchVal)) { 817 ++NumSwitches; 818 // In case of a full LIV, UnswitchVal is the value we unswitched out. 819 // In case of a partial LIV, we only unswitch when its an AND-chain 820 // or OR-chain. In both cases switch input value simplifies to 821 // UnswitchVal. 822 BranchesInfo.setUnswitched(SI, UnswitchVal); 823 return true; 824 } 825 } 826 } 827 828 // Scan the instructions to check for unswitchable values. 829 for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); 830 BBI != E; ++BBI) 831 if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) { 832 Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop, 833 Changed, MSSAU.get()) 834 .first; 835 if (LoopCond && 836 unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { 837 ++NumSelects; 838 return true; 839 } 840 } 841 } 842 843 // Check if there is a header condition that is invariant along the patch from 844 // either the true or false successors to the header. This allows unswitching 845 // conditions depending on memory accesses, if there's a path not clobbering 846 // the memory locations. Check if this transform has been disabled using 847 // metadata, to avoid unswitching the same loop multiple times. 848 if (MSSA && 849 !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) { 850 if (auto Info = 851 hasPartialIVCondition(*CurrentLoop, MSSAThreshold, *MSSA, *AA)) { 852 assert(!Info->InstToDuplicate.empty() && 853 "need at least a partially invariant condition"); 854 LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition " 855 << *Info->InstToDuplicate[0] << "\n"); 856 857 Instruction *TI = CurrentLoop->getHeader()->getTerminator(); 858 Value *LoopCond = Info->InstToDuplicate[0]; 859 860 // If the partially unswitched path is a no-op and has a single exit 861 // block, we do not need to do full unswitching. Instead, we can directly 862 // branch to the exit. 863 // TODO: Instead of duplicating the checks, we could also just directly 864 // branch to the exit from the conditional branch in the loop. 865 if (Info->PathIsNoop) { 866 if (HasBranchDivergence && 867 getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) { 868 LLVM_DEBUG(dbgs() << "NOT unswitching loop %" 869 << CurrentLoop->getHeader()->getName() 870 << " at non-trivial condition '" 871 << *Info->KnownValue << "' == " << *LoopCond << "\n" 872 << ". Condition is divergent.\n"); 873 return false; 874 } 875 876 ++NumBranches; 877 878 BasicBlock *TrueDest = LoopHeader; 879 BasicBlock *FalseDest = Info->ExitForPath; 880 if (Info->KnownValue->isOneValue()) 881 std::swap(TrueDest, FalseDest); 882 883 auto *OldBr = 884 cast<BranchInst>(CurrentLoop->getLoopPreheader()->getTerminator()); 885 emitPreheaderBranchOnCondition(LoopCond, Info->KnownValue, TrueDest, 886 FalseDest, OldBr, TI, 887 Info->InstToDuplicate); 888 delete OldBr; 889 RedoLoop = false; 890 return true; 891 } 892 893 // Otherwise, the path is not a no-op. Run regular unswitching. 894 if (unswitchIfProfitable(LoopCond, Info->KnownValue, 895 CurrentLoop->getHeader()->getTerminator(), 896 Info->InstToDuplicate)) { 897 ++NumBranches; 898 RedoLoop = false; 899 return true; 900 } 901 } 902 } 903 904 return Changed; 905 } 906 907 /// Check to see if all paths from BB exit the loop with no side effects 908 /// (including infinite loops). 909 /// 910 /// If true, we return true and set ExitBB to the block we 911 /// exit through. 912 /// 913 static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, 914 BasicBlock *&ExitBB, 915 std::set<BasicBlock*> &Visited) { 916 if (!Visited.insert(BB).second) { 917 // Already visited. Without more analysis, this could indicate an infinite 918 // loop. 919 return false; 920 } 921 if (!L->contains(BB)) { 922 // Otherwise, this is a loop exit, this is fine so long as this is the 923 // first exit. 924 if (ExitBB) return false; 925 ExitBB = BB; 926 return true; 927 } 928 929 // Otherwise, this is an unvisited intra-loop node. Check all successors. 930 for (BasicBlock *Succ : successors(BB)) { 931 // Check to see if the successor is a trivial loop exit. 932 if (!isTrivialLoopExitBlockHelper(L, Succ, ExitBB, Visited)) 933 return false; 934 } 935 936 // Okay, everything after this looks good, check to make sure that this block 937 // doesn't include any side effects. 938 for (Instruction &I : *BB) 939 if (I.mayHaveSideEffects()) 940 return false; 941 942 return true; 943 } 944 945 /// Return true if the specified block unconditionally leads to an exit from 946 /// the specified loop, and has no side-effects in the process. If so, return 947 /// the block that is exited to, otherwise return null. 948 static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { 949 std::set<BasicBlock*> Visited; 950 Visited.insert(L->getHeader()); // Branches to header make infinite loops. 951 BasicBlock *ExitBB = nullptr; 952 if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) 953 return ExitBB; 954 return nullptr; 955 } 956 957 /// We have found that we can unswitch CurrentLoop when LoopCond == Val to 958 /// simplify the loop. If we decide that this is profitable, 959 /// unswitch the loop, reprocess the pieces, then return true. 960 bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val, 961 Instruction *TI, 962 ArrayRef<Instruction *> ToDuplicate) { 963 // Check to see if it would be profitable to unswitch current loop. 964 if (!BranchesInfo.costAllowsUnswitching()) { 965 LLVM_DEBUG(dbgs() << "NOT unswitching loop %" 966 << CurrentLoop->getHeader()->getName() 967 << " at non-trivial condition '" << *Val 968 << "' == " << *LoopCond << "\n" 969 << ". Cost too high.\n"); 970 return false; 971 } 972 if (HasBranchDivergence && 973 getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) { 974 LLVM_DEBUG(dbgs() << "NOT unswitching loop %" 975 << CurrentLoop->getHeader()->getName() 976 << " at non-trivial condition '" << *Val 977 << "' == " << *LoopCond << "\n" 978 << ". Condition is divergent.\n"); 979 return false; 980 } 981 982 unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate); 983 return true; 984 } 985 986 /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, 987 /// otherwise branch to FalseDest. Insert the code immediately before OldBranch 988 /// and remove (but not erase!) it from the function. 989 void LoopUnswitch::emitPreheaderBranchOnCondition( 990 Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, 991 BranchInst *OldBranch, Instruction *TI, 992 ArrayRef<Instruction *> ToDuplicate) { 993 assert(OldBranch->isUnconditional() && "Preheader is not split correctly"); 994 assert(TrueDest != FalseDest && "Branch targets should be different"); 995 996 // Insert a conditional branch on LIC to the two preheaders. The original 997 // code is the true version and the new code is the false version. 998 Value *BranchVal = LIC; 999 bool Swapped = false; 1000 1001 if (!ToDuplicate.empty()) { 1002 ValueToValueMapTy Old2New; 1003 for (Instruction *I : reverse(ToDuplicate)) { 1004 auto *New = I->clone(); 1005 New->insertBefore(OldBranch); 1006 RemapInstruction(New, Old2New, 1007 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1008 Old2New[I] = New; 1009 1010 if (MSSAU) { 1011 MemorySSA *MSSA = MSSAU->getMemorySSA(); 1012 auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I)); 1013 if (!MemA) 1014 continue; 1015 1016 Loop *L = LI->getLoopFor(I->getParent()); 1017 auto *DefiningAccess = MemA->getDefiningAccess(); 1018 // Get the first defining access before the loop. 1019 while (L->contains(DefiningAccess->getBlock())) { 1020 // If the defining access is a MemoryPhi, get the incoming 1021 // value for the pre-header as defining access. 1022 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) { 1023 DefiningAccess = 1024 MemPhi->getIncomingValueForBlock(L->getLoopPreheader()); 1025 } else { 1026 DefiningAccess = 1027 cast<MemoryDef>(DefiningAccess)->getDefiningAccess(); 1028 } 1029 } 1030 MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(), 1031 MemorySSA::BeforeTerminator); 1032 } 1033 } 1034 BranchVal = Old2New[ToDuplicate[0]]; 1035 } else { 1036 1037 if (!isa<ConstantInt>(Val) || 1038 Val->getType() != Type::getInt1Ty(LIC->getContext())) 1039 BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); 1040 else if (Val != ConstantInt::getTrue(Val->getContext())) { 1041 // We want to enter the new loop when the condition is true. 1042 std::swap(TrueDest, FalseDest); 1043 Swapped = true; 1044 } 1045 } 1046 1047 // Old branch will be removed, so save its parent and successor to update the 1048 // DomTree. 1049 auto *OldBranchSucc = OldBranch->getSuccessor(0); 1050 auto *OldBranchParent = OldBranch->getParent(); 1051 1052 // Insert the new branch. 1053 BranchInst *BI = 1054 IRBuilder<>(OldBranch).CreateCondBr(BranchVal, TrueDest, FalseDest, TI); 1055 if (Swapped) 1056 BI->swapProfMetadata(); 1057 1058 // Remove the old branch so there is only one branch at the end. This is 1059 // needed to perform DomTree's internal DFS walk on the function's CFG. 1060 OldBranch->removeFromParent(); 1061 1062 // Inform the DT about the new branch. 1063 if (DT) { 1064 // First, add both successors. 1065 SmallVector<DominatorTree::UpdateType, 3> Updates; 1066 if (TrueDest != OldBranchSucc) 1067 Updates.push_back({DominatorTree::Insert, OldBranchParent, TrueDest}); 1068 if (FalseDest != OldBranchSucc) 1069 Updates.push_back({DominatorTree::Insert, OldBranchParent, FalseDest}); 1070 // If both of the new successors are different from the old one, inform the 1071 // DT that the edge was deleted. 1072 if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) { 1073 Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc}); 1074 } 1075 1076 if (MSSAU) 1077 MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); 1078 else 1079 DT->applyUpdates(Updates); 1080 } 1081 1082 // If either edge is critical, split it. This helps preserve LoopSimplify 1083 // form for enclosing loops. 1084 auto Options = 1085 CriticalEdgeSplittingOptions(DT, LI, MSSAU.get()).setPreserveLCSSA(); 1086 SplitCriticalEdge(BI, 0, Options); 1087 SplitCriticalEdge(BI, 1, Options); 1088 } 1089 1090 /// Given a loop that has a trivial unswitchable condition in it (a cond branch 1091 /// from its header block to its latch block, where the path through the loop 1092 /// that doesn't execute its body has no side-effects), unswitch it. This 1093 /// doesn't involve any code duplication, just moving the conditional branch 1094 /// outside of the loop and updating loop info. 1095 void LoopUnswitch::unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, 1096 BasicBlock *ExitBlock, 1097 Instruction *TI) { 1098 LLVM_DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %" 1099 << LoopHeader->getName() << " [" << L->getBlocks().size() 1100 << " blocks] in Function " 1101 << L->getHeader()->getParent()->getName() 1102 << " on cond: " << *Val << " == " << *Cond << "\n"); 1103 // We are going to make essential changes to CFG. This may invalidate cached 1104 // information for L or one of its parent loops in SCEV. 1105 if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>()) 1106 SEWP->getSE().forgetTopmostLoop(L); 1107 1108 // First step, split the preheader, so that we know that there is a safe place 1109 // to insert the conditional branch. We will change LoopPreheader to have a 1110 // conditional branch on Cond. 1111 BasicBlock *NewPH = SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get()); 1112 1113 // Now that we have a place to insert the conditional branch, create a place 1114 // to branch to: this is the exit block out of the loop that we should 1115 // short-circuit to. 1116 1117 // Split this block now, so that the loop maintains its exit block, and so 1118 // that the jump from the preheader can execute the contents of the exit block 1119 // without actually branching to it (the exit block should be dominated by the 1120 // loop header, not the preheader). 1121 assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); 1122 BasicBlock *NewExit = 1123 SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI, MSSAU.get()); 1124 1125 // Okay, now we have a position to branch from and a position to branch to, 1126 // insert the new conditional branch. 1127 auto *OldBranch = dyn_cast<BranchInst>(LoopPreheader->getTerminator()); 1128 assert(OldBranch && "Failed to split the preheader"); 1129 emitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, OldBranch, TI); 1130 1131 // emitPreheaderBranchOnCondition removed the OldBranch from the function. 1132 // Delete it, as it is no longer needed. 1133 delete OldBranch; 1134 1135 // We need to reprocess this loop, it could be unswitched again. 1136 RedoLoop = true; 1137 1138 // Now that we know that the loop is never entered when this condition is a 1139 // particular value, rewrite the loop with this info. We know that this will 1140 // at least eliminate the old branch. 1141 rewriteLoopBodyWithConditionConstant(L, Cond, Val, /*IsEqual=*/false); 1142 1143 ++NumTrivial; 1144 } 1145 1146 /// Check if the first non-constant condition starting from the loop header is 1147 /// a trivial unswitch condition: that is, a condition controls whether or not 1148 /// the loop does anything at all. If it is a trivial condition, unswitching 1149 /// produces no code duplications (equivalently, it produces a simpler loop and 1150 /// a new empty loop, which gets deleted). Therefore always unswitch trivial 1151 /// condition. 1152 bool LoopUnswitch::tryTrivialLoopUnswitch(bool &Changed) { 1153 BasicBlock *CurrentBB = CurrentLoop->getHeader(); 1154 Instruction *CurrentTerm = CurrentBB->getTerminator(); 1155 LLVMContext &Context = CurrentBB->getContext(); 1156 1157 // If loop header has only one reachable successor (currently via an 1158 // unconditional branch or constant foldable conditional branch, but 1159 // should also consider adding constant foldable switch instruction in 1160 // future), we should keep looking for trivial condition candidates in 1161 // the successor as well. An alternative is to constant fold conditions 1162 // and merge successors into loop header (then we only need to check header's 1163 // terminator). The reason for not doing this in LoopUnswitch pass is that 1164 // it could potentially break LoopPassManager's invariants. Folding dead 1165 // branches could either eliminate the current loop or make other loops 1166 // unreachable. LCSSA form might also not be preserved after deleting 1167 // branches. The following code keeps traversing loop header's successors 1168 // until it finds the trivial condition candidate (condition that is not a 1169 // constant). Since unswitching generates branches with constant conditions, 1170 // this scenario could be very common in practice. 1171 SmallPtrSet<BasicBlock*, 8> Visited; 1172 1173 while (true) { 1174 // If we exit loop or reach a previous visited block, then 1175 // we can not reach any trivial condition candidates (unfoldable 1176 // branch instructions or switch instructions) and no unswitch 1177 // can happen. Exit and return false. 1178 if (!CurrentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second) 1179 return false; 1180 1181 // Check if this loop will execute any side-effecting instructions (e.g. 1182 // stores, calls, volatile loads) in the part of the loop that the code 1183 // *would* execute. Check the header first. 1184 for (Instruction &I : *CurrentBB) 1185 if (I.mayHaveSideEffects()) 1186 return false; 1187 1188 if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) { 1189 if (BI->isUnconditional()) { 1190 CurrentBB = BI->getSuccessor(0); 1191 } else if (BI->getCondition() == ConstantInt::getTrue(Context)) { 1192 CurrentBB = BI->getSuccessor(0); 1193 } else if (BI->getCondition() == ConstantInt::getFalse(Context)) { 1194 CurrentBB = BI->getSuccessor(1); 1195 } else { 1196 // Found a trivial condition candidate: non-foldable conditional branch. 1197 break; 1198 } 1199 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) { 1200 // At this point, any constant-foldable instructions should have probably 1201 // been folded. 1202 ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); 1203 if (!Cond) 1204 break; 1205 // Find the target block we are definitely going to. 1206 CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor(); 1207 } else { 1208 // We do not understand these terminator instructions. 1209 break; 1210 } 1211 1212 CurrentTerm = CurrentBB->getTerminator(); 1213 } 1214 1215 // CondVal is the condition that controls the trivial condition. 1216 // LoopExitBB is the BasicBlock that loop exits when meets trivial condition. 1217 Constant *CondVal = nullptr; 1218 BasicBlock *LoopExitBB = nullptr; 1219 1220 if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) { 1221 // If this isn't branching on an invariant condition, we can't unswitch it. 1222 if (!BI->isConditional()) 1223 return false; 1224 1225 Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop, 1226 Changed, MSSAU.get()) 1227 .first; 1228 1229 // Unswitch only if the trivial condition itself is an LIV (not 1230 // partial LIV which could occur in and/or) 1231 if (!LoopCond || LoopCond != BI->getCondition()) 1232 return false; 1233 1234 // Check to see if a successor of the branch is guaranteed to 1235 // exit through a unique exit block without having any 1236 // side-effects. If so, determine the value of Cond that causes 1237 // it to do this. 1238 if ((LoopExitBB = 1239 isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(0)))) { 1240 CondVal = ConstantInt::getTrue(Context); 1241 } else if ((LoopExitBB = 1242 isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(1)))) { 1243 CondVal = ConstantInt::getFalse(Context); 1244 } 1245 1246 // If we didn't find a single unique LoopExit block, or if the loop exit 1247 // block contains phi nodes, this isn't trivial. 1248 if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin())) 1249 return false; // Can't handle this. 1250 1251 if (equalityPropUnSafe(*LoopCond)) 1252 return false; 1253 1254 unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB, 1255 CurrentTerm); 1256 ++NumBranches; 1257 return true; 1258 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) { 1259 // If this isn't switching on an invariant condition, we can't unswitch it. 1260 Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop, 1261 Changed, MSSAU.get()) 1262 .first; 1263 1264 // Unswitch only if the trivial condition itself is an LIV (not 1265 // partial LIV which could occur in and/or) 1266 if (!LoopCond || LoopCond != SI->getCondition()) 1267 return false; 1268 1269 // Check to see if a successor of the switch is guaranteed to go to the 1270 // latch block or exit through a one exit block without having any 1271 // side-effects. If so, determine the value of Cond that causes it to do 1272 // this. 1273 // Note that we can't trivially unswitch on the default case or 1274 // on already unswitched cases. 1275 for (auto Case : SI->cases()) { 1276 BasicBlock *LoopExitCandidate; 1277 if ((LoopExitCandidate = 1278 isTrivialLoopExitBlock(CurrentLoop, Case.getCaseSuccessor()))) { 1279 // Okay, we found a trivial case, remember the value that is trivial. 1280 ConstantInt *CaseVal = Case.getCaseValue(); 1281 1282 // Check that it was not unswitched before, since already unswitched 1283 // trivial vals are looks trivial too. 1284 if (BranchesInfo.isUnswitched(SI, CaseVal)) 1285 continue; 1286 LoopExitBB = LoopExitCandidate; 1287 CondVal = CaseVal; 1288 break; 1289 } 1290 } 1291 1292 // If we didn't find a single unique LoopExit block, or if the loop exit 1293 // block contains phi nodes, this isn't trivial. 1294 if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin())) 1295 return false; // Can't handle this. 1296 1297 unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB, 1298 nullptr); 1299 1300 // We are only unswitching full LIV. 1301 BranchesInfo.setUnswitched(SI, CondVal); 1302 ++NumSwitches; 1303 return true; 1304 } 1305 return false; 1306 } 1307 1308 /// Split all of the edges from inside the loop to their exit blocks. 1309 /// Update the appropriate Phi nodes as we do so. 1310 void LoopUnswitch::splitExitEdges( 1311 Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks) { 1312 1313 for (unsigned I = 0, E = ExitBlocks.size(); I != E; ++I) { 1314 BasicBlock *ExitBlock = ExitBlocks[I]; 1315 SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock), 1316 pred_end(ExitBlock)); 1317 1318 // Although SplitBlockPredecessors doesn't preserve loop-simplify in 1319 // general, if we call it on all predecessors of all exits then it does. 1320 SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, MSSAU.get(), 1321 /*PreserveLCSSA*/ true); 1322 } 1323 } 1324 1325 /// We determined that the loop is profitable to unswitch when LIC equal Val. 1326 /// Split it into loop versions and test the condition outside of either loop. 1327 /// Return the loops created as Out1/Out2. 1328 void LoopUnswitch::unswitchNontrivialCondition( 1329 Value *LIC, Constant *Val, Loop *L, Instruction *TI, 1330 ArrayRef<Instruction *> ToDuplicate) { 1331 Function *F = LoopHeader->getParent(); 1332 LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" 1333 << LoopHeader->getName() << " [" << L->getBlocks().size() 1334 << " blocks] in Function " << F->getName() << " when '" 1335 << *Val << "' == " << *LIC << "\n"); 1336 1337 // We are going to make essential changes to CFG. This may invalidate cached 1338 // information for L or one of its parent loops in SCEV. 1339 if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>()) 1340 SEWP->getSE().forgetTopmostLoop(L); 1341 1342 LoopBlocks.clear(); 1343 NewBlocks.clear(); 1344 1345 if (MSSAU && VerifyMemorySSA) 1346 MSSA->verifyMemorySSA(); 1347 1348 // First step, split the preheader and exit blocks, and add these blocks to 1349 // the LoopBlocks list. 1350 BasicBlock *NewPreheader = 1351 SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get()); 1352 LoopBlocks.push_back(NewPreheader); 1353 1354 // We want the loop to come after the preheader, but before the exit blocks. 1355 llvm::append_range(LoopBlocks, L->blocks()); 1356 1357 SmallVector<BasicBlock*, 8> ExitBlocks; 1358 L->getUniqueExitBlocks(ExitBlocks); 1359 1360 // Split all of the edges from inside the loop to their exit blocks. Update 1361 // the appropriate Phi nodes as we do so. 1362 splitExitEdges(L, ExitBlocks); 1363 1364 // The exit blocks may have been changed due to edge splitting, recompute. 1365 ExitBlocks.clear(); 1366 L->getUniqueExitBlocks(ExitBlocks); 1367 1368 // Add exit blocks to the loop blocks. 1369 llvm::append_range(LoopBlocks, ExitBlocks); 1370 1371 // Next step, clone all of the basic blocks that make up the loop (including 1372 // the loop preheader and exit blocks), keeping track of the mapping between 1373 // the instructions and blocks. 1374 NewBlocks.reserve(LoopBlocks.size()); 1375 ValueToValueMapTy VMap; 1376 for (unsigned I = 0, E = LoopBlocks.size(); I != E; ++I) { 1377 BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[I], VMap, ".us", F); 1378 1379 NewBlocks.push_back(NewBB); 1380 VMap[LoopBlocks[I]] = NewBB; // Keep the BB mapping. 1381 } 1382 1383 // Splice the newly inserted blocks into the function right before the 1384 // original preheader. 1385 F->getBasicBlockList().splice(NewPreheader->getIterator(), 1386 F->getBasicBlockList(), 1387 NewBlocks[0]->getIterator(), F->end()); 1388 1389 // Now we create the new Loop object for the versioned loop. 1390 Loop *NewLoop = cloneLoop(L, L->getParentLoop(), VMap, LI, LPM); 1391 1392 // Recalculate unswitching quota, inherit simplified switches info for NewBB, 1393 // Probably clone more loop-unswitch related loop properties. 1394 BranchesInfo.cloneData(NewLoop, L, VMap); 1395 1396 Loop *ParentLoop = L->getParentLoop(); 1397 if (ParentLoop) { 1398 // Make sure to add the cloned preheader and exit blocks to the parent loop 1399 // as well. 1400 ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI); 1401 } 1402 1403 for (unsigned EBI = 0, EBE = ExitBlocks.size(); EBI != EBE; ++EBI) { 1404 BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[EBI]]); 1405 // The new exit block should be in the same loop as the old one. 1406 if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[EBI])) 1407 ExitBBLoop->addBasicBlockToLoop(NewExit, *LI); 1408 1409 assert(NewExit->getTerminator()->getNumSuccessors() == 1 && 1410 "Exit block should have been split to have one successor!"); 1411 BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); 1412 1413 // If the successor of the exit block had PHI nodes, add an entry for 1414 // NewExit. 1415 for (PHINode &PN : ExitSucc->phis()) { 1416 Value *V = PN.getIncomingValueForBlock(ExitBlocks[EBI]); 1417 ValueToValueMapTy::iterator It = VMap.find(V); 1418 if (It != VMap.end()) V = It->second; 1419 PN.addIncoming(V, NewExit); 1420 } 1421 1422 if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { 1423 PHINode *PN = PHINode::Create(LPad->getType(), 0, "", 1424 &*ExitSucc->getFirstInsertionPt()); 1425 1426 for (BasicBlock *BB : predecessors(ExitSucc)) { 1427 LandingPadInst *LPI = BB->getLandingPadInst(); 1428 LPI->replaceAllUsesWith(PN); 1429 PN->addIncoming(LPI, BB); 1430 } 1431 } 1432 } 1433 1434 // Rewrite the code to refer to itself. 1435 for (unsigned NBI = 0, NBE = NewBlocks.size(); NBI != NBE; ++NBI) { 1436 for (Instruction &I : *NewBlocks[NBI]) { 1437 RemapInstruction(&I, VMap, 1438 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1439 if (auto *II = dyn_cast<AssumeInst>(&I)) 1440 AC->registerAssumption(II); 1441 } 1442 } 1443 1444 // Rewrite the original preheader to select between versions of the loop. 1445 BranchInst *OldBR = cast<BranchInst>(LoopPreheader->getTerminator()); 1446 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && 1447 "Preheader splitting did not work correctly!"); 1448 1449 if (MSSAU) { 1450 // Update MemorySSA after cloning, and before splitting to unreachables, 1451 // since that invalidates the 1:1 mapping of clones in VMap. 1452 LoopBlocksRPO LBRPO(L); 1453 LBRPO.perform(LI); 1454 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, VMap); 1455 } 1456 1457 // Emit the new branch that selects between the two versions of this loop. 1458 emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR, 1459 TI, ToDuplicate); 1460 if (MSSAU) { 1461 // Update MemoryPhis in Exit blocks. 1462 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT); 1463 if (VerifyMemorySSA) 1464 MSSA->verifyMemorySSA(); 1465 } 1466 1467 // The OldBr was replaced by a new one and removed (but not erased) by 1468 // emitPreheaderBranchOnCondition. It is no longer needed, so delete it. 1469 delete OldBR; 1470 1471 LoopProcessWorklist.push_back(NewLoop); 1472 RedoLoop = true; 1473 1474 // Keep a WeakTrackingVH holding onto LIC. If the first call to 1475 // RewriteLoopBody 1476 // deletes the instruction (for example by simplifying a PHI that feeds into 1477 // the condition that we're unswitching on), we don't rewrite the second 1478 // iteration. 1479 WeakTrackingVH LICHandle(LIC); 1480 1481 if (ToDuplicate.empty()) { 1482 // Now we rewrite the original code to know that the condition is true and 1483 // the new code to know that the condition is false. 1484 rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); 1485 1486 // It's possible that simplifying one loop could cause the other to be 1487 // changed to another value or a constant. If its a constant, don't 1488 // simplify it. 1489 if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && 1490 LICHandle && !isa<Constant>(LICHandle)) 1491 rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, 1492 /*IsEqual=*/true); 1493 } else { 1494 // Partial unswitching. Update the condition in the right loop with the 1495 // constant. 1496 auto *CC = cast<ConstantInt>(Val); 1497 if (CC->isOneValue()) { 1498 rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val, 1499 /*IsEqual=*/true); 1500 } else 1501 rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true); 1502 1503 // Mark the new loop as partially unswitched, to avoid unswitching on the 1504 // same condition again. 1505 auto &Context = NewLoop->getHeader()->getContext(); 1506 MDNode *DisableUnswitchMD = MDNode::get( 1507 Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable")); 1508 MDNode *NewLoopID = makePostTransformationMetadata( 1509 Context, L->getLoopID(), {"llvm.loop.unswitch.partial"}, 1510 {DisableUnswitchMD}); 1511 NewLoop->setLoopID(NewLoopID); 1512 } 1513 1514 if (MSSA && VerifyMemorySSA) 1515 MSSA->verifyMemorySSA(); 1516 } 1517 1518 /// Remove all instances of I from the worklist vector specified. 1519 static void removeFromWorklist(Instruction *I, 1520 std::vector<Instruction *> &Worklist) { 1521 llvm::erase_value(Worklist, I); 1522 } 1523 1524 /// When we find that I really equals V, remove I from the 1525 /// program, replacing all uses with V and update the worklist. 1526 static void replaceUsesOfWith(Instruction *I, Value *V, 1527 std::vector<Instruction *> &Worklist, Loop *L, 1528 LPPassManager *LPM, MemorySSAUpdater *MSSAU) { 1529 LLVM_DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n"); 1530 1531 // Add uses to the worklist, which may be dead now. 1532 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 1533 if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) 1534 Worklist.push_back(Use); 1535 1536 // Add users to the worklist which may be simplified now. 1537 for (User *U : I->users()) 1538 Worklist.push_back(cast<Instruction>(U)); 1539 removeFromWorklist(I, Worklist); 1540 I->replaceAllUsesWith(V); 1541 if (!I->mayHaveSideEffects()) { 1542 if (MSSAU) 1543 MSSAU->removeMemoryAccess(I); 1544 I->eraseFromParent(); 1545 } 1546 ++NumSimplify; 1547 } 1548 1549 /// We know either that the value LIC has the value specified by Val in the 1550 /// specified loop, or we know it does NOT have that value. 1551 /// Rewrite any uses of LIC or of properties correlated to it. 1552 void LoopUnswitch::rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, 1553 Constant *Val, 1554 bool IsEqual) { 1555 assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); 1556 1557 // FIXME: Support correlated properties, like: 1558 // for (...) 1559 // if (li1 < li2) 1560 // ... 1561 // if (li1 > li2) 1562 // ... 1563 1564 // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, 1565 // selects, switches. 1566 std::vector<Instruction*> Worklist; 1567 LLVMContext &Context = Val->getContext(); 1568 1569 // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC 1570 // in the loop with the appropriate one directly. 1571 if (IsEqual || (isa<ConstantInt>(Val) && 1572 Val->getType()->isIntegerTy(1))) { 1573 Value *Replacement; 1574 if (IsEqual) 1575 Replacement = Val; 1576 else 1577 Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), 1578 !cast<ConstantInt>(Val)->getZExtValue()); 1579 1580 for (User *U : LIC->users()) { 1581 Instruction *UI = dyn_cast<Instruction>(U); 1582 if (!UI || !L->contains(UI)) 1583 continue; 1584 Worklist.push_back(UI); 1585 } 1586 1587 for (Instruction *UI : Worklist) 1588 UI->replaceUsesOfWith(LIC, Replacement); 1589 1590 simplifyCode(Worklist, L); 1591 return; 1592 } 1593 1594 // Otherwise, we don't know the precise value of LIC, but we do know that it 1595 // is certainly NOT "Val". As such, simplify any uses in the loop that we 1596 // can. This case occurs when we unswitch switch statements. 1597 for (User *U : LIC->users()) { 1598 Instruction *UI = dyn_cast<Instruction>(U); 1599 if (!UI || !L->contains(UI)) 1600 continue; 1601 1602 // At this point, we know LIC is definitely not Val. Try to use some simple 1603 // logic to simplify the user w.r.t. to the context. 1604 if (Value *Replacement = simplifyInstructionWithNotEqual(UI, LIC, Val)) { 1605 if (LI->replacementPreservesLCSSAForm(UI, Replacement)) { 1606 // This in-loop instruction has been simplified w.r.t. its context, 1607 // i.e. LIC != Val, make sure we propagate its replacement value to 1608 // all its users. 1609 // 1610 // We can not yet delete UI, the LIC user, yet, because that would invalidate 1611 // the LIC->users() iterator !. However, we can make this instruction 1612 // dead by replacing all its users and push it onto the worklist so that 1613 // it can be properly deleted and its operands simplified. 1614 UI->replaceAllUsesWith(Replacement); 1615 } 1616 } 1617 1618 // This is a LIC user, push it into the worklist so that simplifyCode can 1619 // attempt to simplify it. 1620 Worklist.push_back(UI); 1621 1622 // If we know that LIC is not Val, use this info to simplify code. 1623 SwitchInst *SI = dyn_cast<SwitchInst>(UI); 1624 if (!SI || !isa<ConstantInt>(Val)) continue; 1625 1626 // NOTE: if a case value for the switch is unswitched out, we record it 1627 // after the unswitch finishes. We can not record it here as the switch 1628 // is not a direct user of the partial LIV. 1629 SwitchInst::CaseHandle DeadCase = 1630 *SI->findCaseValue(cast<ConstantInt>(Val)); 1631 // Default case is live for multiple values. 1632 if (DeadCase == *SI->case_default()) 1633 continue; 1634 1635 // Found a dead case value. Don't remove PHI nodes in the 1636 // successor if they become single-entry, those PHI nodes may 1637 // be in the Users list. 1638 1639 BasicBlock *Switch = SI->getParent(); 1640 BasicBlock *SISucc = DeadCase.getCaseSuccessor(); 1641 BasicBlock *Latch = L->getLoopLatch(); 1642 1643 if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. 1644 // If the DeadCase successor dominates the loop latch, then the 1645 // transformation isn't safe since it will delete the sole predecessor edge 1646 // to the latch. 1647 if (Latch && DT->dominates(SISucc, Latch)) 1648 continue; 1649 1650 // FIXME: This is a hack. We need to keep the successor around 1651 // and hooked up so as to preserve the loop structure, because 1652 // trying to update it is complicated. So instead we preserve the 1653 // loop structure and put the block on a dead code path. 1654 SplitEdge(Switch, SISucc, DT, LI, MSSAU.get()); 1655 // Compute the successors instead of relying on the return value 1656 // of SplitEdge, since it may have split the switch successor 1657 // after PHI nodes. 1658 BasicBlock *NewSISucc = DeadCase.getCaseSuccessor(); 1659 BasicBlock *OldSISucc = *succ_begin(NewSISucc); 1660 // Create an "unreachable" destination. 1661 BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", 1662 Switch->getParent(), 1663 OldSISucc); 1664 new UnreachableInst(Context, Abort); 1665 // Force the new case destination to branch to the "unreachable" 1666 // block while maintaining a (dead) CFG edge to the old block. 1667 NewSISucc->getTerminator()->eraseFromParent(); 1668 BranchInst::Create(Abort, OldSISucc, 1669 ConstantInt::getTrue(Context), NewSISucc); 1670 // Release the PHI operands for this edge. 1671 for (PHINode &PN : NewSISucc->phis()) 1672 PN.setIncomingValueForBlock(Switch, UndefValue::get(PN.getType())); 1673 // Tell the domtree about the new block. We don't fully update the 1674 // domtree here -- instead we force it to do a full recomputation 1675 // after the pass is complete -- but we do need to inform it of 1676 // new blocks. 1677 DT->addNewBlock(Abort, NewSISucc); 1678 } 1679 1680 simplifyCode(Worklist, L); 1681 } 1682 1683 /// Now that we have simplified some instructions in the loop, walk over it and 1684 /// constant prop, dce, and fold control flow where possible. Note that this is 1685 /// effectively a very simple loop-structure-aware optimizer. During processing 1686 /// of this loop, L could very well be deleted, so it must not be used. 1687 /// 1688 /// FIXME: When the loop optimizer is more mature, separate this out to a new 1689 /// pass. 1690 /// 1691 void LoopUnswitch::simplifyCode(std::vector<Instruction *> &Worklist, Loop *L) { 1692 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1693 while (!Worklist.empty()) { 1694 Instruction *I = Worklist.back(); 1695 Worklist.pop_back(); 1696 1697 // Simple DCE. 1698 if (isInstructionTriviallyDead(I)) { 1699 LLVM_DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n"); 1700 1701 // Add uses to the worklist, which may be dead now. 1702 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 1703 if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) 1704 Worklist.push_back(Use); 1705 removeFromWorklist(I, Worklist); 1706 if (MSSAU) 1707 MSSAU->removeMemoryAccess(I); 1708 I->eraseFromParent(); 1709 ++NumSimplify; 1710 continue; 1711 } 1712 1713 // See if instruction simplification can hack this up. This is common for 1714 // things like "select false, X, Y" after unswitching made the condition be 1715 // 'false'. TODO: update the domtree properly so we can pass it here. 1716 if (Value *V = SimplifyInstruction(I, DL)) 1717 if (LI->replacementPreservesLCSSAForm(I, V)) { 1718 replaceUsesOfWith(I, V, Worklist, L, LPM, MSSAU.get()); 1719 continue; 1720 } 1721 1722 // Special case hacks that appear commonly in unswitched code. 1723 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 1724 if (BI->isUnconditional()) { 1725 // If BI's parent is the only pred of the successor, fold the two blocks 1726 // together. 1727 BasicBlock *Pred = BI->getParent(); 1728 (void)Pred; 1729 BasicBlock *Succ = BI->getSuccessor(0); 1730 BasicBlock *SinglePred = Succ->getSinglePredecessor(); 1731 if (!SinglePred) continue; // Nothing to do. 1732 assert(SinglePred == Pred && "CFG broken"); 1733 1734 // Make the LPM and Worklist updates specific to LoopUnswitch. 1735 removeFromWorklist(BI, Worklist); 1736 auto SuccIt = Succ->begin(); 1737 while (PHINode *PN = dyn_cast<PHINode>(SuccIt++)) { 1738 for (unsigned It = 0, E = PN->getNumOperands(); It != E; ++It) 1739 if (Instruction *Use = dyn_cast<Instruction>(PN->getOperand(It))) 1740 Worklist.push_back(Use); 1741 for (User *U : PN->users()) 1742 Worklist.push_back(cast<Instruction>(U)); 1743 removeFromWorklist(PN, Worklist); 1744 ++NumSimplify; 1745 } 1746 // Merge the block and make the remaining analyses updates. 1747 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 1748 MergeBlockIntoPredecessor(Succ, &DTU, LI, MSSAU.get()); 1749 ++NumSimplify; 1750 continue; 1751 } 1752 1753 continue; 1754 } 1755 } 1756 } 1757 1758 /// Simple simplifications we can do given the information that Cond is 1759 /// definitely not equal to Val. 1760 Value *LoopUnswitch::simplifyInstructionWithNotEqual(Instruction *Inst, 1761 Value *Invariant, 1762 Constant *Val) { 1763 // icmp eq cond, val -> false 1764 ICmpInst *CI = dyn_cast<ICmpInst>(Inst); 1765 if (CI && CI->isEquality()) { 1766 Value *Op0 = CI->getOperand(0); 1767 Value *Op1 = CI->getOperand(1); 1768 if ((Op0 == Invariant && Op1 == Val) || (Op0 == Val && Op1 == Invariant)) { 1769 LLVMContext &Ctx = Inst->getContext(); 1770 if (CI->getPredicate() == CmpInst::ICMP_EQ) 1771 return ConstantInt::getFalse(Ctx); 1772 else 1773 return ConstantInt::getTrue(Ctx); 1774 } 1775 } 1776 1777 // FIXME: there may be other opportunities, e.g. comparison with floating 1778 // point, or Invariant - Val != 0, etc. 1779 return nullptr; 1780 } 1781