Home | History | Annotate | Line # | Download | only in AMDGPU
      1 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 /// \file
     10 /// This pass compute turns all control flow pseudo instructions into native one
     11 /// computing their address on the fly; it also sets STACK_SIZE info.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "AMDGPU.h"
     16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     17 #include "R600MachineFunctionInfo.h"
     18 #include "R600Subtarget.h"
     19 #include <set>
     20 
     21 using namespace llvm;
     22 
     23 #define DEBUG_TYPE "r600cf"
     24 
     25 namespace {
     26 
     27 struct CFStack {
     28   enum StackItem {
     29     ENTRY = 0,
     30     SUB_ENTRY = 1,
     31     FIRST_NON_WQM_PUSH = 2,
     32     FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
     33   };
     34 
     35   const R600Subtarget *ST;
     36   std::vector<StackItem> BranchStack;
     37   std::vector<StackItem> LoopStack;
     38   unsigned MaxStackSize;
     39   unsigned CurrentEntries = 0;
     40   unsigned CurrentSubEntries = 0;
     41 
     42   CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
     43       // We need to reserve a stack entry for CALL_FS in vertex shaders.
     44       MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
     45 
     46   unsigned getLoopDepth();
     47   bool branchStackContains(CFStack::StackItem);
     48   bool requiresWorkAroundForInst(unsigned Opcode);
     49   unsigned getSubEntrySize(CFStack::StackItem Item);
     50   void updateMaxStackSize();
     51   void pushBranch(unsigned Opcode, bool isWQM = false);
     52   void pushLoop();
     53   void popBranch();
     54   void popLoop();
     55 };
     56 
     57 unsigned CFStack::getLoopDepth() {
     58   return LoopStack.size();
     59 }
     60 
     61 bool CFStack::branchStackContains(CFStack::StackItem Item) {
     62   return llvm::is_contained(BranchStack, Item);
     63 }
     64 
     65 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
     66   if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
     67       getLoopDepth() > 1)
     68     return true;
     69 
     70   if (!ST->hasCFAluBug())
     71     return false;
     72 
     73   switch(Opcode) {
     74   default: return false;
     75   case R600::CF_ALU_PUSH_BEFORE:
     76   case R600::CF_ALU_ELSE_AFTER:
     77   case R600::CF_ALU_BREAK:
     78   case R600::CF_ALU_CONTINUE:
     79     if (CurrentSubEntries == 0)
     80       return false;
     81     if (ST->getWavefrontSize() == 64) {
     82       // We are being conservative here.  We only require this work-around if
     83       // CurrentSubEntries > 3 &&
     84       // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
     85       //
     86       // We have to be conservative, because we don't know for certain that
     87       // our stack allocation algorithm for Evergreen/NI is correct.  Applying this
     88       // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
     89       // resources without any problems.
     90       return CurrentSubEntries > 3;
     91     } else {
     92       assert(ST->getWavefrontSize() == 32);
     93       // We are being conservative here.  We only require the work-around if
     94       // CurrentSubEntries > 7 &&
     95       // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
     96       // See the comment on the wavefront size == 64 case for why we are
     97       // being conservative.
     98       return CurrentSubEntries > 7;
     99     }
    100   }
    101 }
    102 
    103 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
    104   switch(Item) {
    105   default:
    106     return 0;
    107   case CFStack::FIRST_NON_WQM_PUSH:
    108   assert(!ST->hasCaymanISA());
    109   if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
    110     // +1 For the push operation.
    111     // +2 Extra space required.
    112     return 3;
    113   } else {
    114     // Some documentation says that this is not necessary on Evergreen,
    115     // but experimentation has show that we need to allocate 1 extra
    116     // sub-entry for the first non-WQM push.
    117     // +1 For the push operation.
    118     // +1 Extra space required.
    119     return 2;
    120   }
    121   case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
    122     assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
    123     // +1 For the push operation.
    124     // +1 Extra space required.
    125     return 2;
    126   case CFStack::SUB_ENTRY:
    127     return 1;
    128   }
    129 }
    130 
    131 void CFStack::updateMaxStackSize() {
    132   unsigned CurrentStackSize = CurrentEntries + divideCeil(CurrentSubEntries, 4);
    133   MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
    134 }
    135 
    136 void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
    137   CFStack::StackItem Item = CFStack::ENTRY;
    138   switch(Opcode) {
    139   case R600::CF_PUSH_EG:
    140   case R600::CF_ALU_PUSH_BEFORE:
    141     if (!isWQM) {
    142       if (!ST->hasCaymanISA() &&
    143           !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
    144         Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on Evergreen/NI
    145                                              // See comment in
    146                                              // CFStack::getSubEntrySize()
    147       else if (CurrentEntries > 0 &&
    148                ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
    149                !ST->hasCaymanISA() &&
    150                !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
    151         Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
    152       else
    153         Item = CFStack::SUB_ENTRY;
    154     } else
    155       Item = CFStack::ENTRY;
    156     break;
    157   }
    158   BranchStack.push_back(Item);
    159   if (Item == CFStack::ENTRY)
    160     CurrentEntries++;
    161   else
    162     CurrentSubEntries += getSubEntrySize(Item);
    163   updateMaxStackSize();
    164 }
    165 
    166 void CFStack::pushLoop() {
    167   LoopStack.push_back(CFStack::ENTRY);
    168   CurrentEntries++;
    169   updateMaxStackSize();
    170 }
    171 
    172 void CFStack::popBranch() {
    173   CFStack::StackItem Top = BranchStack.back();
    174   if (Top == CFStack::ENTRY)
    175     CurrentEntries--;
    176   else
    177     CurrentSubEntries-= getSubEntrySize(Top);
    178   BranchStack.pop_back();
    179 }
    180 
    181 void CFStack::popLoop() {
    182   CurrentEntries--;
    183   LoopStack.pop_back();
    184 }
    185 
    186 class R600ControlFlowFinalizer : public MachineFunctionPass {
    187 private:
    188   using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
    189 
    190   enum ControlFlowInstruction {
    191     CF_TC,
    192     CF_VC,
    193     CF_CALL_FS,
    194     CF_WHILE_LOOP,
    195     CF_END_LOOP,
    196     CF_LOOP_BREAK,
    197     CF_LOOP_CONTINUE,
    198     CF_JUMP,
    199     CF_ELSE,
    200     CF_POP,
    201     CF_END
    202   };
    203 
    204   const R600InstrInfo *TII = nullptr;
    205   const R600RegisterInfo *TRI = nullptr;
    206   unsigned MaxFetchInst;
    207   const R600Subtarget *ST = nullptr;
    208 
    209   bool IsTrivialInst(MachineInstr &MI) const {
    210     switch (MI.getOpcode()) {
    211     case R600::KILL:
    212     case R600::RETURN:
    213       return true;
    214     default:
    215       return false;
    216     }
    217   }
    218 
    219   const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
    220     unsigned Opcode = 0;
    221     bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
    222     switch (CFI) {
    223     case CF_TC:
    224       Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
    225       break;
    226     case CF_VC:
    227       Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
    228       break;
    229     case CF_CALL_FS:
    230       Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
    231       break;
    232     case CF_WHILE_LOOP:
    233       Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
    234       break;
    235     case CF_END_LOOP:
    236       Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
    237       break;
    238     case CF_LOOP_BREAK:
    239       Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
    240       break;
    241     case CF_LOOP_CONTINUE:
    242       Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
    243       break;
    244     case CF_JUMP:
    245       Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
    246       break;
    247     case CF_ELSE:
    248       Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
    249       break;
    250     case CF_POP:
    251       Opcode = isEg ? R600::POP_EG : R600::POP_R600;
    252       break;
    253     case CF_END:
    254       if (ST->hasCaymanISA()) {
    255         Opcode = R600::CF_END_CM;
    256         break;
    257       }
    258       Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
    259       break;
    260     }
    261     assert (Opcode && "No opcode selected");
    262     return TII->get(Opcode);
    263   }
    264 
    265   bool isCompatibleWithClause(const MachineInstr &MI,
    266                               std::set<unsigned> &DstRegs) const {
    267     unsigned DstMI, SrcMI;
    268     for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
    269                                           E = MI.operands_end();
    270          I != E; ++I) {
    271       const MachineOperand &MO = *I;
    272       if (!MO.isReg())
    273         continue;
    274       if (MO.isDef()) {
    275         Register Reg = MO.getReg();
    276         if (R600::R600_Reg128RegClass.contains(Reg))
    277           DstMI = Reg;
    278         else
    279           DstMI = TRI->getMatchingSuperReg(Reg,
    280               R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
    281               &R600::R600_Reg128RegClass);
    282       }
    283       if (MO.isUse()) {
    284         Register Reg = MO.getReg();
    285         if (R600::R600_Reg128RegClass.contains(Reg))
    286           SrcMI = Reg;
    287         else
    288           SrcMI = TRI->getMatchingSuperReg(Reg,
    289               R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
    290               &R600::R600_Reg128RegClass);
    291       }
    292     }
    293     if ((DstRegs.find(SrcMI) == DstRegs.end())) {
    294       DstRegs.insert(DstMI);
    295       return true;
    296     } else
    297       return false;
    298   }
    299 
    300   ClauseFile
    301   MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
    302       const {
    303     MachineBasicBlock::iterator ClauseHead = I;
    304     std::vector<MachineInstr *> ClauseContent;
    305     unsigned AluInstCount = 0;
    306     bool IsTex = TII->usesTextureCache(*ClauseHead);
    307     std::set<unsigned> DstRegs;
    308     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
    309       if (IsTrivialInst(*I))
    310         continue;
    311       if (AluInstCount >= MaxFetchInst)
    312         break;
    313       if ((IsTex && !TII->usesTextureCache(*I)) ||
    314           (!IsTex && !TII->usesVertexCache(*I)))
    315         break;
    316       if (!isCompatibleWithClause(*I, DstRegs))
    317         break;
    318       AluInstCount ++;
    319       ClauseContent.push_back(&*I);
    320     }
    321     MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
    322         getHWInstrDesc(IsTex?CF_TC:CF_VC))
    323         .addImm(0) // ADDR
    324         .addImm(AluInstCount - 1); // COUNT
    325     return ClauseFile(MIb, std::move(ClauseContent));
    326   }
    327 
    328   void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
    329     static const unsigned LiteralRegs[] = {
    330       R600::ALU_LITERAL_X,
    331       R600::ALU_LITERAL_Y,
    332       R600::ALU_LITERAL_Z,
    333       R600::ALU_LITERAL_W
    334     };
    335     const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
    336         TII->getSrcs(MI);
    337     for (const auto &Src:Srcs) {
    338       if (Src.first->getReg() != R600::ALU_LITERAL_X)
    339         continue;
    340       int64_t Imm = Src.second;
    341       std::vector<MachineOperand *>::iterator It =
    342           llvm::find_if(Lits, [&](MachineOperand *val) {
    343             return val->isImm() && (val->getImm() == Imm);
    344           });
    345 
    346       // Get corresponding Operand
    347       MachineOperand &Operand = MI.getOperand(
    348           TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
    349 
    350       if (It != Lits.end()) {
    351         // Reuse existing literal reg
    352         unsigned Index = It - Lits.begin();
    353         Src.first->setReg(LiteralRegs[Index]);
    354       } else {
    355         // Allocate new literal reg
    356         assert(Lits.size() < 4 && "Too many literals in Instruction Group");
    357         Src.first->setReg(LiteralRegs[Lits.size()]);
    358         Lits.push_back(&Operand);
    359       }
    360     }
    361   }
    362 
    363   MachineBasicBlock::iterator insertLiterals(
    364       MachineBasicBlock::iterator InsertPos,
    365       const std::vector<unsigned> &Literals) const {
    366     MachineBasicBlock *MBB = InsertPos->getParent();
    367     for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
    368       unsigned LiteralPair0 = Literals[i];
    369       unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
    370       InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
    371           TII->get(R600::LITERALS))
    372           .addImm(LiteralPair0)
    373           .addImm(LiteralPair1);
    374     }
    375     return InsertPos;
    376   }
    377 
    378   ClauseFile
    379   MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
    380       const {
    381     MachineInstr &ClauseHead = *I;
    382     std::vector<MachineInstr *> ClauseContent;
    383     I++;
    384     for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
    385       if (IsTrivialInst(*I)) {
    386         ++I;
    387         continue;
    388       }
    389       if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
    390         break;
    391       std::vector<MachineOperand *>Literals;
    392       if (I->isBundle()) {
    393         MachineInstr &DeleteMI = *I;
    394         MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
    395         while (++BI != E && BI->isBundledWithPred()) {
    396           BI->unbundleFromPred();
    397           for (MachineOperand &MO : BI->operands()) {
    398             if (MO.isReg() && MO.isInternalRead())
    399               MO.setIsInternalRead(false);
    400           }
    401           getLiteral(*BI, Literals);
    402           ClauseContent.push_back(&*BI);
    403         }
    404         I = BI;
    405         DeleteMI.eraseFromParent();
    406       } else {
    407         getLiteral(*I, Literals);
    408         ClauseContent.push_back(&*I);
    409         I++;
    410       }
    411       for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
    412         MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
    413             TII->get(R600::LITERALS));
    414         if (Literals[i]->isImm()) {
    415             MILit.addImm(Literals[i]->getImm());
    416         } else {
    417             MILit.addGlobalAddress(Literals[i]->getGlobal(),
    418                                    Literals[i]->getOffset());
    419         }
    420         if (i + 1 < e) {
    421           if (Literals[i + 1]->isImm()) {
    422             MILit.addImm(Literals[i + 1]->getImm());
    423           } else {
    424             MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
    425                                    Literals[i + 1]->getOffset());
    426           }
    427         } else
    428           MILit.addImm(0);
    429         ClauseContent.push_back(MILit);
    430       }
    431     }
    432     assert(ClauseContent.size() < 128 && "ALU clause is too big");
    433     ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
    434     return ClauseFile(&ClauseHead, std::move(ClauseContent));
    435   }
    436 
    437   void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
    438                        const DebugLoc &DL, ClauseFile &Clause,
    439                        unsigned &CfCount) {
    440     CounterPropagateAddr(*Clause.first, CfCount);
    441     MachineBasicBlock *BB = Clause.first->getParent();
    442     BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
    443     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
    444       BB->splice(InsertPos, BB, Clause.second[i]);
    445     }
    446     CfCount += 2 * Clause.second.size();
    447   }
    448 
    449   void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
    450                      ClauseFile &Clause, unsigned &CfCount) {
    451     Clause.first->getOperand(0).setImm(0);
    452     CounterPropagateAddr(*Clause.first, CfCount);
    453     MachineBasicBlock *BB = Clause.first->getParent();
    454     BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
    455     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
    456       BB->splice(InsertPos, BB, Clause.second[i]);
    457     }
    458     CfCount += Clause.second.size();
    459   }
    460 
    461   void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
    462     MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
    463   }
    464   void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
    465                             unsigned Addr) const {
    466     for (MachineInstr *MI : MIs) {
    467       CounterPropagateAddr(*MI, Addr);
    468     }
    469   }
    470 
    471 public:
    472   static char ID;
    473 
    474   R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
    475 
    476   bool runOnMachineFunction(MachineFunction &MF) override {
    477     ST = &MF.getSubtarget<R600Subtarget>();
    478     MaxFetchInst = ST->getTexVTXClauseSize();
    479     TII = ST->getInstrInfo();
    480     TRI = ST->getRegisterInfo();
    481 
    482     R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
    483 
    484     CFStack CFStack(ST, MF.getFunction().getCallingConv());
    485     for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
    486         ++MB) {
    487       MachineBasicBlock &MBB = *MB;
    488       unsigned CfCount = 0;
    489       std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
    490       std::vector<MachineInstr * > IfThenElseStack;
    491       if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
    492         BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
    493             getHWInstrDesc(CF_CALL_FS));
    494         CfCount++;
    495       }
    496       std::vector<ClauseFile> FetchClauses, AluClauses;
    497       std::vector<MachineInstr *> LastAlu(1);
    498       std::vector<MachineInstr *> ToPopAfter;
    499 
    500       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
    501           I != E;) {
    502         if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
    503           LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
    504           FetchClauses.push_back(MakeFetchClause(MBB, I));
    505           CfCount++;
    506           LastAlu.back() = nullptr;
    507           continue;
    508         }
    509 
    510         MachineBasicBlock::iterator MI = I;
    511         if (MI->getOpcode() != R600::ENDIF)
    512           LastAlu.back() = nullptr;
    513         if (MI->getOpcode() == R600::CF_ALU)
    514           LastAlu.back() = &*MI;
    515         I++;
    516         bool RequiresWorkAround =
    517             CFStack.requiresWorkAroundForInst(MI->getOpcode());
    518         switch (MI->getOpcode()) {
    519         case R600::CF_ALU_PUSH_BEFORE:
    520           if (RequiresWorkAround) {
    521             LLVM_DEBUG(dbgs()
    522                        << "Applying bug work-around for ALU_PUSH_BEFORE\n");
    523             BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
    524                 .addImm(CfCount + 1)
    525                 .addImm(1);
    526             MI->setDesc(TII->get(R600::CF_ALU));
    527             CfCount++;
    528             CFStack.pushBranch(R600::CF_PUSH_EG);
    529           } else
    530             CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
    531           LLVM_FALLTHROUGH;
    532         case R600::CF_ALU:
    533           I = MI;
    534           AluClauses.push_back(MakeALUClause(MBB, I));
    535           LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
    536           CfCount++;
    537           break;
    538         case R600::WHILELOOP: {
    539           CFStack.pushLoop();
    540           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    541               getHWInstrDesc(CF_WHILE_LOOP))
    542               .addImm(1);
    543           std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
    544               std::set<MachineInstr *>());
    545           Pair.second.insert(MIb);
    546           LoopStack.push_back(std::move(Pair));
    547           MI->eraseFromParent();
    548           CfCount++;
    549           break;
    550         }
    551         case R600::ENDLOOP: {
    552           CFStack.popLoop();
    553           std::pair<unsigned, std::set<MachineInstr *>> Pair =
    554               std::move(LoopStack.back());
    555           LoopStack.pop_back();
    556           CounterPropagateAddr(Pair.second, CfCount);
    557           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
    558               .addImm(Pair.first + 1);
    559           MI->eraseFromParent();
    560           CfCount++;
    561           break;
    562         }
    563         case R600::IF_PREDICATE_SET: {
    564           LastAlu.push_back(nullptr);
    565           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    566               getHWInstrDesc(CF_JUMP))
    567               .addImm(0)
    568               .addImm(0);
    569           IfThenElseStack.push_back(MIb);
    570           LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
    571           MI->eraseFromParent();
    572           CfCount++;
    573           break;
    574         }
    575         case R600::ELSE: {
    576           MachineInstr * JumpInst = IfThenElseStack.back();
    577           IfThenElseStack.pop_back();
    578           CounterPropagateAddr(*JumpInst, CfCount);
    579           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    580               getHWInstrDesc(CF_ELSE))
    581               .addImm(0)
    582               .addImm(0);
    583           LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
    584           IfThenElseStack.push_back(MIb);
    585           MI->eraseFromParent();
    586           CfCount++;
    587           break;
    588         }
    589         case R600::ENDIF: {
    590           CFStack.popBranch();
    591           if (LastAlu.back()) {
    592             ToPopAfter.push_back(LastAlu.back());
    593           } else {
    594             MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    595                 getHWInstrDesc(CF_POP))
    596                 .addImm(CfCount + 1)
    597                 .addImm(1);
    598             (void)MIb;
    599             LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
    600             CfCount++;
    601           }
    602 
    603           MachineInstr *IfOrElseInst = IfThenElseStack.back();
    604           IfThenElseStack.pop_back();
    605           CounterPropagateAddr(*IfOrElseInst, CfCount);
    606           IfOrElseInst->getOperand(1).setImm(1);
    607           LastAlu.pop_back();
    608           MI->eraseFromParent();
    609           break;
    610         }
    611         case R600::BREAK: {
    612           CfCount ++;
    613           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    614               getHWInstrDesc(CF_LOOP_BREAK))
    615               .addImm(0);
    616           LoopStack.back().second.insert(MIb);
    617           MI->eraseFromParent();
    618           break;
    619         }
    620         case R600::CONTINUE: {
    621           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    622               getHWInstrDesc(CF_LOOP_CONTINUE))
    623               .addImm(0);
    624           LoopStack.back().second.insert(MIb);
    625           MI->eraseFromParent();
    626           CfCount++;
    627           break;
    628         }
    629         case R600::RETURN: {
    630           DebugLoc DL = MBB.findDebugLoc(MI);
    631           BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
    632           CfCount++;
    633           if (CfCount % 2) {
    634             BuildMI(MBB, I, DL, TII->get(R600::PAD));
    635             CfCount++;
    636           }
    637           MI->eraseFromParent();
    638           for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
    639             EmitFetchClause(I, DL, FetchClauses[i], CfCount);
    640           for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
    641             EmitALUClause(I, DL, AluClauses[i], CfCount);
    642           break;
    643         }
    644         default:
    645           if (TII->isExport(MI->getOpcode())) {
    646             LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
    647             CfCount++;
    648           }
    649           break;
    650         }
    651       }
    652       for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
    653         MachineInstr *Alu = ToPopAfter[i];
    654         BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
    655             TII->get(R600::CF_ALU_POP_AFTER))
    656             .addImm(Alu->getOperand(0).getImm())
    657             .addImm(Alu->getOperand(1).getImm())
    658             .addImm(Alu->getOperand(2).getImm())
    659             .addImm(Alu->getOperand(3).getImm())
    660             .addImm(Alu->getOperand(4).getImm())
    661             .addImm(Alu->getOperand(5).getImm())
    662             .addImm(Alu->getOperand(6).getImm())
    663             .addImm(Alu->getOperand(7).getImm())
    664             .addImm(Alu->getOperand(8).getImm());
    665         Alu->eraseFromParent();
    666       }
    667       MFI->CFStackSize = CFStack.MaxStackSize;
    668     }
    669 
    670     return false;
    671   }
    672 
    673   StringRef getPassName() const override {
    674     return "R600 Control Flow Finalizer Pass";
    675   }
    676 };
    677 
    678 } // end anonymous namespace
    679 
    680 INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
    681                      "R600 Control Flow Finalizer", false, false)
    682 INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
    683                     "R600 Control Flow Finalizer", false, false)
    684 
    685 char R600ControlFlowFinalizer::ID = 0;
    686 
    687 char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
    688 
    689 FunctionPass *llvm::createR600ControlFlowFinalizer() {
    690   return new R600ControlFlowFinalizer();
    691 }
    692