1*0b57cec5SDimitry Andric //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric /// \file 10*0b57cec5SDimitry Andric /// This pass compute turns all control flow pseudo instructions into native one 11*0b57cec5SDimitry Andric /// computing their address on the fly; it also sets STACK_SIZE info. 12*0b57cec5SDimitry Andric // 13*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 14*0b57cec5SDimitry Andric 15*0b57cec5SDimitry Andric #include "AMDGPU.h" 16*0b57cec5SDimitry Andric #include "AMDGPUSubtarget.h" 17*0b57cec5SDimitry Andric #include "R600Defines.h" 18*0b57cec5SDimitry Andric #include "R600InstrInfo.h" 19*0b57cec5SDimitry Andric #include "R600MachineFunctionInfo.h" 20*0b57cec5SDimitry Andric #include "R600RegisterInfo.h" 21*0b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22*0b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 23*0b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 24*0b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 25*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 26*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 27*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 28*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 29*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 30*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 31*0b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h" 32*0b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 33*0b57cec5SDimitry Andric #include "llvm/IR/Function.h" 34*0b57cec5SDimitry Andric #include "llvm/Pass.h" 35*0b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 36*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 37*0b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 38*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 39*0b57cec5SDimitry Andric #include <algorithm> 40*0b57cec5SDimitry Andric #include <cassert> 41*0b57cec5SDimitry Andric #include <cstdint> 42*0b57cec5SDimitry Andric #include <set> 43*0b57cec5SDimitry Andric #include <utility> 44*0b57cec5SDimitry Andric #include <vector> 45*0b57cec5SDimitry Andric 46*0b57cec5SDimitry Andric using namespace llvm; 47*0b57cec5SDimitry Andric 48*0b57cec5SDimitry Andric #define DEBUG_TYPE "r600cf" 49*0b57cec5SDimitry Andric 50*0b57cec5SDimitry Andric namespace { 51*0b57cec5SDimitry Andric 52*0b57cec5SDimitry Andric struct CFStack { 53*0b57cec5SDimitry Andric enum StackItem { 54*0b57cec5SDimitry Andric ENTRY = 0, 55*0b57cec5SDimitry Andric SUB_ENTRY = 1, 56*0b57cec5SDimitry Andric FIRST_NON_WQM_PUSH = 2, 57*0b57cec5SDimitry Andric FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 58*0b57cec5SDimitry Andric }; 59*0b57cec5SDimitry Andric 60*0b57cec5SDimitry Andric const R600Subtarget *ST; 61*0b57cec5SDimitry Andric std::vector<StackItem> BranchStack; 62*0b57cec5SDimitry Andric std::vector<StackItem> LoopStack; 63*0b57cec5SDimitry Andric unsigned MaxStackSize; 64*0b57cec5SDimitry Andric unsigned CurrentEntries = 0; 65*0b57cec5SDimitry Andric unsigned CurrentSubEntries = 0; 66*0b57cec5SDimitry Andric 67*0b57cec5SDimitry Andric CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st), 68*0b57cec5SDimitry Andric // We need to reserve a stack entry for CALL_FS in vertex shaders. 69*0b57cec5SDimitry Andric MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {} 70*0b57cec5SDimitry Andric 71*0b57cec5SDimitry Andric unsigned getLoopDepth(); 72*0b57cec5SDimitry Andric bool branchStackContains(CFStack::StackItem); 73*0b57cec5SDimitry Andric bool requiresWorkAroundForInst(unsigned Opcode); 74*0b57cec5SDimitry Andric unsigned getSubEntrySize(CFStack::StackItem Item); 75*0b57cec5SDimitry Andric void updateMaxStackSize(); 76*0b57cec5SDimitry Andric void pushBranch(unsigned Opcode, bool isWQM = false); 77*0b57cec5SDimitry Andric void pushLoop(); 78*0b57cec5SDimitry Andric void popBranch(); 79*0b57cec5SDimitry Andric void popLoop(); 80*0b57cec5SDimitry Andric }; 81*0b57cec5SDimitry Andric 82*0b57cec5SDimitry Andric unsigned CFStack::getLoopDepth() { 83*0b57cec5SDimitry Andric return LoopStack.size(); 84*0b57cec5SDimitry Andric } 85*0b57cec5SDimitry Andric 86*0b57cec5SDimitry Andric bool CFStack::branchStackContains(CFStack::StackItem Item) { 87*0b57cec5SDimitry Andric for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(), 88*0b57cec5SDimitry Andric E = BranchStack.end(); I != E; ++I) { 89*0b57cec5SDimitry Andric if (*I == Item) 90*0b57cec5SDimitry Andric return true; 91*0b57cec5SDimitry Andric } 92*0b57cec5SDimitry Andric return false; 93*0b57cec5SDimitry Andric } 94*0b57cec5SDimitry Andric 95*0b57cec5SDimitry Andric bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { 96*0b57cec5SDimitry Andric if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && 97*0b57cec5SDimitry Andric getLoopDepth() > 1) 98*0b57cec5SDimitry Andric return true; 99*0b57cec5SDimitry Andric 100*0b57cec5SDimitry Andric if (!ST->hasCFAluBug()) 101*0b57cec5SDimitry Andric return false; 102*0b57cec5SDimitry Andric 103*0b57cec5SDimitry Andric switch(Opcode) { 104*0b57cec5SDimitry Andric default: return false; 105*0b57cec5SDimitry Andric case R600::CF_ALU_PUSH_BEFORE: 106*0b57cec5SDimitry Andric case R600::CF_ALU_ELSE_AFTER: 107*0b57cec5SDimitry Andric case R600::CF_ALU_BREAK: 108*0b57cec5SDimitry Andric case R600::CF_ALU_CONTINUE: 109*0b57cec5SDimitry Andric if (CurrentSubEntries == 0) 110*0b57cec5SDimitry Andric return false; 111*0b57cec5SDimitry Andric if (ST->getWavefrontSize() == 64) { 112*0b57cec5SDimitry Andric // We are being conservative here. We only require this work-around if 113*0b57cec5SDimitry Andric // CurrentSubEntries > 3 && 114*0b57cec5SDimitry Andric // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) 115*0b57cec5SDimitry Andric // 116*0b57cec5SDimitry Andric // We have to be conservative, because we don't know for certain that 117*0b57cec5SDimitry Andric // our stack allocation algorithm for Evergreen/NI is correct. Applying this 118*0b57cec5SDimitry Andric // work-around when CurrentSubEntries > 3 allows us to over-allocate stack 119*0b57cec5SDimitry Andric // resources without any problems. 120*0b57cec5SDimitry Andric return CurrentSubEntries > 3; 121*0b57cec5SDimitry Andric } else { 122*0b57cec5SDimitry Andric assert(ST->getWavefrontSize() == 32); 123*0b57cec5SDimitry Andric // We are being conservative here. We only require the work-around if 124*0b57cec5SDimitry Andric // CurrentSubEntries > 7 && 125*0b57cec5SDimitry Andric // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) 126*0b57cec5SDimitry Andric // See the comment on the wavefront size == 64 case for why we are 127*0b57cec5SDimitry Andric // being conservative. 128*0b57cec5SDimitry Andric return CurrentSubEntries > 7; 129*0b57cec5SDimitry Andric } 130*0b57cec5SDimitry Andric } 131*0b57cec5SDimitry Andric } 132*0b57cec5SDimitry Andric 133*0b57cec5SDimitry Andric unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { 134*0b57cec5SDimitry Andric switch(Item) { 135*0b57cec5SDimitry Andric default: 136*0b57cec5SDimitry Andric return 0; 137*0b57cec5SDimitry Andric case CFStack::FIRST_NON_WQM_PUSH: 138*0b57cec5SDimitry Andric assert(!ST->hasCaymanISA()); 139*0b57cec5SDimitry Andric if (ST->getGeneration() <= AMDGPUSubtarget::R700) { 140*0b57cec5SDimitry Andric // +1 For the push operation. 141*0b57cec5SDimitry Andric // +2 Extra space required. 142*0b57cec5SDimitry Andric return 3; 143*0b57cec5SDimitry Andric } else { 144*0b57cec5SDimitry Andric // Some documentation says that this is not necessary on Evergreen, 145*0b57cec5SDimitry Andric // but experimentation has show that we need to allocate 1 extra 146*0b57cec5SDimitry Andric // sub-entry for the first non-WQM push. 147*0b57cec5SDimitry Andric // +1 For the push operation. 148*0b57cec5SDimitry Andric // +1 Extra space required. 149*0b57cec5SDimitry Andric return 2; 150*0b57cec5SDimitry Andric } 151*0b57cec5SDimitry Andric case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: 152*0b57cec5SDimitry Andric assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 153*0b57cec5SDimitry Andric // +1 For the push operation. 154*0b57cec5SDimitry Andric // +1 Extra space required. 155*0b57cec5SDimitry Andric return 2; 156*0b57cec5SDimitry Andric case CFStack::SUB_ENTRY: 157*0b57cec5SDimitry Andric return 1; 158*0b57cec5SDimitry Andric } 159*0b57cec5SDimitry Andric } 160*0b57cec5SDimitry Andric 161*0b57cec5SDimitry Andric void CFStack::updateMaxStackSize() { 162*0b57cec5SDimitry Andric unsigned CurrentStackSize = 163*0b57cec5SDimitry Andric CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4); 164*0b57cec5SDimitry Andric MaxStackSize = std::max(CurrentStackSize, MaxStackSize); 165*0b57cec5SDimitry Andric } 166*0b57cec5SDimitry Andric 167*0b57cec5SDimitry Andric void CFStack::pushBranch(unsigned Opcode, bool isWQM) { 168*0b57cec5SDimitry Andric CFStack::StackItem Item = CFStack::ENTRY; 169*0b57cec5SDimitry Andric switch(Opcode) { 170*0b57cec5SDimitry Andric case R600::CF_PUSH_EG: 171*0b57cec5SDimitry Andric case R600::CF_ALU_PUSH_BEFORE: 172*0b57cec5SDimitry Andric if (!isWQM) { 173*0b57cec5SDimitry Andric if (!ST->hasCaymanISA() && 174*0b57cec5SDimitry Andric !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) 175*0b57cec5SDimitry Andric Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI 176*0b57cec5SDimitry Andric // See comment in 177*0b57cec5SDimitry Andric // CFStack::getSubEntrySize() 178*0b57cec5SDimitry Andric else if (CurrentEntries > 0 && 179*0b57cec5SDimitry Andric ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && 180*0b57cec5SDimitry Andric !ST->hasCaymanISA() && 181*0b57cec5SDimitry Andric !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) 182*0b57cec5SDimitry Andric Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; 183*0b57cec5SDimitry Andric else 184*0b57cec5SDimitry Andric Item = CFStack::SUB_ENTRY; 185*0b57cec5SDimitry Andric } else 186*0b57cec5SDimitry Andric Item = CFStack::ENTRY; 187*0b57cec5SDimitry Andric break; 188*0b57cec5SDimitry Andric } 189*0b57cec5SDimitry Andric BranchStack.push_back(Item); 190*0b57cec5SDimitry Andric if (Item == CFStack::ENTRY) 191*0b57cec5SDimitry Andric CurrentEntries++; 192*0b57cec5SDimitry Andric else 193*0b57cec5SDimitry Andric CurrentSubEntries += getSubEntrySize(Item); 194*0b57cec5SDimitry Andric updateMaxStackSize(); 195*0b57cec5SDimitry Andric } 196*0b57cec5SDimitry Andric 197*0b57cec5SDimitry Andric void CFStack::pushLoop() { 198*0b57cec5SDimitry Andric LoopStack.push_back(CFStack::ENTRY); 199*0b57cec5SDimitry Andric CurrentEntries++; 200*0b57cec5SDimitry Andric updateMaxStackSize(); 201*0b57cec5SDimitry Andric } 202*0b57cec5SDimitry Andric 203*0b57cec5SDimitry Andric void CFStack::popBranch() { 204*0b57cec5SDimitry Andric CFStack::StackItem Top = BranchStack.back(); 205*0b57cec5SDimitry Andric if (Top == CFStack::ENTRY) 206*0b57cec5SDimitry Andric CurrentEntries--; 207*0b57cec5SDimitry Andric else 208*0b57cec5SDimitry Andric CurrentSubEntries-= getSubEntrySize(Top); 209*0b57cec5SDimitry Andric BranchStack.pop_back(); 210*0b57cec5SDimitry Andric } 211*0b57cec5SDimitry Andric 212*0b57cec5SDimitry Andric void CFStack::popLoop() { 213*0b57cec5SDimitry Andric CurrentEntries--; 214*0b57cec5SDimitry Andric LoopStack.pop_back(); 215*0b57cec5SDimitry Andric } 216*0b57cec5SDimitry Andric 217*0b57cec5SDimitry Andric class R600ControlFlowFinalizer : public MachineFunctionPass { 218*0b57cec5SDimitry Andric private: 219*0b57cec5SDimitry Andric using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>; 220*0b57cec5SDimitry Andric 221*0b57cec5SDimitry Andric enum ControlFlowInstruction { 222*0b57cec5SDimitry Andric CF_TC, 223*0b57cec5SDimitry Andric CF_VC, 224*0b57cec5SDimitry Andric CF_CALL_FS, 225*0b57cec5SDimitry Andric CF_WHILE_LOOP, 226*0b57cec5SDimitry Andric CF_END_LOOP, 227*0b57cec5SDimitry Andric CF_LOOP_BREAK, 228*0b57cec5SDimitry Andric CF_LOOP_CONTINUE, 229*0b57cec5SDimitry Andric CF_JUMP, 230*0b57cec5SDimitry Andric CF_ELSE, 231*0b57cec5SDimitry Andric CF_POP, 232*0b57cec5SDimitry Andric CF_END 233*0b57cec5SDimitry Andric }; 234*0b57cec5SDimitry Andric 235*0b57cec5SDimitry Andric const R600InstrInfo *TII = nullptr; 236*0b57cec5SDimitry Andric const R600RegisterInfo *TRI = nullptr; 237*0b57cec5SDimitry Andric unsigned MaxFetchInst; 238*0b57cec5SDimitry Andric const R600Subtarget *ST = nullptr; 239*0b57cec5SDimitry Andric 240*0b57cec5SDimitry Andric bool IsTrivialInst(MachineInstr &MI) const { 241*0b57cec5SDimitry Andric switch (MI.getOpcode()) { 242*0b57cec5SDimitry Andric case R600::KILL: 243*0b57cec5SDimitry Andric case R600::RETURN: 244*0b57cec5SDimitry Andric return true; 245*0b57cec5SDimitry Andric default: 246*0b57cec5SDimitry Andric return false; 247*0b57cec5SDimitry Andric } 248*0b57cec5SDimitry Andric } 249*0b57cec5SDimitry Andric 250*0b57cec5SDimitry Andric const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { 251*0b57cec5SDimitry Andric unsigned Opcode = 0; 252*0b57cec5SDimitry Andric bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 253*0b57cec5SDimitry Andric switch (CFI) { 254*0b57cec5SDimitry Andric case CF_TC: 255*0b57cec5SDimitry Andric Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; 256*0b57cec5SDimitry Andric break; 257*0b57cec5SDimitry Andric case CF_VC: 258*0b57cec5SDimitry Andric Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600; 259*0b57cec5SDimitry Andric break; 260*0b57cec5SDimitry Andric case CF_CALL_FS: 261*0b57cec5SDimitry Andric Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600; 262*0b57cec5SDimitry Andric break; 263*0b57cec5SDimitry Andric case CF_WHILE_LOOP: 264*0b57cec5SDimitry Andric Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600; 265*0b57cec5SDimitry Andric break; 266*0b57cec5SDimitry Andric case CF_END_LOOP: 267*0b57cec5SDimitry Andric Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600; 268*0b57cec5SDimitry Andric break; 269*0b57cec5SDimitry Andric case CF_LOOP_BREAK: 270*0b57cec5SDimitry Andric Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600; 271*0b57cec5SDimitry Andric break; 272*0b57cec5SDimitry Andric case CF_LOOP_CONTINUE: 273*0b57cec5SDimitry Andric Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600; 274*0b57cec5SDimitry Andric break; 275*0b57cec5SDimitry Andric case CF_JUMP: 276*0b57cec5SDimitry Andric Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600; 277*0b57cec5SDimitry Andric break; 278*0b57cec5SDimitry Andric case CF_ELSE: 279*0b57cec5SDimitry Andric Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600; 280*0b57cec5SDimitry Andric break; 281*0b57cec5SDimitry Andric case CF_POP: 282*0b57cec5SDimitry Andric Opcode = isEg ? R600::POP_EG : R600::POP_R600; 283*0b57cec5SDimitry Andric break; 284*0b57cec5SDimitry Andric case CF_END: 285*0b57cec5SDimitry Andric if (ST->hasCaymanISA()) { 286*0b57cec5SDimitry Andric Opcode = R600::CF_END_CM; 287*0b57cec5SDimitry Andric break; 288*0b57cec5SDimitry Andric } 289*0b57cec5SDimitry Andric Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600; 290*0b57cec5SDimitry Andric break; 291*0b57cec5SDimitry Andric } 292*0b57cec5SDimitry Andric assert (Opcode && "No opcode selected"); 293*0b57cec5SDimitry Andric return TII->get(Opcode); 294*0b57cec5SDimitry Andric } 295*0b57cec5SDimitry Andric 296*0b57cec5SDimitry Andric bool isCompatibleWithClause(const MachineInstr &MI, 297*0b57cec5SDimitry Andric std::set<unsigned> &DstRegs) const { 298*0b57cec5SDimitry Andric unsigned DstMI, SrcMI; 299*0b57cec5SDimitry Andric for (MachineInstr::const_mop_iterator I = MI.operands_begin(), 300*0b57cec5SDimitry Andric E = MI.operands_end(); 301*0b57cec5SDimitry Andric I != E; ++I) { 302*0b57cec5SDimitry Andric const MachineOperand &MO = *I; 303*0b57cec5SDimitry Andric if (!MO.isReg()) 304*0b57cec5SDimitry Andric continue; 305*0b57cec5SDimitry Andric if (MO.isDef()) { 306*0b57cec5SDimitry Andric unsigned Reg = MO.getReg(); 307*0b57cec5SDimitry Andric if (R600::R600_Reg128RegClass.contains(Reg)) 308*0b57cec5SDimitry Andric DstMI = Reg; 309*0b57cec5SDimitry Andric else 310*0b57cec5SDimitry Andric DstMI = TRI->getMatchingSuperReg(Reg, 311*0b57cec5SDimitry Andric AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 312*0b57cec5SDimitry Andric &R600::R600_Reg128RegClass); 313*0b57cec5SDimitry Andric } 314*0b57cec5SDimitry Andric if (MO.isUse()) { 315*0b57cec5SDimitry Andric unsigned Reg = MO.getReg(); 316*0b57cec5SDimitry Andric if (R600::R600_Reg128RegClass.contains(Reg)) 317*0b57cec5SDimitry Andric SrcMI = Reg; 318*0b57cec5SDimitry Andric else 319*0b57cec5SDimitry Andric SrcMI = TRI->getMatchingSuperReg(Reg, 320*0b57cec5SDimitry Andric AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 321*0b57cec5SDimitry Andric &R600::R600_Reg128RegClass); 322*0b57cec5SDimitry Andric } 323*0b57cec5SDimitry Andric } 324*0b57cec5SDimitry Andric if ((DstRegs.find(SrcMI) == DstRegs.end())) { 325*0b57cec5SDimitry Andric DstRegs.insert(DstMI); 326*0b57cec5SDimitry Andric return true; 327*0b57cec5SDimitry Andric } else 328*0b57cec5SDimitry Andric return false; 329*0b57cec5SDimitry Andric } 330*0b57cec5SDimitry Andric 331*0b57cec5SDimitry Andric ClauseFile 332*0b57cec5SDimitry Andric MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 333*0b57cec5SDimitry Andric const { 334*0b57cec5SDimitry Andric MachineBasicBlock::iterator ClauseHead = I; 335*0b57cec5SDimitry Andric std::vector<MachineInstr *> ClauseContent; 336*0b57cec5SDimitry Andric unsigned AluInstCount = 0; 337*0b57cec5SDimitry Andric bool IsTex = TII->usesTextureCache(*ClauseHead); 338*0b57cec5SDimitry Andric std::set<unsigned> DstRegs; 339*0b57cec5SDimitry Andric for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 340*0b57cec5SDimitry Andric if (IsTrivialInst(*I)) 341*0b57cec5SDimitry Andric continue; 342*0b57cec5SDimitry Andric if (AluInstCount >= MaxFetchInst) 343*0b57cec5SDimitry Andric break; 344*0b57cec5SDimitry Andric if ((IsTex && !TII->usesTextureCache(*I)) || 345*0b57cec5SDimitry Andric (!IsTex && !TII->usesVertexCache(*I))) 346*0b57cec5SDimitry Andric break; 347*0b57cec5SDimitry Andric if (!isCompatibleWithClause(*I, DstRegs)) 348*0b57cec5SDimitry Andric break; 349*0b57cec5SDimitry Andric AluInstCount ++; 350*0b57cec5SDimitry Andric ClauseContent.push_back(&*I); 351*0b57cec5SDimitry Andric } 352*0b57cec5SDimitry Andric MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), 353*0b57cec5SDimitry Andric getHWInstrDesc(IsTex?CF_TC:CF_VC)) 354*0b57cec5SDimitry Andric .addImm(0) // ADDR 355*0b57cec5SDimitry Andric .addImm(AluInstCount - 1); // COUNT 356*0b57cec5SDimitry Andric return ClauseFile(MIb, std::move(ClauseContent)); 357*0b57cec5SDimitry Andric } 358*0b57cec5SDimitry Andric 359*0b57cec5SDimitry Andric void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const { 360*0b57cec5SDimitry Andric static const unsigned LiteralRegs[] = { 361*0b57cec5SDimitry Andric R600::ALU_LITERAL_X, 362*0b57cec5SDimitry Andric R600::ALU_LITERAL_Y, 363*0b57cec5SDimitry Andric R600::ALU_LITERAL_Z, 364*0b57cec5SDimitry Andric R600::ALU_LITERAL_W 365*0b57cec5SDimitry Andric }; 366*0b57cec5SDimitry Andric const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = 367*0b57cec5SDimitry Andric TII->getSrcs(MI); 368*0b57cec5SDimitry Andric for (const auto &Src:Srcs) { 369*0b57cec5SDimitry Andric if (Src.first->getReg() != R600::ALU_LITERAL_X) 370*0b57cec5SDimitry Andric continue; 371*0b57cec5SDimitry Andric int64_t Imm = Src.second; 372*0b57cec5SDimitry Andric std::vector<MachineOperand *>::iterator It = 373*0b57cec5SDimitry Andric llvm::find_if(Lits, [&](MachineOperand *val) { 374*0b57cec5SDimitry Andric return val->isImm() && (val->getImm() == Imm); 375*0b57cec5SDimitry Andric }); 376*0b57cec5SDimitry Andric 377*0b57cec5SDimitry Andric // Get corresponding Operand 378*0b57cec5SDimitry Andric MachineOperand &Operand = MI.getOperand( 379*0b57cec5SDimitry Andric TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal)); 380*0b57cec5SDimitry Andric 381*0b57cec5SDimitry Andric if (It != Lits.end()) { 382*0b57cec5SDimitry Andric // Reuse existing literal reg 383*0b57cec5SDimitry Andric unsigned Index = It - Lits.begin(); 384*0b57cec5SDimitry Andric Src.first->setReg(LiteralRegs[Index]); 385*0b57cec5SDimitry Andric } else { 386*0b57cec5SDimitry Andric // Allocate new literal reg 387*0b57cec5SDimitry Andric assert(Lits.size() < 4 && "Too many literals in Instruction Group"); 388*0b57cec5SDimitry Andric Src.first->setReg(LiteralRegs[Lits.size()]); 389*0b57cec5SDimitry Andric Lits.push_back(&Operand); 390*0b57cec5SDimitry Andric } 391*0b57cec5SDimitry Andric } 392*0b57cec5SDimitry Andric } 393*0b57cec5SDimitry Andric 394*0b57cec5SDimitry Andric MachineBasicBlock::iterator insertLiterals( 395*0b57cec5SDimitry Andric MachineBasicBlock::iterator InsertPos, 396*0b57cec5SDimitry Andric const std::vector<unsigned> &Literals) const { 397*0b57cec5SDimitry Andric MachineBasicBlock *MBB = InsertPos->getParent(); 398*0b57cec5SDimitry Andric for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { 399*0b57cec5SDimitry Andric unsigned LiteralPair0 = Literals[i]; 400*0b57cec5SDimitry Andric unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; 401*0b57cec5SDimitry Andric InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), 402*0b57cec5SDimitry Andric TII->get(R600::LITERALS)) 403*0b57cec5SDimitry Andric .addImm(LiteralPair0) 404*0b57cec5SDimitry Andric .addImm(LiteralPair1); 405*0b57cec5SDimitry Andric } 406*0b57cec5SDimitry Andric return InsertPos; 407*0b57cec5SDimitry Andric } 408*0b57cec5SDimitry Andric 409*0b57cec5SDimitry Andric ClauseFile 410*0b57cec5SDimitry Andric MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 411*0b57cec5SDimitry Andric const { 412*0b57cec5SDimitry Andric MachineInstr &ClauseHead = *I; 413*0b57cec5SDimitry Andric std::vector<MachineInstr *> ClauseContent; 414*0b57cec5SDimitry Andric I++; 415*0b57cec5SDimitry Andric for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { 416*0b57cec5SDimitry Andric if (IsTrivialInst(*I)) { 417*0b57cec5SDimitry Andric ++I; 418*0b57cec5SDimitry Andric continue; 419*0b57cec5SDimitry Andric } 420*0b57cec5SDimitry Andric if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) 421*0b57cec5SDimitry Andric break; 422*0b57cec5SDimitry Andric std::vector<MachineOperand *>Literals; 423*0b57cec5SDimitry Andric if (I->isBundle()) { 424*0b57cec5SDimitry Andric MachineInstr &DeleteMI = *I; 425*0b57cec5SDimitry Andric MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 426*0b57cec5SDimitry Andric while (++BI != E && BI->isBundledWithPred()) { 427*0b57cec5SDimitry Andric BI->unbundleFromPred(); 428*0b57cec5SDimitry Andric for (MachineOperand &MO : BI->operands()) { 429*0b57cec5SDimitry Andric if (MO.isReg() && MO.isInternalRead()) 430*0b57cec5SDimitry Andric MO.setIsInternalRead(false); 431*0b57cec5SDimitry Andric } 432*0b57cec5SDimitry Andric getLiteral(*BI, Literals); 433*0b57cec5SDimitry Andric ClauseContent.push_back(&*BI); 434*0b57cec5SDimitry Andric } 435*0b57cec5SDimitry Andric I = BI; 436*0b57cec5SDimitry Andric DeleteMI.eraseFromParent(); 437*0b57cec5SDimitry Andric } else { 438*0b57cec5SDimitry Andric getLiteral(*I, Literals); 439*0b57cec5SDimitry Andric ClauseContent.push_back(&*I); 440*0b57cec5SDimitry Andric I++; 441*0b57cec5SDimitry Andric } 442*0b57cec5SDimitry Andric for (unsigned i = 0, e = Literals.size(); i < e; i += 2) { 443*0b57cec5SDimitry Andric MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), 444*0b57cec5SDimitry Andric TII->get(R600::LITERALS)); 445*0b57cec5SDimitry Andric if (Literals[i]->isImm()) { 446*0b57cec5SDimitry Andric MILit.addImm(Literals[i]->getImm()); 447*0b57cec5SDimitry Andric } else { 448*0b57cec5SDimitry Andric MILit.addGlobalAddress(Literals[i]->getGlobal(), 449*0b57cec5SDimitry Andric Literals[i]->getOffset()); 450*0b57cec5SDimitry Andric } 451*0b57cec5SDimitry Andric if (i + 1 < e) { 452*0b57cec5SDimitry Andric if (Literals[i + 1]->isImm()) { 453*0b57cec5SDimitry Andric MILit.addImm(Literals[i + 1]->getImm()); 454*0b57cec5SDimitry Andric } else { 455*0b57cec5SDimitry Andric MILit.addGlobalAddress(Literals[i + 1]->getGlobal(), 456*0b57cec5SDimitry Andric Literals[i + 1]->getOffset()); 457*0b57cec5SDimitry Andric } 458*0b57cec5SDimitry Andric } else 459*0b57cec5SDimitry Andric MILit.addImm(0); 460*0b57cec5SDimitry Andric ClauseContent.push_back(MILit); 461*0b57cec5SDimitry Andric } 462*0b57cec5SDimitry Andric } 463*0b57cec5SDimitry Andric assert(ClauseContent.size() < 128 && "ALU clause is too big"); 464*0b57cec5SDimitry Andric ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1); 465*0b57cec5SDimitry Andric return ClauseFile(&ClauseHead, std::move(ClauseContent)); 466*0b57cec5SDimitry Andric } 467*0b57cec5SDimitry Andric 468*0b57cec5SDimitry Andric void EmitFetchClause(MachineBasicBlock::iterator InsertPos, 469*0b57cec5SDimitry Andric const DebugLoc &DL, ClauseFile &Clause, 470*0b57cec5SDimitry Andric unsigned &CfCount) { 471*0b57cec5SDimitry Andric CounterPropagateAddr(*Clause.first, CfCount); 472*0b57cec5SDimitry Andric MachineBasicBlock *BB = Clause.first->getParent(); 473*0b57cec5SDimitry Andric BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount); 474*0b57cec5SDimitry Andric for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 475*0b57cec5SDimitry Andric BB->splice(InsertPos, BB, Clause.second[i]); 476*0b57cec5SDimitry Andric } 477*0b57cec5SDimitry Andric CfCount += 2 * Clause.second.size(); 478*0b57cec5SDimitry Andric } 479*0b57cec5SDimitry Andric 480*0b57cec5SDimitry Andric void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL, 481*0b57cec5SDimitry Andric ClauseFile &Clause, unsigned &CfCount) { 482*0b57cec5SDimitry Andric Clause.first->getOperand(0).setImm(0); 483*0b57cec5SDimitry Andric CounterPropagateAddr(*Clause.first, CfCount); 484*0b57cec5SDimitry Andric MachineBasicBlock *BB = Clause.first->getParent(); 485*0b57cec5SDimitry Andric BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount); 486*0b57cec5SDimitry Andric for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 487*0b57cec5SDimitry Andric BB->splice(InsertPos, BB, Clause.second[i]); 488*0b57cec5SDimitry Andric } 489*0b57cec5SDimitry Andric CfCount += Clause.second.size(); 490*0b57cec5SDimitry Andric } 491*0b57cec5SDimitry Andric 492*0b57cec5SDimitry Andric void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const { 493*0b57cec5SDimitry Andric MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm()); 494*0b57cec5SDimitry Andric } 495*0b57cec5SDimitry Andric void CounterPropagateAddr(const std::set<MachineInstr *> &MIs, 496*0b57cec5SDimitry Andric unsigned Addr) const { 497*0b57cec5SDimitry Andric for (MachineInstr *MI : MIs) { 498*0b57cec5SDimitry Andric CounterPropagateAddr(*MI, Addr); 499*0b57cec5SDimitry Andric } 500*0b57cec5SDimitry Andric } 501*0b57cec5SDimitry Andric 502*0b57cec5SDimitry Andric public: 503*0b57cec5SDimitry Andric static char ID; 504*0b57cec5SDimitry Andric 505*0b57cec5SDimitry Andric R600ControlFlowFinalizer() : MachineFunctionPass(ID) {} 506*0b57cec5SDimitry Andric 507*0b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override { 508*0b57cec5SDimitry Andric ST = &MF.getSubtarget<R600Subtarget>(); 509*0b57cec5SDimitry Andric MaxFetchInst = ST->getTexVTXClauseSize(); 510*0b57cec5SDimitry Andric TII = ST->getInstrInfo(); 511*0b57cec5SDimitry Andric TRI = ST->getRegisterInfo(); 512*0b57cec5SDimitry Andric 513*0b57cec5SDimitry Andric R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 514*0b57cec5SDimitry Andric 515*0b57cec5SDimitry Andric CFStack CFStack(ST, MF.getFunction().getCallingConv()); 516*0b57cec5SDimitry Andric for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; 517*0b57cec5SDimitry Andric ++MB) { 518*0b57cec5SDimitry Andric MachineBasicBlock &MBB = *MB; 519*0b57cec5SDimitry Andric unsigned CfCount = 0; 520*0b57cec5SDimitry Andric std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack; 521*0b57cec5SDimitry Andric std::vector<MachineInstr * > IfThenElseStack; 522*0b57cec5SDimitry Andric if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { 523*0b57cec5SDimitry Andric BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), 524*0b57cec5SDimitry Andric getHWInstrDesc(CF_CALL_FS)); 525*0b57cec5SDimitry Andric CfCount++; 526*0b57cec5SDimitry Andric } 527*0b57cec5SDimitry Andric std::vector<ClauseFile> FetchClauses, AluClauses; 528*0b57cec5SDimitry Andric std::vector<MachineInstr *> LastAlu(1); 529*0b57cec5SDimitry Andric std::vector<MachineInstr *> ToPopAfter; 530*0b57cec5SDimitry Andric 531*0b57cec5SDimitry Andric for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 532*0b57cec5SDimitry Andric I != E;) { 533*0b57cec5SDimitry Andric if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { 534*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump();); 535*0b57cec5SDimitry Andric FetchClauses.push_back(MakeFetchClause(MBB, I)); 536*0b57cec5SDimitry Andric CfCount++; 537*0b57cec5SDimitry Andric LastAlu.back() = nullptr; 538*0b57cec5SDimitry Andric continue; 539*0b57cec5SDimitry Andric } 540*0b57cec5SDimitry Andric 541*0b57cec5SDimitry Andric MachineBasicBlock::iterator MI = I; 542*0b57cec5SDimitry Andric if (MI->getOpcode() != R600::ENDIF) 543*0b57cec5SDimitry Andric LastAlu.back() = nullptr; 544*0b57cec5SDimitry Andric if (MI->getOpcode() == R600::CF_ALU) 545*0b57cec5SDimitry Andric LastAlu.back() = &*MI; 546*0b57cec5SDimitry Andric I++; 547*0b57cec5SDimitry Andric bool RequiresWorkAround = 548*0b57cec5SDimitry Andric CFStack.requiresWorkAroundForInst(MI->getOpcode()); 549*0b57cec5SDimitry Andric switch (MI->getOpcode()) { 550*0b57cec5SDimitry Andric case R600::CF_ALU_PUSH_BEFORE: 551*0b57cec5SDimitry Andric if (RequiresWorkAround) { 552*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 553*0b57cec5SDimitry Andric << "Applying bug work-around for ALU_PUSH_BEFORE\n"); 554*0b57cec5SDimitry Andric BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) 555*0b57cec5SDimitry Andric .addImm(CfCount + 1) 556*0b57cec5SDimitry Andric .addImm(1); 557*0b57cec5SDimitry Andric MI->setDesc(TII->get(R600::CF_ALU)); 558*0b57cec5SDimitry Andric CfCount++; 559*0b57cec5SDimitry Andric CFStack.pushBranch(R600::CF_PUSH_EG); 560*0b57cec5SDimitry Andric } else 561*0b57cec5SDimitry Andric CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); 562*0b57cec5SDimitry Andric LLVM_FALLTHROUGH; 563*0b57cec5SDimitry Andric case R600::CF_ALU: 564*0b57cec5SDimitry Andric I = MI; 565*0b57cec5SDimitry Andric AluClauses.push_back(MakeALUClause(MBB, I)); 566*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 567*0b57cec5SDimitry Andric CfCount++; 568*0b57cec5SDimitry Andric break; 569*0b57cec5SDimitry Andric case R600::WHILELOOP: { 570*0b57cec5SDimitry Andric CFStack.pushLoop(); 571*0b57cec5SDimitry Andric MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 572*0b57cec5SDimitry Andric getHWInstrDesc(CF_WHILE_LOOP)) 573*0b57cec5SDimitry Andric .addImm(1); 574*0b57cec5SDimitry Andric std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount, 575*0b57cec5SDimitry Andric std::set<MachineInstr *>()); 576*0b57cec5SDimitry Andric Pair.second.insert(MIb); 577*0b57cec5SDimitry Andric LoopStack.push_back(std::move(Pair)); 578*0b57cec5SDimitry Andric MI->eraseFromParent(); 579*0b57cec5SDimitry Andric CfCount++; 580*0b57cec5SDimitry Andric break; 581*0b57cec5SDimitry Andric } 582*0b57cec5SDimitry Andric case R600::ENDLOOP: { 583*0b57cec5SDimitry Andric CFStack.popLoop(); 584*0b57cec5SDimitry Andric std::pair<unsigned, std::set<MachineInstr *>> Pair = 585*0b57cec5SDimitry Andric std::move(LoopStack.back()); 586*0b57cec5SDimitry Andric LoopStack.pop_back(); 587*0b57cec5SDimitry Andric CounterPropagateAddr(Pair.second, CfCount); 588*0b57cec5SDimitry Andric BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) 589*0b57cec5SDimitry Andric .addImm(Pair.first + 1); 590*0b57cec5SDimitry Andric MI->eraseFromParent(); 591*0b57cec5SDimitry Andric CfCount++; 592*0b57cec5SDimitry Andric break; 593*0b57cec5SDimitry Andric } 594*0b57cec5SDimitry Andric case R600::IF_PREDICATE_SET: { 595*0b57cec5SDimitry Andric LastAlu.push_back(nullptr); 596*0b57cec5SDimitry Andric MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 597*0b57cec5SDimitry Andric getHWInstrDesc(CF_JUMP)) 598*0b57cec5SDimitry Andric .addImm(0) 599*0b57cec5SDimitry Andric .addImm(0); 600*0b57cec5SDimitry Andric IfThenElseStack.push_back(MIb); 601*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 602*0b57cec5SDimitry Andric MI->eraseFromParent(); 603*0b57cec5SDimitry Andric CfCount++; 604*0b57cec5SDimitry Andric break; 605*0b57cec5SDimitry Andric } 606*0b57cec5SDimitry Andric case R600::ELSE: { 607*0b57cec5SDimitry Andric MachineInstr * JumpInst = IfThenElseStack.back(); 608*0b57cec5SDimitry Andric IfThenElseStack.pop_back(); 609*0b57cec5SDimitry Andric CounterPropagateAddr(*JumpInst, CfCount); 610*0b57cec5SDimitry Andric MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 611*0b57cec5SDimitry Andric getHWInstrDesc(CF_ELSE)) 612*0b57cec5SDimitry Andric .addImm(0) 613*0b57cec5SDimitry Andric .addImm(0); 614*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 615*0b57cec5SDimitry Andric IfThenElseStack.push_back(MIb); 616*0b57cec5SDimitry Andric MI->eraseFromParent(); 617*0b57cec5SDimitry Andric CfCount++; 618*0b57cec5SDimitry Andric break; 619*0b57cec5SDimitry Andric } 620*0b57cec5SDimitry Andric case R600::ENDIF: { 621*0b57cec5SDimitry Andric CFStack.popBranch(); 622*0b57cec5SDimitry Andric if (LastAlu.back()) { 623*0b57cec5SDimitry Andric ToPopAfter.push_back(LastAlu.back()); 624*0b57cec5SDimitry Andric } else { 625*0b57cec5SDimitry Andric MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 626*0b57cec5SDimitry Andric getHWInstrDesc(CF_POP)) 627*0b57cec5SDimitry Andric .addImm(CfCount + 1) 628*0b57cec5SDimitry Andric .addImm(1); 629*0b57cec5SDimitry Andric (void)MIb; 630*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 631*0b57cec5SDimitry Andric CfCount++; 632*0b57cec5SDimitry Andric } 633*0b57cec5SDimitry Andric 634*0b57cec5SDimitry Andric MachineInstr *IfOrElseInst = IfThenElseStack.back(); 635*0b57cec5SDimitry Andric IfThenElseStack.pop_back(); 636*0b57cec5SDimitry Andric CounterPropagateAddr(*IfOrElseInst, CfCount); 637*0b57cec5SDimitry Andric IfOrElseInst->getOperand(1).setImm(1); 638*0b57cec5SDimitry Andric LastAlu.pop_back(); 639*0b57cec5SDimitry Andric MI->eraseFromParent(); 640*0b57cec5SDimitry Andric break; 641*0b57cec5SDimitry Andric } 642*0b57cec5SDimitry Andric case R600::BREAK: { 643*0b57cec5SDimitry Andric CfCount ++; 644*0b57cec5SDimitry Andric MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 645*0b57cec5SDimitry Andric getHWInstrDesc(CF_LOOP_BREAK)) 646*0b57cec5SDimitry Andric .addImm(0); 647*0b57cec5SDimitry Andric LoopStack.back().second.insert(MIb); 648*0b57cec5SDimitry Andric MI->eraseFromParent(); 649*0b57cec5SDimitry Andric break; 650*0b57cec5SDimitry Andric } 651*0b57cec5SDimitry Andric case R600::CONTINUE: { 652*0b57cec5SDimitry Andric MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 653*0b57cec5SDimitry Andric getHWInstrDesc(CF_LOOP_CONTINUE)) 654*0b57cec5SDimitry Andric .addImm(0); 655*0b57cec5SDimitry Andric LoopStack.back().second.insert(MIb); 656*0b57cec5SDimitry Andric MI->eraseFromParent(); 657*0b57cec5SDimitry Andric CfCount++; 658*0b57cec5SDimitry Andric break; 659*0b57cec5SDimitry Andric } 660*0b57cec5SDimitry Andric case R600::RETURN: { 661*0b57cec5SDimitry Andric DebugLoc DL = MBB.findDebugLoc(MI); 662*0b57cec5SDimitry Andric BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END)); 663*0b57cec5SDimitry Andric CfCount++; 664*0b57cec5SDimitry Andric if (CfCount % 2) { 665*0b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(R600::PAD)); 666*0b57cec5SDimitry Andric CfCount++; 667*0b57cec5SDimitry Andric } 668*0b57cec5SDimitry Andric MI->eraseFromParent(); 669*0b57cec5SDimitry Andric for (unsigned i = 0, e = FetchClauses.size(); i < e; i++) 670*0b57cec5SDimitry Andric EmitFetchClause(I, DL, FetchClauses[i], CfCount); 671*0b57cec5SDimitry Andric for (unsigned i = 0, e = AluClauses.size(); i < e; i++) 672*0b57cec5SDimitry Andric EmitALUClause(I, DL, AluClauses[i], CfCount); 673*0b57cec5SDimitry Andric break; 674*0b57cec5SDimitry Andric } 675*0b57cec5SDimitry Andric default: 676*0b57cec5SDimitry Andric if (TII->isExport(MI->getOpcode())) { 677*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 678*0b57cec5SDimitry Andric CfCount++; 679*0b57cec5SDimitry Andric } 680*0b57cec5SDimitry Andric break; 681*0b57cec5SDimitry Andric } 682*0b57cec5SDimitry Andric } 683*0b57cec5SDimitry Andric for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) { 684*0b57cec5SDimitry Andric MachineInstr *Alu = ToPopAfter[i]; 685*0b57cec5SDimitry Andric BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), 686*0b57cec5SDimitry Andric TII->get(R600::CF_ALU_POP_AFTER)) 687*0b57cec5SDimitry Andric .addImm(Alu->getOperand(0).getImm()) 688*0b57cec5SDimitry Andric .addImm(Alu->getOperand(1).getImm()) 689*0b57cec5SDimitry Andric .addImm(Alu->getOperand(2).getImm()) 690*0b57cec5SDimitry Andric .addImm(Alu->getOperand(3).getImm()) 691*0b57cec5SDimitry Andric .addImm(Alu->getOperand(4).getImm()) 692*0b57cec5SDimitry Andric .addImm(Alu->getOperand(5).getImm()) 693*0b57cec5SDimitry Andric .addImm(Alu->getOperand(6).getImm()) 694*0b57cec5SDimitry Andric .addImm(Alu->getOperand(7).getImm()) 695*0b57cec5SDimitry Andric .addImm(Alu->getOperand(8).getImm()); 696*0b57cec5SDimitry Andric Alu->eraseFromParent(); 697*0b57cec5SDimitry Andric } 698*0b57cec5SDimitry Andric MFI->CFStackSize = CFStack.MaxStackSize; 699*0b57cec5SDimitry Andric } 700*0b57cec5SDimitry Andric 701*0b57cec5SDimitry Andric return false; 702*0b57cec5SDimitry Andric } 703*0b57cec5SDimitry Andric 704*0b57cec5SDimitry Andric StringRef getPassName() const override { 705*0b57cec5SDimitry Andric return "R600 Control Flow Finalizer Pass"; 706*0b57cec5SDimitry Andric } 707*0b57cec5SDimitry Andric }; 708*0b57cec5SDimitry Andric 709*0b57cec5SDimitry Andric } // end anonymous namespace 710*0b57cec5SDimitry Andric 711*0b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE, 712*0b57cec5SDimitry Andric "R600 Control Flow Finalizer", false, false) 713*0b57cec5SDimitry Andric INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE, 714*0b57cec5SDimitry Andric "R600 Control Flow Finalizer", false, false) 715*0b57cec5SDimitry Andric 716*0b57cec5SDimitry Andric char R600ControlFlowFinalizer::ID = 0; 717*0b57cec5SDimitry Andric 718*0b57cec5SDimitry Andric char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID; 719*0b57cec5SDimitry Andric 720*0b57cec5SDimitry Andric FunctionPass *llvm::createR600ControlFlowFinalizer() { 721*0b57cec5SDimitry Andric return new R600ControlFlowFinalizer(); 722*0b57cec5SDimitry Andric } 723