1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass implements instructions packetization for R600. It unsets isLast 11 /// bit of instructions inside a bundle and substitutes src register with 12 /// PreviousVector when applicable. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "R600InstrInfo.h" 19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 20 #include "llvm/CodeGen/DFAPacketizer.h" 21 #include "llvm/CodeGen/MachineDominators.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/MachineLoopInfo.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/CodeGen/ScheduleDAG.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/raw_ostream.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "packets" 32 33 namespace { 34 35 class R600Packetizer : public MachineFunctionPass { 36 37 public: 38 static char ID; 39 R600Packetizer() : MachineFunctionPass(ID) {} 40 41 void getAnalysisUsage(AnalysisUsage &AU) const override { 42 AU.setPreservesCFG(); 43 AU.addRequired<MachineDominatorTree>(); 44 AU.addPreserved<MachineDominatorTree>(); 45 AU.addRequired<MachineLoopInfo>(); 46 AU.addPreserved<MachineLoopInfo>(); 47 MachineFunctionPass::getAnalysisUsage(AU); 48 } 49 50 StringRef getPassName() const override { return "R600 Packetizer"; } 51 52 bool runOnMachineFunction(MachineFunction &Fn) override; 53 }; 54 55 class R600PacketizerList : public VLIWPacketizerList { 56 private: 57 const R600InstrInfo *TII; 58 const R600RegisterInfo &TRI; 59 bool VLIW5; 60 bool ConsideredInstUsesAlreadyWrittenVectorElement; 61 62 unsigned getSlot(const MachineInstr &MI) const { 63 return TRI.getHWRegChan(MI.getOperand(0).getReg()); 64 } 65 66 /// \returns register to PV chan mapping for bundle/single instructions that 67 /// immediately precedes I. 68 DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I) 69 const { 70 DenseMap<unsigned, unsigned> Result; 71 I--; 72 if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle()) 73 return Result; 74 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 75 if (I->isBundle()) 76 BI++; 77 int LastDstChan = -1; 78 do { 79 bool isTrans = false; 80 int BISlot = getSlot(*BI); 81 if (LastDstChan >= BISlot) 82 isTrans = true; 83 LastDstChan = BISlot; 84 if (TII->isPredicated(*BI)) 85 continue; 86 int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write); 87 if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) 88 continue; 89 int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst); 90 if (DstIdx == -1) { 91 continue; 92 } 93 Register Dst = BI->getOperand(DstIdx).getReg(); 94 if (isTrans || TII->isTransOnly(*BI)) { 95 Result[Dst] = R600::PS; 96 continue; 97 } 98 if (BI->getOpcode() == R600::DOT4_r600 || 99 BI->getOpcode() == R600::DOT4_eg) { 100 Result[Dst] = R600::PV_X; 101 continue; 102 } 103 if (Dst == R600::OQAP) { 104 continue; 105 } 106 unsigned PVReg = 0; 107 switch (TRI.getHWRegChan(Dst)) { 108 case 0: 109 PVReg = R600::PV_X; 110 break; 111 case 1: 112 PVReg = R600::PV_Y; 113 break; 114 case 2: 115 PVReg = R600::PV_Z; 116 break; 117 case 3: 118 PVReg = R600::PV_W; 119 break; 120 default: 121 llvm_unreachable("Invalid Chan"); 122 } 123 Result[Dst] = PVReg; 124 } while ((++BI)->isBundledWithPred()); 125 return Result; 126 } 127 128 void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs) 129 const { 130 unsigned Ops[] = { 131 R600::OpName::src0, 132 R600::OpName::src1, 133 R600::OpName::src2 134 }; 135 for (unsigned i = 0; i < 3; i++) { 136 int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]); 137 if (OperandIdx < 0) 138 continue; 139 Register Src = MI.getOperand(OperandIdx).getReg(); 140 const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src); 141 if (It != PVs.end()) 142 MI.getOperand(OperandIdx).setReg(It->second); 143 } 144 } 145 public: 146 // Ctor. 147 R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST, 148 MachineLoopInfo &MLI) 149 : VLIWPacketizerList(MF, MLI, nullptr), 150 TII(ST.getInstrInfo()), 151 TRI(TII->getRegisterInfo()) { 152 VLIW5 = !ST.hasCaymanISA(); 153 } 154 155 // initPacketizerState - initialize some internal flags. 156 void initPacketizerState() override { 157 ConsideredInstUsesAlreadyWrittenVectorElement = false; 158 } 159 160 // ignorePseudoInstruction - Ignore bundling of pseudo instructions. 161 bool ignorePseudoInstruction(const MachineInstr &MI, 162 const MachineBasicBlock *MBB) override { 163 return false; 164 } 165 166 // isSoloInstruction - return true if instruction MI can not be packetized 167 // with any other instruction, which means that MI itself is a packet. 168 bool isSoloInstruction(const MachineInstr &MI) override { 169 if (TII->isVector(MI)) 170 return true; 171 if (!TII->isALUInstr(MI.getOpcode())) 172 return true; 173 if (MI.getOpcode() == R600::GROUP_BARRIER) 174 return true; 175 // XXX: This can be removed once the packetizer properly handles all the 176 // LDS instruction group restrictions. 177 return TII->isLDSInstr(MI.getOpcode()); 178 } 179 180 // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ 181 // together. 182 bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override { 183 MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); 184 if (getSlot(*MII) == getSlot(*MIJ)) 185 ConsideredInstUsesAlreadyWrittenVectorElement = true; 186 // Does MII and MIJ share the same pred_sel ? 187 int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel), 188 OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel); 189 Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : Register(), 190 PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : Register(); 191 if (PredI != PredJ) 192 return false; 193 if (SUJ->isSucc(SUI)) { 194 for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) { 195 const SDep &Dep = SUJ->Succs[i]; 196 if (Dep.getSUnit() != SUI) 197 continue; 198 if (Dep.getKind() == SDep::Anti) 199 continue; 200 if (Dep.getKind() == SDep::Output) 201 if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg()) 202 continue; 203 return false; 204 } 205 } 206 207 bool ARDef = 208 TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ); 209 bool ARUse = 210 TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ); 211 212 return !ARDef || !ARUse; 213 } 214 215 // isLegalToPruneDependencies - Is it legal to prune dependece between SUI 216 // and SUJ. 217 bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override { 218 return false; 219 } 220 221 void setIsLastBit(MachineInstr *MI, unsigned Bit) const { 222 unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last); 223 MI->getOperand(LastOp).setImm(Bit); 224 } 225 226 bool isBundlableWithCurrentPMI(MachineInstr &MI, 227 const DenseMap<unsigned, unsigned> &PV, 228 std::vector<R600InstrInfo::BankSwizzle> &BS, 229 bool &isTransSlot) { 230 isTransSlot = TII->isTransOnly(MI); 231 assert (!isTransSlot || VLIW5); 232 233 // Is the dst reg sequence legal ? 234 if (!isTransSlot && !CurrentPacketMIs.empty()) { 235 if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) { 236 if (ConsideredInstUsesAlreadyWrittenVectorElement && 237 !TII->isVectorOnly(MI) && VLIW5) { 238 isTransSlot = true; 239 LLVM_DEBUG({ 240 dbgs() << "Considering as Trans Inst :"; 241 MI.dump(); 242 }); 243 } 244 else 245 return false; 246 } 247 } 248 249 // Are the Constants limitations met ? 250 CurrentPacketMIs.push_back(&MI); 251 if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) { 252 LLVM_DEBUG({ 253 dbgs() << "Couldn't pack :\n"; 254 MI.dump(); 255 dbgs() << "with the following packets :\n"; 256 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 257 CurrentPacketMIs[i]->dump(); 258 dbgs() << "\n"; 259 } 260 dbgs() << "because of Consts read limitations\n"; 261 }); 262 CurrentPacketMIs.pop_back(); 263 return false; 264 } 265 266 // Is there a BankSwizzle set that meet Read Port limitations ? 267 if (!TII->fitsReadPortLimitations(CurrentPacketMIs, 268 PV, BS, isTransSlot)) { 269 LLVM_DEBUG({ 270 dbgs() << "Couldn't pack :\n"; 271 MI.dump(); 272 dbgs() << "with the following packets :\n"; 273 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 274 CurrentPacketMIs[i]->dump(); 275 dbgs() << "\n"; 276 } 277 dbgs() << "because of Read port limitations\n"; 278 }); 279 CurrentPacketMIs.pop_back(); 280 return false; 281 } 282 283 // We cannot read LDS source registers from the Trans slot. 284 if (isTransSlot && TII->readsLDSSrcReg(MI)) 285 return false; 286 287 CurrentPacketMIs.pop_back(); 288 return true; 289 } 290 291 MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override { 292 MachineBasicBlock::iterator FirstInBundle = 293 CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front(); 294 const DenseMap<unsigned, unsigned> &PV = 295 getPreviousVector(FirstInBundle); 296 std::vector<R600InstrInfo::BankSwizzle> BS; 297 bool isTransSlot; 298 299 if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) { 300 for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { 301 MachineInstr *MI = CurrentPacketMIs[i]; 302 unsigned Op = TII->getOperandIdx(MI->getOpcode(), 303 R600::OpName::bank_swizzle); 304 MI->getOperand(Op).setImm(BS[i]); 305 } 306 unsigned Op = 307 TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle); 308 MI.getOperand(Op).setImm(BS.back()); 309 if (!CurrentPacketMIs.empty()) 310 setIsLastBit(CurrentPacketMIs.back(), 0); 311 substitutePV(MI, PV); 312 MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI); 313 if (isTransSlot) { 314 endPacket(std::next(It)->getParent(), std::next(It)); 315 } 316 return It; 317 } 318 endPacket(MI.getParent(), MI); 319 if (TII->isTransOnly(MI)) 320 return MI; 321 return VLIWPacketizerList::addToPacket(MI); 322 } 323 }; 324 325 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { 326 const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); 327 const R600InstrInfo *TII = ST.getInstrInfo(); 328 329 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 330 331 // Instantiate the packetizer. 332 R600PacketizerList Packetizer(Fn, ST, MLI); 333 334 // DFA state table should not be empty. 335 assert(Packetizer.getResourceTracker() && "Empty DFA table!"); 336 assert(Packetizer.getResourceTracker()->getInstrItins()); 337 338 if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty()) 339 return false; 340 341 // 342 // Loop over all basic blocks and remove KILL pseudo-instructions 343 // These instructions confuse the dependence analysis. Consider: 344 // D0 = ... (Insn 0) 345 // R0 = KILL R0, D0 (Insn 1) 346 // R0 = ... (Insn 2) 347 // Here, Insn 1 will result in the dependence graph not emitting an output 348 // dependence between Insn 0 and Insn 2. This can lead to incorrect 349 // packetization 350 // 351 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 352 MBB != MBBe; ++MBB) { 353 MachineBasicBlock::iterator End = MBB->end(); 354 MachineBasicBlock::iterator MI = MBB->begin(); 355 while (MI != End) { 356 if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF || 357 (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) { 358 MachineBasicBlock::iterator DeleteMI = MI; 359 ++MI; 360 MBB->erase(DeleteMI); 361 End = MBB->end(); 362 continue; 363 } 364 ++MI; 365 } 366 } 367 368 // Loop over all of the basic blocks. 369 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 370 MBB != MBBe; ++MBB) { 371 // Find scheduling regions and schedule / packetize each region. 372 unsigned RemainingCount = MBB->size(); 373 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 374 RegionEnd != MBB->begin();) { 375 // The next region starts above the previous region. Look backward in the 376 // instruction stream until we find the nearest boundary. 377 MachineBasicBlock::iterator I = RegionEnd; 378 for(;I != MBB->begin(); --I, --RemainingCount) { 379 if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn)) 380 break; 381 } 382 I = MBB->begin(); 383 384 // Skip empty scheduling regions. 385 if (I == RegionEnd) { 386 RegionEnd = std::prev(RegionEnd); 387 --RemainingCount; 388 continue; 389 } 390 // Skip regions with one instruction. 391 if (I == std::prev(RegionEnd)) { 392 RegionEnd = std::prev(RegionEnd); 393 continue; 394 } 395 396 Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd); 397 RegionEnd = I; 398 } 399 } 400 401 return true; 402 403 } 404 405 } // end anonymous namespace 406 407 INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE, 408 "R600 Packetizer", false, false) 409 INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE, 410 "R600 Packetizer", false, false) 411 412 char R600Packetizer::ID = 0; 413 414 char &llvm::R600PacketizerID = R600Packetizer::ID; 415 416 llvm::FunctionPass *llvm::createR600Packetizer() { 417 return new R600Packetizer(); 418 } 419