1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass implements instructions packetization for R600. It unsets isLast 11 /// bit of instructions inside a bundle and substitutes src register with 12 /// PreviousVector when applicable. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 18 #include "R600Subtarget.h" 19 #include "llvm/CodeGen/DFAPacketizer.h" 20 #include "llvm/CodeGen/MachineDominators.h" 21 #include "llvm/CodeGen/MachineLoopInfo.h" 22 #include "llvm/CodeGen/ScheduleDAG.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "packets" 27 28 namespace { 29 30 class R600Packetizer : public MachineFunctionPass { 31 32 public: 33 static char ID; 34 R600Packetizer() : MachineFunctionPass(ID) {} 35 36 void getAnalysisUsage(AnalysisUsage &AU) const override { 37 AU.setPreservesCFG(); 38 AU.addRequired<MachineDominatorTree>(); 39 AU.addPreserved<MachineDominatorTree>(); 40 AU.addRequired<MachineLoopInfo>(); 41 AU.addPreserved<MachineLoopInfo>(); 42 MachineFunctionPass::getAnalysisUsage(AU); 43 } 44 45 StringRef getPassName() const override { return "R600 Packetizer"; } 46 47 bool runOnMachineFunction(MachineFunction &Fn) override; 48 }; 49 50 class R600PacketizerList : public VLIWPacketizerList { 51 private: 52 const R600InstrInfo *TII; 53 const R600RegisterInfo &TRI; 54 bool VLIW5; 55 bool ConsideredInstUsesAlreadyWrittenVectorElement; 56 57 unsigned getSlot(const MachineInstr &MI) const { 58 return TRI.getHWRegChan(MI.getOperand(0).getReg()); 59 } 60 61 /// \returns register to PV chan mapping for bundle/single instructions that 62 /// immediately precedes I. 63 DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I) 64 const { 65 DenseMap<unsigned, unsigned> Result; 66 I--; 67 if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle()) 68 return Result; 69 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 70 if (I->isBundle()) 71 BI++; 72 int LastDstChan = -1; 73 do { 74 bool isTrans = false; 75 int BISlot = getSlot(*BI); 76 if (LastDstChan >= BISlot) 77 isTrans = true; 78 LastDstChan = BISlot; 79 if (TII->isPredicated(*BI)) 80 continue; 81 int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write); 82 if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) 83 continue; 84 int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst); 85 if (DstIdx == -1) { 86 continue; 87 } 88 Register Dst = BI->getOperand(DstIdx).getReg(); 89 if (isTrans || TII->isTransOnly(*BI)) { 90 Result[Dst] = R600::PS; 91 continue; 92 } 93 if (BI->getOpcode() == R600::DOT4_r600 || 94 BI->getOpcode() == R600::DOT4_eg) { 95 Result[Dst] = R600::PV_X; 96 continue; 97 } 98 if (Dst == R600::OQAP) { 99 continue; 100 } 101 unsigned PVReg = 0; 102 switch (TRI.getHWRegChan(Dst)) { 103 case 0: 104 PVReg = R600::PV_X; 105 break; 106 case 1: 107 PVReg = R600::PV_Y; 108 break; 109 case 2: 110 PVReg = R600::PV_Z; 111 break; 112 case 3: 113 PVReg = R600::PV_W; 114 break; 115 default: 116 llvm_unreachable("Invalid Chan"); 117 } 118 Result[Dst] = PVReg; 119 } while ((++BI)->isBundledWithPred()); 120 return Result; 121 } 122 123 void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs) 124 const { 125 unsigned Ops[] = { 126 R600::OpName::src0, 127 R600::OpName::src1, 128 R600::OpName::src2 129 }; 130 for (unsigned i = 0; i < 3; i++) { 131 int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]); 132 if (OperandIdx < 0) 133 continue; 134 Register Src = MI.getOperand(OperandIdx).getReg(); 135 const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src); 136 if (It != PVs.end()) 137 MI.getOperand(OperandIdx).setReg(It->second); 138 } 139 } 140 public: 141 // Ctor. 142 R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST, 143 MachineLoopInfo &MLI) 144 : VLIWPacketizerList(MF, MLI, nullptr), 145 TII(ST.getInstrInfo()), 146 TRI(TII->getRegisterInfo()) { 147 VLIW5 = !ST.hasCaymanISA(); 148 } 149 150 // initPacketizerState - initialize some internal flags. 151 void initPacketizerState() override { 152 ConsideredInstUsesAlreadyWrittenVectorElement = false; 153 } 154 155 // ignorePseudoInstruction - Ignore bundling of pseudo instructions. 156 bool ignorePseudoInstruction(const MachineInstr &MI, 157 const MachineBasicBlock *MBB) override { 158 return false; 159 } 160 161 // isSoloInstruction - return true if instruction MI can not be packetized 162 // with any other instruction, which means that MI itself is a packet. 163 bool isSoloInstruction(const MachineInstr &MI) override { 164 if (TII->isVector(MI)) 165 return true; 166 if (!TII->isALUInstr(MI.getOpcode())) 167 return true; 168 if (MI.getOpcode() == R600::GROUP_BARRIER) 169 return true; 170 // XXX: This can be removed once the packetizer properly handles all the 171 // LDS instruction group restrictions. 172 return TII->isLDSInstr(MI.getOpcode()); 173 } 174 175 // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ 176 // together. 177 bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override { 178 MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); 179 if (getSlot(*MII) == getSlot(*MIJ)) 180 ConsideredInstUsesAlreadyWrittenVectorElement = true; 181 // Does MII and MIJ share the same pred_sel ? 182 int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel), 183 OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel); 184 Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : Register(), 185 PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : Register(); 186 if (PredI != PredJ) 187 return false; 188 if (SUJ->isSucc(SUI)) { 189 for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) { 190 const SDep &Dep = SUJ->Succs[i]; 191 if (Dep.getSUnit() != SUI) 192 continue; 193 if (Dep.getKind() == SDep::Anti) 194 continue; 195 if (Dep.getKind() == SDep::Output) 196 if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg()) 197 continue; 198 return false; 199 } 200 } 201 202 bool ARDef = 203 TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ); 204 bool ARUse = 205 TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ); 206 207 return !ARDef || !ARUse; 208 } 209 210 // isLegalToPruneDependencies - Is it legal to prune dependece between SUI 211 // and SUJ. 212 bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override { 213 return false; 214 } 215 216 void setIsLastBit(MachineInstr *MI, unsigned Bit) const { 217 unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last); 218 MI->getOperand(LastOp).setImm(Bit); 219 } 220 221 bool isBundlableWithCurrentPMI(MachineInstr &MI, 222 const DenseMap<unsigned, unsigned> &PV, 223 std::vector<R600InstrInfo::BankSwizzle> &BS, 224 bool &isTransSlot) { 225 isTransSlot = TII->isTransOnly(MI); 226 assert (!isTransSlot || VLIW5); 227 228 // Is the dst reg sequence legal ? 229 if (!isTransSlot && !CurrentPacketMIs.empty()) { 230 if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) { 231 if (ConsideredInstUsesAlreadyWrittenVectorElement && 232 !TII->isVectorOnly(MI) && VLIW5) { 233 isTransSlot = true; 234 LLVM_DEBUG({ 235 dbgs() << "Considering as Trans Inst :"; 236 MI.dump(); 237 }); 238 } 239 else 240 return false; 241 } 242 } 243 244 // Are the Constants limitations met ? 245 CurrentPacketMIs.push_back(&MI); 246 if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) { 247 LLVM_DEBUG({ 248 dbgs() << "Couldn't pack :\n"; 249 MI.dump(); 250 dbgs() << "with the following packets :\n"; 251 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 252 CurrentPacketMIs[i]->dump(); 253 dbgs() << "\n"; 254 } 255 dbgs() << "because of Consts read limitations\n"; 256 }); 257 CurrentPacketMIs.pop_back(); 258 return false; 259 } 260 261 // Is there a BankSwizzle set that meet Read Port limitations ? 262 if (!TII->fitsReadPortLimitations(CurrentPacketMIs, 263 PV, BS, isTransSlot)) { 264 LLVM_DEBUG({ 265 dbgs() << "Couldn't pack :\n"; 266 MI.dump(); 267 dbgs() << "with the following packets :\n"; 268 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 269 CurrentPacketMIs[i]->dump(); 270 dbgs() << "\n"; 271 } 272 dbgs() << "because of Read port limitations\n"; 273 }); 274 CurrentPacketMIs.pop_back(); 275 return false; 276 } 277 278 // We cannot read LDS source registers from the Trans slot. 279 if (isTransSlot && TII->readsLDSSrcReg(MI)) 280 return false; 281 282 CurrentPacketMIs.pop_back(); 283 return true; 284 } 285 286 MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override { 287 MachineBasicBlock::iterator FirstInBundle = 288 CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front(); 289 const DenseMap<unsigned, unsigned> &PV = 290 getPreviousVector(FirstInBundle); 291 std::vector<R600InstrInfo::BankSwizzle> BS; 292 bool isTransSlot; 293 294 if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) { 295 for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { 296 MachineInstr *MI = CurrentPacketMIs[i]; 297 unsigned Op = TII->getOperandIdx(MI->getOpcode(), 298 R600::OpName::bank_swizzle); 299 MI->getOperand(Op).setImm(BS[i]); 300 } 301 unsigned Op = 302 TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle); 303 MI.getOperand(Op).setImm(BS.back()); 304 if (!CurrentPacketMIs.empty()) 305 setIsLastBit(CurrentPacketMIs.back(), 0); 306 substitutePV(MI, PV); 307 MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI); 308 if (isTransSlot) { 309 endPacket(std::next(It)->getParent(), std::next(It)); 310 } 311 return It; 312 } 313 endPacket(MI.getParent(), MI); 314 if (TII->isTransOnly(MI)) 315 return MI; 316 return VLIWPacketizerList::addToPacket(MI); 317 } 318 }; 319 320 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { 321 const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); 322 const R600InstrInfo *TII = ST.getInstrInfo(); 323 324 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 325 326 // Instantiate the packetizer. 327 R600PacketizerList Packetizer(Fn, ST, MLI); 328 329 // DFA state table should not be empty. 330 assert(Packetizer.getResourceTracker() && "Empty DFA table!"); 331 assert(Packetizer.getResourceTracker()->getInstrItins()); 332 333 if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty()) 334 return false; 335 336 // 337 // Loop over all basic blocks and remove KILL pseudo-instructions 338 // These instructions confuse the dependence analysis. Consider: 339 // D0 = ... (Insn 0) 340 // R0 = KILL R0, D0 (Insn 1) 341 // R0 = ... (Insn 2) 342 // Here, Insn 1 will result in the dependence graph not emitting an output 343 // dependence between Insn 0 and Insn 2. This can lead to incorrect 344 // packetization 345 // 346 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 347 MBB != MBBe; ++MBB) { 348 MachineBasicBlock::iterator End = MBB->end(); 349 MachineBasicBlock::iterator MI = MBB->begin(); 350 while (MI != End) { 351 if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF || 352 (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) { 353 MachineBasicBlock::iterator DeleteMI = MI; 354 ++MI; 355 MBB->erase(DeleteMI); 356 End = MBB->end(); 357 continue; 358 } 359 ++MI; 360 } 361 } 362 363 // Loop over all of the basic blocks. 364 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 365 MBB != MBBe; ++MBB) { 366 // Find scheduling regions and schedule / packetize each region. 367 unsigned RemainingCount = MBB->size(); 368 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 369 RegionEnd != MBB->begin();) { 370 // The next region starts above the previous region. Look backward in the 371 // instruction stream until we find the nearest boundary. 372 MachineBasicBlock::iterator I = RegionEnd; 373 for(;I != MBB->begin(); --I, --RemainingCount) { 374 if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn)) 375 break; 376 } 377 I = MBB->begin(); 378 379 // Skip empty scheduling regions. 380 if (I == RegionEnd) { 381 RegionEnd = std::prev(RegionEnd); 382 --RemainingCount; 383 continue; 384 } 385 // Skip regions with one instruction. 386 if (I == std::prev(RegionEnd)) { 387 RegionEnd = std::prev(RegionEnd); 388 continue; 389 } 390 391 Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd); 392 RegionEnd = I; 393 } 394 } 395 396 return true; 397 398 } 399 400 } // end anonymous namespace 401 402 INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE, 403 "R600 Packetizer", false, false) 404 INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE, 405 "R600 Packetizer", false, false) 406 407 char R600Packetizer::ID = 0; 408 409 char &llvm::R600PacketizerID = R600Packetizer::ID; 410 411 llvm::FunctionPass *llvm::createR600Packetizer() { 412 return new R600Packetizer(); 413 } 414