1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass implements instructions packetization for R600. It unsets isLast 11 /// bit of instructions inside a bundle and substitutes src register with 12 /// PreviousVector when applicable. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "MCTargetDesc/R600MCTargetDesc.h" 17 #include "R600.h" 18 #include "R600Subtarget.h" 19 #include "llvm/CodeGen/DFAPacketizer.h" 20 #include "llvm/CodeGen/MachineDominators.h" 21 #include "llvm/CodeGen/MachineLoopInfo.h" 22 #include "llvm/CodeGen/ScheduleDAG.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "packets" 27 28 namespace { 29 30 class R600Packetizer : public MachineFunctionPass { 31 32 public: 33 static char ID; 34 R600Packetizer() : MachineFunctionPass(ID) {} 35 36 void getAnalysisUsage(AnalysisUsage &AU) const override { 37 AU.setPreservesCFG(); 38 AU.addRequired<MachineDominatorTreeWrapperPass>(); 39 AU.addPreserved<MachineDominatorTreeWrapperPass>(); 40 AU.addRequired<MachineLoopInfoWrapperPass>(); 41 AU.addPreserved<MachineLoopInfoWrapperPass>(); 42 MachineFunctionPass::getAnalysisUsage(AU); 43 } 44 45 StringRef getPassName() const override { return "R600 Packetizer"; } 46 47 bool runOnMachineFunction(MachineFunction &Fn) override; 48 }; 49 50 class R600PacketizerList : public VLIWPacketizerList { 51 private: 52 const R600InstrInfo *TII; 53 const R600RegisterInfo &TRI; 54 bool VLIW5; 55 bool ConsideredInstUsesAlreadyWrittenVectorElement; 56 57 unsigned getSlot(const MachineInstr &MI) const { 58 return TRI.getHWRegChan(MI.getOperand(0).getReg()); 59 } 60 61 /// \returns register to PV chan mapping for bundle/single instructions that 62 /// immediately precedes I. 63 DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I) 64 const { 65 DenseMap<unsigned, unsigned> Result; 66 I--; 67 if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle()) 68 return Result; 69 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 70 if (I->isBundle()) 71 BI++; 72 int LastDstChan = -1; 73 do { 74 bool isTrans = false; 75 int BISlot = getSlot(*BI); 76 if (LastDstChan >= BISlot) 77 isTrans = true; 78 LastDstChan = BISlot; 79 if (TII->isPredicated(*BI)) 80 continue; 81 int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write); 82 if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) 83 continue; 84 int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst); 85 if (DstIdx == -1) { 86 continue; 87 } 88 Register Dst = BI->getOperand(DstIdx).getReg(); 89 if (isTrans || TII->isTransOnly(*BI)) { 90 Result[Dst] = R600::PS; 91 continue; 92 } 93 if (BI->getOpcode() == R600::DOT4_r600 || 94 BI->getOpcode() == R600::DOT4_eg) { 95 Result[Dst] = R600::PV_X; 96 continue; 97 } 98 if (Dst == R600::OQAP) { 99 continue; 100 } 101 unsigned PVReg = 0; 102 switch (TRI.getHWRegChan(Dst)) { 103 case 0: 104 PVReg = R600::PV_X; 105 break; 106 case 1: 107 PVReg = R600::PV_Y; 108 break; 109 case 2: 110 PVReg = R600::PV_Z; 111 break; 112 case 3: 113 PVReg = R600::PV_W; 114 break; 115 default: 116 llvm_unreachable("Invalid Chan"); 117 } 118 Result[Dst] = PVReg; 119 } while ((++BI)->isBundledWithPred()); 120 return Result; 121 } 122 123 void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs) 124 const { 125 unsigned Ops[] = { 126 R600::OpName::src0, 127 R600::OpName::src1, 128 R600::OpName::src2 129 }; 130 for (unsigned Op : Ops) { 131 int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Op); 132 if (OperandIdx < 0) 133 continue; 134 Register Src = MI.getOperand(OperandIdx).getReg(); 135 const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src); 136 if (It != PVs.end()) 137 MI.getOperand(OperandIdx).setReg(It->second); 138 } 139 } 140 public: 141 // Ctor. 142 R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST, 143 MachineLoopInfo &MLI) 144 : VLIWPacketizerList(MF, MLI, nullptr), 145 TII(ST.getInstrInfo()), 146 TRI(TII->getRegisterInfo()) { 147 VLIW5 = !ST.hasCaymanISA(); 148 } 149 150 // initPacketizerState - initialize some internal flags. 151 void initPacketizerState() override { 152 ConsideredInstUsesAlreadyWrittenVectorElement = false; 153 } 154 155 // ignorePseudoInstruction - Ignore bundling of pseudo instructions. 156 bool ignorePseudoInstruction(const MachineInstr &MI, 157 const MachineBasicBlock *MBB) override { 158 return false; 159 } 160 161 // isSoloInstruction - return true if instruction MI can not be packetized 162 // with any other instruction, which means that MI itself is a packet. 163 bool isSoloInstruction(const MachineInstr &MI) override { 164 if (TII->isVector(MI)) 165 return true; 166 if (!TII->isALUInstr(MI.getOpcode())) 167 return true; 168 if (MI.getOpcode() == R600::GROUP_BARRIER) 169 return true; 170 // XXX: This can be removed once the packetizer properly handles all the 171 // LDS instruction group restrictions. 172 return TII->isLDSInstr(MI.getOpcode()); 173 } 174 175 // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ 176 // together. 177 bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override { 178 MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); 179 if (getSlot(*MII) == getSlot(*MIJ)) 180 ConsideredInstUsesAlreadyWrittenVectorElement = true; 181 // Does MII and MIJ share the same pred_sel ? 182 int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel), 183 OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel); 184 Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : Register(), 185 PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : Register(); 186 if (PredI != PredJ) 187 return false; 188 if (SUJ->isSucc(SUI)) { 189 for (const SDep &Dep : SUJ->Succs) { 190 if (Dep.getSUnit() != SUI) 191 continue; 192 if (Dep.getKind() == SDep::Anti) 193 continue; 194 if (Dep.getKind() == SDep::Output) 195 if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg()) 196 continue; 197 return false; 198 } 199 } 200 201 bool ARDef = 202 TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ); 203 bool ARUse = 204 TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ); 205 206 return !ARDef || !ARUse; 207 } 208 209 // isLegalToPruneDependencies - Is it legal to prune dependency between SUI 210 // and SUJ. 211 bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override { 212 return false; 213 } 214 215 void setIsLastBit(MachineInstr *MI, unsigned Bit) const { 216 unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last); 217 MI->getOperand(LastOp).setImm(Bit); 218 } 219 220 bool isBundlableWithCurrentPMI(MachineInstr &MI, 221 const DenseMap<unsigned, unsigned> &PV, 222 std::vector<R600InstrInfo::BankSwizzle> &BS, 223 bool &isTransSlot) { 224 isTransSlot = TII->isTransOnly(MI); 225 assert (!isTransSlot || VLIW5); 226 227 // Is the dst reg sequence legal ? 228 if (!isTransSlot && !CurrentPacketMIs.empty()) { 229 if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) { 230 if (ConsideredInstUsesAlreadyWrittenVectorElement && 231 !TII->isVectorOnly(MI) && VLIW5) { 232 isTransSlot = true; 233 LLVM_DEBUG({ 234 dbgs() << "Considering as Trans Inst :"; 235 MI.dump(); 236 }); 237 } 238 else 239 return false; 240 } 241 } 242 243 // Are the Constants limitations met ? 244 CurrentPacketMIs.push_back(&MI); 245 if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) { 246 LLVM_DEBUG({ 247 dbgs() << "Couldn't pack :\n"; 248 MI.dump(); 249 dbgs() << "with the following packets :\n"; 250 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 251 CurrentPacketMIs[i]->dump(); 252 dbgs() << "\n"; 253 } 254 dbgs() << "because of Consts read limitations\n"; 255 }); 256 CurrentPacketMIs.pop_back(); 257 return false; 258 } 259 260 // Is there a BankSwizzle set that meet Read Port limitations ? 261 if (!TII->fitsReadPortLimitations(CurrentPacketMIs, 262 PV, BS, isTransSlot)) { 263 LLVM_DEBUG({ 264 dbgs() << "Couldn't pack :\n"; 265 MI.dump(); 266 dbgs() << "with the following packets :\n"; 267 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 268 CurrentPacketMIs[i]->dump(); 269 dbgs() << "\n"; 270 } 271 dbgs() << "because of Read port limitations\n"; 272 }); 273 CurrentPacketMIs.pop_back(); 274 return false; 275 } 276 277 // We cannot read LDS source registers from the Trans slot. 278 if (isTransSlot && TII->readsLDSSrcReg(MI)) 279 return false; 280 281 CurrentPacketMIs.pop_back(); 282 return true; 283 } 284 285 MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override { 286 MachineBasicBlock::iterator FirstInBundle = 287 CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front(); 288 const DenseMap<unsigned, unsigned> &PV = 289 getPreviousVector(FirstInBundle); 290 std::vector<R600InstrInfo::BankSwizzle> BS; 291 bool isTransSlot; 292 293 if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) { 294 for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { 295 MachineInstr *MI = CurrentPacketMIs[i]; 296 unsigned Op = TII->getOperandIdx(MI->getOpcode(), 297 R600::OpName::bank_swizzle); 298 MI->getOperand(Op).setImm(BS[i]); 299 } 300 unsigned Op = 301 TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle); 302 MI.getOperand(Op).setImm(BS.back()); 303 if (!CurrentPacketMIs.empty()) 304 setIsLastBit(CurrentPacketMIs.back(), 0); 305 substitutePV(MI, PV); 306 MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI); 307 if (isTransSlot) { 308 endPacket(std::next(It)->getParent(), std::next(It)); 309 } 310 return It; 311 } 312 endPacket(MI.getParent(), MI); 313 if (TII->isTransOnly(MI)) 314 return MI; 315 return VLIWPacketizerList::addToPacket(MI); 316 } 317 }; 318 319 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { 320 const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); 321 const R600InstrInfo *TII = ST.getInstrInfo(); 322 323 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 324 325 // Instantiate the packetizer. 326 R600PacketizerList Packetizer(Fn, ST, MLI); 327 328 // DFA state table should not be empty. 329 assert(Packetizer.getResourceTracker() && "Empty DFA table!"); 330 assert(Packetizer.getResourceTracker()->getInstrItins()); 331 332 if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty()) 333 return false; 334 335 // 336 // Loop over all basic blocks and remove KILL pseudo-instructions 337 // These instructions confuse the dependence analysis. Consider: 338 // D0 = ... (Insn 0) 339 // R0 = KILL R0, D0 (Insn 1) 340 // R0 = ... (Insn 2) 341 // Here, Insn 1 will result in the dependence graph not emitting an output 342 // dependence between Insn 0 and Insn 2. This can lead to incorrect 343 // packetization 344 // 345 for (MachineBasicBlock &MBB : Fn) { 346 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { 347 if (MI.isKill() || MI.getOpcode() == R600::IMPLICIT_DEF || 348 (MI.getOpcode() == R600::CF_ALU && !MI.getOperand(8).getImm())) 349 MBB.erase(MI); 350 } 351 } 352 353 // Loop over all of the basic blocks. 354 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 355 MBB != MBBe; ++MBB) { 356 // Find scheduling regions and schedule / packetize each region. 357 unsigned RemainingCount = MBB->size(); 358 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 359 RegionEnd != MBB->begin();) { 360 // The next region starts above the previous region. Look backward in the 361 // instruction stream until we find the nearest boundary. 362 MachineBasicBlock::iterator I = RegionEnd; 363 for(;I != MBB->begin(); --I, --RemainingCount) { 364 if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn)) 365 break; 366 } 367 I = MBB->begin(); 368 369 // Skip empty scheduling regions. 370 if (I == RegionEnd) { 371 RegionEnd = std::prev(RegionEnd); 372 --RemainingCount; 373 continue; 374 } 375 // Skip regions with one instruction. 376 if (I == std::prev(RegionEnd)) { 377 RegionEnd = std::prev(RegionEnd); 378 continue; 379 } 380 381 Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd); 382 RegionEnd = I; 383 } 384 } 385 386 return true; 387 388 } 389 390 } // end anonymous namespace 391 392 INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE, 393 "R600 Packetizer", false, false) 394 INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE, 395 "R600 Packetizer", false, false) 396 397 char R600Packetizer::ID = 0; 398 399 char &llvm::R600PacketizerID = R600Packetizer::ID; 400 401 llvm::FunctionPass *llvm::createR600Packetizer() { 402 return new R600Packetizer(); 403 } 404