1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass implements instructions packetization for R600. It unsets isLast 11 /// bit of instructions inside a bundle and substitutes src register with 12 /// PreviousVector when applicable. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "MCTargetDesc/R600MCTargetDesc.h" 17 #include "R600.h" 18 #include "R600Subtarget.h" 19 #include "llvm/CodeGen/DFAPacketizer.h" 20 #include "llvm/CodeGen/MachineDominators.h" 21 #include "llvm/CodeGen/MachineLoopInfo.h" 22 #include "llvm/CodeGen/ScheduleDAG.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "packets" 27 28 namespace { 29 30 class R600Packetizer : public MachineFunctionPass { 31 32 public: 33 static char ID; 34 R600Packetizer() : MachineFunctionPass(ID) {} 35 36 void getAnalysisUsage(AnalysisUsage &AU) const override { 37 AU.setPreservesCFG(); 38 AU.addRequired<MachineDominatorTreeWrapperPass>(); 39 AU.addPreserved<MachineDominatorTreeWrapperPass>(); 40 AU.addRequired<MachineLoopInfoWrapperPass>(); 41 AU.addPreserved<MachineLoopInfoWrapperPass>(); 42 MachineFunctionPass::getAnalysisUsage(AU); 43 } 44 45 StringRef getPassName() const override { return "R600 Packetizer"; } 46 47 bool runOnMachineFunction(MachineFunction &Fn) override; 48 }; 49 50 class R600PacketizerList : public VLIWPacketizerList { 51 private: 52 const R600InstrInfo *TII; 53 const R600RegisterInfo &TRI; 54 bool VLIW5; 55 bool ConsideredInstUsesAlreadyWrittenVectorElement; 56 57 unsigned getSlot(const MachineInstr &MI) const { 58 return TRI.getHWRegChan(MI.getOperand(0).getReg()); 59 } 60 61 /// \returns register to PV chan mapping for bundle/single instructions that 62 /// immediately precedes I. 63 DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I) 64 const { 65 DenseMap<unsigned, unsigned> Result; 66 I--; 67 if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle()) 68 return Result; 69 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 70 if (I->isBundle()) 71 BI++; 72 int LastDstChan = -1; 73 do { 74 bool isTrans = false; 75 int BISlot = getSlot(*BI); 76 if (LastDstChan >= BISlot) 77 isTrans = true; 78 LastDstChan = BISlot; 79 if (TII->isPredicated(*BI)) 80 continue; 81 int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write); 82 if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) 83 continue; 84 int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst); 85 if (DstIdx == -1) { 86 continue; 87 } 88 Register Dst = BI->getOperand(DstIdx).getReg(); 89 if (isTrans || TII->isTransOnly(*BI)) { 90 Result[Dst] = R600::PS; 91 continue; 92 } 93 if (BI->getOpcode() == R600::DOT4_r600 || 94 BI->getOpcode() == R600::DOT4_eg) { 95 Result[Dst] = R600::PV_X; 96 continue; 97 } 98 if (Dst == R600::OQAP) { 99 continue; 100 } 101 unsigned PVReg = 0; 102 switch (TRI.getHWRegChan(Dst)) { 103 case 0: 104 PVReg = R600::PV_X; 105 break; 106 case 1: 107 PVReg = R600::PV_Y; 108 break; 109 case 2: 110 PVReg = R600::PV_Z; 111 break; 112 case 3: 113 PVReg = R600::PV_W; 114 break; 115 default: 116 llvm_unreachable("Invalid Chan"); 117 } 118 Result[Dst] = PVReg; 119 } while ((++BI)->isBundledWithPred()); 120 return Result; 121 } 122 123 void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs) 124 const { 125 const R600::OpName Ops[] = {R600::OpName::src0, R600::OpName::src1, 126 R600::OpName::src2}; 127 for (R600::OpName Op : Ops) { 128 int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Op); 129 if (OperandIdx < 0) 130 continue; 131 Register Src = MI.getOperand(OperandIdx).getReg(); 132 const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src); 133 if (It != PVs.end()) 134 MI.getOperand(OperandIdx).setReg(It->second); 135 } 136 } 137 public: 138 // Ctor. 139 R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST, 140 MachineLoopInfo &MLI) 141 : VLIWPacketizerList(MF, MLI, nullptr), 142 TII(ST.getInstrInfo()), 143 TRI(TII->getRegisterInfo()) { 144 VLIW5 = !ST.hasCaymanISA(); 145 } 146 147 // initPacketizerState - initialize some internal flags. 148 void initPacketizerState() override { 149 ConsideredInstUsesAlreadyWrittenVectorElement = false; 150 } 151 152 // ignorePseudoInstruction - Ignore bundling of pseudo instructions. 153 bool ignorePseudoInstruction(const MachineInstr &MI, 154 const MachineBasicBlock *MBB) override { 155 return false; 156 } 157 158 // isSoloInstruction - return true if instruction MI can not be packetized 159 // with any other instruction, which means that MI itself is a packet. 160 bool isSoloInstruction(const MachineInstr &MI) override { 161 if (TII->isVector(MI)) 162 return true; 163 if (!TII->isALUInstr(MI.getOpcode())) 164 return true; 165 if (MI.getOpcode() == R600::GROUP_BARRIER) 166 return true; 167 // XXX: This can be removed once the packetizer properly handles all the 168 // LDS instruction group restrictions. 169 return TII->isLDSInstr(MI.getOpcode()); 170 } 171 172 // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ 173 // together. 174 bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override { 175 MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); 176 if (getSlot(*MII) == getSlot(*MIJ)) 177 ConsideredInstUsesAlreadyWrittenVectorElement = true; 178 // Does MII and MIJ share the same pred_sel ? 179 int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel), 180 OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel); 181 Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : Register(), 182 PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : Register(); 183 if (PredI != PredJ) 184 return false; 185 if (SUJ->isSucc(SUI)) { 186 for (const SDep &Dep : SUJ->Succs) { 187 if (Dep.getSUnit() != SUI) 188 continue; 189 if (Dep.getKind() == SDep::Anti) 190 continue; 191 if (Dep.getKind() == SDep::Output) 192 if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg()) 193 continue; 194 return false; 195 } 196 } 197 198 bool ARDef = 199 TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ); 200 bool ARUse = 201 TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ); 202 203 return !ARDef || !ARUse; 204 } 205 206 // isLegalToPruneDependencies - Is it legal to prune dependency between SUI 207 // and SUJ. 208 bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override { 209 return false; 210 } 211 212 void setIsLastBit(MachineInstr *MI, unsigned Bit) const { 213 unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last); 214 MI->getOperand(LastOp).setImm(Bit); 215 } 216 217 bool isBundlableWithCurrentPMI(MachineInstr &MI, 218 const DenseMap<unsigned, unsigned> &PV, 219 std::vector<R600InstrInfo::BankSwizzle> &BS, 220 bool &isTransSlot) { 221 isTransSlot = TII->isTransOnly(MI); 222 assert (!isTransSlot || VLIW5); 223 224 // Is the dst reg sequence legal ? 225 if (!isTransSlot && !CurrentPacketMIs.empty()) { 226 if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) { 227 if (ConsideredInstUsesAlreadyWrittenVectorElement && 228 !TII->isVectorOnly(MI) && VLIW5) { 229 isTransSlot = true; 230 LLVM_DEBUG({ 231 dbgs() << "Considering as Trans Inst :"; 232 MI.dump(); 233 }); 234 } 235 else 236 return false; 237 } 238 } 239 240 // Are the Constants limitations met ? 241 CurrentPacketMIs.push_back(&MI); 242 if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) { 243 LLVM_DEBUG({ 244 dbgs() << "Couldn't pack :\n"; 245 MI.dump(); 246 dbgs() << "with the following packets :\n"; 247 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 248 CurrentPacketMIs[i]->dump(); 249 dbgs() << "\n"; 250 } 251 dbgs() << "because of Consts read limitations\n"; 252 }); 253 CurrentPacketMIs.pop_back(); 254 return false; 255 } 256 257 // Is there a BankSwizzle set that meet Read Port limitations ? 258 if (!TII->fitsReadPortLimitations(CurrentPacketMIs, 259 PV, BS, isTransSlot)) { 260 LLVM_DEBUG({ 261 dbgs() << "Couldn't pack :\n"; 262 MI.dump(); 263 dbgs() << "with the following packets :\n"; 264 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 265 CurrentPacketMIs[i]->dump(); 266 dbgs() << "\n"; 267 } 268 dbgs() << "because of Read port limitations\n"; 269 }); 270 CurrentPacketMIs.pop_back(); 271 return false; 272 } 273 274 // We cannot read LDS source registers from the Trans slot. 275 if (isTransSlot && TII->readsLDSSrcReg(MI)) 276 return false; 277 278 CurrentPacketMIs.pop_back(); 279 return true; 280 } 281 282 MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override { 283 MachineBasicBlock::iterator FirstInBundle = 284 CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front(); 285 const DenseMap<unsigned, unsigned> &PV = 286 getPreviousVector(FirstInBundle); 287 std::vector<R600InstrInfo::BankSwizzle> BS; 288 bool isTransSlot; 289 290 if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) { 291 for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { 292 MachineInstr *MI = CurrentPacketMIs[i]; 293 unsigned Op = TII->getOperandIdx(MI->getOpcode(), 294 R600::OpName::bank_swizzle); 295 MI->getOperand(Op).setImm(BS[i]); 296 } 297 unsigned Op = 298 TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle); 299 MI.getOperand(Op).setImm(BS.back()); 300 if (!CurrentPacketMIs.empty()) 301 setIsLastBit(CurrentPacketMIs.back(), 0); 302 substitutePV(MI, PV); 303 MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI); 304 if (isTransSlot) { 305 endPacket(std::next(It)->getParent(), std::next(It)); 306 } 307 return It; 308 } 309 endPacket(MI.getParent(), MI); 310 if (TII->isTransOnly(MI)) 311 return MI; 312 return VLIWPacketizerList::addToPacket(MI); 313 } 314 }; 315 316 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { 317 const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); 318 const R600InstrInfo *TII = ST.getInstrInfo(); 319 320 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 321 322 // Instantiate the packetizer. 323 R600PacketizerList Packetizer(Fn, ST, MLI); 324 325 // DFA state table should not be empty. 326 assert(Packetizer.getResourceTracker() && "Empty DFA table!"); 327 assert(Packetizer.getResourceTracker()->getInstrItins()); 328 329 if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty()) 330 return false; 331 332 // 333 // Loop over all basic blocks and remove KILL pseudo-instructions 334 // These instructions confuse the dependence analysis. Consider: 335 // D0 = ... (Insn 0) 336 // R0 = KILL R0, D0 (Insn 1) 337 // R0 = ... (Insn 2) 338 // Here, Insn 1 will result in the dependence graph not emitting an output 339 // dependence between Insn 0 and Insn 2. This can lead to incorrect 340 // packetization 341 // 342 for (MachineBasicBlock &MBB : Fn) { 343 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { 344 if (MI.isKill() || MI.getOpcode() == R600::IMPLICIT_DEF || 345 (MI.getOpcode() == R600::CF_ALU && !MI.getOperand(8).getImm())) 346 MBB.erase(MI); 347 } 348 } 349 350 // Loop over all of the basic blocks. 351 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 352 MBB != MBBe; ++MBB) { 353 // Find scheduling regions and schedule / packetize each region. 354 unsigned RemainingCount = MBB->size(); 355 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 356 RegionEnd != MBB->begin();) { 357 // The next region starts above the previous region. Look backward in the 358 // instruction stream until we find the nearest boundary. 359 MachineBasicBlock::iterator I = RegionEnd; 360 for(;I != MBB->begin(); --I, --RemainingCount) { 361 if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn)) 362 break; 363 } 364 I = MBB->begin(); 365 366 // Skip empty scheduling regions. 367 if (I == RegionEnd) { 368 RegionEnd = std::prev(RegionEnd); 369 --RemainingCount; 370 continue; 371 } 372 // Skip regions with one instruction. 373 if (I == std::prev(RegionEnd)) { 374 RegionEnd = std::prev(RegionEnd); 375 continue; 376 } 377 378 Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd); 379 RegionEnd = I; 380 } 381 } 382 383 return true; 384 385 } 386 387 } // end anonymous namespace 388 389 INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE, 390 "R600 Packetizer", false, false) 391 INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE, 392 "R600 Packetizer", false, false) 393 394 char R600Packetizer::ID = 0; 395 396 char &llvm::R600PacketizerID = R600Packetizer::ID; 397 398 llvm::FunctionPass *llvm::createR600Packetizer() { 399 return new R600Packetizer(); 400 } 401