1 //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold 11 /// 128 Alu instructions ; these instructions can access up to 4 prefetched 12 /// 4 lines of 16 registers from constant buffers. Such ALU clauses are 13 /// initiated by CF_ALU instructions. 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "R600Defines.h" 19 #include "R600InstrInfo.h" 20 #include "R600RegisterInfo.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/CodeGen/MachineBasicBlock.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineOperand.h" 30 #include "llvm/Pass.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include <cassert> 33 #include <cstdint> 34 #include <utility> 35 #include <vector> 36 37 using namespace llvm; 38 39 namespace llvm { 40 41 void initializeR600EmitClauseMarkersPass(PassRegistry&); 42 43 } // end namespace llvm 44 45 namespace { 46 47 class R600EmitClauseMarkers : public MachineFunctionPass { 48 private: 49 const R600InstrInfo *TII = nullptr; 50 int Address = 0; 51 52 unsigned OccupiedDwords(MachineInstr &MI) const { 53 switch (MI.getOpcode()) { 54 case R600::INTERP_PAIR_XY: 55 case R600::INTERP_PAIR_ZW: 56 case R600::INTERP_VEC_LOAD: 57 case R600::DOT_4: 58 return 4; 59 case R600::KILL: 60 return 0; 61 default: 62 break; 63 } 64 65 // These will be expanded to two ALU instructions in the 66 // ExpandSpecialInstructions pass. 67 if (TII->isLDSRetInstr(MI.getOpcode())) 68 return 2; 69 70 if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) || 71 TII->isReductionOp(MI.getOpcode())) 72 return 4; 73 74 unsigned NumLiteral = 0; 75 for (MachineInstr::mop_iterator It = MI.operands_begin(), 76 E = MI.operands_end(); 77 It != E; ++It) { 78 MachineOperand &MO = *It; 79 if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X) 80 ++NumLiteral; 81 } 82 return 1 + NumLiteral; 83 } 84 85 bool isALU(const MachineInstr &MI) const { 86 if (TII->isALUInstr(MI.getOpcode())) 87 return true; 88 if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode())) 89 return true; 90 switch (MI.getOpcode()) { 91 case R600::PRED_X: 92 case R600::INTERP_PAIR_XY: 93 case R600::INTERP_PAIR_ZW: 94 case R600::INTERP_VEC_LOAD: 95 case R600::COPY: 96 case R600::DOT_4: 97 return true; 98 default: 99 return false; 100 } 101 } 102 103 bool IsTrivialInst(MachineInstr &MI) const { 104 switch (MI.getOpcode()) { 105 case R600::KILL: 106 case R600::RETURN: 107 case R600::IMPLICIT_DEF: 108 return true; 109 default: 110 return false; 111 } 112 } 113 114 std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const { 115 // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2 116 // (See also R600ISelLowering.cpp) 117 // ConstIndex value is in [0, 4095]; 118 return std::pair<unsigned, unsigned>( 119 ((Sel >> 2) - 512) >> 12, // KC_BANK 120 // Line Number of ConstIndex 121 // A line contains 16 constant registers however KCX bank can lock 122 // two line at the same time ; thus we want to get an even line number. 123 // Line number can be retrieved with (>>4), using (>>5) <<1 generates 124 // an even number. 125 ((((Sel >> 2) - 512) & 4095) >> 5) << 1); 126 } 127 128 bool 129 SubstituteKCacheBank(MachineInstr &MI, 130 std::vector<std::pair<unsigned, unsigned>> &CachedConsts, 131 bool UpdateInstr = true) const { 132 std::vector<std::pair<unsigned, unsigned>> UsedKCache; 133 134 if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4) 135 return true; 136 137 const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts = 138 TII->getSrcs(MI); 139 assert( 140 (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) && 141 "Can't assign Const"); 142 for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 143 if (Consts[i].first->getReg() != R600::ALU_CONST) 144 continue; 145 unsigned Sel = Consts[i].second; 146 unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31; 147 unsigned KCacheIndex = Index * 4 + Chan; 148 const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel); 149 if (CachedConsts.empty()) { 150 CachedConsts.push_back(BankLine); 151 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); 152 continue; 153 } 154 if (CachedConsts[0] == BankLine) { 155 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); 156 continue; 157 } 158 if (CachedConsts.size() == 1) { 159 CachedConsts.push_back(BankLine); 160 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); 161 continue; 162 } 163 if (CachedConsts[1] == BankLine) { 164 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); 165 continue; 166 } 167 return false; 168 } 169 170 if (!UpdateInstr) 171 return true; 172 173 for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) { 174 if (Consts[i].first->getReg() != R600::ALU_CONST) 175 continue; 176 switch(UsedKCache[j].first) { 177 case 0: 178 Consts[i].first->setReg( 179 R600::R600_KC0RegClass.getRegister(UsedKCache[j].second)); 180 break; 181 case 1: 182 Consts[i].first->setReg( 183 R600::R600_KC1RegClass.getRegister(UsedKCache[j].second)); 184 break; 185 default: 186 llvm_unreachable("Wrong Cache Line"); 187 } 188 j++; 189 } 190 return true; 191 } 192 193 bool canClauseLocalKillFitInClause( 194 unsigned AluInstCount, 195 std::vector<std::pair<unsigned, unsigned>> KCacheBanks, 196 MachineBasicBlock::iterator Def, 197 MachineBasicBlock::iterator BBEnd) { 198 const R600RegisterInfo &TRI = TII->getRegisterInfo(); 199 //TODO: change this to defs? 200 for (MachineInstr::const_mop_iterator 201 MOI = Def->operands_begin(), 202 MOE = Def->operands_end(); MOI != MOE; ++MOI) { 203 if (!MOI->isReg() || !MOI->isDef() || 204 TRI.isPhysRegLiveAcrossClauses(MOI->getReg())) 205 continue; 206 207 // Def defines a clause local register, so check that its use will fit 208 // in the clause. 209 unsigned LastUseCount = 0; 210 for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) { 211 AluInstCount += OccupiedDwords(*UseI); 212 // Make sure we won't need to end the clause due to KCache limitations. 213 if (!SubstituteKCacheBank(*UseI, KCacheBanks, false)) 214 return false; 215 216 // We have reached the maximum instruction limit before finding the 217 // use that kills this register, so we cannot use this def in the 218 // current clause. 219 if (AluInstCount >= TII->getMaxAlusPerClause()) 220 return false; 221 222 // TODO: Is this true? kill flag appears to work OK below 223 // Register kill flags have been cleared by the time we get to this 224 // pass, but it is safe to assume that all uses of this register 225 // occur in the same basic block as its definition, because 226 // it is illegal for the scheduler to schedule them in 227 // different blocks. 228 if (UseI->readsRegister(MOI->getReg(), &TRI)) 229 LastUseCount = AluInstCount; 230 231 // Exit early if the current use kills the register 232 if (UseI != Def && UseI->killsRegister(MOI->getReg(), &TRI)) 233 break; 234 } 235 if (LastUseCount) 236 return LastUseCount <= TII->getMaxAlusPerClause(); 237 llvm_unreachable("Clause local register live at end of clause."); 238 } 239 return true; 240 } 241 242 MachineBasicBlock::iterator 243 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { 244 MachineBasicBlock::iterator ClauseHead = I; 245 std::vector<std::pair<unsigned, unsigned>> KCacheBanks; 246 bool PushBeforeModifier = false; 247 unsigned AluInstCount = 0; 248 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 249 if (IsTrivialInst(*I)) 250 continue; 251 if (!isALU(*I)) 252 break; 253 if (AluInstCount > TII->getMaxAlusPerClause()) 254 break; 255 if (I->getOpcode() == R600::PRED_X) { 256 // We put PRED_X in its own clause to ensure that ifcvt won't create 257 // clauses with more than 128 insts. 258 // IfCvt is indeed checking that "then" and "else" branches of an if 259 // statement have less than ~60 insts thus converted clauses can't be 260 // bigger than ~121 insts (predicate setter needs to be in the same 261 // clause as predicated alus). 262 if (AluInstCount > 0) 263 break; 264 if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH) 265 PushBeforeModifier = true; 266 AluInstCount ++; 267 continue; 268 } 269 // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as: 270 // 271 // * KILL or INTERP instructions 272 // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits 273 // * Uses waterfalling (i.e. INDEX_MODE = AR.X) 274 // 275 // XXX: These checks have not been implemented yet. 276 if (TII->mustBeLastInClause(I->getOpcode())) { 277 I++; 278 break; 279 } 280 281 // If this instruction defines a clause local register, make sure 282 // its use can fit in this clause. 283 if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E)) 284 break; 285 286 if (!SubstituteKCacheBank(*I, KCacheBanks)) 287 break; 288 AluInstCount += OccupiedDwords(*I); 289 } 290 unsigned Opcode = PushBeforeModifier ? 291 R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU; 292 BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode)) 293 // We don't use the ADDR field until R600ControlFlowFinalizer pass, where 294 // it is safe to assume it is 0. However if we always put 0 here, the ifcvt 295 // pass may assume that identical ALU clause starter at the beginning of a 296 // true and false branch can be factorized which is not the case. 297 .addImm(Address++) // ADDR 298 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0 299 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1 300 .addImm(KCacheBanks.empty()?0:2) // KM0 301 .addImm((KCacheBanks.size() < 2)?0:2) // KM1 302 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0 303 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1 304 .addImm(AluInstCount) // COUNT 305 .addImm(1); // Enabled 306 return I; 307 } 308 309 public: 310 static char ID; 311 312 R600EmitClauseMarkers() : MachineFunctionPass(ID) { 313 initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry()); 314 } 315 316 bool runOnMachineFunction(MachineFunction &MF) override { 317 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); 318 TII = ST.getInstrInfo(); 319 320 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 321 BB != BB_E; ++BB) { 322 MachineBasicBlock &MBB = *BB; 323 MachineBasicBlock::iterator I = MBB.begin(); 324 if (I != MBB.end() && I->getOpcode() == R600::CF_ALU) 325 continue; // BB was already parsed 326 for (MachineBasicBlock::iterator E = MBB.end(); I != E;) { 327 if (isALU(*I)) { 328 auto next = MakeALUClause(MBB, I); 329 assert(next != I); 330 I = next; 331 } else 332 ++I; 333 } 334 } 335 return false; 336 } 337 338 StringRef getPassName() const override { 339 return "R600 Emit Clause Markers Pass"; 340 } 341 }; 342 343 char R600EmitClauseMarkers::ID = 0; 344 345 } // end anonymous namespace 346 347 INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers", 348 "R600 Emit Clause Markters", false, false) 349 INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers", 350 "R600 Emit Clause Markters", false, false) 351 352 FunctionPass *llvm::createR600EmitClauseMarkers() { 353 return new R600EmitClauseMarkers(); 354 } 355