1 //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold 11 /// 128 Alu instructions ; these instructions can access up to 4 prefetched 12 /// 4 lines of 16 registers from constant buffers. Such ALU clauses are 13 /// initiated by CF_ALU instructions. 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 18 #include "R600Defines.h" 19 #include "R600Subtarget.h" 20 21 using namespace llvm; 22 23 namespace llvm { 24 25 void initializeR600EmitClauseMarkersPass(PassRegistry&); 26 27 } // end namespace llvm 28 29 namespace { 30 31 class R600EmitClauseMarkers : public MachineFunctionPass { 32 private: 33 const R600InstrInfo *TII = nullptr; 34 int Address = 0; 35 36 unsigned OccupiedDwords(MachineInstr &MI) const { 37 switch (MI.getOpcode()) { 38 case R600::INTERP_PAIR_XY: 39 case R600::INTERP_PAIR_ZW: 40 case R600::INTERP_VEC_LOAD: 41 case R600::DOT_4: 42 return 4; 43 case R600::KILL: 44 return 0; 45 default: 46 break; 47 } 48 49 // These will be expanded to two ALU instructions in the 50 // ExpandSpecialInstructions pass. 51 if (TII->isLDSRetInstr(MI.getOpcode())) 52 return 2; 53 54 if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) || 55 TII->isReductionOp(MI.getOpcode())) 56 return 4; 57 58 unsigned NumLiteral = 0; 59 for (MachineInstr::mop_iterator It = MI.operands_begin(), 60 E = MI.operands_end(); 61 It != E; ++It) { 62 MachineOperand &MO = *It; 63 if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X) 64 ++NumLiteral; 65 } 66 return 1 + NumLiteral; 67 } 68 69 bool isALU(const MachineInstr &MI) const { 70 if (TII->isALUInstr(MI.getOpcode())) 71 return true; 72 if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode())) 73 return true; 74 switch (MI.getOpcode()) { 75 case R600::PRED_X: 76 case R600::INTERP_PAIR_XY: 77 case R600::INTERP_PAIR_ZW: 78 case R600::INTERP_VEC_LOAD: 79 case R600::COPY: 80 case R600::DOT_4: 81 return true; 82 default: 83 return false; 84 } 85 } 86 87 bool IsTrivialInst(MachineInstr &MI) const { 88 switch (MI.getOpcode()) { 89 case R600::KILL: 90 case R600::RETURN: 91 case R600::IMPLICIT_DEF: 92 return true; 93 default: 94 return false; 95 } 96 } 97 98 std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const { 99 // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2 100 // (See also R600ISelLowering.cpp) 101 // ConstIndex value is in [0, 4095]; 102 return std::pair<unsigned, unsigned>( 103 ((Sel >> 2) - 512) >> 12, // KC_BANK 104 // Line Number of ConstIndex 105 // A line contains 16 constant registers however KCX bank can lock 106 // two line at the same time ; thus we want to get an even line number. 107 // Line number can be retrieved with (>>4), using (>>5) <<1 generates 108 // an even number. 109 ((((Sel >> 2) - 512) & 4095) >> 5) << 1); 110 } 111 112 bool 113 SubstituteKCacheBank(MachineInstr &MI, 114 std::vector<std::pair<unsigned, unsigned>> &CachedConsts, 115 bool UpdateInstr = true) const { 116 std::vector<std::pair<unsigned, unsigned>> UsedKCache; 117 118 if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4) 119 return true; 120 121 const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts = 122 TII->getSrcs(MI); 123 assert( 124 (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) && 125 "Can't assign Const"); 126 for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 127 if (Consts[i].first->getReg() != R600::ALU_CONST) 128 continue; 129 unsigned Sel = Consts[i].second; 130 unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31; 131 unsigned KCacheIndex = Index * 4 + Chan; 132 const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel); 133 if (CachedConsts.empty()) { 134 CachedConsts.push_back(BankLine); 135 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); 136 continue; 137 } 138 if (CachedConsts[0] == BankLine) { 139 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); 140 continue; 141 } 142 if (CachedConsts.size() == 1) { 143 CachedConsts.push_back(BankLine); 144 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); 145 continue; 146 } 147 if (CachedConsts[1] == BankLine) { 148 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); 149 continue; 150 } 151 return false; 152 } 153 154 if (!UpdateInstr) 155 return true; 156 157 for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) { 158 if (Consts[i].first->getReg() != R600::ALU_CONST) 159 continue; 160 switch(UsedKCache[j].first) { 161 case 0: 162 Consts[i].first->setReg( 163 R600::R600_KC0RegClass.getRegister(UsedKCache[j].second)); 164 break; 165 case 1: 166 Consts[i].first->setReg( 167 R600::R600_KC1RegClass.getRegister(UsedKCache[j].second)); 168 break; 169 default: 170 llvm_unreachable("Wrong Cache Line"); 171 } 172 j++; 173 } 174 return true; 175 } 176 177 bool canClauseLocalKillFitInClause( 178 unsigned AluInstCount, 179 std::vector<std::pair<unsigned, unsigned>> KCacheBanks, 180 MachineBasicBlock::iterator Def, 181 MachineBasicBlock::iterator BBEnd) { 182 const R600RegisterInfo &TRI = TII->getRegisterInfo(); 183 //TODO: change this to defs? 184 for (MachineInstr::const_mop_iterator 185 MOI = Def->operands_begin(), 186 MOE = Def->operands_end(); MOI != MOE; ++MOI) { 187 if (!MOI->isReg() || !MOI->isDef() || 188 TRI.isPhysRegLiveAcrossClauses(MOI->getReg())) 189 continue; 190 191 // Def defines a clause local register, so check that its use will fit 192 // in the clause. 193 unsigned LastUseCount = 0; 194 for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) { 195 AluInstCount += OccupiedDwords(*UseI); 196 // Make sure we won't need to end the clause due to KCache limitations. 197 if (!SubstituteKCacheBank(*UseI, KCacheBanks, false)) 198 return false; 199 200 // We have reached the maximum instruction limit before finding the 201 // use that kills this register, so we cannot use this def in the 202 // current clause. 203 if (AluInstCount >= TII->getMaxAlusPerClause()) 204 return false; 205 206 // TODO: Is this true? kill flag appears to work OK below 207 // Register kill flags have been cleared by the time we get to this 208 // pass, but it is safe to assume that all uses of this register 209 // occur in the same basic block as its definition, because 210 // it is illegal for the scheduler to schedule them in 211 // different blocks. 212 if (UseI->readsRegister(MOI->getReg(), &TRI)) 213 LastUseCount = AluInstCount; 214 215 // Exit early if the current use kills the register 216 if (UseI != Def && UseI->killsRegister(MOI->getReg(), &TRI)) 217 break; 218 } 219 if (LastUseCount) 220 return LastUseCount <= TII->getMaxAlusPerClause(); 221 llvm_unreachable("Clause local register live at end of clause."); 222 } 223 return true; 224 } 225 226 MachineBasicBlock::iterator 227 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { 228 MachineBasicBlock::iterator ClauseHead = I; 229 std::vector<std::pair<unsigned, unsigned>> KCacheBanks; 230 bool PushBeforeModifier = false; 231 unsigned AluInstCount = 0; 232 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 233 if (IsTrivialInst(*I)) 234 continue; 235 if (!isALU(*I)) 236 break; 237 if (AluInstCount > TII->getMaxAlusPerClause()) 238 break; 239 if (I->getOpcode() == R600::PRED_X) { 240 // We put PRED_X in its own clause to ensure that ifcvt won't create 241 // clauses with more than 128 insts. 242 // IfCvt is indeed checking that "then" and "else" branches of an if 243 // statement have less than ~60 insts thus converted clauses can't be 244 // bigger than ~121 insts (predicate setter needs to be in the same 245 // clause as predicated alus). 246 if (AluInstCount > 0) 247 break; 248 if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH) 249 PushBeforeModifier = true; 250 AluInstCount ++; 251 continue; 252 } 253 // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as: 254 // 255 // * KILL or INTERP instructions 256 // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits 257 // * Uses waterfalling (i.e. INDEX_MODE = AR.X) 258 // 259 // XXX: These checks have not been implemented yet. 260 if (TII->mustBeLastInClause(I->getOpcode())) { 261 I++; 262 break; 263 } 264 265 // If this instruction defines a clause local register, make sure 266 // its use can fit in this clause. 267 if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E)) 268 break; 269 270 if (!SubstituteKCacheBank(*I, KCacheBanks)) 271 break; 272 AluInstCount += OccupiedDwords(*I); 273 } 274 unsigned Opcode = PushBeforeModifier ? 275 R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU; 276 BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode)) 277 // We don't use the ADDR field until R600ControlFlowFinalizer pass, where 278 // it is safe to assume it is 0. However if we always put 0 here, the ifcvt 279 // pass may assume that identical ALU clause starter at the beginning of a 280 // true and false branch can be factorized which is not the case. 281 .addImm(Address++) // ADDR 282 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0 283 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1 284 .addImm(KCacheBanks.empty()?0:2) // KM0 285 .addImm((KCacheBanks.size() < 2)?0:2) // KM1 286 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0 287 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1 288 .addImm(AluInstCount) // COUNT 289 .addImm(1); // Enabled 290 return I; 291 } 292 293 public: 294 static char ID; 295 296 R600EmitClauseMarkers() : MachineFunctionPass(ID) { 297 initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry()); 298 } 299 300 bool runOnMachineFunction(MachineFunction &MF) override { 301 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); 302 TII = ST.getInstrInfo(); 303 304 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 305 BB != BB_E; ++BB) { 306 MachineBasicBlock &MBB = *BB; 307 MachineBasicBlock::iterator I = MBB.begin(); 308 if (I != MBB.end() && I->getOpcode() == R600::CF_ALU) 309 continue; // BB was already parsed 310 for (MachineBasicBlock::iterator E = MBB.end(); I != E;) { 311 if (isALU(*I)) { 312 auto next = MakeALUClause(MBB, I); 313 assert(next != I); 314 I = next; 315 } else 316 ++I; 317 } 318 } 319 return false; 320 } 321 322 StringRef getPassName() const override { 323 return "R600 Emit Clause Markers Pass"; 324 } 325 }; 326 327 char R600EmitClauseMarkers::ID = 0; 328 329 } // end anonymous namespace 330 331 INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers", 332 "R600 Emit Clause Markters", false, false) 333 INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers", 334 "R600 Emit Clause Markters", false, false) 335 336 FunctionPass *llvm::createR600EmitClauseMarkers() { 337 return new R600EmitClauseMarkers(); 338 } 339