1 //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold 11 /// 128 Alu instructions ; these instructions can access up to 4 prefetched 12 /// 4 lines of 16 registers from constant buffers. Such ALU clauses are 13 /// initiated by CF_ALU instructions. 14 //===----------------------------------------------------------------------===// 15 16 #include "MCTargetDesc/R600MCTargetDesc.h" 17 #include "R600.h" 18 #include "R600Defines.h" 19 #include "R600Subtarget.h" 20 #include "llvm/CodeGen/MachineFunctionPass.h" 21 22 using namespace llvm; 23 24 namespace llvm { 25 26 void initializeR600EmitClauseMarkersPass(PassRegistry&); 27 28 } // end namespace llvm 29 30 namespace { 31 32 class R600EmitClauseMarkers : public MachineFunctionPass { 33 private: 34 const R600InstrInfo *TII = nullptr; 35 int Address = 0; 36 37 unsigned OccupiedDwords(MachineInstr &MI) const { 38 switch (MI.getOpcode()) { 39 case R600::INTERP_PAIR_XY: 40 case R600::INTERP_PAIR_ZW: 41 case R600::INTERP_VEC_LOAD: 42 case R600::DOT_4: 43 return 4; 44 case R600::KILL: 45 return 0; 46 default: 47 break; 48 } 49 50 // These will be expanded to two ALU instructions in the 51 // ExpandSpecialInstructions pass. 52 if (TII->isLDSRetInstr(MI.getOpcode())) 53 return 2; 54 55 if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) || 56 TII->isReductionOp(MI.getOpcode())) 57 return 4; 58 59 unsigned NumLiteral = 0; 60 for (MachineInstr::mop_iterator It = MI.operands_begin(), 61 E = MI.operands_end(); 62 It != E; ++It) { 63 MachineOperand &MO = *It; 64 if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X) 65 ++NumLiteral; 66 } 67 return 1 + NumLiteral; 68 } 69 70 bool isALU(const MachineInstr &MI) const { 71 if (TII->isALUInstr(MI.getOpcode())) 72 return true; 73 if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode())) 74 return true; 75 switch (MI.getOpcode()) { 76 case R600::PRED_X: 77 case R600::INTERP_PAIR_XY: 78 case R600::INTERP_PAIR_ZW: 79 case R600::INTERP_VEC_LOAD: 80 case R600::COPY: 81 case R600::DOT_4: 82 return true; 83 default: 84 return false; 85 } 86 } 87 88 bool IsTrivialInst(MachineInstr &MI) const { 89 switch (MI.getOpcode()) { 90 case R600::KILL: 91 case R600::RETURN: 92 case R600::IMPLICIT_DEF: 93 return true; 94 default: 95 return false; 96 } 97 } 98 99 std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const { 100 // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2 101 // (See also R600ISelLowering.cpp) 102 // ConstIndex value is in [0, 4095]; 103 return std::pair<unsigned, unsigned>( 104 ((Sel >> 2) - 512) >> 12, // KC_BANK 105 // Line Number of ConstIndex 106 // A line contains 16 constant registers however KCX bank can lock 107 // two line at the same time ; thus we want to get an even line number. 108 // Line number can be retrieved with (>>4), using (>>5) <<1 generates 109 // an even number. 110 ((((Sel >> 2) - 512) & 4095) >> 5) << 1); 111 } 112 113 bool 114 SubstituteKCacheBank(MachineInstr &MI, 115 std::vector<std::pair<unsigned, unsigned>> &CachedConsts, 116 bool UpdateInstr = true) const { 117 std::vector<std::pair<unsigned, unsigned>> UsedKCache; 118 119 if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4) 120 return true; 121 122 const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts = 123 TII->getSrcs(MI); 124 assert( 125 (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) && 126 "Can't assign Const"); 127 for (auto &[Op, Sel] : Consts) { 128 if (Op->getReg() != R600::ALU_CONST) 129 continue; 130 unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31; 131 unsigned KCacheIndex = Index * 4 + Chan; 132 const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel); 133 if (CachedConsts.empty()) { 134 CachedConsts.push_back(BankLine); 135 UsedKCache.emplace_back(0, KCacheIndex); 136 continue; 137 } 138 if (CachedConsts[0] == BankLine) { 139 UsedKCache.emplace_back(0, KCacheIndex); 140 continue; 141 } 142 if (CachedConsts.size() == 1) { 143 CachedConsts.push_back(BankLine); 144 UsedKCache.emplace_back(1, KCacheIndex); 145 continue; 146 } 147 if (CachedConsts[1] == BankLine) { 148 UsedKCache.emplace_back(1, KCacheIndex); 149 continue; 150 } 151 return false; 152 } 153 154 if (!UpdateInstr) 155 return true; 156 157 unsigned j = 0; 158 for (auto &[Op, Sel] : Consts) { 159 if (Op->getReg() != R600::ALU_CONST) 160 continue; 161 switch (UsedKCache[j].first) { 162 case 0: 163 Op->setReg(R600::R600_KC0RegClass.getRegister(UsedKCache[j].second)); 164 break; 165 case 1: 166 Op->setReg(R600::R600_KC1RegClass.getRegister(UsedKCache[j].second)); 167 break; 168 default: 169 llvm_unreachable("Wrong Cache Line"); 170 } 171 j++; 172 } 173 return true; 174 } 175 176 bool canClauseLocalKillFitInClause( 177 unsigned AluInstCount, 178 std::vector<std::pair<unsigned, unsigned>> KCacheBanks, 179 MachineBasicBlock::iterator Def, 180 MachineBasicBlock::iterator BBEnd) { 181 const R600RegisterInfo &TRI = TII->getRegisterInfo(); 182 //TODO: change this to defs? 183 for (MachineInstr::const_mop_iterator 184 MOI = Def->operands_begin(), 185 MOE = Def->operands_end(); MOI != MOE; ++MOI) { 186 if (!MOI->isReg() || !MOI->isDef() || 187 TRI.isPhysRegLiveAcrossClauses(MOI->getReg())) 188 continue; 189 190 // Def defines a clause local register, so check that its use will fit 191 // in the clause. 192 unsigned LastUseCount = 0; 193 for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) { 194 AluInstCount += OccupiedDwords(*UseI); 195 // Make sure we won't need to end the clause due to KCache limitations. 196 if (!SubstituteKCacheBank(*UseI, KCacheBanks, false)) 197 return false; 198 199 // We have reached the maximum instruction limit before finding the 200 // use that kills this register, so we cannot use this def in the 201 // current clause. 202 if (AluInstCount >= TII->getMaxAlusPerClause()) 203 return false; 204 205 // TODO: Is this true? kill flag appears to work OK below 206 // Register kill flags have been cleared by the time we get to this 207 // pass, but it is safe to assume that all uses of this register 208 // occur in the same basic block as its definition, because 209 // it is illegal for the scheduler to schedule them in 210 // different blocks. 211 if (UseI->readsRegister(MOI->getReg(), &TRI)) 212 LastUseCount = AluInstCount; 213 214 // Exit early if the current use kills the register 215 if (UseI != Def && UseI->killsRegister(MOI->getReg(), &TRI)) 216 break; 217 } 218 if (LastUseCount) 219 return LastUseCount <= TII->getMaxAlusPerClause(); 220 llvm_unreachable("Clause local register live at end of clause."); 221 } 222 return true; 223 } 224 225 MachineBasicBlock::iterator 226 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { 227 MachineBasicBlock::iterator ClauseHead = I; 228 std::vector<std::pair<unsigned, unsigned>> KCacheBanks; 229 bool PushBeforeModifier = false; 230 unsigned AluInstCount = 0; 231 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 232 if (IsTrivialInst(*I)) 233 continue; 234 if (!isALU(*I)) 235 break; 236 if (AluInstCount > TII->getMaxAlusPerClause()) 237 break; 238 if (I->getOpcode() == R600::PRED_X) { 239 // We put PRED_X in its own clause to ensure that ifcvt won't create 240 // clauses with more than 128 insts. 241 // IfCvt is indeed checking that "then" and "else" branches of an if 242 // statement have less than ~60 insts thus converted clauses can't be 243 // bigger than ~121 insts (predicate setter needs to be in the same 244 // clause as predicated alus). 245 if (AluInstCount > 0) 246 break; 247 if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH) 248 PushBeforeModifier = true; 249 AluInstCount ++; 250 continue; 251 } 252 // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as: 253 // 254 // * KILL or INTERP instructions 255 // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits 256 // * Uses waterfalling (i.e. INDEX_MODE = AR.X) 257 // 258 // XXX: These checks have not been implemented yet. 259 if (TII->mustBeLastInClause(I->getOpcode())) { 260 I++; 261 break; 262 } 263 264 // If this instruction defines a clause local register, make sure 265 // its use can fit in this clause. 266 if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E)) 267 break; 268 269 if (!SubstituteKCacheBank(*I, KCacheBanks)) 270 break; 271 AluInstCount += OccupiedDwords(*I); 272 } 273 unsigned Opcode = PushBeforeModifier ? 274 R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU; 275 BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode)) 276 // We don't use the ADDR field until R600ControlFlowFinalizer pass, where 277 // it is safe to assume it is 0. However if we always put 0 here, the ifcvt 278 // pass may assume that identical ALU clause starter at the beginning of a 279 // true and false branch can be factorized which is not the case. 280 .addImm(Address++) // ADDR 281 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0 282 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1 283 .addImm(KCacheBanks.empty()?0:2) // KM0 284 .addImm((KCacheBanks.size() < 2)?0:2) // KM1 285 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0 286 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1 287 .addImm(AluInstCount) // COUNT 288 .addImm(1); // Enabled 289 return I; 290 } 291 292 public: 293 static char ID; 294 295 R600EmitClauseMarkers() : MachineFunctionPass(ID) { 296 initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry()); 297 } 298 299 bool runOnMachineFunction(MachineFunction &MF) override { 300 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); 301 TII = ST.getInstrInfo(); 302 303 for (MachineBasicBlock &MBB : MF) { 304 MachineBasicBlock::iterator I = MBB.begin(); 305 if (I != MBB.end() && I->getOpcode() == R600::CF_ALU) 306 continue; // BB was already parsed 307 for (MachineBasicBlock::iterator E = MBB.end(); I != E;) { 308 if (isALU(*I)) { 309 auto next = MakeALUClause(MBB, I); 310 assert(next != I); 311 I = next; 312 } else 313 ++I; 314 } 315 } 316 return false; 317 } 318 319 StringRef getPassName() const override { 320 return "R600 Emit Clause Markers Pass"; 321 } 322 }; 323 324 char R600EmitClauseMarkers::ID = 0; 325 326 } // end anonymous namespace 327 328 INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers", 329 "R600 Emit Clause Markers", false, false) 330 INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers", 331 "R600 Emit Clause Markers", false, false) 332 333 FunctionPass *llvm::createR600EmitClauseMarkers() { 334 return new R600EmitClauseMarkers(); 335 } 336