10b57cec5SDimitry Andric //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
110b57cec5SDimitry Andric /// 128 Alu instructions ; these instructions can access up to 4 prefetched
120b57cec5SDimitry Andric /// 4 lines of 16 registers from constant buffers. Such ALU clauses are
130b57cec5SDimitry Andric /// initiated by CF_ALU instructions.
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric
16349cc55cSDimitry Andric #include "MCTargetDesc/R600MCTargetDesc.h"
17349cc55cSDimitry Andric #include "R600.h"
18e8d8bef9SDimitry Andric #include "R600Defines.h"
19e8d8bef9SDimitry Andric #include "R600Subtarget.h"
2081ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
210b57cec5SDimitry Andric
220b57cec5SDimitry Andric using namespace llvm;
230b57cec5SDimitry Andric
240b57cec5SDimitry Andric namespace llvm {
250b57cec5SDimitry Andric
260b57cec5SDimitry Andric void initializeR600EmitClauseMarkersPass(PassRegistry&);
270b57cec5SDimitry Andric
280b57cec5SDimitry Andric } // end namespace llvm
290b57cec5SDimitry Andric
300b57cec5SDimitry Andric namespace {
310b57cec5SDimitry Andric
320b57cec5SDimitry Andric class R600EmitClauseMarkers : public MachineFunctionPass {
330b57cec5SDimitry Andric private:
340b57cec5SDimitry Andric const R600InstrInfo *TII = nullptr;
350b57cec5SDimitry Andric int Address = 0;
360b57cec5SDimitry Andric
OccupiedDwords(MachineInstr & MI) const370b57cec5SDimitry Andric unsigned OccupiedDwords(MachineInstr &MI) const {
380b57cec5SDimitry Andric switch (MI.getOpcode()) {
390b57cec5SDimitry Andric case R600::INTERP_PAIR_XY:
400b57cec5SDimitry Andric case R600::INTERP_PAIR_ZW:
410b57cec5SDimitry Andric case R600::INTERP_VEC_LOAD:
420b57cec5SDimitry Andric case R600::DOT_4:
430b57cec5SDimitry Andric return 4;
440b57cec5SDimitry Andric case R600::KILL:
450b57cec5SDimitry Andric return 0;
460b57cec5SDimitry Andric default:
470b57cec5SDimitry Andric break;
480b57cec5SDimitry Andric }
490b57cec5SDimitry Andric
500b57cec5SDimitry Andric // These will be expanded to two ALU instructions in the
510b57cec5SDimitry Andric // ExpandSpecialInstructions pass.
520b57cec5SDimitry Andric if (TII->isLDSRetInstr(MI.getOpcode()))
530b57cec5SDimitry Andric return 2;
540b57cec5SDimitry Andric
550b57cec5SDimitry Andric if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) ||
560b57cec5SDimitry Andric TII->isReductionOp(MI.getOpcode()))
570b57cec5SDimitry Andric return 4;
580b57cec5SDimitry Andric
590b57cec5SDimitry Andric unsigned NumLiteral = 0;
600b57cec5SDimitry Andric for (MachineInstr::mop_iterator It = MI.operands_begin(),
610b57cec5SDimitry Andric E = MI.operands_end();
620b57cec5SDimitry Andric It != E; ++It) {
630b57cec5SDimitry Andric MachineOperand &MO = *It;
640b57cec5SDimitry Andric if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
650b57cec5SDimitry Andric ++NumLiteral;
660b57cec5SDimitry Andric }
670b57cec5SDimitry Andric return 1 + NumLiteral;
680b57cec5SDimitry Andric }
690b57cec5SDimitry Andric
isALU(const MachineInstr & MI) const700b57cec5SDimitry Andric bool isALU(const MachineInstr &MI) const {
710b57cec5SDimitry Andric if (TII->isALUInstr(MI.getOpcode()))
720b57cec5SDimitry Andric return true;
730b57cec5SDimitry Andric if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
740b57cec5SDimitry Andric return true;
750b57cec5SDimitry Andric switch (MI.getOpcode()) {
760b57cec5SDimitry Andric case R600::PRED_X:
770b57cec5SDimitry Andric case R600::INTERP_PAIR_XY:
780b57cec5SDimitry Andric case R600::INTERP_PAIR_ZW:
790b57cec5SDimitry Andric case R600::INTERP_VEC_LOAD:
800b57cec5SDimitry Andric case R600::COPY:
810b57cec5SDimitry Andric case R600::DOT_4:
820b57cec5SDimitry Andric return true;
830b57cec5SDimitry Andric default:
840b57cec5SDimitry Andric return false;
850b57cec5SDimitry Andric }
860b57cec5SDimitry Andric }
870b57cec5SDimitry Andric
IsTrivialInst(MachineInstr & MI) const880b57cec5SDimitry Andric bool IsTrivialInst(MachineInstr &MI) const {
890b57cec5SDimitry Andric switch (MI.getOpcode()) {
900b57cec5SDimitry Andric case R600::KILL:
910b57cec5SDimitry Andric case R600::RETURN:
920b57cec5SDimitry Andric case R600::IMPLICIT_DEF:
930b57cec5SDimitry Andric return true;
940b57cec5SDimitry Andric default:
950b57cec5SDimitry Andric return false;
960b57cec5SDimitry Andric }
970b57cec5SDimitry Andric }
980b57cec5SDimitry Andric
getAccessedBankLine(unsigned Sel) const990b57cec5SDimitry Andric std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
1000b57cec5SDimitry Andric // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
1010b57cec5SDimitry Andric // (See also R600ISelLowering.cpp)
1020b57cec5SDimitry Andric // ConstIndex value is in [0, 4095];
1030b57cec5SDimitry Andric return std::pair<unsigned, unsigned>(
1040b57cec5SDimitry Andric ((Sel >> 2) - 512) >> 12, // KC_BANK
1050b57cec5SDimitry Andric // Line Number of ConstIndex
1060b57cec5SDimitry Andric // A line contains 16 constant registers however KCX bank can lock
1070b57cec5SDimitry Andric // two line at the same time ; thus we want to get an even line number.
1080b57cec5SDimitry Andric // Line number can be retrieved with (>>4), using (>>5) <<1 generates
1090b57cec5SDimitry Andric // an even number.
1100b57cec5SDimitry Andric ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
1110b57cec5SDimitry Andric }
1120b57cec5SDimitry Andric
1130b57cec5SDimitry Andric bool
SubstituteKCacheBank(MachineInstr & MI,std::vector<std::pair<unsigned,unsigned>> & CachedConsts,bool UpdateInstr=true) const1140b57cec5SDimitry Andric SubstituteKCacheBank(MachineInstr &MI,
1150b57cec5SDimitry Andric std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
1160b57cec5SDimitry Andric bool UpdateInstr = true) const {
1170b57cec5SDimitry Andric std::vector<std::pair<unsigned, unsigned>> UsedKCache;
1180b57cec5SDimitry Andric
1190b57cec5SDimitry Andric if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
1200b57cec5SDimitry Andric return true;
1210b57cec5SDimitry Andric
1220b57cec5SDimitry Andric const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
1230b57cec5SDimitry Andric TII->getSrcs(MI);
1240b57cec5SDimitry Andric assert(
1250b57cec5SDimitry Andric (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
1260b57cec5SDimitry Andric "Can't assign Const");
127*0fca6ea1SDimitry Andric for (auto &[Op, Sel] : Consts) {
128*0fca6ea1SDimitry Andric if (Op->getReg() != R600::ALU_CONST)
1290b57cec5SDimitry Andric continue;
1300b57cec5SDimitry Andric unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
1310b57cec5SDimitry Andric unsigned KCacheIndex = Index * 4 + Chan;
1320b57cec5SDimitry Andric const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
1330b57cec5SDimitry Andric if (CachedConsts.empty()) {
1340b57cec5SDimitry Andric CachedConsts.push_back(BankLine);
135*0fca6ea1SDimitry Andric UsedKCache.emplace_back(0, KCacheIndex);
1360b57cec5SDimitry Andric continue;
1370b57cec5SDimitry Andric }
1380b57cec5SDimitry Andric if (CachedConsts[0] == BankLine) {
139*0fca6ea1SDimitry Andric UsedKCache.emplace_back(0, KCacheIndex);
1400b57cec5SDimitry Andric continue;
1410b57cec5SDimitry Andric }
1420b57cec5SDimitry Andric if (CachedConsts.size() == 1) {
1430b57cec5SDimitry Andric CachedConsts.push_back(BankLine);
144*0fca6ea1SDimitry Andric UsedKCache.emplace_back(1, KCacheIndex);
1450b57cec5SDimitry Andric continue;
1460b57cec5SDimitry Andric }
1470b57cec5SDimitry Andric if (CachedConsts[1] == BankLine) {
148*0fca6ea1SDimitry Andric UsedKCache.emplace_back(1, KCacheIndex);
1490b57cec5SDimitry Andric continue;
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric return false;
1520b57cec5SDimitry Andric }
1530b57cec5SDimitry Andric
1540b57cec5SDimitry Andric if (!UpdateInstr)
1550b57cec5SDimitry Andric return true;
1560b57cec5SDimitry Andric
157*0fca6ea1SDimitry Andric unsigned j = 0;
158*0fca6ea1SDimitry Andric for (auto &[Op, Sel] : Consts) {
159*0fca6ea1SDimitry Andric if (Op->getReg() != R600::ALU_CONST)
1600b57cec5SDimitry Andric continue;
1610b57cec5SDimitry Andric switch (UsedKCache[j].first) {
1620b57cec5SDimitry Andric case 0:
163*0fca6ea1SDimitry Andric Op->setReg(R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
1640b57cec5SDimitry Andric break;
1650b57cec5SDimitry Andric case 1:
166*0fca6ea1SDimitry Andric Op->setReg(R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
1670b57cec5SDimitry Andric break;
1680b57cec5SDimitry Andric default:
1690b57cec5SDimitry Andric llvm_unreachable("Wrong Cache Line");
1700b57cec5SDimitry Andric }
1710b57cec5SDimitry Andric j++;
1720b57cec5SDimitry Andric }
1730b57cec5SDimitry Andric return true;
1740b57cec5SDimitry Andric }
1750b57cec5SDimitry Andric
canClauseLocalKillFitInClause(unsigned AluInstCount,std::vector<std::pair<unsigned,unsigned>> KCacheBanks,MachineBasicBlock::iterator Def,MachineBasicBlock::iterator BBEnd)1760b57cec5SDimitry Andric bool canClauseLocalKillFitInClause(
1770b57cec5SDimitry Andric unsigned AluInstCount,
1780b57cec5SDimitry Andric std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
1790b57cec5SDimitry Andric MachineBasicBlock::iterator Def,
1800b57cec5SDimitry Andric MachineBasicBlock::iterator BBEnd) {
1810b57cec5SDimitry Andric const R600RegisterInfo &TRI = TII->getRegisterInfo();
1820b57cec5SDimitry Andric //TODO: change this to defs?
1830b57cec5SDimitry Andric for (MachineInstr::const_mop_iterator
1840b57cec5SDimitry Andric MOI = Def->operands_begin(),
1850b57cec5SDimitry Andric MOE = Def->operands_end(); MOI != MOE; ++MOI) {
1860b57cec5SDimitry Andric if (!MOI->isReg() || !MOI->isDef() ||
1870b57cec5SDimitry Andric TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
1880b57cec5SDimitry Andric continue;
1890b57cec5SDimitry Andric
1900b57cec5SDimitry Andric // Def defines a clause local register, so check that its use will fit
1910b57cec5SDimitry Andric // in the clause.
1920b57cec5SDimitry Andric unsigned LastUseCount = 0;
1930b57cec5SDimitry Andric for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
1940b57cec5SDimitry Andric AluInstCount += OccupiedDwords(*UseI);
1950b57cec5SDimitry Andric // Make sure we won't need to end the clause due to KCache limitations.
1960b57cec5SDimitry Andric if (!SubstituteKCacheBank(*UseI, KCacheBanks, false))
1970b57cec5SDimitry Andric return false;
1980b57cec5SDimitry Andric
1990b57cec5SDimitry Andric // We have reached the maximum instruction limit before finding the
2000b57cec5SDimitry Andric // use that kills this register, so we cannot use this def in the
2010b57cec5SDimitry Andric // current clause.
2020b57cec5SDimitry Andric if (AluInstCount >= TII->getMaxAlusPerClause())
2030b57cec5SDimitry Andric return false;
2040b57cec5SDimitry Andric
2050b57cec5SDimitry Andric // TODO: Is this true? kill flag appears to work OK below
2060b57cec5SDimitry Andric // Register kill flags have been cleared by the time we get to this
2070b57cec5SDimitry Andric // pass, but it is safe to assume that all uses of this register
2080b57cec5SDimitry Andric // occur in the same basic block as its definition, because
2090b57cec5SDimitry Andric // it is illegal for the scheduler to schedule them in
2100b57cec5SDimitry Andric // different blocks.
2110b57cec5SDimitry Andric if (UseI->readsRegister(MOI->getReg(), &TRI))
2120b57cec5SDimitry Andric LastUseCount = AluInstCount;
2130b57cec5SDimitry Andric
2140b57cec5SDimitry Andric // Exit early if the current use kills the register
2150b57cec5SDimitry Andric if (UseI != Def && UseI->killsRegister(MOI->getReg(), &TRI))
2160b57cec5SDimitry Andric break;
2170b57cec5SDimitry Andric }
2180b57cec5SDimitry Andric if (LastUseCount)
2190b57cec5SDimitry Andric return LastUseCount <= TII->getMaxAlusPerClause();
2200b57cec5SDimitry Andric llvm_unreachable("Clause local register live at end of clause.");
2210b57cec5SDimitry Andric }
2220b57cec5SDimitry Andric return true;
2230b57cec5SDimitry Andric }
2240b57cec5SDimitry Andric
2250b57cec5SDimitry Andric MachineBasicBlock::iterator
MakeALUClause(MachineBasicBlock & MBB,MachineBasicBlock::iterator I)2260b57cec5SDimitry Andric MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
2270b57cec5SDimitry Andric MachineBasicBlock::iterator ClauseHead = I;
2280b57cec5SDimitry Andric std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
2290b57cec5SDimitry Andric bool PushBeforeModifier = false;
2300b57cec5SDimitry Andric unsigned AluInstCount = 0;
2310b57cec5SDimitry Andric for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
2320b57cec5SDimitry Andric if (IsTrivialInst(*I))
2330b57cec5SDimitry Andric continue;
2340b57cec5SDimitry Andric if (!isALU(*I))
2350b57cec5SDimitry Andric break;
2360b57cec5SDimitry Andric if (AluInstCount > TII->getMaxAlusPerClause())
2370b57cec5SDimitry Andric break;
2380b57cec5SDimitry Andric if (I->getOpcode() == R600::PRED_X) {
2390b57cec5SDimitry Andric // We put PRED_X in its own clause to ensure that ifcvt won't create
2400b57cec5SDimitry Andric // clauses with more than 128 insts.
2410b57cec5SDimitry Andric // IfCvt is indeed checking that "then" and "else" branches of an if
2420b57cec5SDimitry Andric // statement have less than ~60 insts thus converted clauses can't be
2430b57cec5SDimitry Andric // bigger than ~121 insts (predicate setter needs to be in the same
2440b57cec5SDimitry Andric // clause as predicated alus).
2450b57cec5SDimitry Andric if (AluInstCount > 0)
2460b57cec5SDimitry Andric break;
2470b57cec5SDimitry Andric if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH)
2480b57cec5SDimitry Andric PushBeforeModifier = true;
2490b57cec5SDimitry Andric AluInstCount ++;
2500b57cec5SDimitry Andric continue;
2510b57cec5SDimitry Andric }
2520b57cec5SDimitry Andric // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
2530b57cec5SDimitry Andric //
2540b57cec5SDimitry Andric // * KILL or INTERP instructions
2550b57cec5SDimitry Andric // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
2560b57cec5SDimitry Andric // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
2570b57cec5SDimitry Andric //
2580b57cec5SDimitry Andric // XXX: These checks have not been implemented yet.
2590b57cec5SDimitry Andric if (TII->mustBeLastInClause(I->getOpcode())) {
2600b57cec5SDimitry Andric I++;
2610b57cec5SDimitry Andric break;
2620b57cec5SDimitry Andric }
2630b57cec5SDimitry Andric
2640b57cec5SDimitry Andric // If this instruction defines a clause local register, make sure
2650b57cec5SDimitry Andric // its use can fit in this clause.
2660b57cec5SDimitry Andric if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
2670b57cec5SDimitry Andric break;
2680b57cec5SDimitry Andric
2690b57cec5SDimitry Andric if (!SubstituteKCacheBank(*I, KCacheBanks))
2700b57cec5SDimitry Andric break;
2710b57cec5SDimitry Andric AluInstCount += OccupiedDwords(*I);
2720b57cec5SDimitry Andric }
2730b57cec5SDimitry Andric unsigned Opcode = PushBeforeModifier ?
2740b57cec5SDimitry Andric R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
2750b57cec5SDimitry Andric BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
2760b57cec5SDimitry Andric // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
2770b57cec5SDimitry Andric // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
2780b57cec5SDimitry Andric // pass may assume that identical ALU clause starter at the beginning of a
2790b57cec5SDimitry Andric // true and false branch can be factorized which is not the case.
2800b57cec5SDimitry Andric .addImm(Address++) // ADDR
2810b57cec5SDimitry Andric .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
2820b57cec5SDimitry Andric .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
2830b57cec5SDimitry Andric .addImm(KCacheBanks.empty()?0:2) // KM0
2840b57cec5SDimitry Andric .addImm((KCacheBanks.size() < 2)?0:2) // KM1
2850b57cec5SDimitry Andric .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
2860b57cec5SDimitry Andric .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
2870b57cec5SDimitry Andric .addImm(AluInstCount) // COUNT
2880b57cec5SDimitry Andric .addImm(1); // Enabled
2890b57cec5SDimitry Andric return I;
2900b57cec5SDimitry Andric }
2910b57cec5SDimitry Andric
2920b57cec5SDimitry Andric public:
2930b57cec5SDimitry Andric static char ID;
2940b57cec5SDimitry Andric
R600EmitClauseMarkers()2950b57cec5SDimitry Andric R600EmitClauseMarkers() : MachineFunctionPass(ID) {
2960b57cec5SDimitry Andric initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
2970b57cec5SDimitry Andric }
2980b57cec5SDimitry Andric
runOnMachineFunction(MachineFunction & MF)2990b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override {
3000b57cec5SDimitry Andric const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
3010b57cec5SDimitry Andric TII = ST.getInstrInfo();
3020b57cec5SDimitry Andric
3034824e7fdSDimitry Andric for (MachineBasicBlock &MBB : MF) {
3040b57cec5SDimitry Andric MachineBasicBlock::iterator I = MBB.begin();
3050b57cec5SDimitry Andric if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
3060b57cec5SDimitry Andric continue; // BB was already parsed
3070b57cec5SDimitry Andric for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
3080b57cec5SDimitry Andric if (isALU(*I)) {
3090b57cec5SDimitry Andric auto next = MakeALUClause(MBB, I);
3100b57cec5SDimitry Andric assert(next != I);
3110b57cec5SDimitry Andric I = next;
3120b57cec5SDimitry Andric } else
3130b57cec5SDimitry Andric ++I;
3140b57cec5SDimitry Andric }
3150b57cec5SDimitry Andric }
3160b57cec5SDimitry Andric return false;
3170b57cec5SDimitry Andric }
3180b57cec5SDimitry Andric
getPassName() const3190b57cec5SDimitry Andric StringRef getPassName() const override {
3200b57cec5SDimitry Andric return "R600 Emit Clause Markers Pass";
3210b57cec5SDimitry Andric }
3220b57cec5SDimitry Andric };
3230b57cec5SDimitry Andric
3240b57cec5SDimitry Andric char R600EmitClauseMarkers::ID = 0;
3250b57cec5SDimitry Andric
3260b57cec5SDimitry Andric } // end anonymous namespace
3270b57cec5SDimitry Andric
3280b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
32981ad6265SDimitry Andric "R600 Emit Clause Markers", false, false)
3300b57cec5SDimitry Andric INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
33181ad6265SDimitry Andric "R600 Emit Clause Markers", false, false)
3320b57cec5SDimitry Andric
createR600EmitClauseMarkers()3330b57cec5SDimitry Andric FunctionPass *llvm::createR600EmitClauseMarkers() {
3340b57cec5SDimitry Andric return new R600EmitClauseMarkers();
3350b57cec5SDimitry Andric }
336