1 //===- R600ExpandSpecialInstrs.cpp - Expand special instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Vector, Reduction, and Cube instructions need to fill the entire instruction 11 /// group to work correctly. This pass expands these individual instructions 12 /// into several instructions that will completely fill the instruction group. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "MCTargetDesc/R600MCTargetDesc.h" 17 #include "R600.h" 18 #include "R600Defines.h" 19 #include "R600Subtarget.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 23 using namespace llvm; 24 25 #define DEBUG_TYPE "r600-expand-special-instrs" 26 27 namespace { 28 29 class R600ExpandSpecialInstrsPass : public MachineFunctionPass { 30 private: 31 const R600InstrInfo *TII = nullptr; 32 33 void SetFlagInNewMI(MachineInstr *NewMI, const MachineInstr *OldMI, 34 R600::OpName Op); 35 36 public: 37 static char ID; 38 39 R600ExpandSpecialInstrsPass() : MachineFunctionPass(ID) {} 40 41 bool runOnMachineFunction(MachineFunction &MF) override; 42 43 StringRef getPassName() const override { 44 return "R600 Expand special instructions pass"; 45 } 46 }; 47 48 } // end anonymous namespace 49 50 INITIALIZE_PASS_BEGIN(R600ExpandSpecialInstrsPass, DEBUG_TYPE, 51 "R600 Expand Special Instrs", false, false) 52 INITIALIZE_PASS_END(R600ExpandSpecialInstrsPass, DEBUG_TYPE, 53 "R600ExpandSpecialInstrs", false, false) 54 55 char R600ExpandSpecialInstrsPass::ID = 0; 56 57 char &llvm::R600ExpandSpecialInstrsPassID = R600ExpandSpecialInstrsPass::ID; 58 59 FunctionPass *llvm::createR600ExpandSpecialInstrsPass() { 60 return new R600ExpandSpecialInstrsPass(); 61 } 62 63 void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI, 64 const MachineInstr *OldMI, 65 R600::OpName Op) { 66 int OpIdx = TII->getOperandIdx(*OldMI, Op); 67 if (OpIdx > -1) { 68 uint64_t Val = OldMI->getOperand(OpIdx).getImm(); 69 TII->setImmOperand(*NewMI, Op, Val); 70 } 71 } 72 73 bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { 74 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); 75 TII = ST.getInstrInfo(); 76 77 const R600RegisterInfo &TRI = TII->getRegisterInfo(); 78 79 for (MachineBasicBlock &MBB : MF) { 80 MachineBasicBlock::iterator I = MBB.begin(); 81 while (I != MBB.end()) { 82 MachineInstr &MI = *I; 83 I = std::next(I); 84 85 // Expand LDS_*_RET instructions 86 if (TII->isLDSRetInstr(MI.getOpcode())) { 87 int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst); 88 assert(DstIdx != -1); 89 MachineOperand &DstOp = MI.getOperand(DstIdx); 90 MachineInstr *Mov = TII->buildMovInstr(&MBB, I, 91 DstOp.getReg(), R600::OQAP); 92 DstOp.setReg(R600::OQAP); 93 int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(), 94 R600::OpName::pred_sel); 95 int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(), 96 R600::OpName::pred_sel); 97 // Copy the pred_sel bit 98 Mov->getOperand(MovPredSelIdx).setReg( 99 MI.getOperand(LDSPredSelIdx).getReg()); 100 } 101 102 switch (MI.getOpcode()) { 103 default: break; 104 // Expand PRED_X to one of the PRED_SET instructions. 105 case R600::PRED_X: { 106 uint64_t Flags = MI.getOperand(3).getImm(); 107 // The native opcode used by PRED_X is stored as an immediate in the 108 // third operand. 109 MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, 110 MI.getOperand(2).getImm(), // opcode 111 MI.getOperand(0).getReg(), // dst 112 MI.getOperand(1).getReg(), // src0 113 R600::ZERO); // src1 114 TII->addFlag(*PredSet, 0, MO_FLAG_MASK); 115 if (Flags & MO_FLAG_PUSH) { 116 TII->setImmOperand(*PredSet, R600::OpName::update_exec_mask, 1); 117 } else { 118 TII->setImmOperand(*PredSet, R600::OpName::update_pred, 1); 119 } 120 MI.eraseFromParent(); 121 continue; 122 } 123 case R600::DOT_4: { 124 125 const R600RegisterInfo &TRI = TII->getRegisterInfo(); 126 127 Register DstReg = MI.getOperand(0).getReg(); 128 unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; 129 130 for (unsigned Chan = 0; Chan < 4; ++Chan) { 131 bool Mask = (Chan != TRI.getHWRegChan(DstReg)); 132 unsigned SubDstReg = 133 R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 134 MachineInstr *BMI = 135 TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg); 136 if (Chan > 0) { 137 BMI->bundleWithPred(); 138 } 139 if (Mask) { 140 TII->addFlag(*BMI, 0, MO_FLAG_MASK); 141 } 142 if (Chan != 3) 143 TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST); 144 unsigned Opcode = BMI->getOpcode(); 145 // While not strictly necessary from hw point of view, we force 146 // all src operands of a dot4 inst to belong to the same slot. 147 Register Src0 = 148 BMI->getOperand(TII->getOperandIdx(Opcode, R600::OpName::src0)) 149 .getReg(); 150 Register Src1 = 151 BMI->getOperand(TII->getOperandIdx(Opcode, R600::OpName::src1)) 152 .getReg(); 153 (void) Src0; 154 (void) Src1; 155 if ((TRI.getEncodingValue(Src0) & 0xff) < 127 && 156 (TRI.getEncodingValue(Src1) & 0xff) < 127) 157 assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1)); 158 } 159 MI.eraseFromParent(); 160 continue; 161 } 162 } 163 164 bool IsReduction = TII->isReductionOp(MI.getOpcode()); 165 bool IsVector = TII->isVector(MI); 166 bool IsCube = TII->isCubeOp(MI.getOpcode()); 167 if (!IsReduction && !IsVector && !IsCube) { 168 continue; 169 } 170 171 // Expand the instruction 172 // 173 // Reduction instructions: 174 // T0_X = DP4 T1_XYZW, T2_XYZW 175 // becomes: 176 // TO_X = DP4 T1_X, T2_X 177 // TO_Y (write masked) = DP4 T1_Y, T2_Y 178 // TO_Z (write masked) = DP4 T1_Z, T2_Z 179 // TO_W (write masked) = DP4 T1_W, T2_W 180 // 181 // Vector instructions: 182 // T0_X = MULLO_INT T1_X, T2_X 183 // becomes: 184 // T0_X = MULLO_INT T1_X, T2_X 185 // T0_Y (write masked) = MULLO_INT T1_X, T2_X 186 // T0_Z (write masked) = MULLO_INT T1_X, T2_X 187 // T0_W (write masked) = MULLO_INT T1_X, T2_X 188 // 189 // Cube instructions: 190 // T0_XYZW = CUBE T1_XYZW 191 // becomes: 192 // TO_X = CUBE T1_Z, T1_Y 193 // T0_Y = CUBE T1_Z, T1_X 194 // T0_Z = CUBE T1_X, T1_Z 195 // T0_W = CUBE T1_Y, T1_Z 196 for (unsigned Chan = 0; Chan < 4; Chan++) { 197 Register DstReg = 198 MI.getOperand(TII->getOperandIdx(MI, R600::OpName::dst)).getReg(); 199 Register Src0 = 200 MI.getOperand(TII->getOperandIdx(MI, R600::OpName::src0)).getReg(); 201 unsigned Src1 = 0; 202 203 // Determine the correct source registers 204 if (!IsCube) { 205 int Src1Idx = TII->getOperandIdx(MI, R600::OpName::src1); 206 if (Src1Idx != -1) { 207 Src1 = MI.getOperand(Src1Idx).getReg(); 208 } 209 } 210 if (IsReduction) { 211 unsigned SubRegIndex = R600RegisterInfo::getSubRegFromChannel(Chan); 212 Src0 = TRI.getSubReg(Src0, SubRegIndex); 213 Src1 = TRI.getSubReg(Src1, SubRegIndex); 214 } else if (IsCube) { 215 static const int CubeSrcSwz[] = {2, 2, 0, 1}; 216 unsigned SubRegIndex0 = R600RegisterInfo::getSubRegFromChannel(CubeSrcSwz[Chan]); 217 unsigned SubRegIndex1 = R600RegisterInfo::getSubRegFromChannel(CubeSrcSwz[3 - Chan]); 218 Src1 = TRI.getSubReg(Src0, SubRegIndex1); 219 Src0 = TRI.getSubReg(Src0, SubRegIndex0); 220 } 221 222 // Determine the correct destination registers; 223 bool Mask = false; 224 bool NotLast = true; 225 if (IsCube) { 226 unsigned SubRegIndex = R600RegisterInfo::getSubRegFromChannel(Chan); 227 DstReg = TRI.getSubReg(DstReg, SubRegIndex); 228 } else { 229 // Mask the write if the original instruction does not write to 230 // the current Channel. 231 Mask = (Chan != TRI.getHWRegChan(DstReg)); 232 unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; 233 DstReg = R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); 234 } 235 236 // Set the IsLast bit 237 NotLast = (Chan != 3 ); 238 239 // Add the new instruction 240 unsigned Opcode = MI.getOpcode(); 241 switch (Opcode) { 242 case R600::CUBE_r600_pseudo: 243 Opcode = R600::CUBE_r600_real; 244 break; 245 case R600::CUBE_eg_pseudo: 246 Opcode = R600::CUBE_eg_real; 247 break; 248 default: 249 break; 250 } 251 252 MachineInstr *NewMI = 253 TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1); 254 255 if (Chan != 0) 256 NewMI->bundleWithPred(); 257 if (Mask) { 258 TII->addFlag(*NewMI, 0, MO_FLAG_MASK); 259 } 260 if (NotLast) { 261 TII->addFlag(*NewMI, 0, MO_FLAG_NOT_LAST); 262 } 263 SetFlagInNewMI(NewMI, &MI, R600::OpName::clamp); 264 SetFlagInNewMI(NewMI, &MI, R600::OpName::literal); 265 SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_abs); 266 SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_abs); 267 SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_neg); 268 SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_neg); 269 } 270 MI.eraseFromParent(); 271 } 272 } 273 return false; 274 } 275