1 //===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Code to lower AMDGPU MachineInstrs to their corresponding MCInst. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 15 #include "AMDGPUMCInstLower.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUAsmPrinter.h" 18 #include "AMDGPUMachineFunction.h" 19 #include "MCTargetDesc/AMDGPUInstPrinter.h" 20 #include "MCTargetDesc/AMDGPUMCExpr.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "SIMachineFunctionInfo.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/IR/GlobalVariable.h" 28 #include "llvm/MC/MCCodeEmitter.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCExpr.h" 31 #include "llvm/MC/MCInst.h" 32 #include "llvm/MC/MCObjectStreamer.h" 33 #include "llvm/MC/MCStreamer.h" 34 #include "llvm/Support/Endian.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/Format.h" 37 #include <algorithm> 38 39 using namespace llvm; 40 41 #include "AMDGPUGenMCPseudoLowering.inc" 42 43 AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, 44 const TargetSubtargetInfo &st, 45 const AsmPrinter &ap): 46 Ctx(ctx), ST(st), AP(ap) { } 47 48 static AMDGPUMCExpr::Specifier getSpecifier(unsigned MOFlags) { 49 switch (MOFlags) { 50 default: 51 return AMDGPUMCExpr::S_None; 52 case SIInstrInfo::MO_GOTPCREL: 53 return AMDGPUMCExpr::S_GOTPCREL; 54 case SIInstrInfo::MO_GOTPCREL32_LO: 55 return AMDGPUMCExpr::S_GOTPCREL32_LO; 56 case SIInstrInfo::MO_GOTPCREL32_HI: 57 return AMDGPUMCExpr::S_GOTPCREL32_HI; 58 case SIInstrInfo::MO_REL32_LO: 59 return AMDGPUMCExpr::S_REL32_LO; 60 case SIInstrInfo::MO_REL32_HI: 61 return AMDGPUMCExpr::S_REL32_HI; 62 case SIInstrInfo::MO_ABS32_LO: 63 return AMDGPUMCExpr::S_ABS32_LO; 64 case SIInstrInfo::MO_ABS32_HI: 65 return AMDGPUMCExpr::S_ABS32_HI; 66 } 67 } 68 69 bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO, 70 MCOperand &MCOp) const { 71 switch (MO.getType()) { 72 default: 73 break; 74 case MachineOperand::MO_Immediate: 75 MCOp = MCOperand::createImm(MO.getImm()); 76 return true; 77 case MachineOperand::MO_Register: 78 MCOp = MCOperand::createReg(AMDGPU::getMCReg(MO.getReg(), ST)); 79 return true; 80 case MachineOperand::MO_MachineBasicBlock: 81 MCOp = MCOperand::createExpr( 82 MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx)); 83 return true; 84 case MachineOperand::MO_GlobalAddress: { 85 const GlobalValue *GV = MO.getGlobal(); 86 SmallString<128> SymbolName; 87 AP.getNameWithPrefix(SymbolName, GV); 88 MCSymbol *Sym = Ctx.getOrCreateSymbol(SymbolName); 89 const MCExpr *Expr = 90 MCSymbolRefExpr::create(Sym, getSpecifier(MO.getTargetFlags()), Ctx); 91 int64_t Offset = MO.getOffset(); 92 if (Offset != 0) { 93 Expr = MCBinaryExpr::createAdd(Expr, 94 MCConstantExpr::create(Offset, Ctx), Ctx); 95 } 96 MCOp = MCOperand::createExpr(Expr); 97 return true; 98 } 99 case MachineOperand::MO_ExternalSymbol: { 100 MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName())); 101 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); 102 MCOp = MCOperand::createExpr(Expr); 103 return true; 104 } 105 case MachineOperand::MO_RegisterMask: 106 // Regmasks are like implicit defs. 107 return false; 108 case MachineOperand::MO_MCSymbol: 109 if (MO.getTargetFlags() == SIInstrInfo::MO_FAR_BRANCH_OFFSET) { 110 MCSymbol *Sym = MO.getMCSymbol(); 111 MCOp = MCOperand::createExpr(Sym->getVariableValue()); 112 return true; 113 } 114 break; 115 } 116 llvm_unreachable("unknown operand type"); 117 } 118 119 // Lower true16 D16 Pseudo instruction to d16_lo/d16_hi MCInst based on 120 // Dst/Data's .l/.h selection 121 void AMDGPUMCInstLower::lowerT16D16Helper(const MachineInstr *MI, 122 MCInst &OutMI) const { 123 unsigned Opcode = MI->getOpcode(); 124 const auto *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); 125 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 126 const auto *Info = AMDGPU::getT16D16Helper(Opcode); 127 128 llvm::AMDGPU::OpName OpName; 129 if (TII->isDS(Opcode)) { 130 if (MI->mayLoad()) 131 OpName = llvm::AMDGPU::OpName::vdst; 132 else if (MI->mayStore()) 133 OpName = llvm::AMDGPU::OpName::data0; 134 else 135 llvm_unreachable("LDS load or store expected"); 136 } else { 137 OpName = AMDGPU::hasNamedOperand(Opcode, llvm::AMDGPU::OpName::vdata) 138 ? llvm::AMDGPU::OpName::vdata 139 : llvm::AMDGPU::OpName::vdst; 140 } 141 142 // select Dst/Data 143 int VDstOrVDataIdx = AMDGPU::getNamedOperandIdx(Opcode, OpName); 144 const MachineOperand &MIVDstOrVData = MI->getOperand(VDstOrVDataIdx); 145 146 // select hi/lo MCInst 147 bool IsHi = AMDGPU::isHi16Reg(MIVDstOrVData.getReg(), TRI); 148 Opcode = IsHi ? Info->HiOp : Info->LoOp; 149 150 int MCOpcode = TII->pseudoToMCOpcode(Opcode); 151 assert(MCOpcode != -1 && 152 "Pseudo instruction doesn't have a target-specific version"); 153 OutMI.setOpcode(MCOpcode); 154 155 // lower operands 156 for (int I = 0, E = MI->getNumExplicitOperands(); I < E; I++) { 157 const MachineOperand &MO = MI->getOperand(I); 158 MCOperand MCOp; 159 if (I == VDstOrVDataIdx) 160 MCOp = MCOperand::createReg(TRI.get32BitRegister(MIVDstOrVData.getReg())); 161 else 162 lowerOperand(MO, MCOp); 163 OutMI.addOperand(MCOp); 164 } 165 166 if (AMDGPU::hasNamedOperand(MCOpcode, AMDGPU::OpName::vdst_in)) { 167 MCOperand MCOp; 168 lowerOperand(MIVDstOrVData, MCOp); 169 OutMI.addOperand(MCOp); 170 } 171 } 172 173 void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { 174 unsigned Opcode = MI->getOpcode(); 175 const auto *TII = static_cast<const SIInstrInfo *>(ST.getInstrInfo()); 176 177 // FIXME: Should be able to handle this with lowerPseudoInstExpansion. We 178 // need to select it to the subtarget specific version, and there's no way to 179 // do that with a single pseudo source operation. 180 if (Opcode == AMDGPU::S_SETPC_B64_return) 181 Opcode = AMDGPU::S_SETPC_B64; 182 else if (Opcode == AMDGPU::SI_CALL) { 183 // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the 184 // called function (which we need to remove here). 185 OutMI.setOpcode(TII->pseudoToMCOpcode(AMDGPU::S_SWAPPC_B64)); 186 MCOperand Dest, Src; 187 lowerOperand(MI->getOperand(0), Dest); 188 lowerOperand(MI->getOperand(1), Src); 189 OutMI.addOperand(Dest); 190 OutMI.addOperand(Src); 191 return; 192 } else if (Opcode == AMDGPU::SI_TCRETURN || 193 Opcode == AMDGPU::SI_TCRETURN_GFX) { 194 // TODO: How to use branch immediate and avoid register+add? 195 Opcode = AMDGPU::S_SETPC_B64; 196 } else if (AMDGPU::getT16D16Helper(Opcode)) { 197 lowerT16D16Helper(MI, OutMI); 198 return; 199 } 200 201 int MCOpcode = TII->pseudoToMCOpcode(Opcode); 202 if (MCOpcode == -1) { 203 LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); 204 C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have " 205 "a target-specific version: " + Twine(MI->getOpcode())); 206 } 207 208 OutMI.setOpcode(MCOpcode); 209 210 for (const MachineOperand &MO : MI->explicit_operands()) { 211 MCOperand MCOp; 212 lowerOperand(MO, MCOp); 213 OutMI.addOperand(MCOp); 214 } 215 216 int FIIdx = AMDGPU::getNamedOperandIdx(MCOpcode, AMDGPU::OpName::fi); 217 if (FIIdx >= (int)OutMI.getNumOperands()) 218 OutMI.addOperand(MCOperand::createImm(0)); 219 } 220 221 bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO, 222 MCOperand &MCOp) const { 223 const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>(); 224 AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); 225 return MCInstLowering.lowerOperand(MO, MCOp); 226 } 227 228 const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV, 229 const Constant *BaseCV, 230 uint64_t Offset) { 231 232 // Intercept LDS variables with known addresses 233 if (const GlobalVariable *GV = dyn_cast<const GlobalVariable>(CV)) { 234 if (std::optional<uint32_t> Address = 235 AMDGPUMachineFunction::getLDSAbsoluteAddress(*GV)) { 236 auto *IntTy = Type::getInt32Ty(CV->getContext()); 237 return AsmPrinter::lowerConstant(ConstantInt::get(IntTy, *Address), 238 BaseCV, Offset); 239 } 240 } 241 242 if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext)) 243 return E; 244 return AsmPrinter::lowerConstant(CV, BaseCV, Offset); 245 } 246 247 static void emitVGPRBlockComment(const MachineInstr *MI, const SIInstrInfo *TII, 248 const TargetRegisterInfo *TRI, 249 const SIMachineFunctionInfo *MFI, 250 MCStreamer &OS) { 251 // The instruction will only transfer a subset of the registers in the block, 252 // based on the mask that is stored in m0. We could search for the instruction 253 // that sets m0, but most of the time we'll already have the mask stored in 254 // the machine function info. Try to use that. This assumes that we only use 255 // block loads/stores for CSR spills. 256 Register RegBlock = 257 TII->getNamedOperand(*MI, MI->mayLoad() ? AMDGPU::OpName::vdst 258 : AMDGPU::OpName::vdata) 259 ->getReg(); 260 Register FirstRegInBlock = TRI->getSubReg(RegBlock, AMDGPU::sub0); 261 uint32_t Mask = MFI->getMaskForVGPRBlockOps(RegBlock); 262 263 if (!Mask) 264 return; // Nothing to report 265 266 SmallString<512> TransferredRegs; 267 for (unsigned I = 0; I < sizeof(Mask) * 8; ++I) { 268 if (Mask & (1 << I)) { 269 (llvm::Twine(" ") + TRI->getRegAsmName(FirstRegInBlock + I)) 270 .toVector(TransferredRegs); 271 } 272 } 273 274 OS.emitRawComment(" transferring at most " + TransferredRegs); 275 } 276 277 void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) { 278 // FIXME: Enable feature predicate checks once all the test pass. 279 // AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(), 280 // getSubtargetInfo().getFeatureBits()); 281 282 if (MCInst OutInst; lowerPseudoInstExpansion(MI, OutInst)) { 283 EmitToStreamer(*OutStreamer, OutInst); 284 return; 285 } 286 287 const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>(); 288 AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); 289 290 StringRef Err; 291 if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) { 292 LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); 293 C.emitError("Illegal instruction detected: " + Err); 294 MI->print(errs()); 295 } 296 297 if (MI->isBundle()) { 298 const MachineBasicBlock *MBB = MI->getParent(); 299 MachineBasicBlock::const_instr_iterator I = ++MI->getIterator(); 300 while (I != MBB->instr_end() && I->isInsideBundle()) { 301 emitInstruction(&*I); 302 ++I; 303 } 304 } else { 305 // We don't want these pseudo instructions encoded. They are 306 // placeholder terminator instructions and should only be printed as 307 // comments. 308 if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) { 309 if (isVerbose()) 310 OutStreamer->emitRawComment(" return to shader part epilog"); 311 return; 312 } 313 314 if (MI->getOpcode() == AMDGPU::WAVE_BARRIER) { 315 if (isVerbose()) 316 OutStreamer->emitRawComment(" wave barrier"); 317 return; 318 } 319 320 if (MI->getOpcode() == AMDGPU::SCHED_BARRIER) { 321 if (isVerbose()) { 322 std::string HexString; 323 raw_string_ostream HexStream(HexString); 324 HexStream << format_hex(MI->getOperand(0).getImm(), 10, true); 325 OutStreamer->emitRawComment(" sched_barrier mask(" + HexString + ")"); 326 } 327 return; 328 } 329 330 if (MI->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER) { 331 if (isVerbose()) { 332 std::string HexString; 333 raw_string_ostream HexStream(HexString); 334 HexStream << format_hex(MI->getOperand(0).getImm(), 10, true); 335 OutStreamer->emitRawComment( 336 " sched_group_barrier mask(" + HexString + ") size(" + 337 Twine(MI->getOperand(1).getImm()) + ") SyncID(" + 338 Twine(MI->getOperand(2).getImm()) + ")"); 339 } 340 return; 341 } 342 343 if (MI->getOpcode() == AMDGPU::IGLP_OPT) { 344 if (isVerbose()) { 345 std::string HexString; 346 raw_string_ostream HexStream(HexString); 347 HexStream << format_hex(MI->getOperand(0).getImm(), 10, true); 348 OutStreamer->emitRawComment(" iglp_opt mask(" + HexString + ")"); 349 } 350 return; 351 } 352 353 if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) { 354 if (isVerbose()) 355 OutStreamer->emitRawComment(" divergent unreachable"); 356 return; 357 } 358 359 if (MI->isMetaInstruction()) { 360 if (isVerbose()) 361 OutStreamer->emitRawComment(" meta instruction"); 362 return; 363 } 364 365 if (isVerbose()) 366 if (STI.getInstrInfo()->isBlockLoadStore(MI->getOpcode())) 367 emitVGPRBlockComment(MI, STI.getInstrInfo(), STI.getRegisterInfo(), 368 MF->getInfo<SIMachineFunctionInfo>(), 369 *OutStreamer); 370 371 MCInst TmpInst; 372 MCInstLowering.lower(MI, TmpInst); 373 EmitToStreamer(*OutStreamer, TmpInst); 374 375 #ifdef EXPENSIVE_CHECKS 376 // Check getInstSizeInBytes on explicitly specified CPUs (it cannot 377 // work correctly for the generic CPU). 378 // 379 // The isPseudo check really shouldn't be here, but unfortunately there are 380 // some negative lit tests that depend on being able to continue through 381 // here even when pseudo instructions haven't been lowered. 382 // 383 // We also overestimate branch sizes with the offset bug. 384 if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU()) && 385 (!STI.hasOffset3fBug() || !MI->isBranch())) { 386 SmallVector<MCFixup, 4> Fixups; 387 SmallVector<char, 16> CodeBytes; 388 389 std::unique_ptr<MCCodeEmitter> InstEmitter(createAMDGPUMCCodeEmitter( 390 *STI.getInstrInfo(), OutContext)); 391 InstEmitter->encodeInstruction(TmpInst, CodeBytes, Fixups, STI); 392 393 assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI)); 394 } 395 #endif 396 397 if (DumpCodeInstEmitter) { 398 // Disassemble instruction/operands to text 399 DisasmLines.resize(DisasmLines.size() + 1); 400 std::string &DisasmLine = DisasmLines.back(); 401 raw_string_ostream DisasmStream(DisasmLine); 402 403 AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *STI.getInstrInfo(), 404 *STI.getRegisterInfo()); 405 InstPrinter.printInst(&TmpInst, 0, StringRef(), STI, DisasmStream); 406 407 // Disassemble instruction/operands to hex representation. 408 SmallVector<MCFixup, 4> Fixups; 409 SmallVector<char, 16> CodeBytes; 410 411 DumpCodeInstEmitter->encodeInstruction( 412 TmpInst, CodeBytes, Fixups, MF->getSubtarget<MCSubtargetInfo>()); 413 HexLines.resize(HexLines.size() + 1); 414 std::string &HexLine = HexLines.back(); 415 raw_string_ostream HexStream(HexLine); 416 417 for (size_t i = 0; i < CodeBytes.size(); i += 4) { 418 unsigned int CodeDWord = 419 support::endian::read32le(CodeBytes.data() + i); 420 HexStream << format("%s%08X", (i > 0 ? " " : ""), CodeDWord); 421 } 422 423 DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLine.size()); 424 } 425 } 426 } 427