1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 /// \file
12 ///
13 /// This file contains definition for AMDGPU ISA disassembler
14 //
15 //===----------------------------------------------------------------------===//
16
17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
19 #include "Disassembler/AMDGPUDisassembler.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIDefines.h"
22 #include "SIRegisterInfo.h"
23 #include "TargetInfo/AMDGPUTargetInfo.h"
24 #include "Utils/AMDGPUAsmUtils.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm-c/DisassemblerTypes.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCDecoderOps.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCRegisterInfo.h"
34 #include "llvm/MC/MCSubtargetInfo.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/AMDHSAKernelDescriptor.h"
37 #include "llvm/Support/Compiler.h"
38
39 using namespace llvm;
40
41 #define DEBUG_TYPE "amdgpu-disassembler"
42
43 #define SGPR_MAX \
44 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
45 : AMDGPU::EncValues::SGPR_MAX_SI)
46
47 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
48
49 static int64_t getInlineImmValF16(unsigned Imm);
50 static int64_t getInlineImmValBF16(unsigned Imm);
51 static int64_t getInlineImmVal32(unsigned Imm);
52 static int64_t getInlineImmVal64(unsigned Imm);
53
AMDGPUDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx,MCInstrInfo const * MCII)54 AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
55 MCContext &Ctx, MCInstrInfo const *MCII)
56 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
57 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
58 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
59 // ToDo: AMDGPUDisassembler supports only VI ISA.
60 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
61 reportFatalUsageError("disassembly not yet supported for subtarget");
62
63 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
64 createConstantSymbolExpr(Symbol, Code);
65
66 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
67 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
68 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
69 }
70
setABIVersion(unsigned Version)71 void AMDGPUDisassembler::setABIVersion(unsigned Version) {
72 CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(Version);
73 }
74
75 inline static MCDisassembler::DecodeStatus
addOperand(MCInst & Inst,const MCOperand & Opnd)76 addOperand(MCInst &Inst, const MCOperand& Opnd) {
77 Inst.addOperand(Opnd);
78 return Opnd.isValid() ?
79 MCDisassembler::Success :
80 MCDisassembler::Fail;
81 }
82
insertNamedMCOperand(MCInst & MI,const MCOperand & Op,AMDGPU::OpName Name)83 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
84 AMDGPU::OpName Name) {
85 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
86 if (OpIdx != -1) {
87 auto *I = MI.begin();
88 std::advance(I, OpIdx);
89 MI.insert(I, Op);
90 }
91 return OpIdx;
92 }
93
decodeSOPPBrTarget(MCInst & Inst,unsigned Imm,uint64_t Addr,const MCDisassembler * Decoder)94 static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
95 uint64_t Addr,
96 const MCDisassembler *Decoder) {
97 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
98
99 // Our branches take a simm16.
100 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
101
102 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
103 return MCDisassembler::Success;
104 return addOperand(Inst, MCOperand::createImm(Imm));
105 }
106
decodeSMEMOffset(MCInst & Inst,unsigned Imm,uint64_t Addr,const MCDisassembler * Decoder)107 static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
108 const MCDisassembler *Decoder) {
109 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
110 int64_t Offset;
111 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
112 Offset = SignExtend64<24>(Imm);
113 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
114 Offset = Imm & 0xFFFFF;
115 } else { // GFX9+ supports 21-bit signed offsets.
116 Offset = SignExtend64<21>(Imm);
117 }
118 return addOperand(Inst, MCOperand::createImm(Offset));
119 }
120
decodeBoolReg(MCInst & Inst,unsigned Val,uint64_t Addr,const MCDisassembler * Decoder)121 static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
122 const MCDisassembler *Decoder) {
123 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
124 return addOperand(Inst, DAsm->decodeBoolReg(Val));
125 }
126
decodeSplitBarrier(MCInst & Inst,unsigned Val,uint64_t Addr,const MCDisassembler * Decoder)127 static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
128 uint64_t Addr,
129 const MCDisassembler *Decoder) {
130 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
131 return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
132 }
133
decodeDpp8FI(MCInst & Inst,unsigned Val,uint64_t Addr,const MCDisassembler * Decoder)134 static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
135 const MCDisassembler *Decoder) {
136 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
137 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
138 }
139
140 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \
141 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
142 uint64_t /*Addr*/, \
143 const MCDisassembler *Decoder) { \
144 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
145 return addOperand(Inst, DAsm->DecoderName(Imm)); \
146 }
147
148 // Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
149 // number of register. Used by VGPR only and AGPR only operands.
150 #define DECODE_OPERAND_REG_8(RegClass) \
151 static DecodeStatus Decode##RegClass##RegisterClass( \
152 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
153 const MCDisassembler *Decoder) { \
154 assert(Imm < (1 << 8) && "8-bit encoding"); \
155 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
156 return addOperand( \
157 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
158 }
159
160 #define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
161 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
162 const MCDisassembler *Decoder) { \
163 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
164 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
165 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm)); \
166 }
167
decodeSrcOp(MCInst & Inst,unsigned EncSize,unsigned OpWidth,unsigned Imm,unsigned EncImm,const MCDisassembler * Decoder)168 static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
169 unsigned OpWidth, unsigned Imm, unsigned EncImm,
170 const MCDisassembler *Decoder) {
171 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
172 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
173 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm));
174 }
175
176 // Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
177 // get register class. Used by SGPR only operands.
178 #define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
179 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
180
181 #define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
182 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
183
184 // Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
185 // Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
186 // Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
187 // Used by AV_ register classes (AGPR or VGPR only register operands).
188 template <unsigned OpWidth>
decodeAV10(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)189 static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
190 const MCDisassembler *Decoder) {
191 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
192 Decoder);
193 }
194
195 // Decoder for Src(9-bit encoding) registers only.
196 template <unsigned OpWidth>
decodeSrcReg9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)197 static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
198 uint64_t /* Addr */,
199 const MCDisassembler *Decoder) {
200 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
201 }
202
203 // Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
204 // Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
205 // only.
206 template <unsigned OpWidth>
decodeSrcA9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)207 static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
208 const MCDisassembler *Decoder) {
209 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
210 }
211
212 // Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
213 // Imm{9} is acc, registers only.
214 template <unsigned OpWidth>
decodeSrcAV10(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)215 static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
216 uint64_t /* Addr */,
217 const MCDisassembler *Decoder) {
218 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
219 }
220
221 // Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
222 // register from RegClass or immediate. Registers that don't belong to RegClass
223 // will be decoded and InstPrinter will report warning. Immediate will be
224 // decoded into constant matching the OperandType (important for floating point
225 // types).
226 template <unsigned OpWidth>
decodeSrcRegOrImm9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)227 static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
228 uint64_t /* Addr */,
229 const MCDisassembler *Decoder) {
230 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
231 }
232
233 // Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
234 // and decode using 'enum10' from decodeSrcOp.
235 template <unsigned OpWidth>
decodeSrcRegOrImmA9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)236 static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
237 uint64_t /* Addr */,
238 const MCDisassembler *Decoder) {
239 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
240 }
241
242 // Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
243 // when RegisterClass is used as an operand. Most often used for destination
244 // operands.
245
246 DECODE_OPERAND_REG_8(VGPR_32)
DECODE_OPERAND_REG_8(VGPR_32_Lo128)247 DECODE_OPERAND_REG_8(VGPR_32_Lo128)
248 DECODE_OPERAND_REG_8(VReg_64)
249 DECODE_OPERAND_REG_8(VReg_96)
250 DECODE_OPERAND_REG_8(VReg_128)
251 DECODE_OPERAND_REG_8(VReg_192)
252 DECODE_OPERAND_REG_8(VReg_256)
253 DECODE_OPERAND_REG_8(VReg_288)
254 DECODE_OPERAND_REG_8(VReg_320)
255 DECODE_OPERAND_REG_8(VReg_352)
256 DECODE_OPERAND_REG_8(VReg_384)
257 DECODE_OPERAND_REG_8(VReg_512)
258 DECODE_OPERAND_REG_8(VReg_1024)
259
260 DECODE_OPERAND_SREG_7(SReg_32, 32)
261 DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
262 DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
263 DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
264 DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
265 DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
266 DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
267 DECODE_OPERAND_SREG_7(SReg_96, 96)
268 DECODE_OPERAND_SREG_7(SReg_128, 128)
269 DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
270 DECODE_OPERAND_SREG_7(SReg_256, 256)
271 DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
272 DECODE_OPERAND_SREG_7(SReg_512, 512)
273
274 DECODE_OPERAND_SREG_8(SReg_64, 64)
275
276 DECODE_OPERAND_REG_8(AGPR_32)
277 DECODE_OPERAND_REG_8(AReg_64)
278 DECODE_OPERAND_REG_8(AReg_128)
279 DECODE_OPERAND_REG_8(AReg_256)
280 DECODE_OPERAND_REG_8(AReg_512)
281 DECODE_OPERAND_REG_8(AReg_1024)
282
283 static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
284 uint64_t /*Addr*/,
285 const MCDisassembler *Decoder) {
286 assert(isUInt<10>(Imm) && "10-bit encoding expected");
287 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
288
289 bool IsHi = Imm & (1 << 9);
290 unsigned RegIdx = Imm & 0xff;
291 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
292 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
293 }
294
295 static DecodeStatus
DecodeVGPR_16_Lo128RegisterClass(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)296 DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
297 const MCDisassembler *Decoder) {
298 assert(isUInt<8>(Imm) && "8-bit encoding expected");
299
300 bool IsHi = Imm & (1 << 7);
301 unsigned RegIdx = Imm & 0x7f;
302 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
303 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
304 }
305
306 template <unsigned OpWidth>
decodeOperand_VSrcT16_Lo128(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)307 static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
308 uint64_t /*Addr*/,
309 const MCDisassembler *Decoder) {
310 assert(isUInt<9>(Imm) && "9-bit encoding expected");
311
312 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
313 if (Imm & AMDGPU::EncValues::IS_VGPR) {
314 bool IsHi = Imm & (1 << 7);
315 unsigned RegIdx = Imm & 0x7f;
316 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
317 }
318 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
319 }
320
321 template <unsigned OpWidth>
decodeOperand_VSrcT16(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)322 static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
323 uint64_t /*Addr*/,
324 const MCDisassembler *Decoder) {
325 assert(isUInt<10>(Imm) && "10-bit encoding expected");
326
327 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
328 if (Imm & AMDGPU::EncValues::IS_VGPR) {
329 bool IsHi = Imm & (1 << 9);
330 unsigned RegIdx = Imm & 0xff;
331 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
332 }
333 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF));
334 }
335
decodeOperand_VGPR_16(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)336 static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
337 uint64_t /*Addr*/,
338 const MCDisassembler *Decoder) {
339 assert(isUInt<10>(Imm) && "10-bit encoding expected");
340 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
341
342 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
343
344 bool IsHi = Imm & (1 << 9);
345 unsigned RegIdx = Imm & 0xff;
346 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
347 }
348
decodeOperand_KImmFP(MCInst & Inst,unsigned Imm,uint64_t Addr,const MCDisassembler * Decoder)349 static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
350 uint64_t Addr,
351 const MCDisassembler *Decoder) {
352 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
353 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
354 }
355
decodeOperand_KImmFP64(MCInst & Inst,uint64_t Imm,uint64_t Addr,const MCDisassembler * Decoder)356 static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm,
357 uint64_t Addr,
358 const MCDisassembler *Decoder) {
359 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
360 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
361 }
362
decodeOperandVOPDDstY(MCInst & Inst,unsigned Val,uint64_t Addr,const void * Decoder)363 static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
364 uint64_t Addr, const void *Decoder) {
365 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
366 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
367 }
368
IsAGPROperand(const MCInst & Inst,int OpIdx,const MCRegisterInfo * MRI)369 static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
370 const MCRegisterInfo *MRI) {
371 if (OpIdx < 0)
372 return false;
373
374 const MCOperand &Op = Inst.getOperand(OpIdx);
375 if (!Op.isReg())
376 return false;
377
378 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
379 auto Reg = Sub ? Sub : Op.getReg();
380 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
381 }
382
decodeAVLdSt(MCInst & Inst,unsigned Imm,unsigned Opw,const MCDisassembler * Decoder)383 static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
384 const MCDisassembler *Decoder) {
385 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
386 if (!DAsm->isGFX90A()) {
387 Imm &= 511;
388 } else {
389 // If atomic has both vdata and vdst their register classes are tied.
390 // The bit is decoded along with the vdst, first operand. We need to
391 // change register class to AGPR if vdst was AGPR.
392 // If a DS instruction has both data0 and data1 their register classes
393 // are also tied.
394 unsigned Opc = Inst.getOpcode();
395 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
396 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
397 ? AMDGPU::OpName::data0
398 : AMDGPU::OpName::vdata;
399 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
400 int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataName);
401 if ((int)Inst.getNumOperands() == DataIdx) {
402 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
403 if (IsAGPROperand(Inst, DstIdx, MRI))
404 Imm |= 512;
405 }
406
407 if (TSFlags & SIInstrFlags::DS) {
408 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
409 if ((int)Inst.getNumOperands() == Data2Idx &&
410 IsAGPROperand(Inst, DataIdx, MRI))
411 Imm |= 512;
412 }
413 }
414 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
415 }
416
417 template <unsigned Opw>
decodeAVLdSt(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)418 static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
419 uint64_t /* Addr */,
420 const MCDisassembler *Decoder) {
421 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
422 }
423
decodeOperand_VSrc_f64(MCInst & Inst,unsigned Imm,uint64_t Addr,const MCDisassembler * Decoder)424 static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
425 uint64_t Addr,
426 const MCDisassembler *Decoder) {
427 assert(Imm < (1 << 9) && "9-bit encoding");
428 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
429 return addOperand(Inst, DAsm->decodeSrcOp(64, Imm));
430 }
431
432 #define DECODE_SDWA(DecName) \
433 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
434
435 DECODE_SDWA(Src32)
DECODE_SDWA(Src16)436 DECODE_SDWA(Src16)
437 DECODE_SDWA(VopcDst)
438
439 static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
440 uint64_t /* Addr */,
441 const MCDisassembler *Decoder) {
442 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
443 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
444 }
445
446 #include "AMDGPUGenDisassemblerTables.inc"
447
448 //===----------------------------------------------------------------------===//
449 //
450 //===----------------------------------------------------------------------===//
451
452 template <typename InsnType>
tryDecodeInst(const uint8_t * Table,MCInst & MI,InsnType Inst,uint64_t Address,raw_ostream & Comments) const453 DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table, MCInst &MI,
454 InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 assert(MI.getOpcode() == 0);
457 assert(MI.getNumOperands() == 0);
458 MCInst TmpInst;
459 HasLiteral = false;
460 const auto SavedBytes = Bytes;
461
462 SmallString<64> LocalComments;
463 raw_svector_ostream LocalCommentStream(LocalComments);
464 CommentStream = &LocalCommentStream;
465
466 DecodeStatus Res =
467 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
468
469 CommentStream = nullptr;
470
471 if (Res != MCDisassembler::Fail) {
472 MI = TmpInst;
473 Comments << LocalComments;
474 return MCDisassembler::Success;
475 }
476 Bytes = SavedBytes;
477 return MCDisassembler::Fail;
478 }
479
480 template <typename InsnType>
481 DecodeStatus
tryDecodeInst(const uint8_t * Table1,const uint8_t * Table2,MCInst & MI,InsnType Inst,uint64_t Address,raw_ostream & Comments) const482 AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
483 MCInst &MI, InsnType Inst, uint64_t Address,
484 raw_ostream &Comments) const {
485 for (const uint8_t *T : {Table1, Table2}) {
486 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
487 return Res;
488 }
489 return MCDisassembler::Fail;
490 }
491
eatBytes(ArrayRef<uint8_t> & Bytes)492 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
493 assert(Bytes.size() >= sizeof(T));
494 const auto Res =
495 support::endian::read<T, llvm::endianness::little>(Bytes.data());
496 Bytes = Bytes.slice(sizeof(T));
497 return Res;
498 }
499
eat12Bytes(ArrayRef<uint8_t> & Bytes)500 static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
501 assert(Bytes.size() >= 12);
502 uint64_t Lo =
503 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
504 Bytes = Bytes.slice(8);
505 uint64_t Hi =
506 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
507 Bytes = Bytes.slice(4);
508 return DecoderUInt128(Lo, Hi);
509 }
510
eat16Bytes(ArrayRef<uint8_t> & Bytes)511 static inline DecoderUInt128 eat16Bytes(ArrayRef<uint8_t> &Bytes) {
512 assert(Bytes.size() >= 16);
513 uint64_t Lo =
514 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
515 Bytes = Bytes.slice(8);
516 uint64_t Hi =
517 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
518 Bytes = Bytes.slice(8);
519 return DecoderUInt128(Lo, Hi);
520 }
521
decodeImmOperands(MCInst & MI,const MCInstrInfo & MCII) const522 void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
523 const MCInstrInfo &MCII) const {
524 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
525 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
526 if (OpNo >= MI.getNumOperands())
527 continue;
528
529 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
530 // defined to take VGPR_32, but in reality allowing inline constants.
531 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
532 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
533 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
534 continue;
535
536 MCOperand &Op = MI.getOperand(OpNo);
537 if (!Op.isImm())
538 continue;
539 int64_t Imm = Op.getImm();
540 if (AMDGPU::EncValues::INLINE_INTEGER_C_MIN <= Imm &&
541 Imm <= AMDGPU::EncValues::INLINE_INTEGER_C_MAX) {
542 Op = decodeIntImmed(Imm);
543 continue;
544 }
545
546 if (Imm == AMDGPU::EncValues::LITERAL_CONST) {
547 Op = decodeLiteralConstant(OpDesc.OperandType ==
548 AMDGPU::OPERAND_REG_IMM_FP64);
549 continue;
550 }
551
552 if (AMDGPU::EncValues::INLINE_FLOATING_C_MIN <= Imm &&
553 Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX) {
554 switch (OpDesc.OperandType) {
555 case AMDGPU::OPERAND_REG_IMM_BF16:
556 case AMDGPU::OPERAND_REG_IMM_V2BF16:
557 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
558 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
559 Imm = getInlineImmValBF16(Imm);
560 break;
561 case AMDGPU::OPERAND_REG_IMM_FP16:
562 case AMDGPU::OPERAND_REG_IMM_INT16:
563 case AMDGPU::OPERAND_REG_IMM_V2FP16:
564 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
565 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
566 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
567 Imm = getInlineImmValF16(Imm);
568 break;
569 case AMDGPU::OPERAND_REG_IMM_FP64:
570 case AMDGPU::OPERAND_REG_IMM_INT64:
571 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
572 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
573 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
574 Imm = getInlineImmVal64(Imm);
575 break;
576 default:
577 Imm = getInlineImmVal32(Imm);
578 }
579 Op.setImm(Imm);
580 }
581 }
582 }
583
getInstruction(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes_,uint64_t Address,raw_ostream & CS) const584 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
585 ArrayRef<uint8_t> Bytes_,
586 uint64_t Address,
587 raw_ostream &CS) const {
588 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
589 Bytes = Bytes_.slice(0, MaxInstBytesNum);
590
591 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
592 // there are fewer bytes left). This will be overridden on success.
593 Size = std::min((size_t)4, Bytes_.size());
594
595 do {
596 // ToDo: better to switch encoding length using some bit predicate
597 // but it is unknown yet, so try all we can
598
599 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
600 // encodings
601 if (isGFX11Plus() && Bytes.size() >= 12 ) {
602 DecoderUInt128 DecW = eat12Bytes(Bytes);
603
604 if (isGFX11() &&
605 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
606 DecW, Address, CS))
607 break;
608
609 if (isGFX1250() &&
610 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
611 DecW, Address, CS))
612 break;
613
614 if (isGFX12() &&
615 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
616 DecW, Address, CS))
617 break;
618
619 if (isGFX12() &&
620 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
621 break;
622
623 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
624 // Return 8 bytes for a potential literal.
625 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
626
627 if (isGFX1250() &&
628 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
629 break;
630 }
631
632 // Reinitialize Bytes
633 Bytes = Bytes_.slice(0, MaxInstBytesNum);
634
635 } else if (Bytes.size() >= 16 &&
636 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
637 DecoderUInt128 DecW = eat16Bytes(Bytes);
638 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
639 break;
640
641 // Reinitialize Bytes
642 Bytes = Bytes_.slice(0, MaxInstBytesNum);
643 }
644
645 if (Bytes.size() >= 8) {
646 const uint64_t QW = eatBytes<uint64_t>(Bytes);
647
648 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
649 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
650 break;
651
652 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
653 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
654 break;
655
656 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
657 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
658 break;
659
660 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
661 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
662 // table first so we print the correct name.
663 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
664 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
665 break;
666
667 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
668 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
669 break;
670
671 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
672 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
673 break;
674
675 if ((isVI() || isGFX9()) &&
676 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
677 break;
678
679 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
680 break;
681
682 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
683 break;
684
685 if (isGFX1250() &&
686 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
687 QW, Address, CS))
688 break;
689
690 if (isGFX12() &&
691 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
692 Address, CS))
693 break;
694
695 if (isGFX11() &&
696 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
697 Address, CS))
698 break;
699
700 if (isGFX11() &&
701 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
702 break;
703
704 if (isGFX12() &&
705 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
706 break;
707
708 // Reinitialize Bytes
709 Bytes = Bytes_.slice(0, MaxInstBytesNum);
710 }
711
712 // Try decode 32-bit instruction
713 if (Bytes.size() >= 4) {
714 const uint32_t DW = eatBytes<uint32_t>(Bytes);
715
716 if ((isVI() || isGFX9()) &&
717 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
718 break;
719
720 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
721 break;
722
723 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
724 break;
725
726 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
727 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
728 break;
729
730 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
731 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
732 break;
733
734 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
735 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
736 break;
737
738 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
739 break;
740
741 if (isGFX11() &&
742 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
743 Address, CS))
744 break;
745
746 if (isGFX1250() &&
747 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
748 DW, Address, CS))
749 break;
750
751 if (isGFX12() &&
752 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
753 Address, CS))
754 break;
755 }
756
757 return MCDisassembler::Fail;
758 } while (false);
759
760 DecodeStatus Status = MCDisassembler::Success;
761
762 decodeImmOperands(MI, *MCII);
763
764 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
765 if (isMacDPP(MI))
766 convertMacDPPInst(MI);
767
768 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
769 convertVOP3PDPPInst(MI);
770 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
771 convertVOPCDPPInst(MI); // Special VOP3 case
772 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
773 convertVOPC64DPPInst(MI); // Special VOP3 case
774 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
775 -1)
776 convertDPP8Inst(MI);
777 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
778 convertVOP3DPPInst(MI); // Regular VOP3 case
779 }
780
781 convertTrue16OpSel(MI);
782
783 if (AMDGPU::isMAC(MI.getOpcode())) {
784 // Insert dummy unused src2_modifiers.
785 insertNamedMCOperand(MI, MCOperand::createImm(0),
786 AMDGPU::OpName::src2_modifiers);
787 }
788
789 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
790 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
791 // Insert dummy unused src2_modifiers.
792 insertNamedMCOperand(MI, MCOperand::createImm(0),
793 AMDGPU::OpName::src2_modifiers);
794 }
795
796 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
797 !AMDGPU::hasGDS(STI)) {
798 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
799 }
800
801 if (MCII->get(MI.getOpcode()).TSFlags &
802 (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) {
803 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
804 AMDGPU::OpName::cpol);
805 if (CPolPos != -1) {
806 unsigned CPol =
807 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
808 AMDGPU::CPol::GLC : 0;
809 if (MI.getNumOperands() <= (unsigned)CPolPos) {
810 insertNamedMCOperand(MI, MCOperand::createImm(CPol),
811 AMDGPU::OpName::cpol);
812 } else if (CPol) {
813 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
814 }
815 }
816 }
817
818 if ((MCII->get(MI.getOpcode()).TSFlags &
819 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
820 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
821 // GFX90A lost TFE, its place is occupied by ACC.
822 int TFEOpIdx =
823 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
824 if (TFEOpIdx != -1) {
825 auto *TFEIter = MI.begin();
826 std::advance(TFEIter, TFEOpIdx);
827 MI.insert(TFEIter, MCOperand::createImm(0));
828 }
829 }
830
831 if (MCII->get(MI.getOpcode()).TSFlags &
832 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
833 int SWZOpIdx =
834 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
835 if (SWZOpIdx != -1) {
836 auto *SWZIter = MI.begin();
837 std::advance(SWZIter, SWZOpIdx);
838 MI.insert(SWZIter, MCOperand::createImm(0));
839 }
840 }
841
842 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
843 int VAddr0Idx =
844 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
845 int RsrcIdx =
846 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
847 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
848 if (VAddr0Idx >= 0 && NSAArgs > 0) {
849 unsigned NSAWords = (NSAArgs + 3) / 4;
850 if (Bytes.size() < 4 * NSAWords)
851 return MCDisassembler::Fail;
852 for (unsigned i = 0; i < NSAArgs; ++i) {
853 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
854 auto VAddrRCID =
855 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
856 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
857 }
858 Bytes = Bytes.slice(4 * NSAWords);
859 }
860
861 convertMIMGInst(MI);
862 }
863
864 if (MCII->get(MI.getOpcode()).TSFlags &
865 (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))
866 convertMIMGInst(MI);
867
868 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
869 convertEXPInst(MI);
870
871 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
872 convertVINTERPInst(MI);
873
874 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
875 convertSDWAInst(MI);
876
877 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
878 convertMAIInst(MI);
879
880 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
881 AMDGPU::OpName::vdst_in);
882 if (VDstIn_Idx != -1) {
883 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
884 MCOI::OperandConstraint::TIED_TO);
885 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
886 !MI.getOperand(VDstIn_Idx).isReg() ||
887 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
888 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
889 MI.erase(&MI.getOperand(VDstIn_Idx));
890 insertNamedMCOperand(MI,
891 MCOperand::createReg(MI.getOperand(Tied).getReg()),
892 AMDGPU::OpName::vdst_in);
893 }
894 }
895
896 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
897 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
898 convertFMAanyK(MI);
899
900 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
901 // have EXEC as implicit destination. Issue a warning if encoding for
902 // vdst is not EXEC.
903 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
904 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
905 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
906 if (Bytes_[0] != ExecEncoding)
907 Status = MCDisassembler::SoftFail;
908 }
909
910 Size = MaxInstBytesNum - Bytes.size();
911 return Status;
912 }
913
convertEXPInst(MCInst & MI) const914 void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
915 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
916 // The MCInst still has these fields even though they are no longer encoded
917 // in the GFX11 instruction.
918 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
919 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
920 }
921 }
922
convertVINTERPInst(MCInst & MI) const923 void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
924 convertTrue16OpSel(MI);
925 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
926 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
927 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
928 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
929 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
930 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
931 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
932 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
933 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
934 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
935 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
936 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
937 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
938 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
939 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
940 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
941 // The MCInst has this field that is not directly encoded in the
942 // instruction.
943 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
944 }
945 }
946
convertSDWAInst(MCInst & MI) const947 void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
948 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
949 STI.hasFeature(AMDGPU::FeatureGFX10)) {
950 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
951 // VOPC - insert clamp
952 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
953 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
954 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
955 if (SDst != -1) {
956 // VOPC - insert VCC register as sdst
957 insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
958 AMDGPU::OpName::sdst);
959 } else {
960 // VOP1/2 - insert omod if present in instruction
961 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
962 }
963 }
964 }
965
966 /// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
967 /// appropriate subregister for the used format width.
adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo & MRI,MCOperand & MO,uint8_t NumRegs)968 static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI,
969 MCOperand &MO, uint8_t NumRegs) {
970 switch (NumRegs) {
971 case 4:
972 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
973 case 6:
974 return MO.setReg(
975 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
976 case 8:
977 // No-op in cases where one operand is still f8/bf8.
978 return;
979 default:
980 llvm_unreachable("Unexpected size for mfma f8f6f4 operand");
981 }
982 }
983
984 /// f8f6f4 instructions have different pseudos depending on the used formats. In
985 /// the disassembler table, we only have the variants with the largest register
986 /// classes which assume using an fp8/bf8 format for both operands. The actual
987 /// register class depends on the format in blgp and cbsz operands. Adjust the
988 /// register classes depending on the used format.
convertMAIInst(MCInst & MI) const989 void AMDGPUDisassembler::convertMAIInst(MCInst &MI) const {
990 int BlgpIdx =
991 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
992 if (BlgpIdx == -1)
993 return;
994
995 int CbszIdx =
996 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
997
998 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
999 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1000
1001 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1002 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1003 if (!AdjustedRegClassOpcode ||
1004 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1005 return;
1006
1007 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1008 int Src0Idx =
1009 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1010 int Src1Idx =
1011 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1012 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1013 AdjustedRegClassOpcode->NumRegsSrcA);
1014 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1015 AdjustedRegClassOpcode->NumRegsSrcB);
1016 }
1017
1018 struct VOPModifiers {
1019 unsigned OpSel = 0;
1020 unsigned OpSelHi = 0;
1021 unsigned NegLo = 0;
1022 unsigned NegHi = 0;
1023 };
1024
1025 // Reconstruct values of VOP3/VOP3P operands such as op_sel.
1026 // Note that these values do not affect disassembler output,
1027 // so this is only necessary for consistency with src_modifiers.
collectVOPModifiers(const MCInst & MI,bool IsVOP3P=false)1028 static VOPModifiers collectVOPModifiers(const MCInst &MI,
1029 bool IsVOP3P = false) {
1030 VOPModifiers Modifiers;
1031 unsigned Opc = MI.getOpcode();
1032 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1033 AMDGPU::OpName::src1_modifiers,
1034 AMDGPU::OpName::src2_modifiers};
1035 for (int J = 0; J < 3; ++J) {
1036 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1037 if (OpIdx == -1)
1038 continue;
1039
1040 unsigned Val = MI.getOperand(OpIdx).getImm();
1041
1042 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1043 if (IsVOP3P) {
1044 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1045 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1046 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1047 } else if (J == 0) {
1048 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1049 }
1050 }
1051
1052 return Modifiers;
1053 }
1054
1055 // Instructions decode the op_sel/suffix bits into the src_modifier
1056 // operands. Copy those bits into the src operands for true16 VGPRs.
convertTrue16OpSel(MCInst & MI) const1057 void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
1058 const unsigned Opc = MI.getOpcode();
1059 const MCRegisterClass &ConversionRC =
1060 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1061 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1062 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1063 SISrcMods::OP_SEL_0},
1064 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1065 SISrcMods::OP_SEL_0},
1066 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1067 SISrcMods::OP_SEL_0},
1068 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1069 SISrcMods::DST_OP_SEL}}};
1070 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1071 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1072 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1073 if (OpIdx == -1 || OpModsIdx == -1)
1074 continue;
1075 MCOperand &Op = MI.getOperand(OpIdx);
1076 if (!Op.isReg())
1077 continue;
1078 if (!ConversionRC.contains(Op.getReg()))
1079 continue;
1080 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1081 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1082 unsigned ModVal = OpMods.getImm();
1083 if (ModVal & OpSelMask) { // isHi
1084 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1085 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1086 }
1087 }
1088 }
1089
1090 // MAC opcodes have special old and src2 operands.
1091 // src2 is tied to dst, while old is not tied (but assumed to be).
isMacDPP(MCInst & MI) const1092 bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
1093 constexpr int DST_IDX = 0;
1094 auto Opcode = MI.getOpcode();
1095 const auto &Desc = MCII->get(Opcode);
1096 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1097
1098 if (OldIdx != -1 && Desc.getOperandConstraint(
1099 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1100 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1101 assert(Desc.getOperandConstraint(
1102 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1103 MCOI::OperandConstraint::TIED_TO) == DST_IDX);
1104 (void)DST_IDX;
1105 return true;
1106 }
1107
1108 return false;
1109 }
1110
1111 // Create dummy old operand and insert dummy unused src2_modifiers
convertMacDPPInst(MCInst & MI) const1112 void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
1113 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1114 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1115 insertNamedMCOperand(MI, MCOperand::createImm(0),
1116 AMDGPU::OpName::src2_modifiers);
1117 }
1118
convertDPP8Inst(MCInst & MI) const1119 void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
1120 unsigned Opc = MI.getOpcode();
1121
1122 int VDstInIdx =
1123 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1124 if (VDstInIdx != -1)
1125 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1126
1127 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1128 if (MI.getNumOperands() < DescNumOps &&
1129 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1130 convertTrue16OpSel(MI);
1131 auto Mods = collectVOPModifiers(MI);
1132 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1133 AMDGPU::OpName::op_sel);
1134 } else {
1135 // Insert dummy unused src modifiers.
1136 if (MI.getNumOperands() < DescNumOps &&
1137 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1138 insertNamedMCOperand(MI, MCOperand::createImm(0),
1139 AMDGPU::OpName::src0_modifiers);
1140
1141 if (MI.getNumOperands() < DescNumOps &&
1142 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1143 insertNamedMCOperand(MI, MCOperand::createImm(0),
1144 AMDGPU::OpName::src1_modifiers);
1145 }
1146 }
1147
convertVOP3DPPInst(MCInst & MI) const1148 void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
1149 convertTrue16OpSel(MI);
1150
1151 int VDstInIdx =
1152 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1153 if (VDstInIdx != -1)
1154 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1155
1156 unsigned Opc = MI.getOpcode();
1157 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1158 if (MI.getNumOperands() < DescNumOps &&
1159 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1160 auto Mods = collectVOPModifiers(MI);
1161 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1162 AMDGPU::OpName::op_sel);
1163 }
1164 }
1165
1166 // Note that before gfx10, the MIMG encoding provided no information about
1167 // VADDR size. Consequently, decoded instructions always show address as if it
1168 // has 1 dword, which could be not really so.
convertMIMGInst(MCInst & MI) const1169 void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
1170 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1171
1172 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1173 AMDGPU::OpName::vdst);
1174
1175 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1176 AMDGPU::OpName::vdata);
1177 int VAddr0Idx =
1178 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1179 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1180 ? AMDGPU::OpName::srsrc
1181 : AMDGPU::OpName::rsrc;
1182 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1183 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1184 AMDGPU::OpName::dmask);
1185
1186 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1187 AMDGPU::OpName::tfe);
1188 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1189 AMDGPU::OpName::d16);
1190
1191 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1192 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1193 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1194
1195 assert(VDataIdx != -1);
1196 if (BaseOpcode->BVH) {
1197 // Add A16 operand for intersect_ray instructions
1198 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1199 return;
1200 }
1201
1202 bool IsAtomic = (VDstIdx != -1);
1203 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1204 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1205 bool IsNSA = false;
1206 bool IsPartialNSA = false;
1207 unsigned AddrSize = Info->VAddrDwords;
1208
1209 if (isGFX10Plus()) {
1210 unsigned DimIdx =
1211 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1212 int A16Idx =
1213 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1214 const AMDGPU::MIMGDimInfo *Dim =
1215 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1216 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1217
1218 AddrSize =
1219 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1220
1221 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1222 // VIMAGE insts other than BVH never use vaddr4.
1223 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1224 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1225 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1226 if (!IsNSA) {
1227 if (!IsVSample && AddrSize > 12)
1228 AddrSize = 16;
1229 } else {
1230 if (AddrSize > Info->VAddrDwords) {
1231 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1232 // The NSA encoding does not contain enough operands for the
1233 // combination of base opcode / dimension. Should this be an error?
1234 return;
1235 }
1236 IsPartialNSA = true;
1237 }
1238 }
1239 }
1240
1241 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1242 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1243
1244 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1245 if (D16 && AMDGPU::hasPackedD16(STI)) {
1246 DstSize = (DstSize + 1) / 2;
1247 }
1248
1249 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1250 DstSize += 1;
1251
1252 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1253 return;
1254
1255 int NewOpcode =
1256 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1257 if (NewOpcode == -1)
1258 return;
1259
1260 // Widen the register to the correct number of enabled channels.
1261 MCRegister NewVdata;
1262 if (DstSize != Info->VDataDwords) {
1263 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1264
1265 // Get first subregister of VData
1266 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1267 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1268 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1269
1270 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1271 &MRI.getRegClass(DataRCID));
1272 if (!NewVdata) {
1273 // It's possible to encode this such that the low register + enabled
1274 // components exceeds the register count.
1275 return;
1276 }
1277 }
1278
1279 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1280 // If using partial NSA on GFX11+ widen last address register.
1281 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1282 MCRegister NewVAddrSA;
1283 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1284 AddrSize != Info->VAddrDwords) {
1285 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1286 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1287 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1288
1289 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1290 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1291 &MRI.getRegClass(AddrRCID));
1292 if (!NewVAddrSA)
1293 return;
1294 }
1295
1296 MI.setOpcode(NewOpcode);
1297
1298 if (NewVdata != AMDGPU::NoRegister) {
1299 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1300
1301 if (IsAtomic) {
1302 // Atomic operations have an additional operand (a copy of data)
1303 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1304 }
1305 }
1306
1307 if (NewVAddrSA) {
1308 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1309 } else if (IsNSA) {
1310 assert(AddrSize <= Info->VAddrDwords);
1311 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1312 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1313 }
1314 }
1315
1316 // Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1317 // decoder only adds to src_modifiers, so manually add the bits to the other
1318 // operands.
convertVOP3PDPPInst(MCInst & MI) const1319 void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1320 unsigned Opc = MI.getOpcode();
1321 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1322 auto Mods = collectVOPModifiers(MI, true);
1323
1324 if (MI.getNumOperands() < DescNumOps &&
1325 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1326 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1327
1328 if (MI.getNumOperands() < DescNumOps &&
1329 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1330 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1331 AMDGPU::OpName::op_sel);
1332 if (MI.getNumOperands() < DescNumOps &&
1333 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1334 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi),
1335 AMDGPU::OpName::op_sel_hi);
1336 if (MI.getNumOperands() < DescNumOps &&
1337 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1338 insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo),
1339 AMDGPU::OpName::neg_lo);
1340 if (MI.getNumOperands() < DescNumOps &&
1341 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1342 insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi),
1343 AMDGPU::OpName::neg_hi);
1344 }
1345
1346 // Create dummy old operand and insert optional operands
convertVOPCDPPInst(MCInst & MI) const1347 void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1348 unsigned Opc = MI.getOpcode();
1349 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1350
1351 if (MI.getNumOperands() < DescNumOps &&
1352 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1353 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1354
1355 if (MI.getNumOperands() < DescNumOps &&
1356 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1357 insertNamedMCOperand(MI, MCOperand::createImm(0),
1358 AMDGPU::OpName::src0_modifiers);
1359
1360 if (MI.getNumOperands() < DescNumOps &&
1361 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1362 insertNamedMCOperand(MI, MCOperand::createImm(0),
1363 AMDGPU::OpName::src1_modifiers);
1364 }
1365
convertVOPC64DPPInst(MCInst & MI) const1366 void AMDGPUDisassembler::convertVOPC64DPPInst(MCInst &MI) const {
1367 unsigned Opc = MI.getOpcode();
1368 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1369
1370 convertTrue16OpSel(MI);
1371
1372 if (MI.getNumOperands() < DescNumOps &&
1373 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1374 VOPModifiers Mods = collectVOPModifiers(MI);
1375 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1376 AMDGPU::OpName::op_sel);
1377 }
1378 }
1379
convertFMAanyK(MCInst & MI) const1380 void AMDGPUDisassembler::convertFMAanyK(MCInst &MI) const {
1381 assert(HasLiteral && "Should have decoded a literal");
1382 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1383 }
1384
getRegClassName(unsigned RegClassID) const1385 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1386 return getContext().getRegisterInfo()->
1387 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1388 }
1389
1390 inline
errOperand(unsigned V,const Twine & ErrMsg) const1391 MCOperand AMDGPUDisassembler::errOperand(unsigned V,
1392 const Twine& ErrMsg) const {
1393 *CommentStream << "Error: " + ErrMsg;
1394
1395 // ToDo: add support for error operands to MCInst.h
1396 // return MCOperand::createError(V);
1397 return MCOperand();
1398 }
1399
1400 inline
createRegOperand(unsigned int RegId) const1401 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
1402 return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
1403 }
1404
1405 inline
createRegOperand(unsigned RegClassID,unsigned Val) const1406 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
1407 unsigned Val) const {
1408 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1409 if (Val >= RegCl.getNumRegs())
1410 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1411 ": unknown register " + Twine(Val));
1412 return createRegOperand(RegCl.getRegister(Val));
1413 }
1414
1415 inline
createSRegOperand(unsigned SRegClassID,unsigned Val) const1416 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
1417 unsigned Val) const {
1418 // ToDo: SI/CI have 104 SGPRs, VI - 102
1419 // Valery: here we accepting as much as we can, let assembler sort it out
1420 int shift = 0;
1421 switch (SRegClassID) {
1422 case AMDGPU::SGPR_32RegClassID:
1423 case AMDGPU::TTMP_32RegClassID:
1424 break;
1425 case AMDGPU::SGPR_64RegClassID:
1426 case AMDGPU::TTMP_64RegClassID:
1427 shift = 1;
1428 break;
1429 case AMDGPU::SGPR_96RegClassID:
1430 case AMDGPU::TTMP_96RegClassID:
1431 case AMDGPU::SGPR_128RegClassID:
1432 case AMDGPU::TTMP_128RegClassID:
1433 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1434 // this bundle?
1435 case AMDGPU::SGPR_256RegClassID:
1436 case AMDGPU::TTMP_256RegClassID:
1437 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1438 // this bundle?
1439 case AMDGPU::SGPR_288RegClassID:
1440 case AMDGPU::TTMP_288RegClassID:
1441 case AMDGPU::SGPR_320RegClassID:
1442 case AMDGPU::TTMP_320RegClassID:
1443 case AMDGPU::SGPR_352RegClassID:
1444 case AMDGPU::TTMP_352RegClassID:
1445 case AMDGPU::SGPR_384RegClassID:
1446 case AMDGPU::TTMP_384RegClassID:
1447 case AMDGPU::SGPR_512RegClassID:
1448 case AMDGPU::TTMP_512RegClassID:
1449 shift = 2;
1450 break;
1451 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1452 // this bundle?
1453 default:
1454 llvm_unreachable("unhandled register class");
1455 }
1456
1457 if (Val % (1 << shift)) {
1458 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1459 << ": scalar reg isn't aligned " << Val;
1460 }
1461
1462 return createRegOperand(SRegClassID, Val >> shift);
1463 }
1464
createVGPR16Operand(unsigned RegIdx,bool IsHi) const1465 MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1466 bool IsHi) const {
1467 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1468 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1469 }
1470
1471 // Decode Literals for insts which always have a literal in the encoding
1472 MCOperand
decodeMandatoryLiteralConstant(unsigned Val) const1473 AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1474 if (HasLiteral) {
1475 assert(
1476 AMDGPU::hasVOPD(STI) &&
1477 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1478 if (Literal != Val)
1479 return errOperand(Val, "More than one unique literal is illegal");
1480 }
1481 HasLiteral = true;
1482 Literal = Val;
1483 return MCOperand::createImm(Literal);
1484 }
1485
1486 MCOperand
decodeMandatoryLiteral64Constant(uint64_t Val) const1487 AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const {
1488 if (HasLiteral) {
1489 if (Literal64 != Val)
1490 return errOperand(Val, "More than one unique literal is illegal");
1491 }
1492 HasLiteral = true;
1493 Literal = Literal64 = Val;
1494 return MCOperand::createImm(Literal64);
1495 }
1496
decodeLiteralConstant(bool ExtendFP64) const1497 MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
1498 // For now all literal constants are supposed to be unsigned integer
1499 // ToDo: deal with signed/unsigned 64-bit integer constants
1500 // ToDo: deal with float/double constants
1501 if (!HasLiteral) {
1502 if (Bytes.size() < 4) {
1503 return errOperand(0, "cannot read literal, inst bytes left " +
1504 Twine(Bytes.size()));
1505 }
1506 HasLiteral = true;
1507 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1508 if (ExtendFP64)
1509 Literal64 <<= 32;
1510 }
1511 return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1512 }
1513
decodeLiteral64Constant() const1514 MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const {
1515 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1516
1517 if (!HasLiteral) {
1518 if (Bytes.size() < 8) {
1519 return errOperand(0, "cannot read literal64, inst bytes left " +
1520 Twine(Bytes.size()));
1521 }
1522 HasLiteral = true;
1523 Literal64 = eatBytes<uint64_t>(Bytes);
1524 }
1525 return MCOperand::createImm(Literal64);
1526 }
1527
decodeIntImmed(unsigned Imm)1528 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1529 using namespace AMDGPU::EncValues;
1530
1531 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1532 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1533 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1534 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1535 // Cast prevents negative overflow.
1536 }
1537
getInlineImmVal32(unsigned Imm)1538 static int64_t getInlineImmVal32(unsigned Imm) {
1539 switch (Imm) {
1540 case 240:
1541 return llvm::bit_cast<uint32_t>(0.5f);
1542 case 241:
1543 return llvm::bit_cast<uint32_t>(-0.5f);
1544 case 242:
1545 return llvm::bit_cast<uint32_t>(1.0f);
1546 case 243:
1547 return llvm::bit_cast<uint32_t>(-1.0f);
1548 case 244:
1549 return llvm::bit_cast<uint32_t>(2.0f);
1550 case 245:
1551 return llvm::bit_cast<uint32_t>(-2.0f);
1552 case 246:
1553 return llvm::bit_cast<uint32_t>(4.0f);
1554 case 247:
1555 return llvm::bit_cast<uint32_t>(-4.0f);
1556 case 248: // 1 / (2 * PI)
1557 return 0x3e22f983;
1558 default:
1559 llvm_unreachable("invalid fp inline imm");
1560 }
1561 }
1562
getInlineImmVal64(unsigned Imm)1563 static int64_t getInlineImmVal64(unsigned Imm) {
1564 switch (Imm) {
1565 case 240:
1566 return llvm::bit_cast<uint64_t>(0.5);
1567 case 241:
1568 return llvm::bit_cast<uint64_t>(-0.5);
1569 case 242:
1570 return llvm::bit_cast<uint64_t>(1.0);
1571 case 243:
1572 return llvm::bit_cast<uint64_t>(-1.0);
1573 case 244:
1574 return llvm::bit_cast<uint64_t>(2.0);
1575 case 245:
1576 return llvm::bit_cast<uint64_t>(-2.0);
1577 case 246:
1578 return llvm::bit_cast<uint64_t>(4.0);
1579 case 247:
1580 return llvm::bit_cast<uint64_t>(-4.0);
1581 case 248: // 1 / (2 * PI)
1582 return 0x3fc45f306dc9c882;
1583 default:
1584 llvm_unreachable("invalid fp inline imm");
1585 }
1586 }
1587
getInlineImmValF16(unsigned Imm)1588 static int64_t getInlineImmValF16(unsigned Imm) {
1589 switch (Imm) {
1590 case 240:
1591 return 0x3800;
1592 case 241:
1593 return 0xB800;
1594 case 242:
1595 return 0x3C00;
1596 case 243:
1597 return 0xBC00;
1598 case 244:
1599 return 0x4000;
1600 case 245:
1601 return 0xC000;
1602 case 246:
1603 return 0x4400;
1604 case 247:
1605 return 0xC400;
1606 case 248: // 1 / (2 * PI)
1607 return 0x3118;
1608 default:
1609 llvm_unreachable("invalid fp inline imm");
1610 }
1611 }
1612
getInlineImmValBF16(unsigned Imm)1613 static int64_t getInlineImmValBF16(unsigned Imm) {
1614 switch (Imm) {
1615 case 240:
1616 return 0x3F00;
1617 case 241:
1618 return 0xBF00;
1619 case 242:
1620 return 0x3F80;
1621 case 243:
1622 return 0xBF80;
1623 case 244:
1624 return 0x4000;
1625 case 245:
1626 return 0xC000;
1627 case 246:
1628 return 0x4080;
1629 case 247:
1630 return 0xC080;
1631 case 248: // 1 / (2 * PI)
1632 return 0x3E22;
1633 default:
1634 llvm_unreachable("invalid fp inline imm");
1635 }
1636 }
1637
getVgprClassId(unsigned Width) const1638 unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1639 using namespace AMDGPU;
1640
1641 switch (Width) {
1642 case 16:
1643 case 32:
1644 return VGPR_32RegClassID;
1645 case 64:
1646 return VReg_64RegClassID;
1647 case 96:
1648 return VReg_96RegClassID;
1649 case 128:
1650 return VReg_128RegClassID;
1651 case 160:
1652 return VReg_160RegClassID;
1653 case 192:
1654 return VReg_192RegClassID;
1655 case 256:
1656 return VReg_256RegClassID;
1657 case 288:
1658 return VReg_288RegClassID;
1659 case 320:
1660 return VReg_320RegClassID;
1661 case 352:
1662 return VReg_352RegClassID;
1663 case 384:
1664 return VReg_384RegClassID;
1665 case 512:
1666 return VReg_512RegClassID;
1667 case 1024:
1668 return VReg_1024RegClassID;
1669 }
1670 llvm_unreachable("Invalid register width!");
1671 }
1672
getAgprClassId(unsigned Width) const1673 unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1674 using namespace AMDGPU;
1675
1676 switch (Width) {
1677 case 16:
1678 case 32:
1679 return AGPR_32RegClassID;
1680 case 64:
1681 return AReg_64RegClassID;
1682 case 96:
1683 return AReg_96RegClassID;
1684 case 128:
1685 return AReg_128RegClassID;
1686 case 160:
1687 return AReg_160RegClassID;
1688 case 256:
1689 return AReg_256RegClassID;
1690 case 288:
1691 return AReg_288RegClassID;
1692 case 320:
1693 return AReg_320RegClassID;
1694 case 352:
1695 return AReg_352RegClassID;
1696 case 384:
1697 return AReg_384RegClassID;
1698 case 512:
1699 return AReg_512RegClassID;
1700 case 1024:
1701 return AReg_1024RegClassID;
1702 }
1703 llvm_unreachable("Invalid register width!");
1704 }
1705
getSgprClassId(unsigned Width) const1706 unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1707 using namespace AMDGPU;
1708
1709 switch (Width) {
1710 case 16:
1711 case 32:
1712 return SGPR_32RegClassID;
1713 case 64:
1714 return SGPR_64RegClassID;
1715 case 96:
1716 return SGPR_96RegClassID;
1717 case 128:
1718 return SGPR_128RegClassID;
1719 case 160:
1720 return SGPR_160RegClassID;
1721 case 256:
1722 return SGPR_256RegClassID;
1723 case 288:
1724 return SGPR_288RegClassID;
1725 case 320:
1726 return SGPR_320RegClassID;
1727 case 352:
1728 return SGPR_352RegClassID;
1729 case 384:
1730 return SGPR_384RegClassID;
1731 case 512:
1732 return SGPR_512RegClassID;
1733 }
1734 llvm_unreachable("Invalid register width!");
1735 }
1736
getTtmpClassId(unsigned Width) const1737 unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1738 using namespace AMDGPU;
1739
1740 switch (Width) {
1741 case 16:
1742 case 32:
1743 return TTMP_32RegClassID;
1744 case 64:
1745 return TTMP_64RegClassID;
1746 case 128:
1747 return TTMP_128RegClassID;
1748 case 256:
1749 return TTMP_256RegClassID;
1750 case 288:
1751 return TTMP_288RegClassID;
1752 case 320:
1753 return TTMP_320RegClassID;
1754 case 352:
1755 return TTMP_352RegClassID;
1756 case 384:
1757 return TTMP_384RegClassID;
1758 case 512:
1759 return TTMP_512RegClassID;
1760 }
1761 llvm_unreachable("Invalid register width!");
1762 }
1763
getTTmpIdx(unsigned Val) const1764 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1765 using namespace AMDGPU::EncValues;
1766
1767 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1768 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1769
1770 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1771 }
1772
decodeSrcOp(unsigned Width,unsigned Val) const1773 MCOperand AMDGPUDisassembler::decodeSrcOp(unsigned Width, unsigned Val) const {
1774 using namespace AMDGPU::EncValues;
1775
1776 assert(Val < 1024); // enum10
1777
1778 bool IsAGPR = Val & 512;
1779 Val &= 511;
1780
1781 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1782 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1783 : getVgprClassId(Width), Val - VGPR_MIN);
1784 }
1785 return decodeNonVGPRSrcOp(Width, Val & 0xFF);
1786 }
1787
decodeNonVGPRSrcOp(unsigned Width,unsigned Val) const1788 MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(unsigned Width,
1789 unsigned Val) const {
1790 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1791 // decoded earlier.
1792 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1793 using namespace AMDGPU::EncValues;
1794
1795 if (Val <= SGPR_MAX) {
1796 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1797 static_assert(SGPR_MIN == 0);
1798 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1799 }
1800
1801 int TTmpIdx = getTTmpIdx(Val);
1802 if (TTmpIdx >= 0) {
1803 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1804 }
1805
1806 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1807 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1808 Val == LITERAL_CONST)
1809 return MCOperand::createImm(Val);
1810
1811 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
1812 return decodeLiteral64Constant();
1813 }
1814
1815 switch (Width) {
1816 case 32:
1817 case 16:
1818 return decodeSpecialReg32(Val);
1819 case 64:
1820 return decodeSpecialReg64(Val);
1821 case 96:
1822 case 128:
1823 case 256:
1824 case 512:
1825 return decodeSpecialReg96Plus(Val);
1826 default:
1827 llvm_unreachable("unexpected immediate type");
1828 }
1829 }
1830
1831 // Bit 0 of DstY isn't stored in the instruction, because it's always the
1832 // opposite of bit 0 of DstX.
decodeVOPDDstYOp(MCInst & Inst,unsigned Val) const1833 MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
1834 unsigned Val) const {
1835 int VDstXInd =
1836 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1837 assert(VDstXInd != -1);
1838 assert(Inst.getOperand(VDstXInd).isReg());
1839 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1840 Val |= ~XDstReg & 1;
1841 return createRegOperand(getVgprClassId(32), Val);
1842 }
1843
decodeSpecialReg32(unsigned Val) const1844 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
1845 using namespace AMDGPU;
1846
1847 switch (Val) {
1848 // clang-format off
1849 case 102: return createRegOperand(FLAT_SCR_LO);
1850 case 103: return createRegOperand(FLAT_SCR_HI);
1851 case 104: return createRegOperand(XNACK_MASK_LO);
1852 case 105: return createRegOperand(XNACK_MASK_HI);
1853 case 106: return createRegOperand(VCC_LO);
1854 case 107: return createRegOperand(VCC_HI);
1855 case 108: return createRegOperand(TBA_LO);
1856 case 109: return createRegOperand(TBA_HI);
1857 case 110: return createRegOperand(TMA_LO);
1858 case 111: return createRegOperand(TMA_HI);
1859 case 124:
1860 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1861 case 125:
1862 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1863 case 126: return createRegOperand(EXEC_LO);
1864 case 127: return createRegOperand(EXEC_HI);
1865 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1866 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1867 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1868 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1869 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1870 case 251: return createRegOperand(SRC_VCCZ);
1871 case 252: return createRegOperand(SRC_EXECZ);
1872 case 253: return createRegOperand(SRC_SCC);
1873 case 254: return createRegOperand(LDS_DIRECT);
1874 default: break;
1875 // clang-format on
1876 }
1877 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1878 }
1879
decodeSpecialReg64(unsigned Val) const1880 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
1881 using namespace AMDGPU;
1882
1883 switch (Val) {
1884 case 102: return createRegOperand(FLAT_SCR);
1885 case 104: return createRegOperand(XNACK_MASK);
1886 case 106: return createRegOperand(VCC);
1887 case 108: return createRegOperand(TBA);
1888 case 110: return createRegOperand(TMA);
1889 case 124:
1890 if (isGFX11Plus())
1891 return createRegOperand(SGPR_NULL);
1892 break;
1893 case 125:
1894 if (!isGFX11Plus())
1895 return createRegOperand(SGPR_NULL);
1896 break;
1897 case 126: return createRegOperand(EXEC);
1898 case 235: return createRegOperand(SRC_SHARED_BASE);
1899 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1900 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1901 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1902 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1903 case 251: return createRegOperand(SRC_VCCZ);
1904 case 252: return createRegOperand(SRC_EXECZ);
1905 case 253: return createRegOperand(SRC_SCC);
1906 default: break;
1907 }
1908 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1909 }
1910
decodeSpecialReg96Plus(unsigned Val) const1911 MCOperand AMDGPUDisassembler::decodeSpecialReg96Plus(unsigned Val) const {
1912 using namespace AMDGPU;
1913
1914 switch (Val) {
1915 case 124:
1916 if (isGFX11Plus())
1917 return createRegOperand(SGPR_NULL);
1918 break;
1919 case 125:
1920 if (!isGFX11Plus())
1921 return createRegOperand(SGPR_NULL);
1922 break;
1923 default:
1924 break;
1925 }
1926 return errOperand(Val, "unknown operand encoding " + Twine(Val));
1927 }
1928
decodeSDWASrc(unsigned Width,const unsigned Val) const1929 MCOperand AMDGPUDisassembler::decodeSDWASrc(unsigned Width,
1930 const unsigned Val) const {
1931 using namespace AMDGPU::SDWA;
1932 using namespace AMDGPU::EncValues;
1933
1934 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1935 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1936 // XXX: cast to int is needed to avoid stupid warning:
1937 // compare with unsigned is always true
1938 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1939 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1940 return createRegOperand(getVgprClassId(Width),
1941 Val - SDWA9EncValues::SRC_VGPR_MIN);
1942 }
1943 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1944 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1945 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1946 return createSRegOperand(getSgprClassId(Width),
1947 Val - SDWA9EncValues::SRC_SGPR_MIN);
1948 }
1949 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1950 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1951 return createSRegOperand(getTtmpClassId(Width),
1952 Val - SDWA9EncValues::SRC_TTMP_MIN);
1953 }
1954
1955 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1956
1957 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
1958 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
1959 return MCOperand::createImm(SVal);
1960
1961 return decodeSpecialReg32(SVal);
1962 }
1963 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
1964 return createRegOperand(getVgprClassId(Width), Val);
1965 llvm_unreachable("unsupported target");
1966 }
1967
decodeSDWASrc16(unsigned Val) const1968 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1969 return decodeSDWASrc(16, Val);
1970 }
1971
decodeSDWASrc32(unsigned Val) const1972 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1973 return decodeSDWASrc(32, Val);
1974 }
1975
decodeSDWAVopcDst(unsigned Val) const1976 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1977 using namespace AMDGPU::SDWA;
1978
1979 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1980 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1981 "SDWAVopcDst should be present only on GFX9+");
1982
1983 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
1984
1985 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1986 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1987
1988 int TTmpIdx = getTTmpIdx(Val);
1989 if (TTmpIdx >= 0) {
1990 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
1991 return createSRegOperand(TTmpClsId, TTmpIdx);
1992 }
1993 if (Val > SGPR_MAX) {
1994 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
1995 }
1996 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
1997 }
1998 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
1999 }
2000
decodeBoolReg(unsigned Val) const2001 MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
2002 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32) ? decodeSrcOp(32, Val)
2003 : decodeSrcOp(64, Val);
2004 }
2005
decodeSplitBarrier(unsigned Val) const2006 MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
2007 return decodeSrcOp(32, Val);
2008 }
2009
decodeDpp8FI(unsigned Val) const2010 MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
2011 if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
2012 return MCOperand();
2013 return MCOperand::createImm(Val);
2014 }
2015
decodeVersionImm(unsigned Imm) const2016 MCOperand AMDGPUDisassembler::decodeVersionImm(unsigned Imm) const {
2017 using VersionField = AMDGPU::EncodingField<7, 0>;
2018 using W64Bit = AMDGPU::EncodingBit<13>;
2019 using W32Bit = AMDGPU::EncodingBit<14>;
2020 using MDPBit = AMDGPU::EncodingBit<15>;
2021 using Encoding = AMDGPU::EncodingFields<VersionField, W64Bit, W32Bit, MDPBit>;
2022
2023 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2024
2025 // Decode into a plain immediate if any unused bits are raised.
2026 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2027 return MCOperand::createImm(Imm);
2028
2029 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2030 const auto *I = find_if(
2031 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2032 return V.Code == Version;
2033 });
2034 MCContext &Ctx = getContext();
2035 const MCExpr *E;
2036 if (I == Versions.end())
2037 E = MCConstantExpr::create(Version, Ctx);
2038 else
2039 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2040
2041 if (W64)
2042 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2043 if (W32)
2044 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2045 if (MDP)
2046 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2047
2048 return MCOperand::createExpr(E);
2049 }
2050
isVI() const2051 bool AMDGPUDisassembler::isVI() const {
2052 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2053 }
2054
isGFX9() const2055 bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
2056
isGFX90A() const2057 bool AMDGPUDisassembler::isGFX90A() const {
2058 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2059 }
2060
isGFX9Plus() const2061 bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
2062
isGFX10() const2063 bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
2064
isGFX10Plus() const2065 bool AMDGPUDisassembler::isGFX10Plus() const {
2066 return AMDGPU::isGFX10Plus(STI);
2067 }
2068
isGFX11() const2069 bool AMDGPUDisassembler::isGFX11() const {
2070 return STI.hasFeature(AMDGPU::FeatureGFX11);
2071 }
2072
isGFX11Plus() const2073 bool AMDGPUDisassembler::isGFX11Plus() const {
2074 return AMDGPU::isGFX11Plus(STI);
2075 }
2076
isGFX12() const2077 bool AMDGPUDisassembler::isGFX12() const {
2078 return STI.hasFeature(AMDGPU::FeatureGFX12);
2079 }
2080
isGFX12Plus() const2081 bool AMDGPUDisassembler::isGFX12Plus() const {
2082 return AMDGPU::isGFX12Plus(STI);
2083 }
2084
isGFX1250() const2085 bool AMDGPUDisassembler::isGFX1250() const { return AMDGPU::isGFX1250(STI); }
2086
hasArchitectedFlatScratch() const2087 bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
2088 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2089 }
2090
hasKernargPreload() const2091 bool AMDGPUDisassembler::hasKernargPreload() const {
2092 return AMDGPU::hasKernargPreload(STI);
2093 }
2094
2095 //===----------------------------------------------------------------------===//
2096 // AMDGPU specific symbol handling
2097 //===----------------------------------------------------------------------===//
2098
2099 /// Print a string describing the reserved bit range specified by Mask with
2100 /// offset BaseBytes for use in error comments. Mask is a single continuous
2101 /// range of 1s surrounded by zeros. The format here is meant to align with the
2102 /// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
getBitRangeFromMask(uint32_t Mask,unsigned BaseBytes)2103 static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2104 SmallString<32> Result;
2105 raw_svector_ostream S(Result);
2106
2107 int TrailingZeros = llvm::countr_zero(Mask);
2108 int PopCount = llvm::popcount(Mask);
2109
2110 if (PopCount == 1) {
2111 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2112 } else {
2113 S << "bits in range ("
2114 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2115 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2116 }
2117
2118 return Result;
2119 }
2120
2121 #define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2122 #define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2123 do { \
2124 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2125 } while (0)
2126 #define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2127 do { \
2128 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2129 << GET_FIELD(MASK) << '\n'; \
2130 } while (0)
2131
2132 #define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2133 do { \
2134 if (FourByteBuffer & (MASK)) { \
2135 return createStringError(std::errc::invalid_argument, \
2136 "kernel descriptor " DESC \
2137 " reserved %s set" MSG, \
2138 getBitRangeFromMask((MASK), 0).c_str()); \
2139 } \
2140 } while (0)
2141
2142 #define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2143 #define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2144 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2145 #define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2146 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2147 #define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2148 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2149
2150 // NOLINTNEXTLINE(readability-identifier-naming)
decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer,raw_string_ostream & KdStream) const2151 Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
2152 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2153 using namespace amdhsa;
2154 StringRef Indent = "\t";
2155
2156 // We cannot accurately backward compute #VGPRs used from
2157 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2158 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2159 // simply calculate the inverse of what the assembler does.
2160
2161 uint32_t GranulatedWorkitemVGPRCount =
2162 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2163
2164 uint32_t NextFreeVGPR =
2165 (GranulatedWorkitemVGPRCount + 1) *
2166 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
2167
2168 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2169
2170 // We cannot backward compute values used to calculate
2171 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2172 // directives can't be computed:
2173 // .amdhsa_reserve_vcc
2174 // .amdhsa_reserve_flat_scratch
2175 // .amdhsa_reserve_xnack_mask
2176 // They take their respective default values if not specified in the assembly.
2177 //
2178 // GRANULATED_WAVEFRONT_SGPR_COUNT
2179 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2180 //
2181 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2182 // are set to 0. So while disassembling we consider that:
2183 //
2184 // GRANULATED_WAVEFRONT_SGPR_COUNT
2185 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2186 //
2187 // The disassembler cannot recover the original values of those 3 directives.
2188
2189 uint32_t GranulatedWavefrontSGPRCount =
2190 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2191
2192 if (isGFX10Plus())
2193 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2194 "must be zero on gfx10+");
2195
2196 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2197 AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
2198
2199 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2200 if (!hasArchitectedFlatScratch())
2201 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2202 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
2203 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2204
2205 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2206
2207 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2208 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2209 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2210 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2211 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2212 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2213 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2214 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2215
2216 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2217
2218 if (!isGFX12Plus())
2219 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2220 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2221
2222 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2223
2224 if (!isGFX12Plus())
2225 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2226 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2227
2228 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2229 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2230
2231 if (isGFX9Plus())
2232 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2233
2234 if (!isGFX9Plus())
2235 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2236 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2237
2238 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
2239
2240 if (!isGFX10Plus())
2241 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
2242 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
2243
2244 if (isGFX10Plus()) {
2245 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2246 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2247 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2248 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2249 }
2250
2251 if (isGFX12Plus())
2252 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2253 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2254
2255 return true;
2256 }
2257
2258 // NOLINTNEXTLINE(readability-identifier-naming)
decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,raw_string_ostream & KdStream) const2259 Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
2260 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2261 using namespace amdhsa;
2262 StringRef Indent = "\t";
2263 if (hasArchitectedFlatScratch())
2264 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2265 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2266 else
2267 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2268 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2269 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2270 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2271 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2272 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2273 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2274 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2275 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2276 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2277 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2278 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2279
2280 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2281 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2282 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2283
2284 PRINT_DIRECTIVE(
2285 ".amdhsa_exception_fp_ieee_invalid_op",
2286 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2287 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2288 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2289 PRINT_DIRECTIVE(
2290 ".amdhsa_exception_fp_ieee_div_zero",
2291 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2292 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2293 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2294 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2295 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2296 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2297 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2298 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2299 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2300
2301 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2302
2303 return true;
2304 }
2305
2306 // NOLINTNEXTLINE(readability-identifier-naming)
decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer,raw_string_ostream & KdStream) const2307 Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
2308 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2309 using namespace amdhsa;
2310 StringRef Indent = "\t";
2311 if (isGFX90A()) {
2312 KdStream << Indent << ".amdhsa_accum_offset "
2313 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2314 << '\n';
2315
2316 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2317
2318 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2319 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2320 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2321 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2322 } else if (isGFX10Plus()) {
2323 // Bits [0-3].
2324 if (!isGFX12Plus()) {
2325 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2326 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2327 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2328 } else {
2329 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2330 "SHARED_VGPR_COUNT",
2331 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2332 }
2333 } else {
2334 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2335 "COMPUTE_PGM_RSRC3",
2336 "must be zero on gfx12+");
2337 }
2338
2339 // Bits [4-11].
2340 if (isGFX11()) {
2341 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2342 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2343 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2344 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2345 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2346 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2347 } else if (isGFX12Plus()) {
2348 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2349 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2350 } else {
2351 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2352 "COMPUTE_PGM_RSRC3",
2353 "must be zero on gfx10");
2354 }
2355
2356 // Bits [12].
2357 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2358 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2359
2360 // Bits [13].
2361 if (isGFX12Plus()) {
2362 PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
2363 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2364 } else {
2365 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2366 "COMPUTE_PGM_RSRC3",
2367 "must be zero on gfx10 or gfx11");
2368 }
2369
2370 // Bits [14-30].
2371 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
2372 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2373
2374 // Bits [31].
2375 if (isGFX11Plus()) {
2376 PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
2377 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2378 } else {
2379 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
2380 "COMPUTE_PGM_RSRC3",
2381 "must be zero on gfx10");
2382 }
2383 } else if (FourByteBuffer) {
2384 return createStringError(
2385 std::errc::invalid_argument,
2386 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2387 }
2388 return true;
2389 }
2390 #undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2391 #undef PRINT_DIRECTIVE
2392 #undef GET_FIELD
2393 #undef CHECK_RESERVED_BITS_IMPL
2394 #undef CHECK_RESERVED_BITS
2395 #undef CHECK_RESERVED_BITS_MSG
2396 #undef CHECK_RESERVED_BITS_DESC
2397 #undef CHECK_RESERVED_BITS_DESC_MSG
2398
2399 /// Create an error object to return from onSymbolStart for reserved kernel
2400 /// descriptor bits being set.
createReservedKDBitsError(uint32_t Mask,unsigned BaseBytes,const char * Msg="")2401 static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2402 const char *Msg = "") {
2403 return createStringError(
2404 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2405 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2406 }
2407
2408 /// Create an error object to return from onSymbolStart for reserved kernel
2409 /// descriptor bytes being set.
createReservedKDBytesError(unsigned BaseInBytes,unsigned WidthInBytes)2410 static Error createReservedKDBytesError(unsigned BaseInBytes,
2411 unsigned WidthInBytes) {
2412 // Create an error comment in the same format as the "Kernel Descriptor"
2413 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2414 return createStringError(
2415 std::errc::invalid_argument,
2416 "kernel descriptor reserved bits in range (%u:%u) set",
2417 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2418 }
2419
decodeKernelDescriptorDirective(DataExtractor::Cursor & Cursor,ArrayRef<uint8_t> Bytes,raw_string_ostream & KdStream) const2420 Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
2421 DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2422 raw_string_ostream &KdStream) const {
2423 #define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2424 do { \
2425 KdStream << Indent << DIRECTIVE " " \
2426 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2427 } while (0)
2428
2429 uint16_t TwoByteBuffer = 0;
2430 uint32_t FourByteBuffer = 0;
2431
2432 StringRef ReservedBytes;
2433 StringRef Indent = "\t";
2434
2435 assert(Bytes.size() == 64);
2436 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2437
2438 switch (Cursor.tell()) {
2439 case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2440 FourByteBuffer = DE.getU32(Cursor);
2441 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2442 << '\n';
2443 return true;
2444
2445 case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2446 FourByteBuffer = DE.getU32(Cursor);
2447 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2448 << FourByteBuffer << '\n';
2449 return true;
2450
2451 case amdhsa::KERNARG_SIZE_OFFSET:
2452 FourByteBuffer = DE.getU32(Cursor);
2453 KdStream << Indent << ".amdhsa_kernarg_size "
2454 << FourByteBuffer << '\n';
2455 return true;
2456
2457 case amdhsa::RESERVED0_OFFSET:
2458 // 4 reserved bytes, must be 0.
2459 ReservedBytes = DE.getBytes(Cursor, 4);
2460 for (int I = 0; I < 4; ++I) {
2461 if (ReservedBytes[I] != 0)
2462 return createReservedKDBytesError(amdhsa::RESERVED0_OFFSET, 4);
2463 }
2464 return true;
2465
2466 case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2467 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2468 // So far no directive controls this for Code Object V3, so simply skip for
2469 // disassembly.
2470 DE.skip(Cursor, 8);
2471 return true;
2472
2473 case amdhsa::RESERVED1_OFFSET:
2474 // 20 reserved bytes, must be 0.
2475 ReservedBytes = DE.getBytes(Cursor, 20);
2476 for (int I = 0; I < 20; ++I) {
2477 if (ReservedBytes[I] != 0)
2478 return createReservedKDBytesError(amdhsa::RESERVED1_OFFSET, 20);
2479 }
2480 return true;
2481
2482 case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2483 FourByteBuffer = DE.getU32(Cursor);
2484 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2485
2486 case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2487 FourByteBuffer = DE.getU32(Cursor);
2488 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2489
2490 case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2491 FourByteBuffer = DE.getU32(Cursor);
2492 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2493
2494 case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2495 using namespace amdhsa;
2496 TwoByteBuffer = DE.getU16(Cursor);
2497
2498 if (!hasArchitectedFlatScratch())
2499 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2500 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2501 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2502 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2503 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2504 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2505 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2506 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2507 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2508 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2509 if (!hasArchitectedFlatScratch())
2510 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2511 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2512 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2513 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2514
2515 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2516 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2517 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2518
2519 // Reserved for GFX9
2520 if (isGFX9() &&
2521 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2522 return createReservedKDBitsError(
2523 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2524 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2525 }
2526 if (isGFX10Plus()) {
2527 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2528 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2529 }
2530
2531 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2532 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2533 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2534
2535 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2536 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2537 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2538 }
2539
2540 return true;
2541
2542 case amdhsa::KERNARG_PRELOAD_OFFSET:
2543 using namespace amdhsa;
2544 TwoByteBuffer = DE.getU16(Cursor);
2545 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2546 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2547 KERNARG_PRELOAD_SPEC_LENGTH);
2548 }
2549
2550 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2551 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2552 KERNARG_PRELOAD_SPEC_OFFSET);
2553 }
2554 return true;
2555
2556 case amdhsa::RESERVED3_OFFSET:
2557 // 4 bytes from here are reserved, must be 0.
2558 ReservedBytes = DE.getBytes(Cursor, 4);
2559 for (int I = 0; I < 4; ++I) {
2560 if (ReservedBytes[I] != 0)
2561 return createReservedKDBytesError(amdhsa::RESERVED3_OFFSET, 4);
2562 }
2563 return true;
2564
2565 default:
2566 llvm_unreachable("Unhandled index. Case statements cover everything.");
2567 return true;
2568 }
2569 #undef PRINT_DIRECTIVE
2570 }
2571
decodeKernelDescriptor(StringRef KdName,ArrayRef<uint8_t> Bytes,uint64_t KdAddress) const2572 Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor(
2573 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2574
2575 // CP microcode requires the kernel descriptor to be 64 aligned.
2576 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2577 return createStringError(std::errc::invalid_argument,
2578 "kernel descriptor must be 64-byte aligned");
2579
2580 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2581 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2582 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2583 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2584 // when required.
2585 if (isGFX10Plus()) {
2586 uint16_t KernelCodeProperties =
2587 support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2588 llvm::endianness::little);
2589 EnableWavefrontSize32 =
2590 AMDHSA_BITS_GET(KernelCodeProperties,
2591 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2592 }
2593
2594 std::string Kd;
2595 raw_string_ostream KdStream(Kd);
2596 KdStream << ".amdhsa_kernel " << KdName << '\n';
2597
2598 DataExtractor::Cursor C(0);
2599 while (C && C.tell() < Bytes.size()) {
2600 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2601
2602 cantFail(C.takeError());
2603
2604 if (!Res)
2605 return Res;
2606 }
2607 KdStream << ".end_amdhsa_kernel\n";
2608 outs() << KdStream.str();
2609 return true;
2610 }
2611
onSymbolStart(SymbolInfoTy & Symbol,uint64_t & Size,ArrayRef<uint8_t> Bytes,uint64_t Address) const2612 Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
2613 uint64_t &Size,
2614 ArrayRef<uint8_t> Bytes,
2615 uint64_t Address) const {
2616 // Right now only kernel descriptor needs to be handled.
2617 // We ignore all other symbols for target specific handling.
2618 // TODO:
2619 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2620 // Object V2 and V3 when symbols are marked protected.
2621
2622 // amd_kernel_code_t for Code Object V2.
2623 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2624 Size = 256;
2625 return createStringError(std::errc::invalid_argument,
2626 "code object v2 is not supported");
2627 }
2628
2629 // Code Object V3 kernel descriptors.
2630 StringRef Name = Symbol.Name;
2631 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2632 Size = 64; // Size = 64 regardless of success or failure.
2633 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2634 }
2635
2636 return false;
2637 }
2638
createConstantSymbolExpr(StringRef Id,int64_t Val)2639 const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2640 int64_t Val) {
2641 MCContext &Ctx = getContext();
2642 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2643 // Note: only set value to Val on a new symbol in case an dissassembler
2644 // has already been initialized in this context.
2645 if (!Sym->isVariable()) {
2646 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2647 } else {
2648 int64_t Res = ~Val;
2649 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2650 if (!Valid || Res != Val)
2651 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2652 }
2653 return MCSymbolRefExpr::create(Sym, Ctx);
2654 }
2655
2656 //===----------------------------------------------------------------------===//
2657 // AMDGPUSymbolizer
2658 //===----------------------------------------------------------------------===//
2659
2660 // Try to find symbol name for specified label
tryAddingSymbolicOperand(MCInst & Inst,raw_ostream &,int64_t Value,uint64_t,bool IsBranch,uint64_t,uint64_t,uint64_t)2661 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2662 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2663 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2664 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2665
2666 if (!IsBranch) {
2667 return false;
2668 }
2669
2670 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2671 if (!Symbols)
2672 return false;
2673
2674 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2675 return Val.Addr == static_cast<uint64_t>(Value) &&
2676 Val.Type == ELF::STT_NOTYPE;
2677 });
2678 if (Result != Symbols->end()) {
2679 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2680 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2681 Inst.addOperand(MCOperand::createExpr(Add));
2682 return true;
2683 }
2684 // Add to list of referenced addresses, so caller can synthesize a label.
2685 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2686 return false;
2687 }
2688
tryAddingPcLoadReferenceComment(raw_ostream & cStream,int64_t Value,uint64_t Address)2689 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2690 int64_t Value,
2691 uint64_t Address) {
2692 llvm_unreachable("unimplemented");
2693 }
2694
2695 //===----------------------------------------------------------------------===//
2696 // Initialization
2697 //===----------------------------------------------------------------------===//
2698
createAMDGPUSymbolizer(const Triple &,LLVMOpInfoCallback,LLVMSymbolLookupCallback,void * DisInfo,MCContext * Ctx,std::unique_ptr<MCRelocationInfo> && RelInfo)2699 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2700 LLVMOpInfoCallback /*GetOpInfo*/,
2701 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2702 void *DisInfo,
2703 MCContext *Ctx,
2704 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2705 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2706 }
2707
createAMDGPUDisassembler(const Target & T,const MCSubtargetInfo & STI,MCContext & Ctx)2708 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
2709 const MCSubtargetInfo &STI,
2710 MCContext &Ctx) {
2711 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2712 }
2713
2714 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
LLVMInitializeAMDGPUDisassembler()2715 LLVMInitializeAMDGPUDisassembler() {
2716 TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
2717 createAMDGPUDisassembler);
2718 TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
2719 createAMDGPUSymbolizer);
2720 }
2721