1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 // 11 /// \file 12 /// 13 /// This file contains definition for AMDGPU ISA disassembler 14 // 15 //===----------------------------------------------------------------------===// 16 17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)? 18 19 #include "Disassembler/AMDGPUDisassembler.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "SIDefines.h" 22 #include "SIRegisterInfo.h" 23 #include "TargetInfo/AMDGPUTargetInfo.h" 24 #include "Utils/AMDGPUAsmUtils.h" 25 #include "Utils/AMDGPUBaseInfo.h" 26 #include "llvm-c/DisassemblerTypes.h" 27 #include "llvm/BinaryFormat/ELF.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCDecoderOps.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/MC/MCRegisterInfo.h" 34 #include "llvm/MC/MCSubtargetInfo.h" 35 #include "llvm/MC/TargetRegistry.h" 36 #include "llvm/Support/AMDHSAKernelDescriptor.h" 37 #include "llvm/Support/Compiler.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "amdgpu-disassembler" 42 43 #define SGPR_MAX \ 44 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \ 45 : AMDGPU::EncValues::SGPR_MAX_SI) 46 47 using DecodeStatus = llvm::MCDisassembler::DecodeStatus; 48 49 static int64_t getInlineImmValF16(unsigned Imm); 50 static int64_t getInlineImmValBF16(unsigned Imm); 51 static int64_t getInlineImmVal32(unsigned Imm); 52 static int64_t getInlineImmVal64(unsigned Imm); 53 54 AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI, 55 MCContext &Ctx, MCInstrInfo const *MCII) 56 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()), 57 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)), 58 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) { 59 // ToDo: AMDGPUDisassembler supports only VI ISA. 60 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus()) 61 reportFatalUsageError("disassembly not yet supported for subtarget"); 62 63 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions()) 64 createConstantSymbolExpr(Symbol, Code); 65 66 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000); 67 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000); 68 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000); 69 } 70 71 void AMDGPUDisassembler::setABIVersion(unsigned Version) { 72 CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(Version); 73 } 74 75 inline static MCDisassembler::DecodeStatus 76 addOperand(MCInst &Inst, const MCOperand& Opnd) { 77 Inst.addOperand(Opnd); 78 return Opnd.isValid() ? 79 MCDisassembler::Success : 80 MCDisassembler::Fail; 81 } 82 83 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, 84 AMDGPU::OpName Name) { 85 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name); 86 if (OpIdx != -1) { 87 auto *I = MI.begin(); 88 std::advance(I, OpIdx); 89 MI.insert(I, Op); 90 } 91 return OpIdx; 92 } 93 94 static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, 95 uint64_t Addr, 96 const MCDisassembler *Decoder) { 97 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 98 99 // Our branches take a simm16. 100 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr; 101 102 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0)) 103 return MCDisassembler::Success; 104 return addOperand(Inst, MCOperand::createImm(Imm)); 105 } 106 107 static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, 108 const MCDisassembler *Decoder) { 109 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 110 int64_t Offset; 111 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets. 112 Offset = SignExtend64<24>(Imm); 113 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets. 114 Offset = Imm & 0xFFFFF; 115 } else { // GFX9+ supports 21-bit signed offsets. 116 Offset = SignExtend64<21>(Imm); 117 } 118 return addOperand(Inst, MCOperand::createImm(Offset)); 119 } 120 121 static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, 122 const MCDisassembler *Decoder) { 123 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 124 return addOperand(Inst, DAsm->decodeBoolReg(Val)); 125 } 126 127 static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, 128 uint64_t Addr, 129 const MCDisassembler *Decoder) { 130 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 131 return addOperand(Inst, DAsm->decodeSplitBarrier(Val)); 132 } 133 134 static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, 135 const MCDisassembler *Decoder) { 136 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 137 return addOperand(Inst, DAsm->decodeDpp8FI(Val)); 138 } 139 140 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \ 141 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \ 142 uint64_t /*Addr*/, \ 143 const MCDisassembler *Decoder) { \ 144 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \ 145 return addOperand(Inst, DAsm->DecoderName(Imm)); \ 146 } 147 148 // Decoder for registers, decode directly using RegClassID. Imm(8-bit) is 149 // number of register. Used by VGPR only and AGPR only operands. 150 #define DECODE_OPERAND_REG_8(RegClass) \ 151 static DecodeStatus Decode##RegClass##RegisterClass( \ 152 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \ 153 const MCDisassembler *Decoder) { \ 154 assert(Imm < (1 << 8) && "8-bit encoding"); \ 155 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \ 156 return addOperand( \ 157 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \ 158 } 159 160 #define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \ 161 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \ 162 const MCDisassembler *Decoder) { \ 163 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \ 164 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \ 165 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm)); \ 166 } 167 168 static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, 169 unsigned OpWidth, unsigned Imm, unsigned EncImm, 170 const MCDisassembler *Decoder) { 171 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!"); 172 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 173 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm)); 174 } 175 176 // Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to 177 // get register class. Used by SGPR only operands. 178 #define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \ 179 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm) 180 181 #define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \ 182 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm) 183 184 // Decoder for registers. Imm(10-bit): Imm{7-0} is number of register, 185 // Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC). 186 // Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp. 187 // Used by AV_ register classes (AGPR or VGPR only register operands). 188 template <unsigned OpWidth> 189 static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, 190 const MCDisassembler *Decoder) { 191 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR, 192 Decoder); 193 } 194 195 // Decoder for Src(9-bit encoding) registers only. 196 template <unsigned OpWidth> 197 static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, 198 uint64_t /* Addr */, 199 const MCDisassembler *Decoder) { 200 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder); 201 } 202 203 // Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set 204 // Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers 205 // only. 206 template <unsigned OpWidth> 207 static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, 208 const MCDisassembler *Decoder) { 209 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder); 210 } 211 212 // Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding 213 // Imm{9} is acc, registers only. 214 template <unsigned OpWidth> 215 static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, 216 uint64_t /* Addr */, 217 const MCDisassembler *Decoder) { 218 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder); 219 } 220 221 // Decoder for RegisterOperands using 9-bit Src encoding. Operand can be 222 // register from RegClass or immediate. Registers that don't belong to RegClass 223 // will be decoded and InstPrinter will report warning. Immediate will be 224 // decoded into constant matching the OperandType (important for floating point 225 // types). 226 template <unsigned OpWidth> 227 static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, 228 uint64_t /* Addr */, 229 const MCDisassembler *Decoder) { 230 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder); 231 } 232 233 // Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc) 234 // and decode using 'enum10' from decodeSrcOp. 235 template <unsigned OpWidth> 236 static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, 237 uint64_t /* Addr */, 238 const MCDisassembler *Decoder) { 239 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder); 240 } 241 242 // Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass' 243 // when RegisterClass is used as an operand. Most often used for destination 244 // operands. 245 246 DECODE_OPERAND_REG_8(VGPR_32) 247 DECODE_OPERAND_REG_8(VGPR_32_Lo128) 248 DECODE_OPERAND_REG_8(VReg_64) 249 DECODE_OPERAND_REG_8(VReg_96) 250 DECODE_OPERAND_REG_8(VReg_128) 251 DECODE_OPERAND_REG_8(VReg_192) 252 DECODE_OPERAND_REG_8(VReg_256) 253 DECODE_OPERAND_REG_8(VReg_288) 254 DECODE_OPERAND_REG_8(VReg_320) 255 DECODE_OPERAND_REG_8(VReg_352) 256 DECODE_OPERAND_REG_8(VReg_384) 257 DECODE_OPERAND_REG_8(VReg_512) 258 DECODE_OPERAND_REG_8(VReg_1024) 259 260 DECODE_OPERAND_SREG_7(SReg_32, 32) 261 DECODE_OPERAND_SREG_7(SReg_32_XM0, 32) 262 DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32) 263 DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32) 264 DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32) 265 DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64) 266 DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64) 267 DECODE_OPERAND_SREG_7(SReg_96, 96) 268 DECODE_OPERAND_SREG_7(SReg_128, 128) 269 DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128) 270 DECODE_OPERAND_SREG_7(SReg_256, 256) 271 DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256) 272 DECODE_OPERAND_SREG_7(SReg_512, 512) 273 274 DECODE_OPERAND_SREG_8(SReg_64, 64) 275 276 DECODE_OPERAND_REG_8(AGPR_32) 277 DECODE_OPERAND_REG_8(AReg_64) 278 DECODE_OPERAND_REG_8(AReg_128) 279 DECODE_OPERAND_REG_8(AReg_256) 280 DECODE_OPERAND_REG_8(AReg_512) 281 DECODE_OPERAND_REG_8(AReg_1024) 282 283 static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, 284 uint64_t /*Addr*/, 285 const MCDisassembler *Decoder) { 286 assert(isUInt<10>(Imm) && "10-bit encoding expected"); 287 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used"); 288 289 bool IsHi = Imm & (1 << 9); 290 unsigned RegIdx = Imm & 0xff; 291 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 292 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); 293 } 294 295 static DecodeStatus 296 DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, 297 const MCDisassembler *Decoder) { 298 assert(isUInt<8>(Imm) && "8-bit encoding expected"); 299 300 bool IsHi = Imm & (1 << 7); 301 unsigned RegIdx = Imm & 0x7f; 302 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 303 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); 304 } 305 306 template <unsigned OpWidth> 307 static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, 308 uint64_t /*Addr*/, 309 const MCDisassembler *Decoder) { 310 assert(isUInt<9>(Imm) && "9-bit encoding expected"); 311 312 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 313 if (Imm & AMDGPU::EncValues::IS_VGPR) { 314 bool IsHi = Imm & (1 << 7); 315 unsigned RegIdx = Imm & 0x7f; 316 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); 317 } 318 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF)); 319 } 320 321 template <unsigned OpWidth> 322 static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, 323 uint64_t /*Addr*/, 324 const MCDisassembler *Decoder) { 325 assert(isUInt<10>(Imm) && "10-bit encoding expected"); 326 327 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 328 if (Imm & AMDGPU::EncValues::IS_VGPR) { 329 bool IsHi = Imm & (1 << 9); 330 unsigned RegIdx = Imm & 0xff; 331 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); 332 } 333 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF)); 334 } 335 336 static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, 337 uint64_t /*Addr*/, 338 const MCDisassembler *Decoder) { 339 assert(isUInt<10>(Imm) && "10-bit encoding expected"); 340 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected"); 341 342 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 343 344 bool IsHi = Imm & (1 << 9); 345 unsigned RegIdx = Imm & 0xff; 346 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); 347 } 348 349 static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, 350 uint64_t Addr, 351 const MCDisassembler *Decoder) { 352 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 353 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm)); 354 } 355 356 static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, 357 uint64_t Addr, 358 const MCDisassembler *Decoder) { 359 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 360 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm)); 361 } 362 363 static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, 364 uint64_t Addr, const void *Decoder) { 365 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 366 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val)); 367 } 368 369 static bool IsAGPROperand(const MCInst &Inst, int OpIdx, 370 const MCRegisterInfo *MRI) { 371 if (OpIdx < 0) 372 return false; 373 374 const MCOperand &Op = Inst.getOperand(OpIdx); 375 if (!Op.isReg()) 376 return false; 377 378 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 379 auto Reg = Sub ? Sub : Op.getReg(); 380 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255; 381 } 382 383 static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, 384 const MCDisassembler *Decoder) { 385 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 386 if (!DAsm->isGFX90A()) { 387 Imm &= 511; 388 } else { 389 // If atomic has both vdata and vdst their register classes are tied. 390 // The bit is decoded along with the vdst, first operand. We need to 391 // change register class to AGPR if vdst was AGPR. 392 // If a DS instruction has both data0 and data1 their register classes 393 // are also tied. 394 unsigned Opc = Inst.getOpcode(); 395 uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags; 396 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS) 397 ? AMDGPU::OpName::data0 398 : AMDGPU::OpName::vdata; 399 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo(); 400 int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataName); 401 if ((int)Inst.getNumOperands() == DataIdx) { 402 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 403 if (IsAGPROperand(Inst, DstIdx, MRI)) 404 Imm |= 512; 405 } 406 407 if (TSFlags & SIInstrFlags::DS) { 408 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1); 409 if ((int)Inst.getNumOperands() == Data2Idx && 410 IsAGPROperand(Inst, DataIdx, MRI)) 411 Imm |= 512; 412 } 413 } 414 return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256)); 415 } 416 417 template <unsigned Opw> 418 static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, 419 uint64_t /* Addr */, 420 const MCDisassembler *Decoder) { 421 return decodeAVLdSt(Inst, Imm, Opw, Decoder); 422 } 423 424 static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, 425 uint64_t Addr, 426 const MCDisassembler *Decoder) { 427 assert(Imm < (1 << 9) && "9-bit encoding"); 428 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 429 return addOperand(Inst, DAsm->decodeSrcOp(64, Imm)); 430 } 431 432 #define DECODE_SDWA(DecName) \ 433 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) 434 435 DECODE_SDWA(Src32) 436 DECODE_SDWA(Src16) 437 DECODE_SDWA(VopcDst) 438 439 static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, 440 uint64_t /* Addr */, 441 const MCDisassembler *Decoder) { 442 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); 443 return addOperand(Inst, DAsm->decodeVersionImm(Imm)); 444 } 445 446 #include "AMDGPUGenDisassemblerTables.inc" 447 448 //===----------------------------------------------------------------------===// 449 // 450 //===----------------------------------------------------------------------===// 451 452 template <typename InsnType> 453 DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table, MCInst &MI, 454 InsnType Inst, uint64_t Address, 455 raw_ostream &Comments) const { 456 assert(MI.getOpcode() == 0); 457 assert(MI.getNumOperands() == 0); 458 MCInst TmpInst; 459 HasLiteral = false; 460 const auto SavedBytes = Bytes; 461 462 SmallString<64> LocalComments; 463 raw_svector_ostream LocalCommentStream(LocalComments); 464 CommentStream = &LocalCommentStream; 465 466 DecodeStatus Res = 467 decodeInstruction(Table, TmpInst, Inst, Address, this, STI); 468 469 CommentStream = nullptr; 470 471 if (Res != MCDisassembler::Fail) { 472 MI = TmpInst; 473 Comments << LocalComments; 474 return MCDisassembler::Success; 475 } 476 Bytes = SavedBytes; 477 return MCDisassembler::Fail; 478 } 479 480 template <typename InsnType> 481 DecodeStatus 482 AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2, 483 MCInst &MI, InsnType Inst, uint64_t Address, 484 raw_ostream &Comments) const { 485 for (const uint8_t *T : {Table1, Table2}) { 486 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments)) 487 return Res; 488 } 489 return MCDisassembler::Fail; 490 } 491 492 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) { 493 assert(Bytes.size() >= sizeof(T)); 494 const auto Res = 495 support::endian::read<T, llvm::endianness::little>(Bytes.data()); 496 Bytes = Bytes.slice(sizeof(T)); 497 return Res; 498 } 499 500 static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) { 501 assert(Bytes.size() >= 12); 502 uint64_t Lo = 503 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data()); 504 Bytes = Bytes.slice(8); 505 uint64_t Hi = 506 support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data()); 507 Bytes = Bytes.slice(4); 508 return DecoderUInt128(Lo, Hi); 509 } 510 511 static inline DecoderUInt128 eat16Bytes(ArrayRef<uint8_t> &Bytes) { 512 assert(Bytes.size() >= 16); 513 uint64_t Lo = 514 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data()); 515 Bytes = Bytes.slice(8); 516 uint64_t Hi = 517 support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data()); 518 Bytes = Bytes.slice(8); 519 return DecoderUInt128(Lo, Hi); 520 } 521 522 void AMDGPUDisassembler::decodeImmOperands(MCInst &MI, 523 const MCInstrInfo &MCII) const { 524 const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); 525 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) { 526 if (OpNo >= MI.getNumOperands()) 527 continue; 528 529 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands, 530 // defined to take VGPR_32, but in reality allowing inline constants. 531 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType && 532 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST; 533 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER) 534 continue; 535 536 MCOperand &Op = MI.getOperand(OpNo); 537 if (!Op.isImm()) 538 continue; 539 int64_t Imm = Op.getImm(); 540 if (AMDGPU::EncValues::INLINE_INTEGER_C_MIN <= Imm && 541 Imm <= AMDGPU::EncValues::INLINE_INTEGER_C_MAX) { 542 Op = decodeIntImmed(Imm); 543 continue; 544 } 545 546 if (Imm == AMDGPU::EncValues::LITERAL_CONST) { 547 Op = decodeLiteralConstant(OpDesc.OperandType == 548 AMDGPU::OPERAND_REG_IMM_FP64); 549 continue; 550 } 551 552 if (AMDGPU::EncValues::INLINE_FLOATING_C_MIN <= Imm && 553 Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX) { 554 switch (OpDesc.OperandType) { 555 case AMDGPU::OPERAND_REG_IMM_BF16: 556 case AMDGPU::OPERAND_REG_IMM_V2BF16: 557 case AMDGPU::OPERAND_REG_INLINE_C_BF16: 558 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 559 Imm = getInlineImmValBF16(Imm); 560 break; 561 case AMDGPU::OPERAND_REG_IMM_FP16: 562 case AMDGPU::OPERAND_REG_IMM_INT16: 563 case AMDGPU::OPERAND_REG_IMM_V2FP16: 564 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 565 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 566 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 567 Imm = getInlineImmValF16(Imm); 568 break; 569 case AMDGPU::OPERAND_REG_IMM_FP64: 570 case AMDGPU::OPERAND_REG_IMM_INT64: 571 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 572 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 573 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 574 Imm = getInlineImmVal64(Imm); 575 break; 576 default: 577 Imm = getInlineImmVal32(Imm); 578 } 579 Op.setImm(Imm); 580 } 581 } 582 } 583 584 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, 585 ArrayRef<uint8_t> Bytes_, 586 uint64_t Address, 587 raw_ostream &CS) const { 588 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size()); 589 Bytes = Bytes_.slice(0, MaxInstBytesNum); 590 591 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless 592 // there are fewer bytes left). This will be overridden on success. 593 Size = std::min((size_t)4, Bytes_.size()); 594 595 do { 596 // ToDo: better to switch encoding length using some bit predicate 597 // but it is unknown yet, so try all we can 598 599 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2 600 // encodings 601 if (isGFX11Plus() && Bytes.size() >= 12 ) { 602 DecoderUInt128 DecW = eat12Bytes(Bytes); 603 604 if (isGFX11() && 605 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI, 606 DecW, Address, CS)) 607 break; 608 609 if (isGFX1250() && 610 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI, 611 DecW, Address, CS)) 612 break; 613 614 if (isGFX12() && 615 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI, 616 DecW, Address, CS)) 617 break; 618 619 if (isGFX12() && 620 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS)) 621 break; 622 623 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) { 624 // Return 8 bytes for a potential literal. 625 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4); 626 627 if (isGFX1250() && 628 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS)) 629 break; 630 } 631 632 // Reinitialize Bytes 633 Bytes = Bytes_.slice(0, MaxInstBytesNum); 634 635 } else if (Bytes.size() >= 16 && 636 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) { 637 DecoderUInt128 DecW = eat16Bytes(Bytes); 638 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS)) 639 break; 640 641 // Reinitialize Bytes 642 Bytes = Bytes_.slice(0, MaxInstBytesNum); 643 } 644 645 if (Bytes.size() >= 8) { 646 const uint64_t QW = eatBytes<uint64_t>(Bytes); 647 648 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) && 649 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS)) 650 break; 651 652 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && 653 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS)) 654 break; 655 656 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) && 657 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS)) 658 break; 659 660 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and 661 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special 662 // table first so we print the correct name. 663 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) && 664 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS)) 665 break; 666 667 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) && 668 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS)) 669 break; 670 671 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) && 672 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS)) 673 break; 674 675 if ((isVI() || isGFX9()) && 676 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS)) 677 break; 678 679 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS)) 680 break; 681 682 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS)) 683 break; 684 685 if (isGFX1250() && 686 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI, 687 QW, Address, CS)) 688 break; 689 690 if (isGFX12() && 691 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW, 692 Address, CS)) 693 break; 694 695 if (isGFX11() && 696 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW, 697 Address, CS)) 698 break; 699 700 if (isGFX11() && 701 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS)) 702 break; 703 704 if (isGFX12() && 705 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS)) 706 break; 707 708 // Reinitialize Bytes 709 Bytes = Bytes_.slice(0, MaxInstBytesNum); 710 } 711 712 // Try decode 32-bit instruction 713 if (Bytes.size() >= 4) { 714 const uint32_t DW = eatBytes<uint32_t>(Bytes); 715 716 if ((isVI() || isGFX9()) && 717 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS)) 718 break; 719 720 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS)) 721 break; 722 723 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS)) 724 break; 725 726 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) && 727 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS)) 728 break; 729 730 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) && 731 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS)) 732 break; 733 734 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) && 735 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS)) 736 break; 737 738 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS)) 739 break; 740 741 if (isGFX11() && 742 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW, 743 Address, CS)) 744 break; 745 746 if (isGFX1250() && 747 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI, 748 DW, Address, CS)) 749 break; 750 751 if (isGFX12() && 752 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW, 753 Address, CS)) 754 break; 755 } 756 757 return MCDisassembler::Fail; 758 } while (false); 759 760 DecodeStatus Status = MCDisassembler::Success; 761 762 decodeImmOperands(MI, *MCII); 763 764 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) { 765 if (isMacDPP(MI)) 766 convertMacDPPInst(MI); 767 768 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) 769 convertVOP3PDPPInst(MI); 770 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) 771 convertVOPCDPPInst(MI); // Special VOP3 case 772 else if (AMDGPU::isVOPC64DPP(MI.getOpcode())) 773 convertVOPC64DPPInst(MI); // Special VOP3 case 774 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) != 775 -1) 776 convertDPP8Inst(MI); 777 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) 778 convertVOP3DPPInst(MI); // Regular VOP3 case 779 } 780 781 convertTrue16OpSel(MI); 782 783 if (AMDGPU::isMAC(MI.getOpcode())) { 784 // Insert dummy unused src2_modifiers. 785 insertNamedMCOperand(MI, MCOperand::createImm(0), 786 AMDGPU::OpName::src2_modifiers); 787 } 788 789 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp || 790 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) { 791 // Insert dummy unused src2_modifiers. 792 insertNamedMCOperand(MI, MCOperand::createImm(0), 793 AMDGPU::OpName::src2_modifiers); 794 } 795 796 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) && 797 !AMDGPU::hasGDS(STI)) { 798 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds); 799 } 800 801 if (MCII->get(MI.getOpcode()).TSFlags & 802 (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) { 803 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 804 AMDGPU::OpName::cpol); 805 if (CPolPos != -1) { 806 unsigned CPol = 807 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ? 808 AMDGPU::CPol::GLC : 0; 809 if (MI.getNumOperands() <= (unsigned)CPolPos) { 810 insertNamedMCOperand(MI, MCOperand::createImm(CPol), 811 AMDGPU::OpName::cpol); 812 } else if (CPol) { 813 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol); 814 } 815 } 816 } 817 818 if ((MCII->get(MI.getOpcode()).TSFlags & 819 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) && 820 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) { 821 // GFX90A lost TFE, its place is occupied by ACC. 822 int TFEOpIdx = 823 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe); 824 if (TFEOpIdx != -1) { 825 auto *TFEIter = MI.begin(); 826 std::advance(TFEIter, TFEOpIdx); 827 MI.insert(TFEIter, MCOperand::createImm(0)); 828 } 829 } 830 831 if (MCII->get(MI.getOpcode()).TSFlags & 832 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) { 833 int SWZOpIdx = 834 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz); 835 if (SWZOpIdx != -1) { 836 auto *SWZIter = MI.begin(); 837 std::advance(SWZIter, SWZOpIdx); 838 MI.insert(SWZIter, MCOperand::createImm(0)); 839 } 840 } 841 842 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) { 843 int VAddr0Idx = 844 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); 845 int RsrcIdx = 846 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); 847 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1; 848 if (VAddr0Idx >= 0 && NSAArgs > 0) { 849 unsigned NSAWords = (NSAArgs + 3) / 4; 850 if (Bytes.size() < 4 * NSAWords) 851 return MCDisassembler::Fail; 852 for (unsigned i = 0; i < NSAArgs; ++i) { 853 const unsigned VAddrIdx = VAddr0Idx + 1 + i; 854 auto VAddrRCID = 855 MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass; 856 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i])); 857 } 858 Bytes = Bytes.slice(4 * NSAWords); 859 } 860 861 convertMIMGInst(MI); 862 } 863 864 if (MCII->get(MI.getOpcode()).TSFlags & 865 (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE)) 866 convertMIMGInst(MI); 867 868 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP) 869 convertEXPInst(MI); 870 871 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP) 872 convertVINTERPInst(MI); 873 874 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA) 875 convertSDWAInst(MI); 876 877 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI) 878 convertMAIInst(MI); 879 880 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 881 AMDGPU::OpName::vdst_in); 882 if (VDstIn_Idx != -1) { 883 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx, 884 MCOI::OperandConstraint::TIED_TO); 885 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx || 886 !MI.getOperand(VDstIn_Idx).isReg() || 887 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) { 888 if (MI.getNumOperands() > (unsigned)VDstIn_Idx) 889 MI.erase(&MI.getOperand(VDstIn_Idx)); 890 insertNamedMCOperand(MI, 891 MCOperand::createReg(MI.getOperand(Tied).getReg()), 892 AMDGPU::OpName::vdst_in); 893 } 894 } 895 896 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK; 897 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK) 898 convertFMAanyK(MI); 899 900 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and 901 // have EXEC as implicit destination. Issue a warning if encoding for 902 // vdst is not EXEC. 903 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) && 904 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) { 905 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO); 906 if (Bytes_[0] != ExecEncoding) 907 Status = MCDisassembler::SoftFail; 908 } 909 910 Size = MaxInstBytesNum - Bytes.size(); 911 return Status; 912 } 913 914 void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const { 915 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) { 916 // The MCInst still has these fields even though they are no longer encoded 917 // in the GFX11 instruction. 918 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm); 919 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr); 920 } 921 } 922 923 void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const { 924 convertTrue16OpSel(MI); 925 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 || 926 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 || 927 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 || 928 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 || 929 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 || 930 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 || 931 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 || 932 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 || 933 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 || 934 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 || 935 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 || 936 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 || 937 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 || 938 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 || 939 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 || 940 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) { 941 // The MCInst has this field that is not directly encoded in the 942 // instruction. 943 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel); 944 } 945 } 946 947 void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { 948 if (STI.hasFeature(AMDGPU::FeatureGFX9) || 949 STI.hasFeature(AMDGPU::FeatureGFX10)) { 950 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst)) 951 // VOPC - insert clamp 952 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); 953 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) { 954 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); 955 if (SDst != -1) { 956 // VOPC - insert VCC register as sdst 957 insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC), 958 AMDGPU::OpName::sdst); 959 } else { 960 // VOP1/2 - insert omod if present in instruction 961 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod); 962 } 963 } 964 } 965 966 /// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the 967 /// appropriate subregister for the used format width. 968 static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, 969 MCOperand &MO, uint8_t NumRegs) { 970 switch (NumRegs) { 971 case 4: 972 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3)); 973 case 6: 974 return MO.setReg( 975 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5)); 976 case 8: 977 // No-op in cases where one operand is still f8/bf8. 978 return; 979 default: 980 llvm_unreachable("Unexpected size for mfma f8f6f4 operand"); 981 } 982 } 983 984 /// f8f6f4 instructions have different pseudos depending on the used formats. In 985 /// the disassembler table, we only have the variants with the largest register 986 /// classes which assume using an fp8/bf8 format for both operands. The actual 987 /// register class depends on the format in blgp and cbsz operands. Adjust the 988 /// register classes depending on the used format. 989 void AMDGPUDisassembler::convertMAIInst(MCInst &MI) const { 990 int BlgpIdx = 991 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp); 992 if (BlgpIdx == -1) 993 return; 994 995 int CbszIdx = 996 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz); 997 998 unsigned CBSZ = MI.getOperand(CbszIdx).getImm(); 999 unsigned BLGP = MI.getOperand(BlgpIdx).getImm(); 1000 1001 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode = 1002 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode()); 1003 if (!AdjustedRegClassOpcode || 1004 AdjustedRegClassOpcode->Opcode == MI.getOpcode()) 1005 return; 1006 1007 MI.setOpcode(AdjustedRegClassOpcode->Opcode); 1008 int Src0Idx = 1009 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); 1010 int Src1Idx = 1011 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1); 1012 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx), 1013 AdjustedRegClassOpcode->NumRegsSrcA); 1014 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx), 1015 AdjustedRegClassOpcode->NumRegsSrcB); 1016 } 1017 1018 struct VOPModifiers { 1019 unsigned OpSel = 0; 1020 unsigned OpSelHi = 0; 1021 unsigned NegLo = 0; 1022 unsigned NegHi = 0; 1023 }; 1024 1025 // Reconstruct values of VOP3/VOP3P operands such as op_sel. 1026 // Note that these values do not affect disassembler output, 1027 // so this is only necessary for consistency with src_modifiers. 1028 static VOPModifiers collectVOPModifiers(const MCInst &MI, 1029 bool IsVOP3P = false) { 1030 VOPModifiers Modifiers; 1031 unsigned Opc = MI.getOpcode(); 1032 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers, 1033 AMDGPU::OpName::src1_modifiers, 1034 AMDGPU::OpName::src2_modifiers}; 1035 for (int J = 0; J < 3; ++J) { 1036 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 1037 if (OpIdx == -1) 1038 continue; 1039 1040 unsigned Val = MI.getOperand(OpIdx).getImm(); 1041 1042 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J; 1043 if (IsVOP3P) { 1044 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J; 1045 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J; 1046 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J; 1047 } else if (J == 0) { 1048 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3; 1049 } 1050 } 1051 1052 return Modifiers; 1053 } 1054 1055 // Instructions decode the op_sel/suffix bits into the src_modifier 1056 // operands. Copy those bits into the src operands for true16 VGPRs. 1057 void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const { 1058 const unsigned Opc = MI.getOpcode(); 1059 const MCRegisterClass &ConversionRC = 1060 MRI.getRegClass(AMDGPU::VGPR_16RegClassID); 1061 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4> 1062 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers, 1063 SISrcMods::OP_SEL_0}, 1064 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers, 1065 SISrcMods::OP_SEL_0}, 1066 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers, 1067 SISrcMods::OP_SEL_0}, 1068 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers, 1069 SISrcMods::DST_OP_SEL}}}; 1070 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) { 1071 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName); 1072 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName); 1073 if (OpIdx == -1 || OpModsIdx == -1) 1074 continue; 1075 MCOperand &Op = MI.getOperand(OpIdx); 1076 if (!Op.isReg()) 1077 continue; 1078 if (!ConversionRC.contains(Op.getReg())) 1079 continue; 1080 unsigned OpEnc = MRI.getEncodingValue(Op.getReg()); 1081 const MCOperand &OpMods = MI.getOperand(OpModsIdx); 1082 unsigned ModVal = OpMods.getImm(); 1083 if (ModVal & OpSelMask) { // isHi 1084 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK; 1085 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1)); 1086 } 1087 } 1088 } 1089 1090 // MAC opcodes have special old and src2 operands. 1091 // src2 is tied to dst, while old is not tied (but assumed to be). 1092 bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const { 1093 constexpr int DST_IDX = 0; 1094 auto Opcode = MI.getOpcode(); 1095 const auto &Desc = MCII->get(Opcode); 1096 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old); 1097 1098 if (OldIdx != -1 && Desc.getOperandConstraint( 1099 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) { 1100 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2)); 1101 assert(Desc.getOperandConstraint( 1102 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2), 1103 MCOI::OperandConstraint::TIED_TO) == DST_IDX); 1104 (void)DST_IDX; 1105 return true; 1106 } 1107 1108 return false; 1109 } 1110 1111 // Create dummy old operand and insert dummy unused src2_modifiers 1112 void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const { 1113 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands()); 1114 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old); 1115 insertNamedMCOperand(MI, MCOperand::createImm(0), 1116 AMDGPU::OpName::src2_modifiers); 1117 } 1118 1119 void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { 1120 unsigned Opc = MI.getOpcode(); 1121 1122 int VDstInIdx = 1123 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in); 1124 if (VDstInIdx != -1) 1125 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in); 1126 1127 unsigned DescNumOps = MCII->get(Opc).getNumOperands(); 1128 if (MI.getNumOperands() < DescNumOps && 1129 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { 1130 convertTrue16OpSel(MI); 1131 auto Mods = collectVOPModifiers(MI); 1132 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), 1133 AMDGPU::OpName::op_sel); 1134 } else { 1135 // Insert dummy unused src modifiers. 1136 if (MI.getNumOperands() < DescNumOps && 1137 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers)) 1138 insertNamedMCOperand(MI, MCOperand::createImm(0), 1139 AMDGPU::OpName::src0_modifiers); 1140 1141 if (MI.getNumOperands() < DescNumOps && 1142 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers)) 1143 insertNamedMCOperand(MI, MCOperand::createImm(0), 1144 AMDGPU::OpName::src1_modifiers); 1145 } 1146 } 1147 1148 void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { 1149 convertTrue16OpSel(MI); 1150 1151 int VDstInIdx = 1152 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in); 1153 if (VDstInIdx != -1) 1154 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in); 1155 1156 unsigned Opc = MI.getOpcode(); 1157 unsigned DescNumOps = MCII->get(Opc).getNumOperands(); 1158 if (MI.getNumOperands() < DescNumOps && 1159 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { 1160 auto Mods = collectVOPModifiers(MI); 1161 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), 1162 AMDGPU::OpName::op_sel); 1163 } 1164 } 1165 1166 // Note that before gfx10, the MIMG encoding provided no information about 1167 // VADDR size. Consequently, decoded instructions always show address as if it 1168 // has 1 dword, which could be not really so. 1169 void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { 1170 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags; 1171 1172 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 1173 AMDGPU::OpName::vdst); 1174 1175 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 1176 AMDGPU::OpName::vdata); 1177 int VAddr0Idx = 1178 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); 1179 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG) 1180 ? AMDGPU::OpName::srsrc 1181 : AMDGPU::OpName::rsrc; 1182 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName); 1183 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 1184 AMDGPU::OpName::dmask); 1185 1186 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 1187 AMDGPU::OpName::tfe); 1188 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 1189 AMDGPU::OpName::d16); 1190 1191 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 1192 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 1193 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 1194 1195 assert(VDataIdx != -1); 1196 if (BaseOpcode->BVH) { 1197 // Add A16 operand for intersect_ray instructions 1198 addOperand(MI, MCOperand::createImm(BaseOpcode->A16)); 1199 return; 1200 } 1201 1202 bool IsAtomic = (VDstIdx != -1); 1203 bool IsGather4 = TSFlags & SIInstrFlags::Gather4; 1204 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE; 1205 bool IsNSA = false; 1206 bool IsPartialNSA = false; 1207 unsigned AddrSize = Info->VAddrDwords; 1208 1209 if (isGFX10Plus()) { 1210 unsigned DimIdx = 1211 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim); 1212 int A16Idx = 1213 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16); 1214 const AMDGPU::MIMGDimInfo *Dim = 1215 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm()); 1216 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm()); 1217 1218 AddrSize = 1219 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI)); 1220 1221 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms. 1222 // VIMAGE insts other than BVH never use vaddr4. 1223 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA || 1224 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA || 1225 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12; 1226 if (!IsNSA) { 1227 if (!IsVSample && AddrSize > 12) 1228 AddrSize = 16; 1229 } else { 1230 if (AddrSize > Info->VAddrDwords) { 1231 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) { 1232 // The NSA encoding does not contain enough operands for the 1233 // combination of base opcode / dimension. Should this be an error? 1234 return; 1235 } 1236 IsPartialNSA = true; 1237 } 1238 } 1239 } 1240 1241 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; 1242 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1); 1243 1244 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm(); 1245 if (D16 && AMDGPU::hasPackedD16(STI)) { 1246 DstSize = (DstSize + 1) / 2; 1247 } 1248 1249 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm()) 1250 DstSize += 1; 1251 1252 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords) 1253 return; 1254 1255 int NewOpcode = 1256 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize); 1257 if (NewOpcode == -1) 1258 return; 1259 1260 // Widen the register to the correct number of enabled channels. 1261 MCRegister NewVdata; 1262 if (DstSize != Info->VDataDwords) { 1263 auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass; 1264 1265 // Get first subregister of VData 1266 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg(); 1267 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); 1268 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; 1269 1270 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, 1271 &MRI.getRegClass(DataRCID)); 1272 if (!NewVdata) { 1273 // It's possible to encode this such that the low register + enabled 1274 // components exceeds the register count. 1275 return; 1276 } 1277 } 1278 1279 // If not using NSA on GFX10+, widen vaddr0 address register to correct size. 1280 // If using partial NSA on GFX11+ widen last address register. 1281 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx; 1282 MCRegister NewVAddrSA; 1283 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) && 1284 AddrSize != Info->VAddrDwords) { 1285 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg(); 1286 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0); 1287 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA; 1288 1289 auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass; 1290 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, 1291 &MRI.getRegClass(AddrRCID)); 1292 if (!NewVAddrSA) 1293 return; 1294 } 1295 1296 MI.setOpcode(NewOpcode); 1297 1298 if (NewVdata != AMDGPU::NoRegister) { 1299 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); 1300 1301 if (IsAtomic) { 1302 // Atomic operations have an additional operand (a copy of data) 1303 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); 1304 } 1305 } 1306 1307 if (NewVAddrSA) { 1308 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA); 1309 } else if (IsNSA) { 1310 assert(AddrSize <= Info->VAddrDwords); 1311 MI.erase(MI.begin() + VAddr0Idx + AddrSize, 1312 MI.begin() + VAddr0Idx + Info->VAddrDwords); 1313 } 1314 } 1315 1316 // Opsel and neg bits are used in src_modifiers and standalone operands. Autogen 1317 // decoder only adds to src_modifiers, so manually add the bits to the other 1318 // operands. 1319 void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const { 1320 unsigned Opc = MI.getOpcode(); 1321 unsigned DescNumOps = MCII->get(Opc).getNumOperands(); 1322 auto Mods = collectVOPModifiers(MI, true); 1323 1324 if (MI.getNumOperands() < DescNumOps && 1325 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) 1326 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in); 1327 1328 if (MI.getNumOperands() < DescNumOps && 1329 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) 1330 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), 1331 AMDGPU::OpName::op_sel); 1332 if (MI.getNumOperands() < DescNumOps && 1333 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi)) 1334 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi), 1335 AMDGPU::OpName::op_sel_hi); 1336 if (MI.getNumOperands() < DescNumOps && 1337 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo)) 1338 insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo), 1339 AMDGPU::OpName::neg_lo); 1340 if (MI.getNumOperands() < DescNumOps && 1341 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi)) 1342 insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi), 1343 AMDGPU::OpName::neg_hi); 1344 } 1345 1346 // Create dummy old operand and insert optional operands 1347 void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const { 1348 unsigned Opc = MI.getOpcode(); 1349 unsigned DescNumOps = MCII->get(Opc).getNumOperands(); 1350 1351 if (MI.getNumOperands() < DescNumOps && 1352 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old)) 1353 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old); 1354 1355 if (MI.getNumOperands() < DescNumOps && 1356 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers)) 1357 insertNamedMCOperand(MI, MCOperand::createImm(0), 1358 AMDGPU::OpName::src0_modifiers); 1359 1360 if (MI.getNumOperands() < DescNumOps && 1361 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers)) 1362 insertNamedMCOperand(MI, MCOperand::createImm(0), 1363 AMDGPU::OpName::src1_modifiers); 1364 } 1365 1366 void AMDGPUDisassembler::convertVOPC64DPPInst(MCInst &MI) const { 1367 unsigned Opc = MI.getOpcode(); 1368 unsigned DescNumOps = MCII->get(Opc).getNumOperands(); 1369 1370 convertTrue16OpSel(MI); 1371 1372 if (MI.getNumOperands() < DescNumOps && 1373 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { 1374 VOPModifiers Mods = collectVOPModifiers(MI); 1375 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), 1376 AMDGPU::OpName::op_sel); 1377 } 1378 } 1379 1380 void AMDGPUDisassembler::convertFMAanyK(MCInst &MI) const { 1381 assert(HasLiteral && "Should have decoded a literal"); 1382 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX); 1383 } 1384 1385 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { 1386 return getContext().getRegisterInfo()-> 1387 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); 1388 } 1389 1390 inline 1391 MCOperand AMDGPUDisassembler::errOperand(unsigned V, 1392 const Twine& ErrMsg) const { 1393 *CommentStream << "Error: " + ErrMsg; 1394 1395 // ToDo: add support for error operands to MCInst.h 1396 // return MCOperand::createError(V); 1397 return MCOperand(); 1398 } 1399 1400 inline 1401 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const { 1402 return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI)); 1403 } 1404 1405 inline 1406 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID, 1407 unsigned Val) const { 1408 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID]; 1409 if (Val >= RegCl.getNumRegs()) 1410 return errOperand(Val, Twine(getRegClassName(RegClassID)) + 1411 ": unknown register " + Twine(Val)); 1412 return createRegOperand(RegCl.getRegister(Val)); 1413 } 1414 1415 inline 1416 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, 1417 unsigned Val) const { 1418 // ToDo: SI/CI have 104 SGPRs, VI - 102 1419 // Valery: here we accepting as much as we can, let assembler sort it out 1420 int shift = 0; 1421 switch (SRegClassID) { 1422 case AMDGPU::SGPR_32RegClassID: 1423 case AMDGPU::TTMP_32RegClassID: 1424 break; 1425 case AMDGPU::SGPR_64RegClassID: 1426 case AMDGPU::TTMP_64RegClassID: 1427 shift = 1; 1428 break; 1429 case AMDGPU::SGPR_96RegClassID: 1430 case AMDGPU::TTMP_96RegClassID: 1431 case AMDGPU::SGPR_128RegClassID: 1432 case AMDGPU::TTMP_128RegClassID: 1433 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in 1434 // this bundle? 1435 case AMDGPU::SGPR_256RegClassID: 1436 case AMDGPU::TTMP_256RegClassID: 1437 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in 1438 // this bundle? 1439 case AMDGPU::SGPR_288RegClassID: 1440 case AMDGPU::TTMP_288RegClassID: 1441 case AMDGPU::SGPR_320RegClassID: 1442 case AMDGPU::TTMP_320RegClassID: 1443 case AMDGPU::SGPR_352RegClassID: 1444 case AMDGPU::TTMP_352RegClassID: 1445 case AMDGPU::SGPR_384RegClassID: 1446 case AMDGPU::TTMP_384RegClassID: 1447 case AMDGPU::SGPR_512RegClassID: 1448 case AMDGPU::TTMP_512RegClassID: 1449 shift = 2; 1450 break; 1451 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in 1452 // this bundle? 1453 default: 1454 llvm_unreachable("unhandled register class"); 1455 } 1456 1457 if (Val % (1 << shift)) { 1458 *CommentStream << "Warning: " << getRegClassName(SRegClassID) 1459 << ": scalar reg isn't aligned " << Val; 1460 } 1461 1462 return createRegOperand(SRegClassID, Val >> shift); 1463 } 1464 1465 MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx, 1466 bool IsHi) const { 1467 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0); 1468 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16); 1469 } 1470 1471 // Decode Literals for insts which always have a literal in the encoding 1472 MCOperand 1473 AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { 1474 if (HasLiteral) { 1475 assert( 1476 AMDGPU::hasVOPD(STI) && 1477 "Should only decode multiple kimm with VOPD, check VSrc operand types"); 1478 if (Literal != Val) 1479 return errOperand(Val, "More than one unique literal is illegal"); 1480 } 1481 HasLiteral = true; 1482 Literal = Val; 1483 return MCOperand::createImm(Literal); 1484 } 1485 1486 MCOperand 1487 AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const { 1488 if (HasLiteral) { 1489 if (Literal64 != Val) 1490 return errOperand(Val, "More than one unique literal is illegal"); 1491 } 1492 HasLiteral = true; 1493 Literal = Literal64 = Val; 1494 return MCOperand::createImm(Literal64); 1495 } 1496 1497 MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const { 1498 // For now all literal constants are supposed to be unsigned integer 1499 // ToDo: deal with signed/unsigned 64-bit integer constants 1500 // ToDo: deal with float/double constants 1501 if (!HasLiteral) { 1502 if (Bytes.size() < 4) { 1503 return errOperand(0, "cannot read literal, inst bytes left " + 1504 Twine(Bytes.size())); 1505 } 1506 HasLiteral = true; 1507 Literal = Literal64 = eatBytes<uint32_t>(Bytes); 1508 if (ExtendFP64) 1509 Literal64 <<= 32; 1510 } 1511 return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal); 1512 } 1513 1514 MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const { 1515 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals)); 1516 1517 if (!HasLiteral) { 1518 if (Bytes.size() < 8) { 1519 return errOperand(0, "cannot read literal64, inst bytes left " + 1520 Twine(Bytes.size())); 1521 } 1522 HasLiteral = true; 1523 Literal64 = eatBytes<uint64_t>(Bytes); 1524 } 1525 return MCOperand::createImm(Literal64); 1526 } 1527 1528 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { 1529 using namespace AMDGPU::EncValues; 1530 1531 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX); 1532 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ? 1533 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) : 1534 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm))); 1535 // Cast prevents negative overflow. 1536 } 1537 1538 static int64_t getInlineImmVal32(unsigned Imm) { 1539 switch (Imm) { 1540 case 240: 1541 return llvm::bit_cast<uint32_t>(0.5f); 1542 case 241: 1543 return llvm::bit_cast<uint32_t>(-0.5f); 1544 case 242: 1545 return llvm::bit_cast<uint32_t>(1.0f); 1546 case 243: 1547 return llvm::bit_cast<uint32_t>(-1.0f); 1548 case 244: 1549 return llvm::bit_cast<uint32_t>(2.0f); 1550 case 245: 1551 return llvm::bit_cast<uint32_t>(-2.0f); 1552 case 246: 1553 return llvm::bit_cast<uint32_t>(4.0f); 1554 case 247: 1555 return llvm::bit_cast<uint32_t>(-4.0f); 1556 case 248: // 1 / (2 * PI) 1557 return 0x3e22f983; 1558 default: 1559 llvm_unreachable("invalid fp inline imm"); 1560 } 1561 } 1562 1563 static int64_t getInlineImmVal64(unsigned Imm) { 1564 switch (Imm) { 1565 case 240: 1566 return llvm::bit_cast<uint64_t>(0.5); 1567 case 241: 1568 return llvm::bit_cast<uint64_t>(-0.5); 1569 case 242: 1570 return llvm::bit_cast<uint64_t>(1.0); 1571 case 243: 1572 return llvm::bit_cast<uint64_t>(-1.0); 1573 case 244: 1574 return llvm::bit_cast<uint64_t>(2.0); 1575 case 245: 1576 return llvm::bit_cast<uint64_t>(-2.0); 1577 case 246: 1578 return llvm::bit_cast<uint64_t>(4.0); 1579 case 247: 1580 return llvm::bit_cast<uint64_t>(-4.0); 1581 case 248: // 1 / (2 * PI) 1582 return 0x3fc45f306dc9c882; 1583 default: 1584 llvm_unreachable("invalid fp inline imm"); 1585 } 1586 } 1587 1588 static int64_t getInlineImmValF16(unsigned Imm) { 1589 switch (Imm) { 1590 case 240: 1591 return 0x3800; 1592 case 241: 1593 return 0xB800; 1594 case 242: 1595 return 0x3C00; 1596 case 243: 1597 return 0xBC00; 1598 case 244: 1599 return 0x4000; 1600 case 245: 1601 return 0xC000; 1602 case 246: 1603 return 0x4400; 1604 case 247: 1605 return 0xC400; 1606 case 248: // 1 / (2 * PI) 1607 return 0x3118; 1608 default: 1609 llvm_unreachable("invalid fp inline imm"); 1610 } 1611 } 1612 1613 static int64_t getInlineImmValBF16(unsigned Imm) { 1614 switch (Imm) { 1615 case 240: 1616 return 0x3F00; 1617 case 241: 1618 return 0xBF00; 1619 case 242: 1620 return 0x3F80; 1621 case 243: 1622 return 0xBF80; 1623 case 244: 1624 return 0x4000; 1625 case 245: 1626 return 0xC000; 1627 case 246: 1628 return 0x4080; 1629 case 247: 1630 return 0xC080; 1631 case 248: // 1 / (2 * PI) 1632 return 0x3E22; 1633 default: 1634 llvm_unreachable("invalid fp inline imm"); 1635 } 1636 } 1637 1638 unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const { 1639 using namespace AMDGPU; 1640 1641 switch (Width) { 1642 case 16: 1643 case 32: 1644 return VGPR_32RegClassID; 1645 case 64: 1646 return VReg_64RegClassID; 1647 case 96: 1648 return VReg_96RegClassID; 1649 case 128: 1650 return VReg_128RegClassID; 1651 case 160: 1652 return VReg_160RegClassID; 1653 case 192: 1654 return VReg_192RegClassID; 1655 case 256: 1656 return VReg_256RegClassID; 1657 case 288: 1658 return VReg_288RegClassID; 1659 case 320: 1660 return VReg_320RegClassID; 1661 case 352: 1662 return VReg_352RegClassID; 1663 case 384: 1664 return VReg_384RegClassID; 1665 case 512: 1666 return VReg_512RegClassID; 1667 case 1024: 1668 return VReg_1024RegClassID; 1669 } 1670 llvm_unreachable("Invalid register width!"); 1671 } 1672 1673 unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const { 1674 using namespace AMDGPU; 1675 1676 switch (Width) { 1677 case 16: 1678 case 32: 1679 return AGPR_32RegClassID; 1680 case 64: 1681 return AReg_64RegClassID; 1682 case 96: 1683 return AReg_96RegClassID; 1684 case 128: 1685 return AReg_128RegClassID; 1686 case 160: 1687 return AReg_160RegClassID; 1688 case 256: 1689 return AReg_256RegClassID; 1690 case 288: 1691 return AReg_288RegClassID; 1692 case 320: 1693 return AReg_320RegClassID; 1694 case 352: 1695 return AReg_352RegClassID; 1696 case 384: 1697 return AReg_384RegClassID; 1698 case 512: 1699 return AReg_512RegClassID; 1700 case 1024: 1701 return AReg_1024RegClassID; 1702 } 1703 llvm_unreachable("Invalid register width!"); 1704 } 1705 1706 unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const { 1707 using namespace AMDGPU; 1708 1709 switch (Width) { 1710 case 16: 1711 case 32: 1712 return SGPR_32RegClassID; 1713 case 64: 1714 return SGPR_64RegClassID; 1715 case 96: 1716 return SGPR_96RegClassID; 1717 case 128: 1718 return SGPR_128RegClassID; 1719 case 160: 1720 return SGPR_160RegClassID; 1721 case 256: 1722 return SGPR_256RegClassID; 1723 case 288: 1724 return SGPR_288RegClassID; 1725 case 320: 1726 return SGPR_320RegClassID; 1727 case 352: 1728 return SGPR_352RegClassID; 1729 case 384: 1730 return SGPR_384RegClassID; 1731 case 512: 1732 return SGPR_512RegClassID; 1733 } 1734 llvm_unreachable("Invalid register width!"); 1735 } 1736 1737 unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const { 1738 using namespace AMDGPU; 1739 1740 switch (Width) { 1741 case 16: 1742 case 32: 1743 return TTMP_32RegClassID; 1744 case 64: 1745 return TTMP_64RegClassID; 1746 case 128: 1747 return TTMP_128RegClassID; 1748 case 256: 1749 return TTMP_256RegClassID; 1750 case 288: 1751 return TTMP_288RegClassID; 1752 case 320: 1753 return TTMP_320RegClassID; 1754 case 352: 1755 return TTMP_352RegClassID; 1756 case 384: 1757 return TTMP_384RegClassID; 1758 case 512: 1759 return TTMP_512RegClassID; 1760 } 1761 llvm_unreachable("Invalid register width!"); 1762 } 1763 1764 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { 1765 using namespace AMDGPU::EncValues; 1766 1767 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN; 1768 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX; 1769 1770 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; 1771 } 1772 1773 MCOperand AMDGPUDisassembler::decodeSrcOp(unsigned Width, unsigned Val) const { 1774 using namespace AMDGPU::EncValues; 1775 1776 assert(Val < 1024); // enum10 1777 1778 bool IsAGPR = Val & 512; 1779 Val &= 511; 1780 1781 if (VGPR_MIN <= Val && Val <= VGPR_MAX) { 1782 return createRegOperand(IsAGPR ? getAgprClassId(Width) 1783 : getVgprClassId(Width), Val - VGPR_MIN); 1784 } 1785 return decodeNonVGPRSrcOp(Width, Val & 0xFF); 1786 } 1787 1788 MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(unsigned Width, 1789 unsigned Val) const { 1790 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been 1791 // decoded earlier. 1792 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0"); 1793 using namespace AMDGPU::EncValues; 1794 1795 if (Val <= SGPR_MAX) { 1796 // "SGPR_MIN <= Val" is always true and causes compilation warning. 1797 static_assert(SGPR_MIN == 0); 1798 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 1799 } 1800 1801 int TTmpIdx = getTTmpIdx(Val); 1802 if (TTmpIdx >= 0) { 1803 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 1804 } 1805 1806 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) || 1807 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) || 1808 Val == LITERAL_CONST) 1809 return MCOperand::createImm(Val); 1810 1811 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) { 1812 return decodeLiteral64Constant(); 1813 } 1814 1815 switch (Width) { 1816 case 32: 1817 case 16: 1818 return decodeSpecialReg32(Val); 1819 case 64: 1820 return decodeSpecialReg64(Val); 1821 case 96: 1822 case 128: 1823 case 256: 1824 case 512: 1825 return decodeSpecialReg96Plus(Val); 1826 default: 1827 llvm_unreachable("unexpected immediate type"); 1828 } 1829 } 1830 1831 // Bit 0 of DstY isn't stored in the instruction, because it's always the 1832 // opposite of bit 0 of DstX. 1833 MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst, 1834 unsigned Val) const { 1835 int VDstXInd = 1836 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX); 1837 assert(VDstXInd != -1); 1838 assert(Inst.getOperand(VDstXInd).isReg()); 1839 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg()); 1840 Val |= ~XDstReg & 1; 1841 return createRegOperand(getVgprClassId(32), Val); 1842 } 1843 1844 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { 1845 using namespace AMDGPU; 1846 1847 switch (Val) { 1848 // clang-format off 1849 case 102: return createRegOperand(FLAT_SCR_LO); 1850 case 103: return createRegOperand(FLAT_SCR_HI); 1851 case 104: return createRegOperand(XNACK_MASK_LO); 1852 case 105: return createRegOperand(XNACK_MASK_HI); 1853 case 106: return createRegOperand(VCC_LO); 1854 case 107: return createRegOperand(VCC_HI); 1855 case 108: return createRegOperand(TBA_LO); 1856 case 109: return createRegOperand(TBA_HI); 1857 case 110: return createRegOperand(TMA_LO); 1858 case 111: return createRegOperand(TMA_HI); 1859 case 124: 1860 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0); 1861 case 125: 1862 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL); 1863 case 126: return createRegOperand(EXEC_LO); 1864 case 127: return createRegOperand(EXEC_HI); 1865 case 235: return createRegOperand(SRC_SHARED_BASE_LO); 1866 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO); 1867 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO); 1868 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO); 1869 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID); 1870 case 251: return createRegOperand(SRC_VCCZ); 1871 case 252: return createRegOperand(SRC_EXECZ); 1872 case 253: return createRegOperand(SRC_SCC); 1873 case 254: return createRegOperand(LDS_DIRECT); 1874 default: break; 1875 // clang-format on 1876 } 1877 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 1878 } 1879 1880 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { 1881 using namespace AMDGPU; 1882 1883 switch (Val) { 1884 case 102: return createRegOperand(FLAT_SCR); 1885 case 104: return createRegOperand(XNACK_MASK); 1886 case 106: return createRegOperand(VCC); 1887 case 108: return createRegOperand(TBA); 1888 case 110: return createRegOperand(TMA); 1889 case 124: 1890 if (isGFX11Plus()) 1891 return createRegOperand(SGPR_NULL); 1892 break; 1893 case 125: 1894 if (!isGFX11Plus()) 1895 return createRegOperand(SGPR_NULL); 1896 break; 1897 case 126: return createRegOperand(EXEC); 1898 case 235: return createRegOperand(SRC_SHARED_BASE); 1899 case 236: return createRegOperand(SRC_SHARED_LIMIT); 1900 case 237: return createRegOperand(SRC_PRIVATE_BASE); 1901 case 238: return createRegOperand(SRC_PRIVATE_LIMIT); 1902 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID); 1903 case 251: return createRegOperand(SRC_VCCZ); 1904 case 252: return createRegOperand(SRC_EXECZ); 1905 case 253: return createRegOperand(SRC_SCC); 1906 default: break; 1907 } 1908 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 1909 } 1910 1911 MCOperand AMDGPUDisassembler::decodeSpecialReg96Plus(unsigned Val) const { 1912 using namespace AMDGPU; 1913 1914 switch (Val) { 1915 case 124: 1916 if (isGFX11Plus()) 1917 return createRegOperand(SGPR_NULL); 1918 break; 1919 case 125: 1920 if (!isGFX11Plus()) 1921 return createRegOperand(SGPR_NULL); 1922 break; 1923 default: 1924 break; 1925 } 1926 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 1927 } 1928 1929 MCOperand AMDGPUDisassembler::decodeSDWASrc(unsigned Width, 1930 const unsigned Val) const { 1931 using namespace AMDGPU::SDWA; 1932 using namespace AMDGPU::EncValues; 1933 1934 if (STI.hasFeature(AMDGPU::FeatureGFX9) || 1935 STI.hasFeature(AMDGPU::FeatureGFX10)) { 1936 // XXX: cast to int is needed to avoid stupid warning: 1937 // compare with unsigned is always true 1938 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) && 1939 Val <= SDWA9EncValues::SRC_VGPR_MAX) { 1940 return createRegOperand(getVgprClassId(Width), 1941 Val - SDWA9EncValues::SRC_VGPR_MIN); 1942 } 1943 if (SDWA9EncValues::SRC_SGPR_MIN <= Val && 1944 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10 1945 : SDWA9EncValues::SRC_SGPR_MAX_SI)) { 1946 return createSRegOperand(getSgprClassId(Width), 1947 Val - SDWA9EncValues::SRC_SGPR_MIN); 1948 } 1949 if (SDWA9EncValues::SRC_TTMP_MIN <= Val && 1950 Val <= SDWA9EncValues::SRC_TTMP_MAX) { 1951 return createSRegOperand(getTtmpClassId(Width), 1952 Val - SDWA9EncValues::SRC_TTMP_MIN); 1953 } 1954 1955 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN; 1956 1957 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) || 1958 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)) 1959 return MCOperand::createImm(SVal); 1960 1961 return decodeSpecialReg32(SVal); 1962 } 1963 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) 1964 return createRegOperand(getVgprClassId(Width), Val); 1965 llvm_unreachable("unsupported target"); 1966 } 1967 1968 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { 1969 return decodeSDWASrc(16, Val); 1970 } 1971 1972 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { 1973 return decodeSDWASrc(32, Val); 1974 } 1975 1976 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { 1977 using namespace AMDGPU::SDWA; 1978 1979 assert((STI.hasFeature(AMDGPU::FeatureGFX9) || 1980 STI.hasFeature(AMDGPU::FeatureGFX10)) && 1981 "SDWAVopcDst should be present only on GFX9+"); 1982 1983 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32); 1984 1985 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { 1986 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; 1987 1988 int TTmpIdx = getTTmpIdx(Val); 1989 if (TTmpIdx >= 0) { 1990 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64); 1991 return createSRegOperand(TTmpClsId, TTmpIdx); 1992 } 1993 if (Val > SGPR_MAX) { 1994 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val); 1995 } 1996 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val); 1997 } 1998 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC); 1999 } 2000 2001 MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const { 2002 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32) ? decodeSrcOp(32, Val) 2003 : decodeSrcOp(64, Val); 2004 } 2005 2006 MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const { 2007 return decodeSrcOp(32, Val); 2008 } 2009 2010 MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const { 2011 if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1) 2012 return MCOperand(); 2013 return MCOperand::createImm(Val); 2014 } 2015 2016 MCOperand AMDGPUDisassembler::decodeVersionImm(unsigned Imm) const { 2017 using VersionField = AMDGPU::EncodingField<7, 0>; 2018 using W64Bit = AMDGPU::EncodingBit<13>; 2019 using W32Bit = AMDGPU::EncodingBit<14>; 2020 using MDPBit = AMDGPU::EncodingBit<15>; 2021 using Encoding = AMDGPU::EncodingFields<VersionField, W64Bit, W32Bit, MDPBit>; 2022 2023 auto [Version, W64, W32, MDP] = Encoding::decode(Imm); 2024 2025 // Decode into a plain immediate if any unused bits are raised. 2026 if (Encoding::encode(Version, W64, W32, MDP) != Imm) 2027 return MCOperand::createImm(Imm); 2028 2029 const auto &Versions = AMDGPU::UCVersion::getGFXVersions(); 2030 const auto *I = find_if( 2031 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) { 2032 return V.Code == Version; 2033 }); 2034 MCContext &Ctx = getContext(); 2035 const MCExpr *E; 2036 if (I == Versions.end()) 2037 E = MCConstantExpr::create(Version, Ctx); 2038 else 2039 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx); 2040 2041 if (W64) 2042 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx); 2043 if (W32) 2044 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx); 2045 if (MDP) 2046 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx); 2047 2048 return MCOperand::createExpr(E); 2049 } 2050 2051 bool AMDGPUDisassembler::isVI() const { 2052 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands); 2053 } 2054 2055 bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); } 2056 2057 bool AMDGPUDisassembler::isGFX90A() const { 2058 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts); 2059 } 2060 2061 bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); } 2062 2063 bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); } 2064 2065 bool AMDGPUDisassembler::isGFX10Plus() const { 2066 return AMDGPU::isGFX10Plus(STI); 2067 } 2068 2069 bool AMDGPUDisassembler::isGFX11() const { 2070 return STI.hasFeature(AMDGPU::FeatureGFX11); 2071 } 2072 2073 bool AMDGPUDisassembler::isGFX11Plus() const { 2074 return AMDGPU::isGFX11Plus(STI); 2075 } 2076 2077 bool AMDGPUDisassembler::isGFX12() const { 2078 return STI.hasFeature(AMDGPU::FeatureGFX12); 2079 } 2080 2081 bool AMDGPUDisassembler::isGFX12Plus() const { 2082 return AMDGPU::isGFX12Plus(STI); 2083 } 2084 2085 bool AMDGPUDisassembler::isGFX1250() const { return AMDGPU::isGFX1250(STI); } 2086 2087 bool AMDGPUDisassembler::hasArchitectedFlatScratch() const { 2088 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch); 2089 } 2090 2091 bool AMDGPUDisassembler::hasKernargPreload() const { 2092 return AMDGPU::hasKernargPreload(STI); 2093 } 2094 2095 //===----------------------------------------------------------------------===// 2096 // AMDGPU specific symbol handling 2097 //===----------------------------------------------------------------------===// 2098 2099 /// Print a string describing the reserved bit range specified by Mask with 2100 /// offset BaseBytes for use in error comments. Mask is a single continuous 2101 /// range of 1s surrounded by zeros. The format here is meant to align with the 2102 /// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html. 2103 static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) { 2104 SmallString<32> Result; 2105 raw_svector_ostream S(Result); 2106 2107 int TrailingZeros = llvm::countr_zero(Mask); 2108 int PopCount = llvm::popcount(Mask); 2109 2110 if (PopCount == 1) { 2111 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')'; 2112 } else { 2113 S << "bits in range (" 2114 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':' 2115 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')'; 2116 } 2117 2118 return Result; 2119 } 2120 2121 #define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK)) 2122 #define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ 2123 do { \ 2124 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \ 2125 } while (0) 2126 #define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \ 2127 do { \ 2128 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \ 2129 << GET_FIELD(MASK) << '\n'; \ 2130 } while (0) 2131 2132 #define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \ 2133 do { \ 2134 if (FourByteBuffer & (MASK)) { \ 2135 return createStringError(std::errc::invalid_argument, \ 2136 "kernel descriptor " DESC \ 2137 " reserved %s set" MSG, \ 2138 getBitRangeFromMask((MASK), 0).c_str()); \ 2139 } \ 2140 } while (0) 2141 2142 #define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "") 2143 #define CHECK_RESERVED_BITS_MSG(MASK, MSG) \ 2144 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG) 2145 #define CHECK_RESERVED_BITS_DESC(MASK, DESC) \ 2146 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "") 2147 #define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \ 2148 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG) 2149 2150 // NOLINTNEXTLINE(readability-identifier-naming) 2151 Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( 2152 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { 2153 using namespace amdhsa; 2154 StringRef Indent = "\t"; 2155 2156 // We cannot accurately backward compute #VGPRs used from 2157 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same 2158 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we 2159 // simply calculate the inverse of what the assembler does. 2160 2161 uint32_t GranulatedWorkitemVGPRCount = 2162 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT); 2163 2164 uint32_t NextFreeVGPR = 2165 (GranulatedWorkitemVGPRCount + 1) * 2166 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32); 2167 2168 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n'; 2169 2170 // We cannot backward compute values used to calculate 2171 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following 2172 // directives can't be computed: 2173 // .amdhsa_reserve_vcc 2174 // .amdhsa_reserve_flat_scratch 2175 // .amdhsa_reserve_xnack_mask 2176 // They take their respective default values if not specified in the assembly. 2177 // 2178 // GRANULATED_WAVEFRONT_SGPR_COUNT 2179 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK) 2180 // 2181 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR 2182 // are set to 0. So while disassembling we consider that: 2183 // 2184 // GRANULATED_WAVEFRONT_SGPR_COUNT 2185 // = f(NEXT_FREE_SGPR + 0 + 0 + 0) 2186 // 2187 // The disassembler cannot recover the original values of those 3 directives. 2188 2189 uint32_t GranulatedWavefrontSGPRCount = 2190 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT); 2191 2192 if (isGFX10Plus()) 2193 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 2194 "must be zero on gfx10+"); 2195 2196 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) * 2197 AMDGPU::IsaInfo::getSGPREncodingGranule(&STI); 2198 2199 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; 2200 if (!hasArchitectedFlatScratch()) 2201 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; 2202 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; 2203 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; 2204 2205 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY); 2206 2207 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32", 2208 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); 2209 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64", 2210 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); 2211 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32", 2212 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); 2213 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64", 2214 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); 2215 2216 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV); 2217 2218 if (!isGFX12Plus()) 2219 PRINT_DIRECTIVE(".amdhsa_dx10_clamp", 2220 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP); 2221 2222 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE); 2223 2224 if (!isGFX12Plus()) 2225 PRINT_DIRECTIVE(".amdhsa_ieee_mode", 2226 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE); 2227 2228 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY); 2229 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER); 2230 2231 if (isGFX9Plus()) 2232 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL); 2233 2234 if (!isGFX9Plus()) 2235 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0, 2236 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9"); 2237 2238 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1"); 2239 2240 if (!isGFX10Plus()) 2241 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2, 2242 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10"); 2243 2244 if (isGFX10Plus()) { 2245 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode", 2246 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE); 2247 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED); 2248 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS); 2249 } 2250 2251 if (isGFX12Plus()) 2252 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling", 2253 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN); 2254 2255 return true; 2256 } 2257 2258 // NOLINTNEXTLINE(readability-identifier-naming) 2259 Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( 2260 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { 2261 using namespace amdhsa; 2262 StringRef Indent = "\t"; 2263 if (hasArchitectedFlatScratch()) 2264 PRINT_DIRECTIVE(".amdhsa_enable_private_segment", 2265 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT); 2266 else 2267 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset", 2268 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT); 2269 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x", 2270 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); 2271 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y", 2272 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); 2273 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z", 2274 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); 2275 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info", 2276 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); 2277 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id", 2278 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); 2279 2280 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH); 2281 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY); 2282 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE); 2283 2284 PRINT_DIRECTIVE( 2285 ".amdhsa_exception_fp_ieee_invalid_op", 2286 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); 2287 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src", 2288 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); 2289 PRINT_DIRECTIVE( 2290 ".amdhsa_exception_fp_ieee_div_zero", 2291 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); 2292 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow", 2293 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); 2294 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow", 2295 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); 2296 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact", 2297 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); 2298 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero", 2299 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); 2300 2301 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2"); 2302 2303 return true; 2304 } 2305 2306 // NOLINTNEXTLINE(readability-identifier-naming) 2307 Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( 2308 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { 2309 using namespace amdhsa; 2310 StringRef Indent = "\t"; 2311 if (isGFX90A()) { 2312 KdStream << Indent << ".amdhsa_accum_offset " 2313 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4 2314 << '\n'; 2315 2316 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT); 2317 2318 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0, 2319 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a"); 2320 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1, 2321 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a"); 2322 } else if (isGFX10Plus()) { 2323 // Bits [0-3]. 2324 if (!isGFX12Plus()) { 2325 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) { 2326 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count", 2327 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT); 2328 } else { 2329 PRINT_PSEUDO_DIRECTIVE_COMMENT( 2330 "SHARED_VGPR_COUNT", 2331 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT); 2332 } 2333 } else { 2334 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0, 2335 "COMPUTE_PGM_RSRC3", 2336 "must be zero on gfx12+"); 2337 } 2338 2339 // Bits [4-11]. 2340 if (isGFX11()) { 2341 PRINT_DIRECTIVE(".amdhsa_inst_pref_size", 2342 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE); 2343 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START", 2344 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START); 2345 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END", 2346 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END); 2347 } else if (isGFX12Plus()) { 2348 PRINT_DIRECTIVE(".amdhsa_inst_pref_size", 2349 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE); 2350 } else { 2351 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1, 2352 "COMPUTE_PGM_RSRC3", 2353 "must be zero on gfx10"); 2354 } 2355 2356 // Bits [12]. 2357 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2, 2358 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+"); 2359 2360 // Bits [13]. 2361 if (isGFX12Plus()) { 2362 PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN", 2363 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN); 2364 } else { 2365 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3, 2366 "COMPUTE_PGM_RSRC3", 2367 "must be zero on gfx10 or gfx11"); 2368 } 2369 2370 // Bits [14-30]. 2371 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4, 2372 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+"); 2373 2374 // Bits [31]. 2375 if (isGFX11Plus()) { 2376 PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP", 2377 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP); 2378 } else { 2379 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5, 2380 "COMPUTE_PGM_RSRC3", 2381 "must be zero on gfx10"); 2382 } 2383 } else if (FourByteBuffer) { 2384 return createStringError( 2385 std::errc::invalid_argument, 2386 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9"); 2387 } 2388 return true; 2389 } 2390 #undef PRINT_PSEUDO_DIRECTIVE_COMMENT 2391 #undef PRINT_DIRECTIVE 2392 #undef GET_FIELD 2393 #undef CHECK_RESERVED_BITS_IMPL 2394 #undef CHECK_RESERVED_BITS 2395 #undef CHECK_RESERVED_BITS_MSG 2396 #undef CHECK_RESERVED_BITS_DESC 2397 #undef CHECK_RESERVED_BITS_DESC_MSG 2398 2399 /// Create an error object to return from onSymbolStart for reserved kernel 2400 /// descriptor bits being set. 2401 static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, 2402 const char *Msg = "") { 2403 return createStringError( 2404 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s", 2405 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg); 2406 } 2407 2408 /// Create an error object to return from onSymbolStart for reserved kernel 2409 /// descriptor bytes being set. 2410 static Error createReservedKDBytesError(unsigned BaseInBytes, 2411 unsigned WidthInBytes) { 2412 // Create an error comment in the same format as the "Kernel Descriptor" 2413 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor . 2414 return createStringError( 2415 std::errc::invalid_argument, 2416 "kernel descriptor reserved bits in range (%u:%u) set", 2417 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT); 2418 } 2419 2420 Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective( 2421 DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes, 2422 raw_string_ostream &KdStream) const { 2423 #define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ 2424 do { \ 2425 KdStream << Indent << DIRECTIVE " " \ 2426 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ 2427 } while (0) 2428 2429 uint16_t TwoByteBuffer = 0; 2430 uint32_t FourByteBuffer = 0; 2431 2432 StringRef ReservedBytes; 2433 StringRef Indent = "\t"; 2434 2435 assert(Bytes.size() == 64); 2436 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8); 2437 2438 switch (Cursor.tell()) { 2439 case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET: 2440 FourByteBuffer = DE.getU32(Cursor); 2441 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer 2442 << '\n'; 2443 return true; 2444 2445 case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET: 2446 FourByteBuffer = DE.getU32(Cursor); 2447 KdStream << Indent << ".amdhsa_private_segment_fixed_size " 2448 << FourByteBuffer << '\n'; 2449 return true; 2450 2451 case amdhsa::KERNARG_SIZE_OFFSET: 2452 FourByteBuffer = DE.getU32(Cursor); 2453 KdStream << Indent << ".amdhsa_kernarg_size " 2454 << FourByteBuffer << '\n'; 2455 return true; 2456 2457 case amdhsa::RESERVED0_OFFSET: 2458 // 4 reserved bytes, must be 0. 2459 ReservedBytes = DE.getBytes(Cursor, 4); 2460 for (int I = 0; I < 4; ++I) { 2461 if (ReservedBytes[I] != 0) 2462 return createReservedKDBytesError(amdhsa::RESERVED0_OFFSET, 4); 2463 } 2464 return true; 2465 2466 case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET: 2467 // KERNEL_CODE_ENTRY_BYTE_OFFSET 2468 // So far no directive controls this for Code Object V3, so simply skip for 2469 // disassembly. 2470 DE.skip(Cursor, 8); 2471 return true; 2472 2473 case amdhsa::RESERVED1_OFFSET: 2474 // 20 reserved bytes, must be 0. 2475 ReservedBytes = DE.getBytes(Cursor, 20); 2476 for (int I = 0; I < 20; ++I) { 2477 if (ReservedBytes[I] != 0) 2478 return createReservedKDBytesError(amdhsa::RESERVED1_OFFSET, 20); 2479 } 2480 return true; 2481 2482 case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: 2483 FourByteBuffer = DE.getU32(Cursor); 2484 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream); 2485 2486 case amdhsa::COMPUTE_PGM_RSRC1_OFFSET: 2487 FourByteBuffer = DE.getU32(Cursor); 2488 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream); 2489 2490 case amdhsa::COMPUTE_PGM_RSRC2_OFFSET: 2491 FourByteBuffer = DE.getU32(Cursor); 2492 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream); 2493 2494 case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: 2495 using namespace amdhsa; 2496 TwoByteBuffer = DE.getU16(Cursor); 2497 2498 if (!hasArchitectedFlatScratch()) 2499 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", 2500 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); 2501 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr", 2502 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); 2503 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr", 2504 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); 2505 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr", 2506 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); 2507 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id", 2508 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); 2509 if (!hasArchitectedFlatScratch()) 2510 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", 2511 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); 2512 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size", 2513 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); 2514 2515 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0) 2516 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0, 2517 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET); 2518 2519 // Reserved for GFX9 2520 if (isGFX9() && 2521 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) { 2522 return createReservedKDBitsError( 2523 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 2524 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9"); 2525 } 2526 if (isGFX10Plus()) { 2527 PRINT_DIRECTIVE(".amdhsa_wavefront_size32", 2528 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); 2529 } 2530 2531 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) 2532 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack", 2533 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); 2534 2535 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) { 2536 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1, 2537 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET); 2538 } 2539 2540 return true; 2541 2542 case amdhsa::KERNARG_PRELOAD_OFFSET: 2543 using namespace amdhsa; 2544 TwoByteBuffer = DE.getU16(Cursor); 2545 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) { 2546 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length", 2547 KERNARG_PRELOAD_SPEC_LENGTH); 2548 } 2549 2550 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) { 2551 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset", 2552 KERNARG_PRELOAD_SPEC_OFFSET); 2553 } 2554 return true; 2555 2556 case amdhsa::RESERVED3_OFFSET: 2557 // 4 bytes from here are reserved, must be 0. 2558 ReservedBytes = DE.getBytes(Cursor, 4); 2559 for (int I = 0; I < 4; ++I) { 2560 if (ReservedBytes[I] != 0) 2561 return createReservedKDBytesError(amdhsa::RESERVED3_OFFSET, 4); 2562 } 2563 return true; 2564 2565 default: 2566 llvm_unreachable("Unhandled index. Case statements cover everything."); 2567 return true; 2568 } 2569 #undef PRINT_DIRECTIVE 2570 } 2571 2572 Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor( 2573 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const { 2574 2575 // CP microcode requires the kernel descriptor to be 64 aligned. 2576 if (Bytes.size() != 64 || KdAddress % 64 != 0) 2577 return createStringError(std::errc::invalid_argument, 2578 "kernel descriptor must be 64-byte aligned"); 2579 2580 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10 2581 // requires us to know the setting of .amdhsa_wavefront_size32 in order to 2582 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong 2583 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here 2584 // when required. 2585 if (isGFX10Plus()) { 2586 uint16_t KernelCodeProperties = 2587 support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET], 2588 llvm::endianness::little); 2589 EnableWavefrontSize32 = 2590 AMDHSA_BITS_GET(KernelCodeProperties, 2591 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); 2592 } 2593 2594 std::string Kd; 2595 raw_string_ostream KdStream(Kd); 2596 KdStream << ".amdhsa_kernel " << KdName << '\n'; 2597 2598 DataExtractor::Cursor C(0); 2599 while (C && C.tell() < Bytes.size()) { 2600 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream); 2601 2602 cantFail(C.takeError()); 2603 2604 if (!Res) 2605 return Res; 2606 } 2607 KdStream << ".end_amdhsa_kernel\n"; 2608 outs() << KdStream.str(); 2609 return true; 2610 } 2611 2612 Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, 2613 uint64_t &Size, 2614 ArrayRef<uint8_t> Bytes, 2615 uint64_t Address) const { 2616 // Right now only kernel descriptor needs to be handled. 2617 // We ignore all other symbols for target specific handling. 2618 // TODO: 2619 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code 2620 // Object V2 and V3 when symbols are marked protected. 2621 2622 // amd_kernel_code_t for Code Object V2. 2623 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) { 2624 Size = 256; 2625 return createStringError(std::errc::invalid_argument, 2626 "code object v2 is not supported"); 2627 } 2628 2629 // Code Object V3 kernel descriptors. 2630 StringRef Name = Symbol.Name; 2631 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) { 2632 Size = 64; // Size = 64 regardless of success or failure. 2633 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address); 2634 } 2635 2636 return false; 2637 } 2638 2639 const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id, 2640 int64_t Val) { 2641 MCContext &Ctx = getContext(); 2642 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id); 2643 // Note: only set value to Val on a new symbol in case an dissassembler 2644 // has already been initialized in this context. 2645 if (!Sym->isVariable()) { 2646 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx)); 2647 } else { 2648 int64_t Res = ~Val; 2649 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res); 2650 if (!Valid || Res != Val) 2651 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id); 2652 } 2653 return MCSymbolRefExpr::create(Sym, Ctx); 2654 } 2655 2656 //===----------------------------------------------------------------------===// 2657 // AMDGPUSymbolizer 2658 //===----------------------------------------------------------------------===// 2659 2660 // Try to find symbol name for specified label 2661 bool AMDGPUSymbolizer::tryAddingSymbolicOperand( 2662 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value, 2663 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/, 2664 uint64_t /*OpSize*/, uint64_t /*InstSize*/) { 2665 2666 if (!IsBranch) { 2667 return false; 2668 } 2669 2670 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo); 2671 if (!Symbols) 2672 return false; 2673 2674 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) { 2675 return Val.Addr == static_cast<uint64_t>(Value) && 2676 Val.Type == ELF::STT_NOTYPE; 2677 }); 2678 if (Result != Symbols->end()) { 2679 auto *Sym = Ctx.getOrCreateSymbol(Result->Name); 2680 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx); 2681 Inst.addOperand(MCOperand::createExpr(Add)); 2682 return true; 2683 } 2684 // Add to list of referenced addresses, so caller can synthesize a label. 2685 ReferencedAddresses.push_back(static_cast<uint64_t>(Value)); 2686 return false; 2687 } 2688 2689 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, 2690 int64_t Value, 2691 uint64_t Address) { 2692 llvm_unreachable("unimplemented"); 2693 } 2694 2695 //===----------------------------------------------------------------------===// 2696 // Initialization 2697 //===----------------------------------------------------------------------===// 2698 2699 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/, 2700 LLVMOpInfoCallback /*GetOpInfo*/, 2701 LLVMSymbolLookupCallback /*SymbolLookUp*/, 2702 void *DisInfo, 2703 MCContext *Ctx, 2704 std::unique_ptr<MCRelocationInfo> &&RelInfo) { 2705 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo); 2706 } 2707 2708 static MCDisassembler *createAMDGPUDisassembler(const Target &T, 2709 const MCSubtargetInfo &STI, 2710 MCContext &Ctx) { 2711 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo()); 2712 } 2713 2714 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void 2715 LLVMInitializeAMDGPUDisassembler() { 2716 TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(), 2717 createAMDGPUDisassembler); 2718 TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(), 2719 createAMDGPUSymbolizer); 2720 } 2721