1 //===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H 11 #define LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H 12 13 #include "llvm/MC/MCInstrDesc.h" 14 15 namespace llvm { 16 17 // This needs to be kept in sync with the field bits in SIRegisterClass. 18 enum SIRCFlags : uint8_t { 19 RegTupleAlignUnitsWidth = 2, 20 HasVGPRBit = RegTupleAlignUnitsWidth, 21 HasAGPRBit, 22 HasSGPRbit, 23 24 HasVGPR = 1 << HasVGPRBit, 25 HasAGPR = 1 << HasAGPRBit, 26 HasSGPR = 1 << HasSGPRbit, 27 28 RegTupleAlignUnitsMask = (1 << RegTupleAlignUnitsWidth) - 1, 29 RegKindMask = (HasVGPR | HasAGPR | HasSGPR) 30 }; // enum SIRCFlagsr 31 32 namespace SIEncodingFamily { 33 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td 34 // and the columns of the getMCOpcodeGen table. 35 enum { 36 SI = 0, 37 VI = 1, 38 SDWA = 2, 39 SDWA9 = 3, 40 GFX80 = 4, 41 GFX9 = 5, 42 GFX10 = 6, 43 SDWA10 = 7, 44 GFX90A = 8, 45 GFX940 = 9, 46 GFX11 = 10, 47 }; 48 } 49 50 namespace SIInstrFlags { 51 // This needs to be kept in sync with the field bits in InstSI. 52 enum : uint64_t { 53 // Low bits - basic encoding information. 54 SALU = 1 << 0, 55 VALU = 1 << 1, 56 57 // SALU instruction formats. 58 SOP1 = 1 << 2, 59 SOP2 = 1 << 3, 60 SOPC = 1 << 4, 61 SOPK = 1 << 5, 62 SOPP = 1 << 6, 63 64 // VALU instruction formats. 65 VOP1 = 1 << 7, 66 VOP2 = 1 << 8, 67 VOPC = 1 << 9, 68 69 // TODO: Should this be spilt into VOP3 a and b? 70 VOP3 = 1 << 10, 71 VOP3P = 1 << 12, 72 73 VINTRP = 1 << 13, 74 SDWA = 1 << 14, 75 DPP = 1 << 15, 76 TRANS = 1 << 16, 77 78 // Memory instruction formats. 79 MUBUF = 1 << 17, 80 MTBUF = 1 << 18, 81 SMRD = 1 << 19, 82 MIMG = 1 << 20, 83 EXP = 1 << 21, 84 FLAT = 1 << 22, 85 DS = 1 << 23, 86 87 // Pseudo instruction formats. 88 VGPRSpill = 1 << 24, 89 SGPRSpill = 1 << 25, 90 91 // LDSDIR instruction format. 92 LDSDIR = 1 << 26, 93 94 // VINTERP instruction format. 95 VINTERP = 1 << 27, 96 97 // High bits - other information. 98 VM_CNT = UINT64_C(1) << 32, 99 EXP_CNT = UINT64_C(1) << 33, 100 LGKM_CNT = UINT64_C(1) << 34, 101 102 WQM = UINT64_C(1) << 35, 103 DisableWQM = UINT64_C(1) << 36, 104 Gather4 = UINT64_C(1) << 37, 105 SOPK_ZEXT = UINT64_C(1) << 38, 106 SCALAR_STORE = UINT64_C(1) << 39, 107 FIXED_SIZE = UINT64_C(1) << 40, 108 VOPAsmPrefer32Bit = UINT64_C(1) << 41, 109 VOP3_OPSEL = UINT64_C(1) << 42, 110 maybeAtomic = UINT64_C(1) << 43, 111 renamedInGFX9 = UINT64_C(1) << 44, 112 113 // Is a clamp on FP type. 114 FPClamp = UINT64_C(1) << 45, 115 116 // Is an integer clamp 117 IntClamp = UINT64_C(1) << 46, 118 119 // Clamps lo component of register. 120 ClampLo = UINT64_C(1) << 47, 121 122 // Clamps hi component of register. 123 // ClampLo and ClampHi set for packed clamp. 124 ClampHi = UINT64_C(1) << 48, 125 126 // Is a packed VOP3P instruction. 127 IsPacked = UINT64_C(1) << 49, 128 129 // Is a D16 buffer instruction. 130 D16Buf = UINT64_C(1) << 50, 131 132 // FLAT instruction accesses FLAT_GLBL segment. 133 FlatGlobal = UINT64_C(1) << 51, 134 135 // Uses floating point double precision rounding mode 136 FPDPRounding = UINT64_C(1) << 52, 137 138 // Instruction is FP atomic. 139 FPAtomic = UINT64_C(1) << 53, 140 141 // Is a MFMA instruction. 142 IsMAI = UINT64_C(1) << 54, 143 144 // Is a DOT instruction. 145 IsDOT = UINT64_C(1) << 55, 146 147 // FLAT instruction accesses FLAT_SCRATCH segment. 148 FlatScratch = UINT64_C(1) << 56, 149 150 // Atomic without return. 151 IsAtomicNoRet = UINT64_C(1) << 57, 152 153 // Atomic with return. 154 IsAtomicRet = UINT64_C(1) << 58, 155 156 // Is a WMMA instruction. 157 IsWMMA = UINT64_C(1) << 59, 158 159 // Whether tied sources will be read. 160 TiedSourceNotRead = UINT64_C(1) << 60, 161 162 // Is never uniform. 163 IsNeverUniform = UINT64_C(1) << 61, 164 }; 165 166 // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. 167 // The result is true if any of these tests are true. 168 enum ClassFlags : unsigned { 169 S_NAN = 1 << 0, // Signaling NaN 170 Q_NAN = 1 << 1, // Quiet NaN 171 N_INFINITY = 1 << 2, // Negative infinity 172 N_NORMAL = 1 << 3, // Negative normal 173 N_SUBNORMAL = 1 << 4, // Negative subnormal 174 N_ZERO = 1 << 5, // Negative zero 175 P_ZERO = 1 << 6, // Positive zero 176 P_SUBNORMAL = 1 << 7, // Positive subnormal 177 P_NORMAL = 1 << 8, // Positive normal 178 P_INFINITY = 1 << 9 // Positive infinity 179 }; 180 } 181 182 namespace AMDGPU { 183 enum OperandType : unsigned { 184 /// Operands with register or 32-bit immediate 185 OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET, 186 OPERAND_REG_IMM_INT64, 187 OPERAND_REG_IMM_INT16, 188 OPERAND_REG_IMM_FP32, 189 OPERAND_REG_IMM_FP64, 190 OPERAND_REG_IMM_FP16, 191 OPERAND_REG_IMM_FP16_DEFERRED, 192 OPERAND_REG_IMM_FP32_DEFERRED, 193 OPERAND_REG_IMM_V2FP16, 194 OPERAND_REG_IMM_V2INT16, 195 OPERAND_REG_IMM_V2INT32, 196 OPERAND_REG_IMM_V2FP32, 197 198 /// Operands with register or inline constant 199 OPERAND_REG_INLINE_C_INT16, 200 OPERAND_REG_INLINE_C_INT32, 201 OPERAND_REG_INLINE_C_INT64, 202 OPERAND_REG_INLINE_C_FP16, 203 OPERAND_REG_INLINE_C_FP32, 204 OPERAND_REG_INLINE_C_FP64, 205 OPERAND_REG_INLINE_C_V2INT16, 206 OPERAND_REG_INLINE_C_V2FP16, 207 OPERAND_REG_INLINE_C_V2INT32, 208 OPERAND_REG_INLINE_C_V2FP32, 209 210 /// Operand with 32-bit immediate that uses the constant bus. 211 OPERAND_KIMM32, 212 OPERAND_KIMM16, 213 214 /// Operands with an AccVGPR register or inline constant 215 OPERAND_REG_INLINE_AC_INT16, 216 OPERAND_REG_INLINE_AC_INT32, 217 OPERAND_REG_INLINE_AC_FP16, 218 OPERAND_REG_INLINE_AC_FP32, 219 OPERAND_REG_INLINE_AC_FP64, 220 OPERAND_REG_INLINE_AC_V2INT16, 221 OPERAND_REG_INLINE_AC_V2FP16, 222 OPERAND_REG_INLINE_AC_V2INT32, 223 OPERAND_REG_INLINE_AC_V2FP32, 224 225 // Operand for source modifiers for VOP instructions 226 OPERAND_INPUT_MODS, 227 228 // Operand for SDWA instructions 229 OPERAND_SDWA_VOPC_DST, 230 231 OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32, 232 OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32, 233 234 OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16, 235 OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32, 236 237 OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16, 238 OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32, 239 240 OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32, 241 OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST, 242 243 OPERAND_KIMM_FIRST = OPERAND_KIMM32, 244 OPERAND_KIMM_LAST = OPERAND_KIMM16 245 246 }; 247 } 248 249 // Input operand modifiers bit-masks 250 // NEG and SEXT share same bit-mask because they can't be set simultaneously. 251 namespace SISrcMods { 252 enum : unsigned { 253 NONE = 0, 254 NEG = 1 << 0, // Floating-point negate modifier 255 ABS = 1 << 1, // Floating-point absolute modifier 256 SEXT = 1 << 0, // Integer sign-extend modifier 257 NEG_HI = ABS, // Floating-point negate high packed component modifier. 258 OP_SEL_0 = 1 << 2, 259 OP_SEL_1 = 1 << 3, 260 DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1) 261 }; 262 } 263 264 namespace SIOutMods { 265 enum : unsigned { 266 NONE = 0, 267 MUL2 = 1, 268 MUL4 = 2, 269 DIV2 = 3 270 }; 271 } 272 273 namespace AMDGPU { 274 namespace VGPRIndexMode { 275 276 enum Id : unsigned { // id of symbolic names 277 ID_SRC0 = 0, 278 ID_SRC1, 279 ID_SRC2, 280 ID_DST, 281 282 ID_MIN = ID_SRC0, 283 ID_MAX = ID_DST 284 }; 285 286 enum EncBits : unsigned { 287 OFF = 0, 288 SRC0_ENABLE = 1 << ID_SRC0, 289 SRC1_ENABLE = 1 << ID_SRC1, 290 SRC2_ENABLE = 1 << ID_SRC2, 291 DST_ENABLE = 1 << ID_DST, 292 ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE, 293 UNDEF = 0xFFFF 294 }; 295 296 } // namespace VGPRIndexMode 297 } // namespace AMDGPU 298 299 namespace AMDGPUAsmVariants { 300 enum : unsigned { 301 DEFAULT = 0, 302 VOP3 = 1, 303 SDWA = 2, 304 SDWA9 = 3, 305 DPP = 4, 306 VOP3_DPP = 5 307 }; 308 } // namespace AMDGPUAsmVariants 309 310 namespace AMDGPU { 311 namespace EncValues { // Encoding values of enum9/8/7 operands 312 313 enum : unsigned { 314 SGPR_MIN = 0, 315 SGPR_MAX_SI = 101, 316 SGPR_MAX_GFX10 = 105, 317 TTMP_VI_MIN = 112, 318 TTMP_VI_MAX = 123, 319 TTMP_GFX9PLUS_MIN = 108, 320 TTMP_GFX9PLUS_MAX = 123, 321 INLINE_INTEGER_C_MIN = 128, 322 INLINE_INTEGER_C_POSITIVE_MAX = 192, // 64 323 INLINE_INTEGER_C_MAX = 208, 324 INLINE_FLOATING_C_MIN = 240, 325 INLINE_FLOATING_C_MAX = 248, 326 LITERAL_CONST = 255, 327 VGPR_MIN = 256, 328 VGPR_MAX = 511, 329 IS_VGPR = 256 // Indicates VGPR or AGPR 330 }; 331 332 } // namespace EncValues 333 } // namespace AMDGPU 334 335 namespace AMDGPU { 336 namespace CPol { 337 338 enum CPol { 339 GLC = 1, 340 SLC = 2, 341 DLC = 4, 342 SCC = 16, 343 SC0 = GLC, 344 SC1 = SCC, 345 NT = SLC, 346 ALL = GLC | SLC | DLC | SCC 347 }; 348 349 } // namespace CPol 350 351 namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns. 352 353 enum Id { // Message ID, width(4) [3:0]. 354 ID_INTERRUPT = 1, 355 356 ID_GS_PreGFX11 = 2, // replaced in GFX11 357 ID_GS_DONE_PreGFX11 = 3, // replaced in GFX11 358 359 ID_HS_TESSFACTOR_GFX11Plus = 2, // reused in GFX11 360 ID_DEALLOC_VGPRS_GFX11Plus = 3, // reused in GFX11 361 362 ID_SAVEWAVE = 4, // added in GFX8, removed in GFX11 363 ID_STALL_WAVE_GEN = 5, // added in GFX9 364 ID_HALT_WAVES = 6, // added in GFX9 365 ID_ORDERED_PS_DONE = 7, // added in GFX9, removed in GFX11 366 ID_EARLY_PRIM_DEALLOC = 8, // added in GFX9, removed in GFX10 367 ID_GS_ALLOC_REQ = 9, // added in GFX9 368 ID_GET_DOORBELL = 10, // added in GFX9, removed in GFX11 369 ID_GET_DDID = 11, // added in GFX10, removed in GFX11 370 ID_SYSMSG = 15, 371 372 ID_RTN_GET_DOORBELL = 128, 373 ID_RTN_GET_DDID = 129, 374 ID_RTN_GET_TMA = 130, 375 ID_RTN_GET_REALTIME = 131, 376 ID_RTN_SAVE_WAVE = 132, 377 ID_RTN_GET_TBA = 133, 378 379 ID_MASK_PreGFX11_ = 0xF, 380 ID_MASK_GFX11Plus_ = 0xFF 381 }; 382 383 enum Op { // Both GS and SYS operation IDs. 384 OP_UNKNOWN_ = -1, 385 OP_SHIFT_ = 4, 386 OP_NONE_ = 0, 387 // Bits used for operation encoding 388 OP_WIDTH_ = 3, 389 OP_MASK_ = (((1 << OP_WIDTH_) - 1) << OP_SHIFT_), 390 // GS operations are encoded in bits 5:4 391 OP_GS_NOP = 0, 392 OP_GS_CUT = 1, 393 OP_GS_EMIT = 2, 394 OP_GS_EMIT_CUT = 3, 395 OP_GS_LAST_, 396 OP_GS_FIRST_ = OP_GS_NOP, 397 // SYS operations are encoded in bits 6:4 398 OP_SYS_ECC_ERR_INTERRUPT = 1, 399 OP_SYS_REG_RD = 2, 400 OP_SYS_HOST_TRAP_ACK = 3, 401 OP_SYS_TTRACE_PC = 4, 402 OP_SYS_LAST_, 403 OP_SYS_FIRST_ = OP_SYS_ECC_ERR_INTERRUPT, 404 }; 405 406 enum StreamId : unsigned { // Stream ID, (2) [9:8]. 407 STREAM_ID_NONE_ = 0, 408 STREAM_ID_DEFAULT_ = 0, 409 STREAM_ID_LAST_ = 4, 410 STREAM_ID_FIRST_ = STREAM_ID_DEFAULT_, 411 STREAM_ID_SHIFT_ = 8, 412 STREAM_ID_WIDTH_= 2, 413 STREAM_ID_MASK_ = (((1 << STREAM_ID_WIDTH_) - 1) << STREAM_ID_SHIFT_) 414 }; 415 416 } // namespace SendMsg 417 418 namespace Hwreg { // Encoding of SIMM16 used in s_setreg/getreg* insns. 419 420 enum Id { // HwRegCode, (6) [5:0] 421 ID_MODE = 1, 422 ID_STATUS = 2, 423 ID_TRAPSTS = 3, 424 ID_HW_ID = 4, 425 ID_GPR_ALLOC = 5, 426 ID_LDS_ALLOC = 6, 427 ID_IB_STS = 7, 428 ID_MEM_BASES = 15, 429 ID_TBA_LO = 16, 430 ID_TBA_HI = 17, 431 ID_TMA_LO = 18, 432 ID_TMA_HI = 19, 433 ID_FLAT_SCR_LO = 20, 434 ID_FLAT_SCR_HI = 21, 435 ID_XNACK_MASK = 22, 436 ID_HW_ID1 = 23, 437 ID_HW_ID2 = 24, 438 ID_POPS_PACKER = 25, 439 ID_PERF_SNAPSHOT_DATA = 27, 440 ID_SHADER_CYCLES = 29, 441 442 // Register numbers reused in GFX11+ 443 ID_PERF_SNAPSHOT_PC_LO = 18, 444 ID_PERF_SNAPSHOT_PC_HI = 19, 445 446 // GFX940 specific registers 447 ID_XCC_ID = 20, 448 ID_SQ_PERF_SNAPSHOT_DATA = 21, 449 ID_SQ_PERF_SNAPSHOT_DATA1 = 22, 450 ID_SQ_PERF_SNAPSHOT_PC_LO = 23, 451 ID_SQ_PERF_SNAPSHOT_PC_HI = 24, 452 453 ID_SHIFT_ = 0, 454 ID_WIDTH_ = 6, 455 ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_) 456 }; 457 458 enum Offset : unsigned { // Offset, (5) [10:6] 459 OFFSET_DEFAULT_ = 0, 460 OFFSET_SHIFT_ = 6, 461 OFFSET_WIDTH_ = 5, 462 OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_), 463 464 OFFSET_MEM_VIOL = 8, 465 }; 466 467 enum WidthMinusOne : unsigned { // WidthMinusOne, (5) [15:11] 468 WIDTH_M1_DEFAULT_ = 31, 469 WIDTH_M1_SHIFT_ = 11, 470 WIDTH_M1_WIDTH_ = 5, 471 WIDTH_M1_MASK_ = (((1 << WIDTH_M1_WIDTH_) - 1) << WIDTH_M1_SHIFT_), 472 }; 473 474 // Some values from WidthMinusOne mapped into Width domain. 475 enum Width : unsigned { 476 WIDTH_DEFAULT_ = WIDTH_M1_DEFAULT_ + 1, 477 }; 478 479 enum ModeRegisterMasks : uint32_t { 480 FP_ROUND_MASK = 0xf << 0, // Bits 0..3 481 FP_DENORM_MASK = 0xf << 4, // Bits 4..7 482 DX10_CLAMP_MASK = 1 << 8, 483 IEEE_MODE_MASK = 1 << 9, 484 LOD_CLAMP_MASK = 1 << 10, 485 DEBUG_MASK = 1 << 11, 486 487 // EXCP_EN fields. 488 EXCP_EN_INVALID_MASK = 1 << 12, 489 EXCP_EN_INPUT_DENORMAL_MASK = 1 << 13, 490 EXCP_EN_FLOAT_DIV0_MASK = 1 << 14, 491 EXCP_EN_OVERFLOW_MASK = 1 << 15, 492 EXCP_EN_UNDERFLOW_MASK = 1 << 16, 493 EXCP_EN_INEXACT_MASK = 1 << 17, 494 EXCP_EN_INT_DIV0_MASK = 1 << 18, 495 496 GPR_IDX_EN_MASK = 1 << 27, 497 VSKIP_MASK = 1 << 28, 498 CSP_MASK = 0x7u << 29 // Bits 29..31 499 }; 500 501 } // namespace Hwreg 502 503 namespace MTBUFFormat { 504 505 enum DataFormat : int64_t { 506 DFMT_INVALID = 0, 507 DFMT_8, 508 DFMT_16, 509 DFMT_8_8, 510 DFMT_32, 511 DFMT_16_16, 512 DFMT_10_11_11, 513 DFMT_11_11_10, 514 DFMT_10_10_10_2, 515 DFMT_2_10_10_10, 516 DFMT_8_8_8_8, 517 DFMT_32_32, 518 DFMT_16_16_16_16, 519 DFMT_32_32_32, 520 DFMT_32_32_32_32, 521 DFMT_RESERVED_15, 522 523 DFMT_MIN = DFMT_INVALID, 524 DFMT_MAX = DFMT_RESERVED_15, 525 526 DFMT_UNDEF = -1, 527 DFMT_DEFAULT = DFMT_8, 528 529 DFMT_SHIFT = 0, 530 DFMT_MASK = 0xF 531 }; 532 533 enum NumFormat : int64_t { 534 NFMT_UNORM = 0, 535 NFMT_SNORM, 536 NFMT_USCALED, 537 NFMT_SSCALED, 538 NFMT_UINT, 539 NFMT_SINT, 540 NFMT_RESERVED_6, // VI and GFX9 541 NFMT_SNORM_OGL = NFMT_RESERVED_6, // SI and CI only 542 NFMT_FLOAT, 543 544 NFMT_MIN = NFMT_UNORM, 545 NFMT_MAX = NFMT_FLOAT, 546 547 NFMT_UNDEF = -1, 548 NFMT_DEFAULT = NFMT_UNORM, 549 550 NFMT_SHIFT = 4, 551 NFMT_MASK = 7 552 }; 553 554 enum MergedFormat : int64_t { 555 DFMT_NFMT_UNDEF = -1, 556 DFMT_NFMT_DEFAULT = ((DFMT_DEFAULT & DFMT_MASK) << DFMT_SHIFT) | 557 ((NFMT_DEFAULT & NFMT_MASK) << NFMT_SHIFT), 558 559 560 DFMT_NFMT_MASK = (DFMT_MASK << DFMT_SHIFT) | (NFMT_MASK << NFMT_SHIFT), 561 562 DFMT_NFMT_MAX = DFMT_NFMT_MASK 563 }; 564 565 enum UnifiedFormatCommon : int64_t { 566 UFMT_MAX = 127, 567 UFMT_UNDEF = -1, 568 UFMT_DEFAULT = 1 569 }; 570 571 } // namespace MTBUFFormat 572 573 namespace UfmtGFX10 { 574 enum UnifiedFormat : int64_t { 575 UFMT_INVALID = 0, 576 577 UFMT_8_UNORM, 578 UFMT_8_SNORM, 579 UFMT_8_USCALED, 580 UFMT_8_SSCALED, 581 UFMT_8_UINT, 582 UFMT_8_SINT, 583 584 UFMT_16_UNORM, 585 UFMT_16_SNORM, 586 UFMT_16_USCALED, 587 UFMT_16_SSCALED, 588 UFMT_16_UINT, 589 UFMT_16_SINT, 590 UFMT_16_FLOAT, 591 592 UFMT_8_8_UNORM, 593 UFMT_8_8_SNORM, 594 UFMT_8_8_USCALED, 595 UFMT_8_8_SSCALED, 596 UFMT_8_8_UINT, 597 UFMT_8_8_SINT, 598 599 UFMT_32_UINT, 600 UFMT_32_SINT, 601 UFMT_32_FLOAT, 602 603 UFMT_16_16_UNORM, 604 UFMT_16_16_SNORM, 605 UFMT_16_16_USCALED, 606 UFMT_16_16_SSCALED, 607 UFMT_16_16_UINT, 608 UFMT_16_16_SINT, 609 UFMT_16_16_FLOAT, 610 611 UFMT_10_11_11_UNORM, 612 UFMT_10_11_11_SNORM, 613 UFMT_10_11_11_USCALED, 614 UFMT_10_11_11_SSCALED, 615 UFMT_10_11_11_UINT, 616 UFMT_10_11_11_SINT, 617 UFMT_10_11_11_FLOAT, 618 619 UFMT_11_11_10_UNORM, 620 UFMT_11_11_10_SNORM, 621 UFMT_11_11_10_USCALED, 622 UFMT_11_11_10_SSCALED, 623 UFMT_11_11_10_UINT, 624 UFMT_11_11_10_SINT, 625 UFMT_11_11_10_FLOAT, 626 627 UFMT_10_10_10_2_UNORM, 628 UFMT_10_10_10_2_SNORM, 629 UFMT_10_10_10_2_USCALED, 630 UFMT_10_10_10_2_SSCALED, 631 UFMT_10_10_10_2_UINT, 632 UFMT_10_10_10_2_SINT, 633 634 UFMT_2_10_10_10_UNORM, 635 UFMT_2_10_10_10_SNORM, 636 UFMT_2_10_10_10_USCALED, 637 UFMT_2_10_10_10_SSCALED, 638 UFMT_2_10_10_10_UINT, 639 UFMT_2_10_10_10_SINT, 640 641 UFMT_8_8_8_8_UNORM, 642 UFMT_8_8_8_8_SNORM, 643 UFMT_8_8_8_8_USCALED, 644 UFMT_8_8_8_8_SSCALED, 645 UFMT_8_8_8_8_UINT, 646 UFMT_8_8_8_8_SINT, 647 648 UFMT_32_32_UINT, 649 UFMT_32_32_SINT, 650 UFMT_32_32_FLOAT, 651 652 UFMT_16_16_16_16_UNORM, 653 UFMT_16_16_16_16_SNORM, 654 UFMT_16_16_16_16_USCALED, 655 UFMT_16_16_16_16_SSCALED, 656 UFMT_16_16_16_16_UINT, 657 UFMT_16_16_16_16_SINT, 658 UFMT_16_16_16_16_FLOAT, 659 660 UFMT_32_32_32_UINT, 661 UFMT_32_32_32_SINT, 662 UFMT_32_32_32_FLOAT, 663 UFMT_32_32_32_32_UINT, 664 UFMT_32_32_32_32_SINT, 665 UFMT_32_32_32_32_FLOAT, 666 667 UFMT_FIRST = UFMT_INVALID, 668 UFMT_LAST = UFMT_32_32_32_32_FLOAT, 669 }; 670 671 } // namespace UfmtGFX10 672 673 namespace UfmtGFX11 { 674 enum UnifiedFormat : int64_t { 675 UFMT_INVALID = 0, 676 677 UFMT_8_UNORM, 678 UFMT_8_SNORM, 679 UFMT_8_USCALED, 680 UFMT_8_SSCALED, 681 UFMT_8_UINT, 682 UFMT_8_SINT, 683 684 UFMT_16_UNORM, 685 UFMT_16_SNORM, 686 UFMT_16_USCALED, 687 UFMT_16_SSCALED, 688 UFMT_16_UINT, 689 UFMT_16_SINT, 690 UFMT_16_FLOAT, 691 692 UFMT_8_8_UNORM, 693 UFMT_8_8_SNORM, 694 UFMT_8_8_USCALED, 695 UFMT_8_8_SSCALED, 696 UFMT_8_8_UINT, 697 UFMT_8_8_SINT, 698 699 UFMT_32_UINT, 700 UFMT_32_SINT, 701 UFMT_32_FLOAT, 702 703 UFMT_16_16_UNORM, 704 UFMT_16_16_SNORM, 705 UFMT_16_16_USCALED, 706 UFMT_16_16_SSCALED, 707 UFMT_16_16_UINT, 708 UFMT_16_16_SINT, 709 UFMT_16_16_FLOAT, 710 711 UFMT_10_11_11_FLOAT, 712 713 UFMT_11_11_10_FLOAT, 714 715 UFMT_10_10_10_2_UNORM, 716 UFMT_10_10_10_2_SNORM, 717 UFMT_10_10_10_2_UINT, 718 UFMT_10_10_10_2_SINT, 719 720 UFMT_2_10_10_10_UNORM, 721 UFMT_2_10_10_10_SNORM, 722 UFMT_2_10_10_10_USCALED, 723 UFMT_2_10_10_10_SSCALED, 724 UFMT_2_10_10_10_UINT, 725 UFMT_2_10_10_10_SINT, 726 727 UFMT_8_8_8_8_UNORM, 728 UFMT_8_8_8_8_SNORM, 729 UFMT_8_8_8_8_USCALED, 730 UFMT_8_8_8_8_SSCALED, 731 UFMT_8_8_8_8_UINT, 732 UFMT_8_8_8_8_SINT, 733 734 UFMT_32_32_UINT, 735 UFMT_32_32_SINT, 736 UFMT_32_32_FLOAT, 737 738 UFMT_16_16_16_16_UNORM, 739 UFMT_16_16_16_16_SNORM, 740 UFMT_16_16_16_16_USCALED, 741 UFMT_16_16_16_16_SSCALED, 742 UFMT_16_16_16_16_UINT, 743 UFMT_16_16_16_16_SINT, 744 UFMT_16_16_16_16_FLOAT, 745 746 UFMT_32_32_32_UINT, 747 UFMT_32_32_32_SINT, 748 UFMT_32_32_32_FLOAT, 749 UFMT_32_32_32_32_UINT, 750 UFMT_32_32_32_32_SINT, 751 UFMT_32_32_32_32_FLOAT, 752 753 UFMT_FIRST = UFMT_INVALID, 754 UFMT_LAST = UFMT_32_32_32_32_FLOAT, 755 }; 756 757 } // namespace UfmtGFX11 758 759 namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32. 760 761 enum Id : unsigned { // id of symbolic names 762 ID_QUAD_PERM = 0, 763 ID_BITMASK_PERM, 764 ID_SWAP, 765 ID_REVERSE, 766 ID_BROADCAST 767 }; 768 769 enum EncBits : unsigned { 770 771 // swizzle mode encodings 772 773 QUAD_PERM_ENC = 0x8000, 774 QUAD_PERM_ENC_MASK = 0xFF00, 775 776 BITMASK_PERM_ENC = 0x0000, 777 BITMASK_PERM_ENC_MASK = 0x8000, 778 779 // QUAD_PERM encodings 780 781 LANE_MASK = 0x3, 782 LANE_MAX = LANE_MASK, 783 LANE_SHIFT = 2, 784 LANE_NUM = 4, 785 786 // BITMASK_PERM encodings 787 788 BITMASK_MASK = 0x1F, 789 BITMASK_MAX = BITMASK_MASK, 790 BITMASK_WIDTH = 5, 791 792 BITMASK_AND_SHIFT = 0, 793 BITMASK_OR_SHIFT = 5, 794 BITMASK_XOR_SHIFT = 10 795 }; 796 797 } // namespace Swizzle 798 799 namespace SDWA { 800 801 enum SdwaSel : unsigned { 802 BYTE_0 = 0, 803 BYTE_1 = 1, 804 BYTE_2 = 2, 805 BYTE_3 = 3, 806 WORD_0 = 4, 807 WORD_1 = 5, 808 DWORD = 6, 809 }; 810 811 enum DstUnused : unsigned { 812 UNUSED_PAD = 0, 813 UNUSED_SEXT = 1, 814 UNUSED_PRESERVE = 2, 815 }; 816 817 enum SDWA9EncValues : unsigned { 818 SRC_SGPR_MASK = 0x100, 819 SRC_VGPR_MASK = 0xFF, 820 VOPC_DST_VCC_MASK = 0x80, 821 VOPC_DST_SGPR_MASK = 0x7F, 822 823 SRC_VGPR_MIN = 0, 824 SRC_VGPR_MAX = 255, 825 SRC_SGPR_MIN = 256, 826 SRC_SGPR_MAX_SI = 357, 827 SRC_SGPR_MAX_GFX10 = 361, 828 SRC_TTMP_MIN = 364, 829 SRC_TTMP_MAX = 379, 830 }; 831 832 } // namespace SDWA 833 834 namespace DPP { 835 836 // clang-format off 837 enum DppCtrl : unsigned { 838 QUAD_PERM_FIRST = 0, 839 QUAD_PERM_ID = 0xE4, // identity permutation 840 QUAD_PERM_LAST = 0xFF, 841 DPP_UNUSED1 = 0x100, 842 ROW_SHL0 = 0x100, 843 ROW_SHL_FIRST = 0x101, 844 ROW_SHL_LAST = 0x10F, 845 DPP_UNUSED2 = 0x110, 846 ROW_SHR0 = 0x110, 847 ROW_SHR_FIRST = 0x111, 848 ROW_SHR_LAST = 0x11F, 849 DPP_UNUSED3 = 0x120, 850 ROW_ROR0 = 0x120, 851 ROW_ROR_FIRST = 0x121, 852 ROW_ROR_LAST = 0x12F, 853 WAVE_SHL1 = 0x130, 854 DPP_UNUSED4_FIRST = 0x131, 855 DPP_UNUSED4_LAST = 0x133, 856 WAVE_ROL1 = 0x134, 857 DPP_UNUSED5_FIRST = 0x135, 858 DPP_UNUSED5_LAST = 0x137, 859 WAVE_SHR1 = 0x138, 860 DPP_UNUSED6_FIRST = 0x139, 861 DPP_UNUSED6_LAST = 0x13B, 862 WAVE_ROR1 = 0x13C, 863 DPP_UNUSED7_FIRST = 0x13D, 864 DPP_UNUSED7_LAST = 0x13F, 865 ROW_MIRROR = 0x140, 866 ROW_HALF_MIRROR = 0x141, 867 BCAST15 = 0x142, 868 BCAST31 = 0x143, 869 DPP_UNUSED8_FIRST = 0x144, 870 DPP_UNUSED8_LAST = 0x14F, 871 ROW_NEWBCAST_FIRST= 0x150, 872 ROW_NEWBCAST_LAST = 0x15F, 873 ROW_SHARE0 = 0x150, 874 ROW_SHARE_FIRST = 0x150, 875 ROW_SHARE_LAST = 0x15F, 876 ROW_XMASK0 = 0x160, 877 ROW_XMASK_FIRST = 0x160, 878 ROW_XMASK_LAST = 0x16F, 879 DPP_LAST = ROW_XMASK_LAST 880 }; 881 // clang-format on 882 883 enum DppFiMode { 884 DPP_FI_0 = 0, 885 DPP_FI_1 = 1, 886 DPP8_FI_0 = 0xE9, 887 DPP8_FI_1 = 0xEA, 888 }; 889 890 } // namespace DPP 891 892 namespace Exp { 893 894 enum Target : unsigned { 895 ET_MRT0 = 0, 896 ET_MRT7 = 7, 897 ET_MRTZ = 8, 898 ET_NULL = 9, // Pre-GFX11 899 ET_POS0 = 12, 900 ET_POS3 = 15, 901 ET_POS4 = 16, // GFX10+ 902 ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget 903 ET_PRIM = 20, // GFX10+ 904 ET_DUAL_SRC_BLEND0 = 21, // GFX11+ 905 ET_DUAL_SRC_BLEND1 = 22, // GFX11+ 906 ET_PARAM0 = 32, // Pre-GFX11 907 ET_PARAM31 = 63, // Pre-GFX11 908 909 ET_NULL_MAX_IDX = 0, 910 ET_MRTZ_MAX_IDX = 0, 911 ET_PRIM_MAX_IDX = 0, 912 ET_MRT_MAX_IDX = 7, 913 ET_POS_MAX_IDX = 4, 914 ET_DUAL_SRC_BLEND_MAX_IDX = 1, 915 ET_PARAM_MAX_IDX = 31, 916 917 ET_INVALID = 255, 918 }; 919 920 } // namespace Exp 921 922 namespace VOP3PEncoding { 923 924 enum OpSel : uint64_t { 925 OP_SEL_HI_0 = UINT64_C(1) << 59, 926 OP_SEL_HI_1 = UINT64_C(1) << 60, 927 OP_SEL_HI_2 = UINT64_C(1) << 14, 928 }; 929 930 } // namespace VOP3PEncoding 931 932 namespace ImplicitArg { 933 // Implicit kernel argument offset for code object version 5. 934 enum Offset_COV5 : unsigned { 935 HOSTCALL_PTR_OFFSET = 80, 936 MULTIGRID_SYNC_ARG_OFFSET = 88, 937 HEAP_PTR_OFFSET = 96, 938 939 DEFAULT_QUEUE_OFFSET = 104, 940 COMPLETION_ACTION_OFFSET = 112, 941 942 PRIVATE_BASE_OFFSET = 192, 943 SHARED_BASE_OFFSET = 196, 944 QUEUE_PTR_OFFSET = 200, 945 }; 946 947 } // namespace ImplicitArg 948 949 namespace VirtRegFlag { 950 // Virtual register flags used for various target specific handlings during 951 // codegen. 952 enum Register_Flag : uint8_t { 953 // Register operand in a whole-wave mode operation. 954 WWM_REG = 1 << 0, 955 }; 956 957 } // namespace VirtRegFlag 958 959 } // namespace AMDGPU 960 961 #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 962 #define S_00B028_VGPRS(x) (((x) & 0x3F) << 0) 963 #define S_00B028_SGPRS(x) (((x) & 0x0F) << 6) 964 #define S_00B028_MEM_ORDERED(x) (((x) & 0x1) << 25) 965 #define G_00B028_MEM_ORDERED(x) (((x) >> 25) & 0x1) 966 #define C_00B028_MEM_ORDERED 0xFDFFFFFF 967 968 #define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C 969 #define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8) 970 #define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128 971 #define S_00B128_MEM_ORDERED(x) (((x) & 0x1) << 27) 972 #define G_00B128_MEM_ORDERED(x) (((x) >> 27) & 0x1) 973 #define C_00B128_MEM_ORDERED 0xF7FFFFFF 974 975 #define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228 976 #define S_00B228_WGP_MODE(x) (((x) & 0x1) << 27) 977 #define G_00B228_WGP_MODE(x) (((x) >> 27) & 0x1) 978 #define C_00B228_WGP_MODE 0xF7FFFFFF 979 #define S_00B228_MEM_ORDERED(x) (((x) & 0x1) << 25) 980 #define G_00B228_MEM_ORDERED(x) (((x) >> 25) & 0x1) 981 #define C_00B228_MEM_ORDERED 0xFDFFFFFF 982 983 #define R_00B328_SPI_SHADER_PGM_RSRC1_ES 0x00B328 984 #define R_00B428_SPI_SHADER_PGM_RSRC1_HS 0x00B428 985 #define S_00B428_WGP_MODE(x) (((x) & 0x1) << 26) 986 #define G_00B428_WGP_MODE(x) (((x) >> 26) & 0x1) 987 #define C_00B428_WGP_MODE 0xFBFFFFFF 988 #define S_00B428_MEM_ORDERED(x) (((x) & 0x1) << 24) 989 #define G_00B428_MEM_ORDERED(x) (((x) >> 24) & 0x1) 990 #define C_00B428_MEM_ORDERED 0xFEFFFFFF 991 992 #define R_00B528_SPI_SHADER_PGM_RSRC1_LS 0x00B528 993 994 #define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C 995 #define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0) 996 #define G_00B84C_SCRATCH_EN(x) (((x) >> 0) & 0x1) 997 #define C_00B84C_SCRATCH_EN 0xFFFFFFFE 998 #define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1) 999 #define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F) 1000 #define C_00B84C_USER_SGPR 0xFFFFFFC1 1001 #define S_00B84C_TRAP_HANDLER(x) (((x) & 0x1) << 6) 1002 #define G_00B84C_TRAP_HANDLER(x) (((x) >> 6) & 0x1) 1003 #define C_00B84C_TRAP_HANDLER 0xFFFFFFBF 1004 #define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7) 1005 #define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1) 1006 #define C_00B84C_TGID_X_EN 0xFFFFFF7F 1007 #define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8) 1008 #define G_00B84C_TGID_Y_EN(x) (((x) >> 8) & 0x1) 1009 #define C_00B84C_TGID_Y_EN 0xFFFFFEFF 1010 #define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9) 1011 #define G_00B84C_TGID_Z_EN(x) (((x) >> 9) & 0x1) 1012 #define C_00B84C_TGID_Z_EN 0xFFFFFDFF 1013 #define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10) 1014 #define G_00B84C_TG_SIZE_EN(x) (((x) >> 10) & 0x1) 1015 #define C_00B84C_TG_SIZE_EN 0xFFFFFBFF 1016 #define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11) 1017 #define G_00B84C_TIDIG_COMP_CNT(x) (((x) >> 11) & 0x03) 1018 #define C_00B84C_TIDIG_COMP_CNT 0xFFFFE7FF 1019 /* CIK */ 1020 #define S_00B84C_EXCP_EN_MSB(x) (((x) & 0x03) << 13) 1021 #define G_00B84C_EXCP_EN_MSB(x) (((x) >> 13) & 0x03) 1022 #define C_00B84C_EXCP_EN_MSB 0xFFFF9FFF 1023 /* */ 1024 #define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15) 1025 #define G_00B84C_LDS_SIZE(x) (((x) >> 15) & 0x1FF) 1026 #define C_00B84C_LDS_SIZE 0xFF007FFF 1027 #define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24) 1028 #define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F) 1029 #define C_00B84C_EXCP_EN 1030 1031 #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC 1032 #define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0 1033 1034 #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 1035 #define S_00B848_VGPRS(x) (((x) & 0x3F) << 0) 1036 #define G_00B848_VGPRS(x) (((x) >> 0) & 0x3F) 1037 #define C_00B848_VGPRS 0xFFFFFFC0 1038 #define S_00B848_SGPRS(x) (((x) & 0x0F) << 6) 1039 #define G_00B848_SGPRS(x) (((x) >> 6) & 0x0F) 1040 #define C_00B848_SGPRS 0xFFFFFC3F 1041 #define S_00B848_PRIORITY(x) (((x) & 0x03) << 10) 1042 #define G_00B848_PRIORITY(x) (((x) >> 10) & 0x03) 1043 #define C_00B848_PRIORITY 0xFFFFF3FF 1044 #define S_00B848_FLOAT_MODE(x) (((x) & 0xFF) << 12) 1045 #define G_00B848_FLOAT_MODE(x) (((x) >> 12) & 0xFF) 1046 #define C_00B848_FLOAT_MODE 0xFFF00FFF 1047 #define S_00B848_PRIV(x) (((x) & 0x1) << 20) 1048 #define G_00B848_PRIV(x) (((x) >> 20) & 0x1) 1049 #define C_00B848_PRIV 0xFFEFFFFF 1050 #define S_00B848_DX10_CLAMP(x) (((x) & 0x1) << 21) 1051 #define G_00B848_DX10_CLAMP(x) (((x) >> 21) & 0x1) 1052 #define C_00B848_DX10_CLAMP 0xFFDFFFFF 1053 #define S_00B848_DEBUG_MODE(x) (((x) & 0x1) << 22) 1054 #define G_00B848_DEBUG_MODE(x) (((x) >> 22) & 0x1) 1055 #define C_00B848_DEBUG_MODE 0xFFBFFFFF 1056 #define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23) 1057 #define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1) 1058 #define C_00B848_IEEE_MODE 0xFF7FFFFF 1059 #define S_00B848_WGP_MODE(x) (((x) & 0x1) << 29) 1060 #define G_00B848_WGP_MODE(x) (((x) >> 29) & 0x1) 1061 #define C_00B848_WGP_MODE 0xDFFFFFFF 1062 #define S_00B848_MEM_ORDERED(x) (((x) & 0x1) << 30) 1063 #define G_00B848_MEM_ORDERED(x) (((x) >> 30) & 0x1) 1064 #define C_00B848_MEM_ORDERED 0xBFFFFFFF 1065 #define S_00B848_FWD_PROGRESS(x) (((x) & 0x1) << 31) 1066 #define G_00B848_FWD_PROGRESS(x) (((x) >> 31) & 0x1) 1067 #define C_00B848_FWD_PROGRESS 0x7FFFFFFF 1068 1069 1070 // Helpers for setting FLOAT_MODE 1071 #define FP_ROUND_ROUND_TO_NEAREST 0 1072 #define FP_ROUND_ROUND_TO_INF 1 1073 #define FP_ROUND_ROUND_TO_NEGINF 2 1074 #define FP_ROUND_ROUND_TO_ZERO 3 1075 1076 // Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double 1077 // precision. 1078 #define FP_ROUND_MODE_SP(x) ((x) & 0x3) 1079 #define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2) 1080 1081 #define FP_DENORM_FLUSH_IN_FLUSH_OUT 0 1082 #define FP_DENORM_FLUSH_OUT 1 1083 #define FP_DENORM_FLUSH_IN 2 1084 #define FP_DENORM_FLUSH_NONE 3 1085 1086 1087 // Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double 1088 // precision. 1089 #define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4) 1090 #define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6) 1091 1092 #define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860 1093 #define S_00B860_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) 1094 #define S_00B860_WAVESIZE_GFX11Plus(x) (((x) & 0x7FFF) << 12) 1095 1096 #define R_0286E8_SPI_TMPRING_SIZE 0x0286E8 1097 #define S_0286E8_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) 1098 #define S_0286E8_WAVESIZE_GFX11Plus(x) (((x) & 0x7FFF) << 12) 1099 1100 #define R_028B54_VGT_SHADER_STAGES_EN 0x028B54 1101 #define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21) 1102 #define S_028B54_GS_W32_EN(x) (((x) & 0x1) << 22) 1103 #define S_028B54_VS_W32_EN(x) (((x) & 0x1) << 23) 1104 #define R_0286D8_SPI_PS_IN_CONTROL 0x0286D8 1105 #define S_0286D8_PS_W32_EN(x) (((x) & 0x1) << 15) 1106 #define R_00B800_COMPUTE_DISPATCH_INITIATOR 0x00B800 1107 #define S_00B800_CS_W32_EN(x) (((x) & 0x1) << 15) 1108 1109 #define R_SPILLED_SGPRS 0x4 1110 #define R_SPILLED_VGPRS 0x8 1111 } // End namespace llvm 1112 1113 #endif 1114