1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUBaseInfo.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUAsmUtils.h" 12 #include "AMDKernelCodeT.h" 13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14 #include "llvm/BinaryFormat/ELF.h" 15 #include "llvm/IR/Attributes.h" 16 #include "llvm/IR/Constants.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/GlobalValue.h" 19 #include "llvm/IR/IntrinsicsAMDGPU.h" 20 #include "llvm/IR/IntrinsicsR600.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/MC/MCInstrInfo.h" 23 #include "llvm/MC/MCRegisterInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/Support/AMDHSAKernelDescriptor.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/TargetParser/TargetParser.h" 28 #include <optional> 29 30 #define GET_INSTRINFO_NAMED_OPS 31 #define GET_INSTRMAP_INFO 32 #include "AMDGPUGenInstrInfo.inc" 33 34 static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion( 35 "amdhsa-code-object-version", llvm::cl::Hidden, 36 llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), 37 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " 38 "or asm directive still take priority if present)")); 39 40 namespace { 41 42 /// \returns Bit mask for given bit \p Shift and bit \p Width. 43 unsigned getBitMask(unsigned Shift, unsigned Width) { 44 return ((1 << Width) - 1) << Shift; 45 } 46 47 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 48 /// 49 /// \returns Packed \p Dst. 50 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 51 unsigned Mask = getBitMask(Shift, Width); 52 return ((Src << Shift) & Mask) | (Dst & ~Mask); 53 } 54 55 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 56 /// 57 /// \returns Unpacked bits. 58 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 59 return (Src & getBitMask(Shift, Width)) >> Shift; 60 } 61 62 /// \returns Vmcnt bit shift (lower bits). 63 unsigned getVmcntBitShiftLo(unsigned VersionMajor) { 64 return VersionMajor >= 11 ? 10 : 0; 65 } 66 67 /// \returns Vmcnt bit width (lower bits). 68 unsigned getVmcntBitWidthLo(unsigned VersionMajor) { 69 return VersionMajor >= 11 ? 6 : 4; 70 } 71 72 /// \returns Expcnt bit shift. 73 unsigned getExpcntBitShift(unsigned VersionMajor) { 74 return VersionMajor >= 11 ? 0 : 4; 75 } 76 77 /// \returns Expcnt bit width. 78 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; } 79 80 /// \returns Lgkmcnt bit shift. 81 unsigned getLgkmcntBitShift(unsigned VersionMajor) { 82 return VersionMajor >= 11 ? 4 : 8; 83 } 84 85 /// \returns Lgkmcnt bit width. 86 unsigned getLgkmcntBitWidth(unsigned VersionMajor) { 87 return VersionMajor >= 10 ? 6 : 4; 88 } 89 90 /// \returns Vmcnt bit shift (higher bits). 91 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; } 92 93 /// \returns Vmcnt bit width (higher bits). 94 unsigned getVmcntBitWidthHi(unsigned VersionMajor) { 95 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; 96 } 97 98 /// \returns Loadcnt bit width 99 unsigned getLoadcntBitWidth(unsigned VersionMajor) { 100 return VersionMajor >= 12 ? 6 : 0; 101 } 102 103 /// \returns Samplecnt bit width. 104 unsigned getSamplecntBitWidth(unsigned VersionMajor) { 105 return VersionMajor >= 12 ? 6 : 0; 106 } 107 108 /// \returns Bvhcnt bit width. 109 unsigned getBvhcntBitWidth(unsigned VersionMajor) { 110 return VersionMajor >= 12 ? 3 : 0; 111 } 112 113 /// \returns Dscnt bit width. 114 unsigned getDscntBitWidth(unsigned VersionMajor) { 115 return VersionMajor >= 12 ? 6 : 0; 116 } 117 118 /// \returns Dscnt bit shift in combined S_WAIT instructions. 119 unsigned getDscntBitShift(unsigned VersionMajor) { return 0; } 120 121 /// \returns Storecnt or Vscnt bit width, depending on VersionMajor. 122 unsigned getStorecntBitWidth(unsigned VersionMajor) { 123 return VersionMajor >= 10 ? 6 : 0; 124 } 125 126 /// \returns Kmcnt bit width. 127 unsigned getKmcntBitWidth(unsigned VersionMajor) { 128 return VersionMajor >= 12 ? 5 : 0; 129 } 130 131 /// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions. 132 unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) { 133 return VersionMajor >= 12 ? 8 : 0; 134 } 135 136 /// \returns VmVsrc bit width 137 inline unsigned getVmVsrcBitWidth() { return 3; } 138 139 /// \returns VmVsrc bit shift 140 inline unsigned getVmVsrcBitShift() { return 2; } 141 142 /// \returns VaVdst bit width 143 inline unsigned getVaVdstBitWidth() { return 4; } 144 145 /// \returns VaVdst bit shift 146 inline unsigned getVaVdstBitShift() { return 12; } 147 148 /// \returns SaSdst bit width 149 inline unsigned getSaSdstBitWidth() { return 1; } 150 151 /// \returns SaSdst bit shift 152 inline unsigned getSaSdstBitShift() { return 0; } 153 154 } // end namespace anonymous 155 156 namespace llvm { 157 158 namespace AMDGPU { 159 160 /// \returns True if \p STI is AMDHSA. 161 bool isHsaAbi(const MCSubtargetInfo &STI) { 162 return STI.getTargetTriple().getOS() == Triple::AMDHSA; 163 } 164 165 unsigned getAMDHSACodeObjectVersion(const Module &M) { 166 if (auto Ver = mdconst::extract_or_null<ConstantInt>( 167 M.getModuleFlag("amdgpu_code_object_version"))) { 168 return (unsigned)Ver->getZExtValue() / 100; 169 } 170 171 return getDefaultAMDHSACodeObjectVersion(); 172 } 173 174 unsigned getDefaultAMDHSACodeObjectVersion() { 175 return DefaultAMDHSACodeObjectVersion; 176 } 177 178 uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) { 179 if (T.getOS() != Triple::AMDHSA) 180 return 0; 181 182 switch (CodeObjectVersion) { 183 case 4: 184 return ELF::ELFABIVERSION_AMDGPU_HSA_V4; 185 case 5: 186 return ELF::ELFABIVERSION_AMDGPU_HSA_V5; 187 default: 188 report_fatal_error("Unsupported AMDHSA Code Object Version " + 189 Twine(CodeObjectVersion)); 190 } 191 } 192 193 unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { 194 switch (CodeObjectVersion) { 195 case AMDHSA_COV4: 196 return 48; 197 case AMDHSA_COV5: 198 default: 199 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; 200 } 201 } 202 203 204 // FIXME: All such magic numbers about the ABI should be in a 205 // central TD file. 206 unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { 207 switch (CodeObjectVersion) { 208 case AMDHSA_COV4: 209 return 24; 210 case AMDHSA_COV5: 211 default: 212 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; 213 } 214 } 215 216 unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { 217 switch (CodeObjectVersion) { 218 case AMDHSA_COV4: 219 return 32; 220 case AMDHSA_COV5: 221 default: 222 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET; 223 } 224 } 225 226 unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) { 227 switch (CodeObjectVersion) { 228 case AMDHSA_COV4: 229 return 40; 230 case AMDHSA_COV5: 231 default: 232 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET; 233 } 234 } 235 236 #define GET_MIMGBaseOpcodesTable_IMPL 237 #define GET_MIMGDimInfoTable_IMPL 238 #define GET_MIMGInfoTable_IMPL 239 #define GET_MIMGLZMappingTable_IMPL 240 #define GET_MIMGMIPMappingTable_IMPL 241 #define GET_MIMGBiasMappingTable_IMPL 242 #define GET_MIMGOffsetMappingTable_IMPL 243 #define GET_MIMGG16MappingTable_IMPL 244 #define GET_MAIInstInfoTable_IMPL 245 #include "AMDGPUGenSearchableTables.inc" 246 247 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 248 unsigned VDataDwords, unsigned VAddrDwords) { 249 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, 250 VDataDwords, VAddrDwords); 251 return Info ? Info->Opcode : -1; 252 } 253 254 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { 255 const MIMGInfo *Info = getMIMGInfo(Opc); 256 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr; 257 } 258 259 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { 260 const MIMGInfo *OrigInfo = getMIMGInfo(Opc); 261 const MIMGInfo *NewInfo = 262 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding, 263 NewChannels, OrigInfo->VAddrDwords); 264 return NewInfo ? NewInfo->Opcode : -1; 265 } 266 267 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 268 const MIMGDimInfo *Dim, bool IsA16, 269 bool IsG16Supported) { 270 unsigned AddrWords = BaseOpcode->NumExtraArgs; 271 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + 272 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 273 if (IsA16) 274 AddrWords += divideCeil(AddrComponents, 2); 275 else 276 AddrWords += AddrComponents; 277 278 // Note: For subtargets that support A16 but not G16, enabling A16 also 279 // enables 16 bit gradients. 280 // For subtargets that support A16 (operand) and G16 (done with a different 281 // instruction encoding), they are independent. 282 283 if (BaseOpcode->Gradients) { 284 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16) 285 // There are two gradients per coordinate, we pack them separately. 286 // For the 3d case, 287 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv) 288 AddrWords += alignTo<2>(Dim->NumGradients / 2); 289 else 290 AddrWords += Dim->NumGradients; 291 } 292 return AddrWords; 293 } 294 295 struct MUBUFInfo { 296 uint16_t Opcode; 297 uint16_t BaseOpcode; 298 uint8_t elements; 299 bool has_vaddr; 300 bool has_srsrc; 301 bool has_soffset; 302 bool IsBufferInv; 303 }; 304 305 struct MTBUFInfo { 306 uint16_t Opcode; 307 uint16_t BaseOpcode; 308 uint8_t elements; 309 bool has_vaddr; 310 bool has_srsrc; 311 bool has_soffset; 312 }; 313 314 struct SMInfo { 315 uint16_t Opcode; 316 bool IsBuffer; 317 }; 318 319 struct VOPInfo { 320 uint16_t Opcode; 321 bool IsSingle; 322 }; 323 324 struct VOPC64DPPInfo { 325 uint16_t Opcode; 326 }; 327 328 struct VOPDComponentInfo { 329 uint16_t BaseVOP; 330 uint16_t VOPDOp; 331 bool CanBeVOPDX; 332 }; 333 334 struct VOPDInfo { 335 uint16_t Opcode; 336 uint16_t OpX; 337 uint16_t OpY; 338 uint16_t Subtarget; 339 }; 340 341 struct VOPTrue16Info { 342 uint16_t Opcode; 343 bool IsTrue16; 344 }; 345 346 #define GET_MTBUFInfoTable_DECL 347 #define GET_MTBUFInfoTable_IMPL 348 #define GET_MUBUFInfoTable_DECL 349 #define GET_MUBUFInfoTable_IMPL 350 #define GET_SMInfoTable_DECL 351 #define GET_SMInfoTable_IMPL 352 #define GET_VOP1InfoTable_DECL 353 #define GET_VOP1InfoTable_IMPL 354 #define GET_VOP2InfoTable_DECL 355 #define GET_VOP2InfoTable_IMPL 356 #define GET_VOP3InfoTable_DECL 357 #define GET_VOP3InfoTable_IMPL 358 #define GET_VOPC64DPPTable_DECL 359 #define GET_VOPC64DPPTable_IMPL 360 #define GET_VOPC64DPP8Table_DECL 361 #define GET_VOPC64DPP8Table_IMPL 362 #define GET_VOPDComponentTable_DECL 363 #define GET_VOPDComponentTable_IMPL 364 #define GET_VOPDPairs_DECL 365 #define GET_VOPDPairs_IMPL 366 #define GET_VOPTrue16Table_DECL 367 #define GET_VOPTrue16Table_IMPL 368 #define GET_WMMAOpcode2AddrMappingTable_DECL 369 #define GET_WMMAOpcode2AddrMappingTable_IMPL 370 #define GET_WMMAOpcode3AddrMappingTable_DECL 371 #define GET_WMMAOpcode3AddrMappingTable_IMPL 372 #include "AMDGPUGenSearchableTables.inc" 373 374 int getMTBUFBaseOpcode(unsigned Opc) { 375 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); 376 return Info ? Info->BaseOpcode : -1; 377 } 378 379 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { 380 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 381 return Info ? Info->Opcode : -1; 382 } 383 384 int getMTBUFElements(unsigned Opc) { 385 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 386 return Info ? Info->elements : 0; 387 } 388 389 bool getMTBUFHasVAddr(unsigned Opc) { 390 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 391 return Info ? Info->has_vaddr : false; 392 } 393 394 bool getMTBUFHasSrsrc(unsigned Opc) { 395 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 396 return Info ? Info->has_srsrc : false; 397 } 398 399 bool getMTBUFHasSoffset(unsigned Opc) { 400 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 401 return Info ? Info->has_soffset : false; 402 } 403 404 int getMUBUFBaseOpcode(unsigned Opc) { 405 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); 406 return Info ? Info->BaseOpcode : -1; 407 } 408 409 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { 410 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 411 return Info ? Info->Opcode : -1; 412 } 413 414 int getMUBUFElements(unsigned Opc) { 415 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 416 return Info ? Info->elements : 0; 417 } 418 419 bool getMUBUFHasVAddr(unsigned Opc) { 420 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 421 return Info ? Info->has_vaddr : false; 422 } 423 424 bool getMUBUFHasSrsrc(unsigned Opc) { 425 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 426 return Info ? Info->has_srsrc : false; 427 } 428 429 bool getMUBUFHasSoffset(unsigned Opc) { 430 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 431 return Info ? Info->has_soffset : false; 432 } 433 434 bool getMUBUFIsBufferInv(unsigned Opc) { 435 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 436 return Info ? Info->IsBufferInv : false; 437 } 438 439 bool getSMEMIsBuffer(unsigned Opc) { 440 const SMInfo *Info = getSMEMOpcodeHelper(Opc); 441 return Info ? Info->IsBuffer : false; 442 } 443 444 bool getVOP1IsSingle(unsigned Opc) { 445 const VOPInfo *Info = getVOP1OpcodeHelper(Opc); 446 return Info ? Info->IsSingle : false; 447 } 448 449 bool getVOP2IsSingle(unsigned Opc) { 450 const VOPInfo *Info = getVOP2OpcodeHelper(Opc); 451 return Info ? Info->IsSingle : false; 452 } 453 454 bool getVOP3IsSingle(unsigned Opc) { 455 const VOPInfo *Info = getVOP3OpcodeHelper(Opc); 456 return Info ? Info->IsSingle : false; 457 } 458 459 bool isVOPC64DPP(unsigned Opc) { 460 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc); 461 } 462 463 bool getMAIIsDGEMM(unsigned Opc) { 464 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 465 return Info ? Info->is_dgemm : false; 466 } 467 468 bool getMAIIsGFX940XDL(unsigned Opc) { 469 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 470 return Info ? Info->is_gfx940_xdl : false; 471 } 472 473 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) { 474 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts)) 475 return SIEncodingFamily::GFX12; 476 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts)) 477 return SIEncodingFamily::GFX11; 478 llvm_unreachable("Subtarget generation does not support VOPD!"); 479 } 480 481 CanBeVOPD getCanBeVOPD(unsigned Opc) { 482 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 483 if (Info) 484 return {Info->CanBeVOPDX, true}; 485 else 486 return {false, false}; 487 } 488 489 unsigned getVOPDOpcode(unsigned Opc) { 490 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 491 return Info ? Info->VOPDOp : ~0u; 492 } 493 494 bool isVOPD(unsigned Opc) { 495 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X); 496 } 497 498 bool isMAC(unsigned Opc) { 499 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 500 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 501 Opc == AMDGPU::V_MAC_F32_e64_vi || 502 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 503 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 504 Opc == AMDGPU::V_MAC_F16_e64_vi || 505 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 506 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 507 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 508 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 || 509 Opc == AMDGPU::V_FMAC_F32_e64_vi || 510 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 511 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 512 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 513 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 || 514 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 || 515 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi || 516 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi || 517 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi || 518 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi; 519 } 520 521 bool isPermlane16(unsigned Opc) { 522 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 523 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 || 524 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 || 525 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 || 526 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 || 527 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 || 528 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 || 529 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12; 530 } 531 532 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) { 533 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 || 534 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 || 535 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 || 536 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 || 537 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 || 538 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 || 539 Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 || 540 Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12; 541 } 542 543 bool isGenericAtomic(unsigned Opc) { 544 return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN || 545 Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX || 546 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP || 547 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD || 548 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB || 549 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN || 550 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN || 551 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX || 552 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX || 553 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND || 554 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR || 555 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR || 556 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC || 557 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC || 558 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD || 559 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN || 560 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX || 561 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP || 562 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG; 563 } 564 565 bool isTrue16Inst(unsigned Opc) { 566 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc); 567 return Info ? Info->IsTrue16 : false; 568 } 569 570 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { 571 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); 572 return Info ? Info->Opcode3Addr : ~0u; 573 } 574 575 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) { 576 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc); 577 return Info ? Info->Opcode2Addr : ~0u; 578 } 579 580 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 581 // header files, so we need to wrap it in a function that takes unsigned 582 // instead. 583 int getMCOpcode(uint16_t Opcode, unsigned Gen) { 584 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 585 } 586 587 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) { 588 const VOPDInfo *Info = 589 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily); 590 return Info ? Info->Opcode : -1; 591 } 592 593 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) { 594 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode); 595 assert(Info); 596 auto OpX = getVOPDBaseFromComponent(Info->OpX); 597 auto OpY = getVOPDBaseFromComponent(Info->OpY); 598 assert(OpX && OpY); 599 return {OpX->BaseVOP, OpY->BaseVOP}; 600 } 601 602 namespace VOPD { 603 604 ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) { 605 assert(OpDesc.getNumDefs() == Component::DST_NUM); 606 607 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1); 608 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1); 609 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO); 610 assert(TiedIdx == -1 || TiedIdx == Component::DST); 611 HasSrc2Acc = TiedIdx != -1; 612 613 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs(); 614 assert(SrcOperandsNum <= Component::MAX_SRC_NUM); 615 616 auto OperandsNum = OpDesc.getNumOperands(); 617 unsigned CompOprIdx; 618 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) { 619 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) { 620 MandatoryLiteralIdx = CompOprIdx; 621 break; 622 } 623 } 624 } 625 626 unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const { 627 assert(CompOprIdx < Component::MAX_OPR_NUM); 628 629 if (CompOprIdx == Component::DST) 630 return getIndexOfDstInParsedOperands(); 631 632 auto CompSrcIdx = CompOprIdx - Component::DST_NUM; 633 if (CompSrcIdx < getCompParsedSrcOperandsNum()) 634 return getIndexOfSrcInParsedOperands(CompSrcIdx); 635 636 // The specified operand does not exist. 637 return 0; 638 } 639 640 std::optional<unsigned> InstInfo::getInvalidCompOperandIndex( 641 std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const { 642 643 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx); 644 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx); 645 646 const unsigned CompOprNum = 647 SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM; 648 unsigned CompOprIdx; 649 for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) { 650 unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx]; 651 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] && 652 ((OpXRegs[CompOprIdx] & BanksMasks) == 653 (OpYRegs[CompOprIdx] & BanksMasks))) 654 return CompOprIdx; 655 } 656 657 return {}; 658 } 659 660 // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used 661 // by the specified component. If an operand is unused 662 // or is not a VGPR, the corresponding value is 0. 663 // 664 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index 665 // for the specified component and MC operand. The callback must return 0 666 // if the operand is not a register or not a VGPR. 667 InstInfo::RegIndices InstInfo::getRegIndices( 668 unsigned CompIdx, 669 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const { 670 assert(CompIdx < COMPONENTS_NUM); 671 672 const auto &Comp = CompInfo[CompIdx]; 673 InstInfo::RegIndices RegIndices; 674 675 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands()); 676 677 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) { 678 unsigned CompSrcIdx = CompOprIdx - DST_NUM; 679 RegIndices[CompOprIdx] = 680 Comp.hasRegSrcOperand(CompSrcIdx) 681 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx)) 682 : 0; 683 } 684 return RegIndices; 685 } 686 687 } // namespace VOPD 688 689 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) { 690 return VOPD::InstInfo(OpX, OpY); 691 } 692 693 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, 694 const MCInstrInfo *InstrInfo) { 695 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode); 696 const auto &OpXDesc = InstrInfo->get(OpX); 697 const auto &OpYDesc = InstrInfo->get(OpY); 698 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X); 699 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo); 700 return VOPD::InstInfo(OpXInfo, OpYInfo); 701 } 702 703 namespace IsaInfo { 704 705 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) 706 : STI(STI), XnackSetting(TargetIDSetting::Any), 707 SramEccSetting(TargetIDSetting::Any) { 708 if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) 709 XnackSetting = TargetIDSetting::Unsupported; 710 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) 711 SramEccSetting = TargetIDSetting::Unsupported; 712 } 713 714 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { 715 // Check if xnack or sramecc is explicitly enabled or disabled. In the 716 // absence of the target features we assume we must generate code that can run 717 // in any environment. 718 SubtargetFeatures Features(FS); 719 std::optional<bool> XnackRequested; 720 std::optional<bool> SramEccRequested; 721 722 for (const std::string &Feature : Features.getFeatures()) { 723 if (Feature == "+xnack") 724 XnackRequested = true; 725 else if (Feature == "-xnack") 726 XnackRequested = false; 727 else if (Feature == "+sramecc") 728 SramEccRequested = true; 729 else if (Feature == "-sramecc") 730 SramEccRequested = false; 731 } 732 733 bool XnackSupported = isXnackSupported(); 734 bool SramEccSupported = isSramEccSupported(); 735 736 if (XnackRequested) { 737 if (XnackSupported) { 738 XnackSetting = 739 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; 740 } else { 741 // If a specific xnack setting was requested and this GPU does not support 742 // xnack emit a warning. Setting will remain set to "Unsupported". 743 if (*XnackRequested) { 744 errs() << "warning: xnack 'On' was requested for a processor that does " 745 "not support it!\n"; 746 } else { 747 errs() << "warning: xnack 'Off' was requested for a processor that " 748 "does not support it!\n"; 749 } 750 } 751 } 752 753 if (SramEccRequested) { 754 if (SramEccSupported) { 755 SramEccSetting = 756 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; 757 } else { 758 // If a specific sramecc setting was requested and this GPU does not 759 // support sramecc emit a warning. Setting will remain set to 760 // "Unsupported". 761 if (*SramEccRequested) { 762 errs() << "warning: sramecc 'On' was requested for a processor that " 763 "does not support it!\n"; 764 } else { 765 errs() << "warning: sramecc 'Off' was requested for a processor that " 766 "does not support it!\n"; 767 } 768 } 769 } 770 } 771 772 static TargetIDSetting 773 getTargetIDSettingFromFeatureString(StringRef FeatureString) { 774 if (FeatureString.ends_with("-")) 775 return TargetIDSetting::Off; 776 if (FeatureString.ends_with("+")) 777 return TargetIDSetting::On; 778 779 llvm_unreachable("Malformed feature string"); 780 } 781 782 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { 783 SmallVector<StringRef, 3> TargetIDSplit; 784 TargetID.split(TargetIDSplit, ':'); 785 786 for (const auto &FeatureString : TargetIDSplit) { 787 if (FeatureString.starts_with("xnack")) 788 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); 789 if (FeatureString.starts_with("sramecc")) 790 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); 791 } 792 } 793 794 std::string AMDGPUTargetID::toString() const { 795 std::string StringRep; 796 raw_string_ostream StreamRep(StringRep); 797 798 auto TargetTriple = STI.getTargetTriple(); 799 auto Version = getIsaVersion(STI.getCPU()); 800 801 StreamRep << TargetTriple.getArchName() << '-' 802 << TargetTriple.getVendorName() << '-' 803 << TargetTriple.getOSName() << '-' 804 << TargetTriple.getEnvironmentName() << '-'; 805 806 std::string Processor; 807 // TODO: Following else statement is present here because we used various 808 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). 809 // Remove once all aliases are removed from GCNProcessors.td. 810 if (Version.Major >= 9) 811 Processor = STI.getCPU().str(); 812 else 813 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) + 814 Twine(Version.Stepping)) 815 .str(); 816 817 std::string Features; 818 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { 819 // sramecc. 820 if (getSramEccSetting() == TargetIDSetting::Off) 821 Features += ":sramecc-"; 822 else if (getSramEccSetting() == TargetIDSetting::On) 823 Features += ":sramecc+"; 824 // xnack. 825 if (getXnackSetting() == TargetIDSetting::Off) 826 Features += ":xnack-"; 827 else if (getXnackSetting() == TargetIDSetting::On) 828 Features += ":xnack+"; 829 } 830 831 StreamRep << Processor << Features; 832 833 StreamRep.flush(); 834 return StringRep; 835 } 836 837 unsigned getWavefrontSize(const MCSubtargetInfo *STI) { 838 if (STI->getFeatureBits().test(FeatureWavefrontSize16)) 839 return 16; 840 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) 841 return 32; 842 843 return 64; 844 } 845 846 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { 847 unsigned BytesPerCU = 0; 848 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 849 BytesPerCU = 32768; 850 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 851 BytesPerCU = 65536; 852 853 // "Per CU" really means "per whatever functional block the waves of a 854 // workgroup must share". So the effective local memory size is doubled in 855 // WGP mode on gfx10. 856 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 857 BytesPerCU *= 2; 858 859 return BytesPerCU; 860 } 861 862 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) { 863 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 864 return 32768; 865 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 866 return 65536; 867 return 0; 868 } 869 870 unsigned getEUsPerCU(const MCSubtargetInfo *STI) { 871 // "Per CU" really means "per whatever functional block the waves of a 872 // workgroup must share". For gfx10 in CU mode this is the CU, which contains 873 // two SIMDs. 874 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode)) 875 return 2; 876 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains 877 // two CUs, so a total of four SIMDs. 878 return 4; 879 } 880 881 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 882 unsigned FlatWorkGroupSize) { 883 assert(FlatWorkGroupSize != 0); 884 if (STI->getTargetTriple().getArch() != Triple::amdgcn) 885 return 8; 886 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI); 887 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); 888 if (N == 1) { 889 // Single-wave workgroups don't consume barrier resources. 890 return MaxWaves; 891 } 892 893 unsigned MaxBarriers = 16; 894 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 895 MaxBarriers = 32; 896 897 return std::min(MaxWaves / N, MaxBarriers); 898 } 899 900 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { 901 return 1; 902 } 903 904 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { 905 // FIXME: Need to take scratch memory into account. 906 if (isGFX90A(*STI)) 907 return 8; 908 if (!isGFX10Plus(*STI)) 909 return 10; 910 return hasGFX10_3Insts(*STI) ? 16 : 20; 911 } 912 913 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 914 unsigned FlatWorkGroupSize) { 915 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize), 916 getEUsPerCU(STI)); 917 } 918 919 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { 920 return 1; 921 } 922 923 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { 924 // Some subtargets allow encoding 2048, but this isn't tested or supported. 925 return 1024; 926 } 927 928 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 929 unsigned FlatWorkGroupSize) { 930 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI)); 931 } 932 933 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { 934 IsaVersion Version = getIsaVersion(STI->getCPU()); 935 if (Version.Major >= 10) 936 return getAddressableNumSGPRs(STI); 937 if (Version.Major >= 8) 938 return 16; 939 return 8; 940 } 941 942 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { 943 return 8; 944 } 945 946 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { 947 IsaVersion Version = getIsaVersion(STI->getCPU()); 948 if (Version.Major >= 8) 949 return 800; 950 return 512; 951 } 952 953 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { 954 if (STI->getFeatureBits().test(FeatureSGPRInitBug)) 955 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 956 957 IsaVersion Version = getIsaVersion(STI->getCPU()); 958 if (Version.Major >= 10) 959 return 106; 960 if (Version.Major >= 8) 961 return 102; 962 return 104; 963 } 964 965 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 966 assert(WavesPerEU != 0); 967 968 IsaVersion Version = getIsaVersion(STI->getCPU()); 969 if (Version.Major >= 10) 970 return 0; 971 972 if (WavesPerEU >= getMaxWavesPerEU(STI)) 973 return 0; 974 975 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); 976 if (STI->getFeatureBits().test(FeatureTrapHandler)) 977 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 978 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; 979 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); 980 } 981 982 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 983 bool Addressable) { 984 assert(WavesPerEU != 0); 985 986 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); 987 IsaVersion Version = getIsaVersion(STI->getCPU()); 988 if (Version.Major >= 10) 989 return Addressable ? AddressableNumSGPRs : 108; 990 if (Version.Major >= 8 && !Addressable) 991 AddressableNumSGPRs = 112; 992 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; 993 if (STI->getFeatureBits().test(FeatureTrapHandler)) 994 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 995 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); 996 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 997 } 998 999 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 1000 bool FlatScrUsed, bool XNACKUsed) { 1001 unsigned ExtraSGPRs = 0; 1002 if (VCCUsed) 1003 ExtraSGPRs = 2; 1004 1005 IsaVersion Version = getIsaVersion(STI->getCPU()); 1006 if (Version.Major >= 10) 1007 return ExtraSGPRs; 1008 1009 if (Version.Major < 8) { 1010 if (FlatScrUsed) 1011 ExtraSGPRs = 4; 1012 } else { 1013 if (XNACKUsed) 1014 ExtraSGPRs = 4; 1015 1016 if (FlatScrUsed || 1017 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch)) 1018 ExtraSGPRs = 6; 1019 } 1020 1021 return ExtraSGPRs; 1022 } 1023 1024 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 1025 bool FlatScrUsed) { 1026 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, 1027 STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); 1028 } 1029 1030 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { 1031 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); 1032 // SGPRBlocks is actual number of SGPR blocks minus 1. 1033 return NumSGPRs / getSGPREncodingGranule(STI) - 1; 1034 } 1035 1036 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 1037 std::optional<bool> EnableWavefrontSize32) { 1038 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1039 return 8; 1040 1041 bool IsWave32 = EnableWavefrontSize32 ? 1042 *EnableWavefrontSize32 : 1043 STI->getFeatureBits().test(FeatureWavefrontSize32); 1044 1045 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs)) 1046 return IsWave32 ? 24 : 12; 1047 1048 if (hasGFX10_3Insts(*STI)) 1049 return IsWave32 ? 16 : 8; 1050 1051 return IsWave32 ? 8 : 4; 1052 } 1053 1054 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 1055 std::optional<bool> EnableWavefrontSize32) { 1056 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1057 return 8; 1058 1059 bool IsWave32 = EnableWavefrontSize32 ? 1060 *EnableWavefrontSize32 : 1061 STI->getFeatureBits().test(FeatureWavefrontSize32); 1062 1063 return IsWave32 ? 8 : 4; 1064 } 1065 1066 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { 1067 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1068 return 512; 1069 if (!isGFX10Plus(*STI)) 1070 return 256; 1071 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32); 1072 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs)) 1073 return IsWave32 ? 1536 : 768; 1074 return IsWave32 ? 1024 : 512; 1075 } 1076 1077 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { 1078 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1079 return 512; 1080 return 256; 1081 } 1082 1083 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, 1084 unsigned NumVGPRs) { 1085 unsigned MaxWaves = getMaxWavesPerEU(STI); 1086 unsigned Granule = getVGPRAllocGranule(STI); 1087 if (NumVGPRs < Granule) 1088 return MaxWaves; 1089 unsigned RoundedRegs = alignTo(NumVGPRs, Granule); 1090 return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves); 1091 } 1092 1093 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1094 assert(WavesPerEU != 0); 1095 1096 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI); 1097 if (WavesPerEU >= MaxWavesPerEU) 1098 return 0; 1099 1100 unsigned TotNumVGPRs = getTotalNumVGPRs(STI); 1101 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI); 1102 unsigned Granule = getVGPRAllocGranule(STI); 1103 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule); 1104 1105 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule)) 1106 return 0; 1107 1108 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs); 1109 if (WavesPerEU < MinWavesPerEU) 1110 return getMinNumVGPRs(STI, MinWavesPerEU); 1111 1112 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule); 1113 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext); 1114 return std::min(MinNumVGPRs, AddrsableNumVGPRs); 1115 } 1116 1117 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1118 assert(WavesPerEU != 0); 1119 1120 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, 1121 getVGPRAllocGranule(STI)); 1122 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); 1123 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 1124 } 1125 1126 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, 1127 std::optional<bool> EnableWavefrontSize32) { 1128 NumVGPRs = alignTo(std::max(1u, NumVGPRs), 1129 getVGPREncodingGranule(STI, EnableWavefrontSize32)); 1130 // VGPRBlocks is actual number of VGPR blocks minus 1. 1131 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1; 1132 } 1133 1134 } // end namespace IsaInfo 1135 1136 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 1137 const MCSubtargetInfo *STI) { 1138 IsaVersion Version = getIsaVersion(STI->getCPU()); 1139 1140 memset(&Header, 0, sizeof(Header)); 1141 1142 Header.amd_kernel_code_version_major = 1; 1143 Header.amd_kernel_code_version_minor = 2; 1144 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 1145 Header.amd_machine_version_major = Version.Major; 1146 Header.amd_machine_version_minor = Version.Minor; 1147 Header.amd_machine_version_stepping = Version.Stepping; 1148 Header.kernel_code_entry_byte_offset = sizeof(Header); 1149 Header.wavefront_size = 6; 1150 1151 // If the code object does not support indirect functions, then the value must 1152 // be 0xffffffff. 1153 Header.call_convention = -1; 1154 1155 // These alignment values are specified in powers of two, so alignment = 1156 // 2^n. The minimum alignment is 2^4 = 16. 1157 Header.kernarg_segment_alignment = 4; 1158 Header.group_segment_alignment = 4; 1159 Header.private_segment_alignment = 4; 1160 1161 if (Version.Major >= 10) { 1162 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { 1163 Header.wavefront_size = 5; 1164 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; 1165 } 1166 Header.compute_pgm_resource_registers |= 1167 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | 1168 S_00B848_MEM_ORDERED(1); 1169 } 1170 } 1171 1172 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 1173 const MCSubtargetInfo *STI) { 1174 IsaVersion Version = getIsaVersion(STI->getCPU()); 1175 1176 amdhsa::kernel_descriptor_t KD; 1177 memset(&KD, 0, sizeof(KD)); 1178 1179 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1180 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, 1181 amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE); 1182 if (Version.Major >= 12) { 1183 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1184 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0); 1185 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1186 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0); 1187 } else { 1188 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1189 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1); 1190 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1191 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1); 1192 } 1193 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, 1194 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1); 1195 if (Version.Major >= 10) { 1196 AMDHSA_BITS_SET(KD.kernel_code_properties, 1197 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 1198 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0); 1199 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1200 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, 1201 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1); 1202 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1203 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1); 1204 } 1205 if (AMDGPU::isGFX90A(*STI)) { 1206 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, 1207 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, 1208 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0); 1209 } 1210 return KD; 1211 } 1212 1213 bool isGroupSegment(const GlobalValue *GV) { 1214 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 1215 } 1216 1217 bool isGlobalSegment(const GlobalValue *GV) { 1218 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 1219 } 1220 1221 bool isReadOnlySegment(const GlobalValue *GV) { 1222 unsigned AS = GV->getAddressSpace(); 1223 return AS == AMDGPUAS::CONSTANT_ADDRESS || 1224 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; 1225 } 1226 1227 bool shouldEmitConstantsToTextSection(const Triple &TT) { 1228 return TT.getArch() == Triple::r600; 1229 } 1230 1231 std::pair<unsigned, unsigned> 1232 getIntegerPairAttribute(const Function &F, StringRef Name, 1233 std::pair<unsigned, unsigned> Default, 1234 bool OnlyFirstRequired) { 1235 Attribute A = F.getFnAttribute(Name); 1236 if (!A.isStringAttribute()) 1237 return Default; 1238 1239 LLVMContext &Ctx = F.getContext(); 1240 std::pair<unsigned, unsigned> Ints = Default; 1241 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 1242 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 1243 Ctx.emitError("can't parse first integer attribute " + Name); 1244 return Default; 1245 } 1246 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 1247 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 1248 Ctx.emitError("can't parse second integer attribute " + Name); 1249 return Default; 1250 } 1251 } 1252 1253 return Ints; 1254 } 1255 1256 unsigned getVmcntBitMask(const IsaVersion &Version) { 1257 return (1 << (getVmcntBitWidthLo(Version.Major) + 1258 getVmcntBitWidthHi(Version.Major))) - 1259 1; 1260 } 1261 1262 unsigned getLoadcntBitMask(const IsaVersion &Version) { 1263 return (1 << getLoadcntBitWidth(Version.Major)) - 1; 1264 } 1265 1266 unsigned getSamplecntBitMask(const IsaVersion &Version) { 1267 return (1 << getSamplecntBitWidth(Version.Major)) - 1; 1268 } 1269 1270 unsigned getBvhcntBitMask(const IsaVersion &Version) { 1271 return (1 << getBvhcntBitWidth(Version.Major)) - 1; 1272 } 1273 1274 unsigned getExpcntBitMask(const IsaVersion &Version) { 1275 return (1 << getExpcntBitWidth(Version.Major)) - 1; 1276 } 1277 1278 unsigned getLgkmcntBitMask(const IsaVersion &Version) { 1279 return (1 << getLgkmcntBitWidth(Version.Major)) - 1; 1280 } 1281 1282 unsigned getDscntBitMask(const IsaVersion &Version) { 1283 return (1 << getDscntBitWidth(Version.Major)) - 1; 1284 } 1285 1286 unsigned getKmcntBitMask(const IsaVersion &Version) { 1287 return (1 << getKmcntBitWidth(Version.Major)) - 1; 1288 } 1289 1290 unsigned getStorecntBitMask(const IsaVersion &Version) { 1291 return (1 << getStorecntBitWidth(Version.Major)) - 1; 1292 } 1293 1294 unsigned getWaitcntBitMask(const IsaVersion &Version) { 1295 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major), 1296 getVmcntBitWidthLo(Version.Major)); 1297 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major), 1298 getExpcntBitWidth(Version.Major)); 1299 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major), 1300 getLgkmcntBitWidth(Version.Major)); 1301 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major), 1302 getVmcntBitWidthHi(Version.Major)); 1303 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi; 1304 } 1305 1306 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1307 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major), 1308 getVmcntBitWidthLo(Version.Major)); 1309 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major), 1310 getVmcntBitWidthHi(Version.Major)); 1311 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major); 1312 } 1313 1314 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { 1315 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major), 1316 getExpcntBitWidth(Version.Major)); 1317 } 1318 1319 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1320 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major), 1321 getLgkmcntBitWidth(Version.Major)); 1322 } 1323 1324 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 1325 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 1326 Vmcnt = decodeVmcnt(Version, Waitcnt); 1327 Expcnt = decodeExpcnt(Version, Waitcnt); 1328 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 1329 } 1330 1331 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { 1332 Waitcnt Decoded; 1333 Decoded.LoadCnt = decodeVmcnt(Version, Encoded); 1334 Decoded.ExpCnt = decodeExpcnt(Version, Encoded); 1335 Decoded.DsCnt = decodeLgkmcnt(Version, Encoded); 1336 return Decoded; 1337 } 1338 1339 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 1340 unsigned Vmcnt) { 1341 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major), 1342 getVmcntBitWidthLo(Version.Major)); 1343 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt, 1344 getVmcntBitShiftHi(Version.Major), 1345 getVmcntBitWidthHi(Version.Major)); 1346 } 1347 1348 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 1349 unsigned Expcnt) { 1350 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major), 1351 getExpcntBitWidth(Version.Major)); 1352 } 1353 1354 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 1355 unsigned Lgkmcnt) { 1356 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major), 1357 getLgkmcntBitWidth(Version.Major)); 1358 } 1359 1360 unsigned encodeWaitcnt(const IsaVersion &Version, 1361 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 1362 unsigned Waitcnt = getWaitcntBitMask(Version); 1363 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 1364 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 1365 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 1366 return Waitcnt; 1367 } 1368 1369 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1370 return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt); 1371 } 1372 1373 static unsigned getCombinedCountBitMask(const IsaVersion &Version, 1374 bool IsStore) { 1375 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major), 1376 getDscntBitWidth(Version.Major)); 1377 if (IsStore) { 1378 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major), 1379 getStorecntBitWidth(Version.Major)); 1380 return Dscnt | Storecnt; 1381 } else { 1382 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major), 1383 getLoadcntBitWidth(Version.Major)); 1384 return Dscnt | Loadcnt; 1385 } 1386 } 1387 1388 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) { 1389 Waitcnt Decoded; 1390 Decoded.LoadCnt = 1391 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major), 1392 getLoadcntBitWidth(Version.Major)); 1393 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major), 1394 getDscntBitWidth(Version.Major)); 1395 return Decoded; 1396 } 1397 1398 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) { 1399 Waitcnt Decoded; 1400 Decoded.StoreCnt = 1401 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major), 1402 getStorecntBitWidth(Version.Major)); 1403 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major), 1404 getDscntBitWidth(Version.Major)); 1405 return Decoded; 1406 } 1407 1408 static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, 1409 unsigned Loadcnt) { 1410 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major), 1411 getLoadcntBitWidth(Version.Major)); 1412 } 1413 1414 static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, 1415 unsigned Storecnt) { 1416 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major), 1417 getStorecntBitWidth(Version.Major)); 1418 } 1419 1420 static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, 1421 unsigned Dscnt) { 1422 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major), 1423 getDscntBitWidth(Version.Major)); 1424 } 1425 1426 static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, 1427 unsigned Dscnt) { 1428 unsigned Waitcnt = getCombinedCountBitMask(Version, false); 1429 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt); 1430 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); 1431 return Waitcnt; 1432 } 1433 1434 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1435 return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt); 1436 } 1437 1438 static unsigned encodeStorecntDscnt(const IsaVersion &Version, 1439 unsigned Storecnt, unsigned Dscnt) { 1440 unsigned Waitcnt = getCombinedCountBitMask(Version, true); 1441 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt); 1442 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); 1443 return Waitcnt; 1444 } 1445 1446 unsigned encodeStorecntDscnt(const IsaVersion &Version, 1447 const Waitcnt &Decoded) { 1448 return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt); 1449 } 1450 1451 //===----------------------------------------------------------------------===// 1452 // Custom Operands. 1453 // 1454 // A table of custom operands shall describe "primary" operand names 1455 // first followed by aliases if any. It is not required but recommended 1456 // to arrange operands so that operand encoding match operand position 1457 // in the table. This will make disassembly a bit more efficient. 1458 // Unused slots in the table shall have an empty name. 1459 // 1460 //===----------------------------------------------------------------------===// 1461 1462 template <class T> 1463 static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize, 1464 T Context) { 1465 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() && 1466 (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)); 1467 } 1468 1469 template <class T> 1470 static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test, 1471 const CustomOperand<T> OpInfo[], int OpInfoSize, 1472 T Context) { 1473 int InvalidIdx = OPR_ID_UNKNOWN; 1474 for (int Idx = 0; Idx < OpInfoSize; ++Idx) { 1475 if (Test(OpInfo[Idx])) { 1476 if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)) 1477 return Idx; 1478 InvalidIdx = OPR_ID_UNSUPPORTED; 1479 } 1480 } 1481 return InvalidIdx; 1482 } 1483 1484 template <class T> 1485 static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[], 1486 int OpInfoSize, T Context) { 1487 auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; }; 1488 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); 1489 } 1490 1491 template <class T> 1492 static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize, 1493 T Context, bool QuickCheck = true) { 1494 auto Test = [=](const CustomOperand<T> &Op) { 1495 return Op.Encoding == Id && !Op.Name.empty(); 1496 }; 1497 // This is an optimization that should work in most cases. 1498 // As a side effect, it may cause selection of an alias 1499 // instead of a primary operand name in case of sparse tables. 1500 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) && 1501 OpInfo[Id].Encoding == Id) { 1502 return Id; 1503 } 1504 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); 1505 } 1506 1507 //===----------------------------------------------------------------------===// 1508 // Custom Operand Values 1509 //===----------------------------------------------------------------------===// 1510 1511 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, 1512 int Size, 1513 const MCSubtargetInfo &STI) { 1514 unsigned Enc = 0; 1515 for (int Idx = 0; Idx < Size; ++Idx) { 1516 const auto &Op = Opr[Idx]; 1517 if (Op.isSupported(STI)) 1518 Enc |= Op.encode(Op.Default); 1519 } 1520 return Enc; 1521 } 1522 1523 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, 1524 int Size, unsigned Code, 1525 bool &HasNonDefaultVal, 1526 const MCSubtargetInfo &STI) { 1527 unsigned UsedOprMask = 0; 1528 HasNonDefaultVal = false; 1529 for (int Idx = 0; Idx < Size; ++Idx) { 1530 const auto &Op = Opr[Idx]; 1531 if (!Op.isSupported(STI)) 1532 continue; 1533 UsedOprMask |= Op.getMask(); 1534 unsigned Val = Op.decode(Code); 1535 if (!Op.isValid(Val)) 1536 return false; 1537 HasNonDefaultVal |= (Val != Op.Default); 1538 } 1539 return (Code & ~UsedOprMask) == 0; 1540 } 1541 1542 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, 1543 unsigned Code, int &Idx, StringRef &Name, 1544 unsigned &Val, bool &IsDefault, 1545 const MCSubtargetInfo &STI) { 1546 while (Idx < Size) { 1547 const auto &Op = Opr[Idx++]; 1548 if (Op.isSupported(STI)) { 1549 Name = Op.Name; 1550 Val = Op.decode(Code); 1551 IsDefault = (Val == Op.Default); 1552 return true; 1553 } 1554 } 1555 1556 return false; 1557 } 1558 1559 static int encodeCustomOperandVal(const CustomOperandVal &Op, 1560 int64_t InputVal) { 1561 if (InputVal < 0 || InputVal > Op.Max) 1562 return OPR_VAL_INVALID; 1563 return Op.encode(InputVal); 1564 } 1565 1566 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, 1567 const StringRef Name, int64_t InputVal, 1568 unsigned &UsedOprMask, 1569 const MCSubtargetInfo &STI) { 1570 int InvalidId = OPR_ID_UNKNOWN; 1571 for (int Idx = 0; Idx < Size; ++Idx) { 1572 const auto &Op = Opr[Idx]; 1573 if (Op.Name == Name) { 1574 if (!Op.isSupported(STI)) { 1575 InvalidId = OPR_ID_UNSUPPORTED; 1576 continue; 1577 } 1578 auto OprMask = Op.getMask(); 1579 if (OprMask & UsedOprMask) 1580 return OPR_ID_DUPLICATE; 1581 UsedOprMask |= OprMask; 1582 return encodeCustomOperandVal(Op, InputVal); 1583 } 1584 } 1585 return InvalidId; 1586 } 1587 1588 //===----------------------------------------------------------------------===// 1589 // DepCtr 1590 //===----------------------------------------------------------------------===// 1591 1592 namespace DepCtr { 1593 1594 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) { 1595 static int Default = -1; 1596 if (Default == -1) 1597 Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI); 1598 return Default; 1599 } 1600 1601 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 1602 const MCSubtargetInfo &STI) { 1603 return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code, 1604 HasNonDefaultVal, STI); 1605 } 1606 1607 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 1608 bool &IsDefault, const MCSubtargetInfo &STI) { 1609 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val, 1610 IsDefault, STI); 1611 } 1612 1613 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 1614 const MCSubtargetInfo &STI) { 1615 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask, 1616 STI); 1617 } 1618 1619 unsigned decodeFieldVmVsrc(unsigned Encoded) { 1620 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); 1621 } 1622 1623 unsigned decodeFieldVaVdst(unsigned Encoded) { 1624 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); 1625 } 1626 1627 unsigned decodeFieldSaSdst(unsigned Encoded) { 1628 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); 1629 } 1630 1631 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) { 1632 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); 1633 } 1634 1635 unsigned encodeFieldVmVsrc(unsigned VmVsrc) { 1636 return encodeFieldVmVsrc(0xffff, VmVsrc); 1637 } 1638 1639 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) { 1640 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); 1641 } 1642 1643 unsigned encodeFieldVaVdst(unsigned VaVdst) { 1644 return encodeFieldVaVdst(0xffff, VaVdst); 1645 } 1646 1647 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) { 1648 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); 1649 } 1650 1651 unsigned encodeFieldSaSdst(unsigned SaSdst) { 1652 return encodeFieldSaSdst(0xffff, SaSdst); 1653 } 1654 1655 } // namespace DepCtr 1656 1657 //===----------------------------------------------------------------------===// 1658 // hwreg 1659 //===----------------------------------------------------------------------===// 1660 1661 namespace Hwreg { 1662 1663 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) { 1664 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI); 1665 return (Idx < 0) ? Idx : Opr[Idx].Encoding; 1666 } 1667 1668 bool isValidHwreg(int64_t Id) { 1669 return 0 <= Id && isUInt<ID_WIDTH_>(Id); 1670 } 1671 1672 bool isValidHwregOffset(int64_t Offset) { 1673 return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset); 1674 } 1675 1676 bool isValidHwregWidth(int64_t Width) { 1677 return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1); 1678 } 1679 1680 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) { 1681 return (Id << ID_SHIFT_) | 1682 (Offset << OFFSET_SHIFT_) | 1683 ((Width - 1) << WIDTH_M1_SHIFT_); 1684 } 1685 1686 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) { 1687 int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI); 1688 return (Idx < 0) ? "" : Opr[Idx].Name; 1689 } 1690 1691 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) { 1692 Id = (Val & ID_MASK_) >> ID_SHIFT_; 1693 Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_; 1694 Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1; 1695 } 1696 1697 } // namespace Hwreg 1698 1699 //===----------------------------------------------------------------------===// 1700 // exp tgt 1701 //===----------------------------------------------------------------------===// 1702 1703 namespace Exp { 1704 1705 struct ExpTgt { 1706 StringLiteral Name; 1707 unsigned Tgt; 1708 unsigned MaxIndex; 1709 }; 1710 1711 static constexpr ExpTgt ExpTgtInfo[] = { 1712 {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, 1713 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, 1714 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, 1715 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, 1716 {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, 1717 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX}, 1718 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, 1719 }; 1720 1721 bool getTgtName(unsigned Id, StringRef &Name, int &Index) { 1722 for (const ExpTgt &Val : ExpTgtInfo) { 1723 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { 1724 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); 1725 Name = Val.Name; 1726 return true; 1727 } 1728 } 1729 return false; 1730 } 1731 1732 unsigned getTgtId(const StringRef Name) { 1733 1734 for (const ExpTgt &Val : ExpTgtInfo) { 1735 if (Val.MaxIndex == 0 && Name == Val.Name) 1736 return Val.Tgt; 1737 1738 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) { 1739 StringRef Suffix = Name.drop_front(Val.Name.size()); 1740 1741 unsigned Id; 1742 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex) 1743 return ET_INVALID; 1744 1745 // Disable leading zeroes 1746 if (Suffix.size() > 1 && Suffix[0] == '0') 1747 return ET_INVALID; 1748 1749 return Val.Tgt + Id; 1750 } 1751 } 1752 return ET_INVALID; 1753 } 1754 1755 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { 1756 switch (Id) { 1757 case ET_NULL: 1758 return !isGFX11Plus(STI); 1759 case ET_POS4: 1760 case ET_PRIM: 1761 return isGFX10Plus(STI); 1762 case ET_DUAL_SRC_BLEND0: 1763 case ET_DUAL_SRC_BLEND1: 1764 return isGFX11Plus(STI); 1765 default: 1766 if (Id >= ET_PARAM0 && Id <= ET_PARAM31) 1767 return !isGFX11Plus(STI); 1768 return true; 1769 } 1770 } 1771 1772 } // namespace Exp 1773 1774 //===----------------------------------------------------------------------===// 1775 // MTBUF Format 1776 //===----------------------------------------------------------------------===// 1777 1778 namespace MTBUFFormat { 1779 1780 int64_t getDfmt(const StringRef Name) { 1781 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { 1782 if (Name == DfmtSymbolic[Id]) 1783 return Id; 1784 } 1785 return DFMT_UNDEF; 1786 } 1787 1788 StringRef getDfmtName(unsigned Id) { 1789 assert(Id <= DFMT_MAX); 1790 return DfmtSymbolic[Id]; 1791 } 1792 1793 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { 1794 if (isSI(STI) || isCI(STI)) 1795 return NfmtSymbolicSICI; 1796 if (isVI(STI) || isGFX9(STI)) 1797 return NfmtSymbolicVI; 1798 return NfmtSymbolicGFX10; 1799 } 1800 1801 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { 1802 auto lookupTable = getNfmtLookupTable(STI); 1803 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { 1804 if (Name == lookupTable[Id]) 1805 return Id; 1806 } 1807 return NFMT_UNDEF; 1808 } 1809 1810 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { 1811 assert(Id <= NFMT_MAX); 1812 return getNfmtLookupTable(STI)[Id]; 1813 } 1814 1815 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1816 unsigned Dfmt; 1817 unsigned Nfmt; 1818 decodeDfmtNfmt(Id, Dfmt, Nfmt); 1819 return isValidNfmt(Nfmt, STI); 1820 } 1821 1822 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1823 return !getNfmtName(Id, STI).empty(); 1824 } 1825 1826 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { 1827 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); 1828 } 1829 1830 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { 1831 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; 1832 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; 1833 } 1834 1835 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) { 1836 if (isGFX11Plus(STI)) { 1837 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1838 if (Name == UfmtSymbolicGFX11[Id]) 1839 return Id; 1840 } 1841 } else { 1842 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1843 if (Name == UfmtSymbolicGFX10[Id]) 1844 return Id; 1845 } 1846 } 1847 return UFMT_UNDEF; 1848 } 1849 1850 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) { 1851 if(isValidUnifiedFormat(Id, STI)) 1852 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id]; 1853 return ""; 1854 } 1855 1856 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) { 1857 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST; 1858 } 1859 1860 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 1861 const MCSubtargetInfo &STI) { 1862 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); 1863 if (isGFX11Plus(STI)) { 1864 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1865 if (Fmt == DfmtNfmt2UFmtGFX11[Id]) 1866 return Id; 1867 } 1868 } else { 1869 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1870 if (Fmt == DfmtNfmt2UFmtGFX10[Id]) 1871 return Id; 1872 } 1873 } 1874 return UFMT_UNDEF; 1875 } 1876 1877 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { 1878 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); 1879 } 1880 1881 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { 1882 if (isGFX10Plus(STI)) 1883 return UFMT_DEFAULT; 1884 return DFMT_NFMT_DEFAULT; 1885 } 1886 1887 } // namespace MTBUFFormat 1888 1889 //===----------------------------------------------------------------------===// 1890 // SendMsg 1891 //===----------------------------------------------------------------------===// 1892 1893 namespace SendMsg { 1894 1895 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { 1896 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; 1897 } 1898 1899 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) { 1900 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI); 1901 return (Idx < 0) ? Idx : Msg[Idx].Encoding; 1902 } 1903 1904 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) { 1905 return (MsgId & ~(getMsgIdMask(STI))) == 0; 1906 } 1907 1908 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) { 1909 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI); 1910 return (Idx < 0) ? "" : Msg[Idx].Name; 1911 } 1912 1913 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) { 1914 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 1915 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 1916 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 1917 for (int i = F; i < L; ++i) { 1918 if (Name == S[i]) { 1919 return i; 1920 } 1921 } 1922 return OP_UNKNOWN_; 1923 } 1924 1925 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1926 bool Strict) { 1927 assert(isValidMsgId(MsgId, STI)); 1928 1929 if (!Strict) 1930 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); 1931 1932 if (MsgId == ID_SYSMSG) 1933 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_; 1934 if (!isGFX11Plus(STI)) { 1935 switch (MsgId) { 1936 case ID_GS_PreGFX11: 1937 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; 1938 case ID_GS_DONE_PreGFX11: 1939 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; 1940 } 1941 } 1942 return OpId == OP_NONE_; 1943 } 1944 1945 StringRef getMsgOpName(int64_t MsgId, int64_t OpId, 1946 const MCSubtargetInfo &STI) { 1947 assert(msgRequiresOp(MsgId, STI)); 1948 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId]; 1949 } 1950 1951 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1952 const MCSubtargetInfo &STI, bool Strict) { 1953 assert(isValidMsgOp(MsgId, OpId, STI, Strict)); 1954 1955 if (!Strict) 1956 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); 1957 1958 if (!isGFX11Plus(STI)) { 1959 switch (MsgId) { 1960 case ID_GS_PreGFX11: 1961 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; 1962 case ID_GS_DONE_PreGFX11: 1963 return (OpId == OP_GS_NOP) ? 1964 (StreamId == STREAM_ID_NONE_) : 1965 (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); 1966 } 1967 } 1968 return StreamId == STREAM_ID_NONE_; 1969 } 1970 1971 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { 1972 return MsgId == ID_SYSMSG || 1973 (!isGFX11Plus(STI) && 1974 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); 1975 } 1976 1977 bool msgSupportsStream(int64_t MsgId, int64_t OpId, 1978 const MCSubtargetInfo &STI) { 1979 return !isGFX11Plus(STI) && 1980 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && 1981 OpId != OP_GS_NOP; 1982 } 1983 1984 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 1985 uint16_t &StreamId, const MCSubtargetInfo &STI) { 1986 MsgId = Val & getMsgIdMask(STI); 1987 if (isGFX11Plus(STI)) { 1988 OpId = 0; 1989 StreamId = 0; 1990 } else { 1991 OpId = (Val & OP_MASK_) >> OP_SHIFT_; 1992 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; 1993 } 1994 } 1995 1996 uint64_t encodeMsg(uint64_t MsgId, 1997 uint64_t OpId, 1998 uint64_t StreamId) { 1999 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_); 2000 } 2001 2002 } // namespace SendMsg 2003 2004 //===----------------------------------------------------------------------===// 2005 // 2006 //===----------------------------------------------------------------------===// 2007 2008 unsigned getInitialPSInputAddr(const Function &F) { 2009 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0); 2010 } 2011 2012 bool getHasColorExport(const Function &F) { 2013 // As a safe default always respond as if PS has color exports. 2014 return F.getFnAttributeAsParsedInteger( 2015 "amdgpu-color-export", 2016 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0; 2017 } 2018 2019 bool getHasDepthExport(const Function &F) { 2020 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0; 2021 } 2022 2023 bool isShader(CallingConv::ID cc) { 2024 switch(cc) { 2025 case CallingConv::AMDGPU_VS: 2026 case CallingConv::AMDGPU_LS: 2027 case CallingConv::AMDGPU_HS: 2028 case CallingConv::AMDGPU_ES: 2029 case CallingConv::AMDGPU_GS: 2030 case CallingConv::AMDGPU_PS: 2031 case CallingConv::AMDGPU_CS_Chain: 2032 case CallingConv::AMDGPU_CS_ChainPreserve: 2033 case CallingConv::AMDGPU_CS: 2034 return true; 2035 default: 2036 return false; 2037 } 2038 } 2039 2040 bool isGraphics(CallingConv::ID cc) { 2041 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx; 2042 } 2043 2044 bool isCompute(CallingConv::ID cc) { 2045 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS; 2046 } 2047 2048 bool isEntryFunctionCC(CallingConv::ID CC) { 2049 switch (CC) { 2050 case CallingConv::AMDGPU_KERNEL: 2051 case CallingConv::SPIR_KERNEL: 2052 case CallingConv::AMDGPU_VS: 2053 case CallingConv::AMDGPU_GS: 2054 case CallingConv::AMDGPU_PS: 2055 case CallingConv::AMDGPU_CS: 2056 case CallingConv::AMDGPU_ES: 2057 case CallingConv::AMDGPU_HS: 2058 case CallingConv::AMDGPU_LS: 2059 return true; 2060 default: 2061 return false; 2062 } 2063 } 2064 2065 bool isModuleEntryFunctionCC(CallingConv::ID CC) { 2066 switch (CC) { 2067 case CallingConv::AMDGPU_Gfx: 2068 return true; 2069 default: 2070 return isEntryFunctionCC(CC) || isChainCC(CC); 2071 } 2072 } 2073 2074 bool isChainCC(CallingConv::ID CC) { 2075 switch (CC) { 2076 case CallingConv::AMDGPU_CS_Chain: 2077 case CallingConv::AMDGPU_CS_ChainPreserve: 2078 return true; 2079 default: 2080 return false; 2081 } 2082 } 2083 2084 bool isKernelCC(const Function *Func) { 2085 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); 2086 } 2087 2088 bool hasXNACK(const MCSubtargetInfo &STI) { 2089 return STI.hasFeature(AMDGPU::FeatureXNACK); 2090 } 2091 2092 bool hasSRAMECC(const MCSubtargetInfo &STI) { 2093 return STI.hasFeature(AMDGPU::FeatureSRAMECC); 2094 } 2095 2096 bool hasMIMG_R128(const MCSubtargetInfo &STI) { 2097 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16); 2098 } 2099 2100 bool hasA16(const MCSubtargetInfo &STI) { 2101 return STI.hasFeature(AMDGPU::FeatureA16); 2102 } 2103 2104 bool hasG16(const MCSubtargetInfo &STI) { 2105 return STI.hasFeature(AMDGPU::FeatureG16); 2106 } 2107 2108 bool hasPackedD16(const MCSubtargetInfo &STI) { 2109 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) && 2110 !isSI(STI); 2111 } 2112 2113 bool hasGDS(const MCSubtargetInfo &STI) { 2114 return STI.hasFeature(AMDGPU::FeatureGDS); 2115 } 2116 2117 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) { 2118 auto Version = getIsaVersion(STI.getCPU()); 2119 if (Version.Major == 10) 2120 return Version.Minor >= 3 ? 13 : 5; 2121 if (Version.Major == 11) 2122 return 5; 2123 if (Version.Major >= 12) 2124 return HasSampler ? 4 : 5; 2125 return 0; 2126 } 2127 2128 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; } 2129 2130 bool isSI(const MCSubtargetInfo &STI) { 2131 return STI.hasFeature(AMDGPU::FeatureSouthernIslands); 2132 } 2133 2134 bool isCI(const MCSubtargetInfo &STI) { 2135 return STI.hasFeature(AMDGPU::FeatureSeaIslands); 2136 } 2137 2138 bool isVI(const MCSubtargetInfo &STI) { 2139 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands); 2140 } 2141 2142 bool isGFX9(const MCSubtargetInfo &STI) { 2143 return STI.hasFeature(AMDGPU::FeatureGFX9); 2144 } 2145 2146 bool isGFX9_GFX10(const MCSubtargetInfo &STI) { 2147 return isGFX9(STI) || isGFX10(STI); 2148 } 2149 2150 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) { 2151 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI); 2152 } 2153 2154 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) { 2155 return isVI(STI) || isGFX9(STI) || isGFX10(STI); 2156 } 2157 2158 bool isGFX8Plus(const MCSubtargetInfo &STI) { 2159 return isVI(STI) || isGFX9Plus(STI); 2160 } 2161 2162 bool isGFX9Plus(const MCSubtargetInfo &STI) { 2163 return isGFX9(STI) || isGFX10Plus(STI); 2164 } 2165 2166 bool isGFX10(const MCSubtargetInfo &STI) { 2167 return STI.hasFeature(AMDGPU::FeatureGFX10); 2168 } 2169 2170 bool isGFX10_GFX11(const MCSubtargetInfo &STI) { 2171 return isGFX10(STI) || isGFX11(STI); 2172 } 2173 2174 bool isGFX10Plus(const MCSubtargetInfo &STI) { 2175 return isGFX10(STI) || isGFX11Plus(STI); 2176 } 2177 2178 bool isGFX11(const MCSubtargetInfo &STI) { 2179 return STI.hasFeature(AMDGPU::FeatureGFX11); 2180 } 2181 2182 bool isGFX11Plus(const MCSubtargetInfo &STI) { 2183 return isGFX11(STI) || isGFX12Plus(STI); 2184 } 2185 2186 bool isGFX12(const MCSubtargetInfo &STI) { 2187 return STI.getFeatureBits()[AMDGPU::FeatureGFX12]; 2188 } 2189 2190 bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); } 2191 2192 bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); } 2193 2194 bool isNotGFX11Plus(const MCSubtargetInfo &STI) { 2195 return !isGFX11Plus(STI); 2196 } 2197 2198 bool isNotGFX10Plus(const MCSubtargetInfo &STI) { 2199 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); 2200 } 2201 2202 bool isGFX10Before1030(const MCSubtargetInfo &STI) { 2203 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); 2204 } 2205 2206 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 2207 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding); 2208 } 2209 2210 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) { 2211 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding); 2212 } 2213 2214 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { 2215 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding); 2216 } 2217 2218 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { 2219 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts); 2220 } 2221 2222 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) { 2223 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI); 2224 } 2225 2226 bool isGFX90A(const MCSubtargetInfo &STI) { 2227 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts); 2228 } 2229 2230 bool isGFX940(const MCSubtargetInfo &STI) { 2231 return STI.hasFeature(AMDGPU::FeatureGFX940Insts); 2232 } 2233 2234 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { 2235 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch); 2236 } 2237 2238 bool hasMAIInsts(const MCSubtargetInfo &STI) { 2239 return STI.hasFeature(AMDGPU::FeatureMAIInsts); 2240 } 2241 2242 bool hasVOPD(const MCSubtargetInfo &STI) { 2243 return STI.hasFeature(AMDGPU::FeatureVOPD); 2244 } 2245 2246 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) { 2247 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR); 2248 } 2249 2250 unsigned hasKernargPreload(const MCSubtargetInfo &STI) { 2251 return STI.hasFeature(AMDGPU::FeatureKernargPreload); 2252 } 2253 2254 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, 2255 int32_t ArgNumVGPR) { 2256 if (has90AInsts && ArgNumAGPR) 2257 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR; 2258 return std::max(ArgNumVGPR, ArgNumAGPR); 2259 } 2260 2261 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 2262 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 2263 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); 2264 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 2265 Reg == AMDGPU::SCC; 2266 } 2267 2268 bool isHi(unsigned Reg, const MCRegisterInfo &MRI) { 2269 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI; 2270 } 2271 2272 #define MAP_REG2REG \ 2273 using namespace AMDGPU; \ 2274 switch(Reg) { \ 2275 default: return Reg; \ 2276 CASE_CI_VI(FLAT_SCR) \ 2277 CASE_CI_VI(FLAT_SCR_LO) \ 2278 CASE_CI_VI(FLAT_SCR_HI) \ 2279 CASE_VI_GFX9PLUS(TTMP0) \ 2280 CASE_VI_GFX9PLUS(TTMP1) \ 2281 CASE_VI_GFX9PLUS(TTMP2) \ 2282 CASE_VI_GFX9PLUS(TTMP3) \ 2283 CASE_VI_GFX9PLUS(TTMP4) \ 2284 CASE_VI_GFX9PLUS(TTMP5) \ 2285 CASE_VI_GFX9PLUS(TTMP6) \ 2286 CASE_VI_GFX9PLUS(TTMP7) \ 2287 CASE_VI_GFX9PLUS(TTMP8) \ 2288 CASE_VI_GFX9PLUS(TTMP9) \ 2289 CASE_VI_GFX9PLUS(TTMP10) \ 2290 CASE_VI_GFX9PLUS(TTMP11) \ 2291 CASE_VI_GFX9PLUS(TTMP12) \ 2292 CASE_VI_GFX9PLUS(TTMP13) \ 2293 CASE_VI_GFX9PLUS(TTMP14) \ 2294 CASE_VI_GFX9PLUS(TTMP15) \ 2295 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ 2296 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ 2297 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ 2298 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ 2299 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ 2300 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ 2301 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ 2302 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ 2303 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ 2304 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ 2305 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ 2306 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ 2307 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 2308 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 2309 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2310 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2311 CASE_GFXPRE11_GFX11PLUS(M0) \ 2312 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ 2313 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ 2314 } 2315 2316 #define CASE_CI_VI(node) \ 2317 assert(!isSI(STI)); \ 2318 case node: return isCI(STI) ? node##_ci : node##_vi; 2319 2320 #define CASE_VI_GFX9PLUS(node) \ 2321 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; 2322 2323 #define CASE_GFXPRE11_GFX11PLUS(node) \ 2324 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; 2325 2326 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ 2327 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; 2328 2329 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 2330 if (STI.getTargetTriple().getArch() == Triple::r600) 2331 return Reg; 2332 MAP_REG2REG 2333 } 2334 2335 #undef CASE_CI_VI 2336 #undef CASE_VI_GFX9PLUS 2337 #undef CASE_GFXPRE11_GFX11PLUS 2338 #undef CASE_GFXPRE11_GFX11PLUS_TO 2339 2340 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 2341 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; 2342 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node; 2343 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) 2344 2345 unsigned mc2PseudoReg(unsigned Reg) { 2346 MAP_REG2REG 2347 } 2348 2349 bool isInlineValue(unsigned Reg) { 2350 switch (Reg) { 2351 case AMDGPU::SRC_SHARED_BASE_LO: 2352 case AMDGPU::SRC_SHARED_BASE: 2353 case AMDGPU::SRC_SHARED_LIMIT_LO: 2354 case AMDGPU::SRC_SHARED_LIMIT: 2355 case AMDGPU::SRC_PRIVATE_BASE_LO: 2356 case AMDGPU::SRC_PRIVATE_BASE: 2357 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 2358 case AMDGPU::SRC_PRIVATE_LIMIT: 2359 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2360 return true; 2361 case AMDGPU::SRC_VCCZ: 2362 case AMDGPU::SRC_EXECZ: 2363 case AMDGPU::SRC_SCC: 2364 return true; 2365 case AMDGPU::SGPR_NULL: 2366 return true; 2367 default: 2368 return false; 2369 } 2370 } 2371 2372 #undef CASE_CI_VI 2373 #undef CASE_VI_GFX9PLUS 2374 #undef CASE_GFXPRE11_GFX11PLUS 2375 #undef CASE_GFXPRE11_GFX11PLUS_TO 2376 #undef MAP_REG2REG 2377 2378 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2379 assert(OpNo < Desc.NumOperands); 2380 unsigned OpType = Desc.operands()[OpNo].OperandType; 2381 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 2382 OpType <= AMDGPU::OPERAND_SRC_LAST; 2383 } 2384 2385 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2386 assert(OpNo < Desc.NumOperands); 2387 unsigned OpType = Desc.operands()[OpNo].OperandType; 2388 return OpType >= AMDGPU::OPERAND_KIMM_FIRST && 2389 OpType <= AMDGPU::OPERAND_KIMM_LAST; 2390 } 2391 2392 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2393 assert(OpNo < Desc.NumOperands); 2394 unsigned OpType = Desc.operands()[OpNo].OperandType; 2395 switch (OpType) { 2396 case AMDGPU::OPERAND_REG_IMM_FP32: 2397 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2398 case AMDGPU::OPERAND_REG_IMM_FP64: 2399 case AMDGPU::OPERAND_REG_IMM_FP16: 2400 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2401 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2402 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2403 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2404 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2405 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2406 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2407 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2408 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2409 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2410 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2411 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2412 return true; 2413 default: 2414 return false; 2415 } 2416 } 2417 2418 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2419 assert(OpNo < Desc.NumOperands); 2420 unsigned OpType = Desc.operands()[OpNo].OperandType; 2421 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 2422 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) || 2423 (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && 2424 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST); 2425 } 2426 2427 // Avoid using MCRegisterClass::getSize, since that function will go away 2428 // (move from MC* level to Target* level). Return size in bits. 2429 unsigned getRegBitWidth(unsigned RCID) { 2430 switch (RCID) { 2431 case AMDGPU::SGPR_LO16RegClassID: 2432 case AMDGPU::AGPR_LO16RegClassID: 2433 return 16; 2434 case AMDGPU::SGPR_32RegClassID: 2435 case AMDGPU::VGPR_32RegClassID: 2436 case AMDGPU::VRegOrLds_32RegClassID: 2437 case AMDGPU::AGPR_32RegClassID: 2438 case AMDGPU::VS_32RegClassID: 2439 case AMDGPU::AV_32RegClassID: 2440 case AMDGPU::SReg_32RegClassID: 2441 case AMDGPU::SReg_32_XM0RegClassID: 2442 case AMDGPU::SRegOrLds_32RegClassID: 2443 return 32; 2444 case AMDGPU::SGPR_64RegClassID: 2445 case AMDGPU::VS_64RegClassID: 2446 case AMDGPU::SReg_64RegClassID: 2447 case AMDGPU::VReg_64RegClassID: 2448 case AMDGPU::AReg_64RegClassID: 2449 case AMDGPU::SReg_64_XEXECRegClassID: 2450 case AMDGPU::VReg_64_Align2RegClassID: 2451 case AMDGPU::AReg_64_Align2RegClassID: 2452 case AMDGPU::AV_64RegClassID: 2453 case AMDGPU::AV_64_Align2RegClassID: 2454 return 64; 2455 case AMDGPU::SGPR_96RegClassID: 2456 case AMDGPU::SReg_96RegClassID: 2457 case AMDGPU::VReg_96RegClassID: 2458 case AMDGPU::AReg_96RegClassID: 2459 case AMDGPU::VReg_96_Align2RegClassID: 2460 case AMDGPU::AReg_96_Align2RegClassID: 2461 case AMDGPU::AV_96RegClassID: 2462 case AMDGPU::AV_96_Align2RegClassID: 2463 return 96; 2464 case AMDGPU::SGPR_128RegClassID: 2465 case AMDGPU::SReg_128RegClassID: 2466 case AMDGPU::VReg_128RegClassID: 2467 case AMDGPU::AReg_128RegClassID: 2468 case AMDGPU::VReg_128_Align2RegClassID: 2469 case AMDGPU::AReg_128_Align2RegClassID: 2470 case AMDGPU::AV_128RegClassID: 2471 case AMDGPU::AV_128_Align2RegClassID: 2472 return 128; 2473 case AMDGPU::SGPR_160RegClassID: 2474 case AMDGPU::SReg_160RegClassID: 2475 case AMDGPU::VReg_160RegClassID: 2476 case AMDGPU::AReg_160RegClassID: 2477 case AMDGPU::VReg_160_Align2RegClassID: 2478 case AMDGPU::AReg_160_Align2RegClassID: 2479 case AMDGPU::AV_160RegClassID: 2480 case AMDGPU::AV_160_Align2RegClassID: 2481 return 160; 2482 case AMDGPU::SGPR_192RegClassID: 2483 case AMDGPU::SReg_192RegClassID: 2484 case AMDGPU::VReg_192RegClassID: 2485 case AMDGPU::AReg_192RegClassID: 2486 case AMDGPU::VReg_192_Align2RegClassID: 2487 case AMDGPU::AReg_192_Align2RegClassID: 2488 case AMDGPU::AV_192RegClassID: 2489 case AMDGPU::AV_192_Align2RegClassID: 2490 return 192; 2491 case AMDGPU::SGPR_224RegClassID: 2492 case AMDGPU::SReg_224RegClassID: 2493 case AMDGPU::VReg_224RegClassID: 2494 case AMDGPU::AReg_224RegClassID: 2495 case AMDGPU::VReg_224_Align2RegClassID: 2496 case AMDGPU::AReg_224_Align2RegClassID: 2497 case AMDGPU::AV_224RegClassID: 2498 case AMDGPU::AV_224_Align2RegClassID: 2499 return 224; 2500 case AMDGPU::SGPR_256RegClassID: 2501 case AMDGPU::SReg_256RegClassID: 2502 case AMDGPU::VReg_256RegClassID: 2503 case AMDGPU::AReg_256RegClassID: 2504 case AMDGPU::VReg_256_Align2RegClassID: 2505 case AMDGPU::AReg_256_Align2RegClassID: 2506 case AMDGPU::AV_256RegClassID: 2507 case AMDGPU::AV_256_Align2RegClassID: 2508 return 256; 2509 case AMDGPU::SGPR_288RegClassID: 2510 case AMDGPU::SReg_288RegClassID: 2511 case AMDGPU::VReg_288RegClassID: 2512 case AMDGPU::AReg_288RegClassID: 2513 case AMDGPU::VReg_288_Align2RegClassID: 2514 case AMDGPU::AReg_288_Align2RegClassID: 2515 case AMDGPU::AV_288RegClassID: 2516 case AMDGPU::AV_288_Align2RegClassID: 2517 return 288; 2518 case AMDGPU::SGPR_320RegClassID: 2519 case AMDGPU::SReg_320RegClassID: 2520 case AMDGPU::VReg_320RegClassID: 2521 case AMDGPU::AReg_320RegClassID: 2522 case AMDGPU::VReg_320_Align2RegClassID: 2523 case AMDGPU::AReg_320_Align2RegClassID: 2524 case AMDGPU::AV_320RegClassID: 2525 case AMDGPU::AV_320_Align2RegClassID: 2526 return 320; 2527 case AMDGPU::SGPR_352RegClassID: 2528 case AMDGPU::SReg_352RegClassID: 2529 case AMDGPU::VReg_352RegClassID: 2530 case AMDGPU::AReg_352RegClassID: 2531 case AMDGPU::VReg_352_Align2RegClassID: 2532 case AMDGPU::AReg_352_Align2RegClassID: 2533 case AMDGPU::AV_352RegClassID: 2534 case AMDGPU::AV_352_Align2RegClassID: 2535 return 352; 2536 case AMDGPU::SGPR_384RegClassID: 2537 case AMDGPU::SReg_384RegClassID: 2538 case AMDGPU::VReg_384RegClassID: 2539 case AMDGPU::AReg_384RegClassID: 2540 case AMDGPU::VReg_384_Align2RegClassID: 2541 case AMDGPU::AReg_384_Align2RegClassID: 2542 case AMDGPU::AV_384RegClassID: 2543 case AMDGPU::AV_384_Align2RegClassID: 2544 return 384; 2545 case AMDGPU::SGPR_512RegClassID: 2546 case AMDGPU::SReg_512RegClassID: 2547 case AMDGPU::VReg_512RegClassID: 2548 case AMDGPU::AReg_512RegClassID: 2549 case AMDGPU::VReg_512_Align2RegClassID: 2550 case AMDGPU::AReg_512_Align2RegClassID: 2551 case AMDGPU::AV_512RegClassID: 2552 case AMDGPU::AV_512_Align2RegClassID: 2553 return 512; 2554 case AMDGPU::SGPR_1024RegClassID: 2555 case AMDGPU::SReg_1024RegClassID: 2556 case AMDGPU::VReg_1024RegClassID: 2557 case AMDGPU::AReg_1024RegClassID: 2558 case AMDGPU::VReg_1024_Align2RegClassID: 2559 case AMDGPU::AReg_1024_Align2RegClassID: 2560 case AMDGPU::AV_1024RegClassID: 2561 case AMDGPU::AV_1024_Align2RegClassID: 2562 return 1024; 2563 default: 2564 llvm_unreachable("Unexpected register class"); 2565 } 2566 } 2567 2568 unsigned getRegBitWidth(const MCRegisterClass &RC) { 2569 return getRegBitWidth(RC.getID()); 2570 } 2571 2572 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 2573 unsigned OpNo) { 2574 assert(OpNo < Desc.NumOperands); 2575 unsigned RCID = Desc.operands()[OpNo].RegClass; 2576 return getRegBitWidth(RCID) / 8; 2577 } 2578 2579 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 2580 if (isInlinableIntLiteral(Literal)) 2581 return true; 2582 2583 uint64_t Val = static_cast<uint64_t>(Literal); 2584 return (Val == llvm::bit_cast<uint64_t>(0.0)) || 2585 (Val == llvm::bit_cast<uint64_t>(1.0)) || 2586 (Val == llvm::bit_cast<uint64_t>(-1.0)) || 2587 (Val == llvm::bit_cast<uint64_t>(0.5)) || 2588 (Val == llvm::bit_cast<uint64_t>(-0.5)) || 2589 (Val == llvm::bit_cast<uint64_t>(2.0)) || 2590 (Val == llvm::bit_cast<uint64_t>(-2.0)) || 2591 (Val == llvm::bit_cast<uint64_t>(4.0)) || 2592 (Val == llvm::bit_cast<uint64_t>(-4.0)) || 2593 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 2594 } 2595 2596 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 2597 if (isInlinableIntLiteral(Literal)) 2598 return true; 2599 2600 // The actual type of the operand does not seem to matter as long 2601 // as the bits match one of the inline immediate values. For example: 2602 // 2603 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 2604 // so it is a legal inline immediate. 2605 // 2606 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 2607 // floating-point, so it is a legal inline immediate. 2608 2609 uint32_t Val = static_cast<uint32_t>(Literal); 2610 return (Val == llvm::bit_cast<uint32_t>(0.0f)) || 2611 (Val == llvm::bit_cast<uint32_t>(1.0f)) || 2612 (Val == llvm::bit_cast<uint32_t>(-1.0f)) || 2613 (Val == llvm::bit_cast<uint32_t>(0.5f)) || 2614 (Val == llvm::bit_cast<uint32_t>(-0.5f)) || 2615 (Val == llvm::bit_cast<uint32_t>(2.0f)) || 2616 (Val == llvm::bit_cast<uint32_t>(-2.0f)) || 2617 (Val == llvm::bit_cast<uint32_t>(4.0f)) || 2618 (Val == llvm::bit_cast<uint32_t>(-4.0f)) || 2619 (Val == 0x3e22f983 && HasInv2Pi); 2620 } 2621 2622 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 2623 if (!HasInv2Pi) 2624 return false; 2625 2626 if (isInlinableIntLiteral(Literal)) 2627 return true; 2628 2629 uint16_t Val = static_cast<uint16_t>(Literal); 2630 return Val == 0x3C00 || // 1.0 2631 Val == 0xBC00 || // -1.0 2632 Val == 0x3800 || // 0.5 2633 Val == 0xB800 || // -0.5 2634 Val == 0x4000 || // 2.0 2635 Val == 0xC000 || // -2.0 2636 Val == 0x4400 || // 4.0 2637 Val == 0xC400 || // -4.0 2638 Val == 0x3118; // 1/2pi 2639 } 2640 2641 std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) { 2642 // Unfortunately, the Instruction Set Architecture Reference Guide is 2643 // misleading about how the inline operands work for (packed) 16-bit 2644 // instructions. In a nutshell, the actual HW behavior is: 2645 // 2646 // - integer encodings (-16 .. 64) are always produced as sign-extended 2647 // 32-bit values 2648 // - float encodings are produced as: 2649 // - for F16 instructions: corresponding half-precision float values in 2650 // the LSBs, 0 in the MSBs 2651 // - for UI16 instructions: corresponding single-precision float value 2652 int32_t Signed = static_cast<int32_t>(Literal); 2653 if (Signed >= 0 && Signed <= 64) 2654 return 128 + Signed; 2655 2656 if (Signed >= -16 && Signed <= -1) 2657 return 192 + std::abs(Signed); 2658 2659 if (IsFloat) { 2660 // clang-format off 2661 switch (Literal) { 2662 case 0x3800: return 240; // 0.5 2663 case 0xB800: return 241; // -0.5 2664 case 0x3C00: return 242; // 1.0 2665 case 0xBC00: return 243; // -1.0 2666 case 0x4000: return 244; // 2.0 2667 case 0xC000: return 245; // -2.0 2668 case 0x4400: return 246; // 4.0 2669 case 0xC400: return 247; // -4.0 2670 case 0x3118: return 248; // 1.0 / (2.0 * pi) 2671 default: break; 2672 } 2673 // clang-format on 2674 } else { 2675 // clang-format off 2676 switch (Literal) { 2677 case 0x3F000000: return 240; // 0.5 2678 case 0xBF000000: return 241; // -0.5 2679 case 0x3F800000: return 242; // 1.0 2680 case 0xBF800000: return 243; // -1.0 2681 case 0x40000000: return 244; // 2.0 2682 case 0xC0000000: return 245; // -2.0 2683 case 0x40800000: return 246; // 4.0 2684 case 0xC0800000: return 247; // -4.0 2685 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi) 2686 default: break; 2687 } 2688 // clang-format on 2689 } 2690 2691 return {}; 2692 } 2693 2694 // Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction 2695 // or nullopt. 2696 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) { 2697 return getInlineEncodingV216(false, Literal); 2698 } 2699 2700 // Encoding of the literal as an inline constant for a V_PK_*_F16 instruction 2701 // or nullopt. 2702 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) { 2703 return getInlineEncodingV216(true, Literal); 2704 } 2705 2706 // Whether the given literal can be inlined for a V_PK_* instruction. 2707 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) { 2708 switch (OpType) { 2709 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2710 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2711 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2712 return getInlineEncodingV216(false, Literal).has_value(); 2713 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2714 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2715 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2716 return getInlineEncodingV216(true, Literal).has_value(); 2717 default: 2718 llvm_unreachable("bad packed operand type"); 2719 } 2720 } 2721 2722 // Whether the given literal can be inlined for a V_PK_*_IU16 instruction. 2723 bool isInlinableLiteralV2I16(uint32_t Literal) { 2724 return getInlineEncodingV2I16(Literal).has_value(); 2725 } 2726 2727 // Whether the given literal can be inlined for a V_PK_*_F16 instruction. 2728 bool isInlinableLiteralV2F16(uint32_t Literal) { 2729 return getInlineEncodingV2F16(Literal).has_value(); 2730 } 2731 2732 bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { 2733 if (IsFP64) 2734 return !(Val & 0xffffffffu); 2735 2736 return isUInt<32>(Val) || isInt<32>(Val); 2737 } 2738 2739 bool isArgPassedInSGPR(const Argument *A) { 2740 const Function *F = A->getParent(); 2741 2742 // Arguments to compute shaders are never a source of divergence. 2743 CallingConv::ID CC = F->getCallingConv(); 2744 switch (CC) { 2745 case CallingConv::AMDGPU_KERNEL: 2746 case CallingConv::SPIR_KERNEL: 2747 return true; 2748 case CallingConv::AMDGPU_VS: 2749 case CallingConv::AMDGPU_LS: 2750 case CallingConv::AMDGPU_HS: 2751 case CallingConv::AMDGPU_ES: 2752 case CallingConv::AMDGPU_GS: 2753 case CallingConv::AMDGPU_PS: 2754 case CallingConv::AMDGPU_CS: 2755 case CallingConv::AMDGPU_Gfx: 2756 case CallingConv::AMDGPU_CS_Chain: 2757 case CallingConv::AMDGPU_CS_ChainPreserve: 2758 // For non-compute shaders, SGPR inputs are marked with either inreg or 2759 // byval. Everything else is in VGPRs. 2760 return A->hasAttribute(Attribute::InReg) || 2761 A->hasAttribute(Attribute::ByVal); 2762 default: 2763 // TODO: treat i1 as divergent? 2764 return A->hasAttribute(Attribute::InReg); 2765 } 2766 } 2767 2768 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) { 2769 // Arguments to compute shaders are never a source of divergence. 2770 CallingConv::ID CC = CB->getCallingConv(); 2771 switch (CC) { 2772 case CallingConv::AMDGPU_KERNEL: 2773 case CallingConv::SPIR_KERNEL: 2774 return true; 2775 case CallingConv::AMDGPU_VS: 2776 case CallingConv::AMDGPU_LS: 2777 case CallingConv::AMDGPU_HS: 2778 case CallingConv::AMDGPU_ES: 2779 case CallingConv::AMDGPU_GS: 2780 case CallingConv::AMDGPU_PS: 2781 case CallingConv::AMDGPU_CS: 2782 case CallingConv::AMDGPU_Gfx: 2783 case CallingConv::AMDGPU_CS_Chain: 2784 case CallingConv::AMDGPU_CS_ChainPreserve: 2785 // For non-compute shaders, SGPR inputs are marked with either inreg or 2786 // byval. Everything else is in VGPRs. 2787 return CB->paramHasAttr(ArgNo, Attribute::InReg) || 2788 CB->paramHasAttr(ArgNo, Attribute::ByVal); 2789 default: 2790 return CB->paramHasAttr(ArgNo, Attribute::InReg); 2791 } 2792 } 2793 2794 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { 2795 return isGCN3Encoding(ST) || isGFX10Plus(ST); 2796 } 2797 2798 static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { 2799 return isGFX9Plus(ST); 2800 } 2801 2802 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 2803 int64_t EncodedOffset) { 2804 if (isGFX12Plus(ST)) 2805 return isUInt<23>(EncodedOffset); 2806 2807 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) 2808 : isUInt<8>(EncodedOffset); 2809 } 2810 2811 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 2812 int64_t EncodedOffset, 2813 bool IsBuffer) { 2814 if (isGFX12Plus(ST)) 2815 return isInt<24>(EncodedOffset); 2816 2817 return !IsBuffer && 2818 hasSMRDSignedImmOffset(ST) && 2819 isInt<21>(EncodedOffset); 2820 } 2821 2822 static bool isDwordAligned(uint64_t ByteOffset) { 2823 return (ByteOffset & 3) == 0; 2824 } 2825 2826 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, 2827 uint64_t ByteOffset) { 2828 if (hasSMEMByteOffset(ST)) 2829 return ByteOffset; 2830 2831 assert(isDwordAligned(ByteOffset)); 2832 return ByteOffset >> 2; 2833 } 2834 2835 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 2836 int64_t ByteOffset, bool IsBuffer) { 2837 if (isGFX12Plus(ST)) // 24 bit signed offsets 2838 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset) 2839 : std::nullopt; 2840 2841 // The signed version is always a byte offset. 2842 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { 2843 assert(hasSMEMByteOffset(ST)); 2844 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset) 2845 : std::nullopt; 2846 } 2847 2848 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) 2849 return std::nullopt; 2850 2851 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2852 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) 2853 ? std::optional<int64_t>(EncodedOffset) 2854 : std::nullopt; 2855 } 2856 2857 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 2858 int64_t ByteOffset) { 2859 if (!isCI(ST) || !isDwordAligned(ByteOffset)) 2860 return std::nullopt; 2861 2862 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2863 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset) 2864 : std::nullopt; 2865 } 2866 2867 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) { 2868 if (AMDGPU::isGFX10(ST)) 2869 return 12; 2870 2871 if (AMDGPU::isGFX12(ST)) 2872 return 24; 2873 return 13; 2874 } 2875 2876 namespace { 2877 2878 struct SourceOfDivergence { 2879 unsigned Intr; 2880 }; 2881 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); 2882 2883 struct AlwaysUniform { 2884 unsigned Intr; 2885 }; 2886 const AlwaysUniform *lookupAlwaysUniform(unsigned Intr); 2887 2888 #define GET_SourcesOfDivergence_IMPL 2889 #define GET_UniformIntrinsics_IMPL 2890 #define GET_Gfx9BufferFormat_IMPL 2891 #define GET_Gfx10BufferFormat_IMPL 2892 #define GET_Gfx11PlusBufferFormat_IMPL 2893 #include "AMDGPUGenSearchableTables.inc" 2894 2895 } // end anonymous namespace 2896 2897 bool isIntrinsicSourceOfDivergence(unsigned IntrID) { 2898 return lookupSourceOfDivergence(IntrID); 2899 } 2900 2901 bool isIntrinsicAlwaysUniform(unsigned IntrID) { 2902 return lookupAlwaysUniform(IntrID); 2903 } 2904 2905 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 2906 uint8_t NumComponents, 2907 uint8_t NumFormat, 2908 const MCSubtargetInfo &STI) { 2909 return isGFX11Plus(STI) 2910 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents, 2911 NumFormat) 2912 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp, 2913 NumComponents, NumFormat) 2914 : getGfx9BufferFormatInfo(BitsPerComp, 2915 NumComponents, NumFormat); 2916 } 2917 2918 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 2919 const MCSubtargetInfo &STI) { 2920 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format) 2921 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format) 2922 : getGfx9BufferFormatInfo(Format); 2923 } 2924 2925 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) { 2926 for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1, 2927 OpName::src2 }) { 2928 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName); 2929 if (Idx == -1) 2930 continue; 2931 2932 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID || 2933 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID) 2934 return true; 2935 } 2936 2937 return false; 2938 } 2939 2940 bool isDPALU_DPP(const MCInstrDesc &OpDesc) { 2941 return hasAny64BitVGPROperands(OpDesc); 2942 } 2943 2944 } // namespace AMDGPU 2945 2946 raw_ostream &operator<<(raw_ostream &OS, 2947 const AMDGPU::IsaInfo::TargetIDSetting S) { 2948 switch (S) { 2949 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): 2950 OS << "Unsupported"; 2951 break; 2952 case (AMDGPU::IsaInfo::TargetIDSetting::Any): 2953 OS << "Any"; 2954 break; 2955 case (AMDGPU::IsaInfo::TargetIDSetting::Off): 2956 OS << "Off"; 2957 break; 2958 case (AMDGPU::IsaInfo::TargetIDSetting::On): 2959 OS << "On"; 2960 break; 2961 } 2962 return OS; 2963 } 2964 2965 } // namespace llvm 2966