1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUBaseInfo.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUAsmUtils.h" 12 #include "AMDKernelCodeT.h" 13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14 #include "llvm/BinaryFormat/ELF.h" 15 #include "llvm/IR/Attributes.h" 16 #include "llvm/IR/Constants.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/GlobalValue.h" 19 #include "llvm/IR/IntrinsicsAMDGPU.h" 20 #include "llvm/IR/IntrinsicsR600.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/MC/MCInstrInfo.h" 23 #include "llvm/MC/MCRegisterInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/Support/AMDHSAKernelDescriptor.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/TargetParser/TargetParser.h" 28 #include <optional> 29 30 #define GET_INSTRINFO_NAMED_OPS 31 #define GET_INSTRMAP_INFO 32 #include "AMDGPUGenInstrInfo.inc" 33 34 static llvm::cl::opt<unsigned> 35 AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, 36 llvm::cl::desc("AMDHSA Code Object Version"), 37 llvm::cl::init(4)); 38 39 namespace { 40 41 /// \returns Bit mask for given bit \p Shift and bit \p Width. 42 unsigned getBitMask(unsigned Shift, unsigned Width) { 43 return ((1 << Width) - 1) << Shift; 44 } 45 46 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 47 /// 48 /// \returns Packed \p Dst. 49 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 50 unsigned Mask = getBitMask(Shift, Width); 51 return ((Src << Shift) & Mask) | (Dst & ~Mask); 52 } 53 54 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 55 /// 56 /// \returns Unpacked bits. 57 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 58 return (Src & getBitMask(Shift, Width)) >> Shift; 59 } 60 61 /// \returns Vmcnt bit shift (lower bits). 62 unsigned getVmcntBitShiftLo(unsigned VersionMajor) { 63 return VersionMajor >= 11 ? 10 : 0; 64 } 65 66 /// \returns Vmcnt bit width (lower bits). 67 unsigned getVmcntBitWidthLo(unsigned VersionMajor) { 68 return VersionMajor >= 11 ? 6 : 4; 69 } 70 71 /// \returns Expcnt bit shift. 72 unsigned getExpcntBitShift(unsigned VersionMajor) { 73 return VersionMajor >= 11 ? 0 : 4; 74 } 75 76 /// \returns Expcnt bit width. 77 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; } 78 79 /// \returns Lgkmcnt bit shift. 80 unsigned getLgkmcntBitShift(unsigned VersionMajor) { 81 return VersionMajor >= 11 ? 4 : 8; 82 } 83 84 /// \returns Lgkmcnt bit width. 85 unsigned getLgkmcntBitWidth(unsigned VersionMajor) { 86 return VersionMajor >= 10 ? 6 : 4; 87 } 88 89 /// \returns Vmcnt bit shift (higher bits). 90 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; } 91 92 /// \returns Vmcnt bit width (higher bits). 93 unsigned getVmcntBitWidthHi(unsigned VersionMajor) { 94 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; 95 } 96 97 /// \returns VmVsrc bit width 98 inline unsigned getVmVsrcBitWidth() { return 3; } 99 100 /// \returns VmVsrc bit shift 101 inline unsigned getVmVsrcBitShift() { return 2; } 102 103 /// \returns VaVdst bit width 104 inline unsigned getVaVdstBitWidth() { return 4; } 105 106 /// \returns VaVdst bit shift 107 inline unsigned getVaVdstBitShift() { return 12; } 108 109 /// \returns SaSdst bit width 110 inline unsigned getSaSdstBitWidth() { return 1; } 111 112 /// \returns SaSdst bit shift 113 inline unsigned getSaSdstBitShift() { return 0; } 114 115 } // end namespace anonymous 116 117 namespace llvm { 118 119 namespace AMDGPU { 120 121 /// \returns True if \p STI is AMDHSA. 122 bool isHsaAbi(const MCSubtargetInfo &STI) { 123 return STI.getTargetTriple().getOS() == Triple::AMDHSA; 124 } 125 126 std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) { 127 if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA) 128 return std::nullopt; 129 130 switch (AmdhsaCodeObjectVersion) { 131 case 4: 132 return ELF::ELFABIVERSION_AMDGPU_HSA_V4; 133 case 5: 134 return ELF::ELFABIVERSION_AMDGPU_HSA_V5; 135 default: 136 report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") + 137 Twine(AmdhsaCodeObjectVersion)); 138 } 139 } 140 141 bool isHsaAbiVersion4(const MCSubtargetInfo *STI) { 142 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 143 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4; 144 return false; 145 } 146 147 bool isHsaAbiVersion5(const MCSubtargetInfo *STI) { 148 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 149 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5; 150 return false; 151 } 152 153 unsigned getAmdhsaCodeObjectVersion() { 154 return AmdhsaCodeObjectVersion; 155 } 156 157 unsigned getCodeObjectVersion(const Module &M) { 158 if (auto Ver = mdconst::extract_or_null<ConstantInt>( 159 M.getModuleFlag("amdgpu_code_object_version"))) { 160 return (unsigned)Ver->getZExtValue() / 100; 161 } 162 163 // Default code object version. 164 return AMDHSA_COV4; 165 } 166 167 unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { 168 switch (CodeObjectVersion) { 169 case AMDHSA_COV4: 170 return 48; 171 case AMDHSA_COV5: 172 default: 173 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; 174 } 175 } 176 177 178 // FIXME: All such magic numbers about the ABI should be in a 179 // central TD file. 180 unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { 181 switch (CodeObjectVersion) { 182 case AMDHSA_COV4: 183 return 24; 184 case AMDHSA_COV5: 185 default: 186 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; 187 } 188 } 189 190 unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { 191 switch (CodeObjectVersion) { 192 case AMDHSA_COV4: 193 return 32; 194 case AMDHSA_COV5: 195 default: 196 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET; 197 } 198 } 199 200 unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) { 201 switch (CodeObjectVersion) { 202 case AMDHSA_COV4: 203 return 40; 204 case AMDHSA_COV5: 205 default: 206 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET; 207 } 208 } 209 210 #define GET_MIMGBaseOpcodesTable_IMPL 211 #define GET_MIMGDimInfoTable_IMPL 212 #define GET_MIMGInfoTable_IMPL 213 #define GET_MIMGLZMappingTable_IMPL 214 #define GET_MIMGMIPMappingTable_IMPL 215 #define GET_MIMGBiasMappingTable_IMPL 216 #define GET_MIMGOffsetMappingTable_IMPL 217 #define GET_MIMGG16MappingTable_IMPL 218 #define GET_MAIInstInfoTable_IMPL 219 #include "AMDGPUGenSearchableTables.inc" 220 221 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 222 unsigned VDataDwords, unsigned VAddrDwords) { 223 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, 224 VDataDwords, VAddrDwords); 225 return Info ? Info->Opcode : -1; 226 } 227 228 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { 229 const MIMGInfo *Info = getMIMGInfo(Opc); 230 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr; 231 } 232 233 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { 234 const MIMGInfo *OrigInfo = getMIMGInfo(Opc); 235 const MIMGInfo *NewInfo = 236 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding, 237 NewChannels, OrigInfo->VAddrDwords); 238 return NewInfo ? NewInfo->Opcode : -1; 239 } 240 241 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 242 const MIMGDimInfo *Dim, bool IsA16, 243 bool IsG16Supported) { 244 unsigned AddrWords = BaseOpcode->NumExtraArgs; 245 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + 246 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 247 if (IsA16) 248 AddrWords += divideCeil(AddrComponents, 2); 249 else 250 AddrWords += AddrComponents; 251 252 // Note: For subtargets that support A16 but not G16, enabling A16 also 253 // enables 16 bit gradients. 254 // For subtargets that support A16 (operand) and G16 (done with a different 255 // instruction encoding), they are independent. 256 257 if (BaseOpcode->Gradients) { 258 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16) 259 // There are two gradients per coordinate, we pack them separately. 260 // For the 3d case, 261 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv) 262 AddrWords += alignTo<2>(Dim->NumGradients / 2); 263 else 264 AddrWords += Dim->NumGradients; 265 } 266 return AddrWords; 267 } 268 269 struct MUBUFInfo { 270 uint16_t Opcode; 271 uint16_t BaseOpcode; 272 uint8_t elements; 273 bool has_vaddr; 274 bool has_srsrc; 275 bool has_soffset; 276 bool IsBufferInv; 277 }; 278 279 struct MTBUFInfo { 280 uint16_t Opcode; 281 uint16_t BaseOpcode; 282 uint8_t elements; 283 bool has_vaddr; 284 bool has_srsrc; 285 bool has_soffset; 286 }; 287 288 struct SMInfo { 289 uint16_t Opcode; 290 bool IsBuffer; 291 }; 292 293 struct VOPInfo { 294 uint16_t Opcode; 295 bool IsSingle; 296 }; 297 298 struct VOPC64DPPInfo { 299 uint16_t Opcode; 300 }; 301 302 struct VOPDComponentInfo { 303 uint16_t BaseVOP; 304 uint16_t VOPDOp; 305 bool CanBeVOPDX; 306 }; 307 308 struct VOPDInfo { 309 uint16_t Opcode; 310 uint16_t OpX; 311 uint16_t OpY; 312 uint16_t Subtarget; 313 }; 314 315 struct VOPTrue16Info { 316 uint16_t Opcode; 317 bool IsTrue16; 318 }; 319 320 #define GET_MTBUFInfoTable_DECL 321 #define GET_MTBUFInfoTable_IMPL 322 #define GET_MUBUFInfoTable_DECL 323 #define GET_MUBUFInfoTable_IMPL 324 #define GET_SMInfoTable_DECL 325 #define GET_SMInfoTable_IMPL 326 #define GET_VOP1InfoTable_DECL 327 #define GET_VOP1InfoTable_IMPL 328 #define GET_VOP2InfoTable_DECL 329 #define GET_VOP2InfoTable_IMPL 330 #define GET_VOP3InfoTable_DECL 331 #define GET_VOP3InfoTable_IMPL 332 #define GET_VOPC64DPPTable_DECL 333 #define GET_VOPC64DPPTable_IMPL 334 #define GET_VOPC64DPP8Table_DECL 335 #define GET_VOPC64DPP8Table_IMPL 336 #define GET_VOPDComponentTable_DECL 337 #define GET_VOPDComponentTable_IMPL 338 #define GET_VOPDPairs_DECL 339 #define GET_VOPDPairs_IMPL 340 #define GET_VOPTrue16Table_DECL 341 #define GET_VOPTrue16Table_IMPL 342 #define GET_WMMAOpcode2AddrMappingTable_DECL 343 #define GET_WMMAOpcode2AddrMappingTable_IMPL 344 #define GET_WMMAOpcode3AddrMappingTable_DECL 345 #define GET_WMMAOpcode3AddrMappingTable_IMPL 346 #include "AMDGPUGenSearchableTables.inc" 347 348 int getMTBUFBaseOpcode(unsigned Opc) { 349 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); 350 return Info ? Info->BaseOpcode : -1; 351 } 352 353 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { 354 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 355 return Info ? Info->Opcode : -1; 356 } 357 358 int getMTBUFElements(unsigned Opc) { 359 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 360 return Info ? Info->elements : 0; 361 } 362 363 bool getMTBUFHasVAddr(unsigned Opc) { 364 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 365 return Info ? Info->has_vaddr : false; 366 } 367 368 bool getMTBUFHasSrsrc(unsigned Opc) { 369 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 370 return Info ? Info->has_srsrc : false; 371 } 372 373 bool getMTBUFHasSoffset(unsigned Opc) { 374 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 375 return Info ? Info->has_soffset : false; 376 } 377 378 int getMUBUFBaseOpcode(unsigned Opc) { 379 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); 380 return Info ? Info->BaseOpcode : -1; 381 } 382 383 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { 384 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 385 return Info ? Info->Opcode : -1; 386 } 387 388 int getMUBUFElements(unsigned Opc) { 389 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 390 return Info ? Info->elements : 0; 391 } 392 393 bool getMUBUFHasVAddr(unsigned Opc) { 394 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 395 return Info ? Info->has_vaddr : false; 396 } 397 398 bool getMUBUFHasSrsrc(unsigned Opc) { 399 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 400 return Info ? Info->has_srsrc : false; 401 } 402 403 bool getMUBUFHasSoffset(unsigned Opc) { 404 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 405 return Info ? Info->has_soffset : false; 406 } 407 408 bool getMUBUFIsBufferInv(unsigned Opc) { 409 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 410 return Info ? Info->IsBufferInv : false; 411 } 412 413 bool getSMEMIsBuffer(unsigned Opc) { 414 const SMInfo *Info = getSMEMOpcodeHelper(Opc); 415 return Info ? Info->IsBuffer : false; 416 } 417 418 bool getVOP1IsSingle(unsigned Opc) { 419 const VOPInfo *Info = getVOP1OpcodeHelper(Opc); 420 return Info ? Info->IsSingle : false; 421 } 422 423 bool getVOP2IsSingle(unsigned Opc) { 424 const VOPInfo *Info = getVOP2OpcodeHelper(Opc); 425 return Info ? Info->IsSingle : false; 426 } 427 428 bool getVOP3IsSingle(unsigned Opc) { 429 const VOPInfo *Info = getVOP3OpcodeHelper(Opc); 430 return Info ? Info->IsSingle : false; 431 } 432 433 bool isVOPC64DPP(unsigned Opc) { 434 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc); 435 } 436 437 bool getMAIIsDGEMM(unsigned Opc) { 438 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 439 return Info ? Info->is_dgemm : false; 440 } 441 442 bool getMAIIsGFX940XDL(unsigned Opc) { 443 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 444 return Info ? Info->is_gfx940_xdl : false; 445 } 446 447 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) { 448 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts)) 449 return SIEncodingFamily::GFX12; 450 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts)) 451 return SIEncodingFamily::GFX11; 452 llvm_unreachable("Subtarget generation does not support VOPD!"); 453 } 454 455 CanBeVOPD getCanBeVOPD(unsigned Opc) { 456 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 457 if (Info) 458 return {Info->CanBeVOPDX, true}; 459 else 460 return {false, false}; 461 } 462 463 unsigned getVOPDOpcode(unsigned Opc) { 464 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 465 return Info ? Info->VOPDOp : ~0u; 466 } 467 468 bool isVOPD(unsigned Opc) { 469 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X); 470 } 471 472 bool isMAC(unsigned Opc) { 473 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 474 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 475 Opc == AMDGPU::V_MAC_F32_e64_vi || 476 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 477 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 478 Opc == AMDGPU::V_MAC_F16_e64_vi || 479 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 480 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 481 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 482 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 || 483 Opc == AMDGPU::V_FMAC_F32_e64_vi || 484 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 485 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 486 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 487 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 || 488 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 || 489 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi || 490 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi || 491 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi || 492 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi; 493 } 494 495 bool isPermlane16(unsigned Opc) { 496 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 497 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 || 498 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 || 499 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 || 500 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 || 501 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 || 502 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 || 503 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12; 504 } 505 506 bool isGenericAtomic(unsigned Opc) { 507 return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN || 508 Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX || 509 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP || 510 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD || 511 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB || 512 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN || 513 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN || 514 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX || 515 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX || 516 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND || 517 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR || 518 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR || 519 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC || 520 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC || 521 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD || 522 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN || 523 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX || 524 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP || 525 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG; 526 } 527 528 bool isTrue16Inst(unsigned Opc) { 529 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc); 530 return Info ? Info->IsTrue16 : false; 531 } 532 533 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { 534 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); 535 return Info ? Info->Opcode3Addr : ~0u; 536 } 537 538 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) { 539 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc); 540 return Info ? Info->Opcode2Addr : ~0u; 541 } 542 543 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 544 // header files, so we need to wrap it in a function that takes unsigned 545 // instead. 546 int getMCOpcode(uint16_t Opcode, unsigned Gen) { 547 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 548 } 549 550 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) { 551 const VOPDInfo *Info = 552 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily); 553 return Info ? Info->Opcode : -1; 554 } 555 556 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) { 557 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode); 558 assert(Info); 559 auto OpX = getVOPDBaseFromComponent(Info->OpX); 560 auto OpY = getVOPDBaseFromComponent(Info->OpY); 561 assert(OpX && OpY); 562 return {OpX->BaseVOP, OpY->BaseVOP}; 563 } 564 565 namespace VOPD { 566 567 ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) { 568 assert(OpDesc.getNumDefs() == Component::DST_NUM); 569 570 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1); 571 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1); 572 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO); 573 assert(TiedIdx == -1 || TiedIdx == Component::DST); 574 HasSrc2Acc = TiedIdx != -1; 575 576 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs(); 577 assert(SrcOperandsNum <= Component::MAX_SRC_NUM); 578 579 auto OperandsNum = OpDesc.getNumOperands(); 580 unsigned CompOprIdx; 581 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) { 582 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) { 583 MandatoryLiteralIdx = CompOprIdx; 584 break; 585 } 586 } 587 } 588 589 unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const { 590 assert(CompOprIdx < Component::MAX_OPR_NUM); 591 592 if (CompOprIdx == Component::DST) 593 return getIndexOfDstInParsedOperands(); 594 595 auto CompSrcIdx = CompOprIdx - Component::DST_NUM; 596 if (CompSrcIdx < getCompParsedSrcOperandsNum()) 597 return getIndexOfSrcInParsedOperands(CompSrcIdx); 598 599 // The specified operand does not exist. 600 return 0; 601 } 602 603 std::optional<unsigned> InstInfo::getInvalidCompOperandIndex( 604 std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const { 605 606 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx); 607 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx); 608 609 const unsigned CompOprNum = 610 SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM; 611 unsigned CompOprIdx; 612 for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) { 613 unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx]; 614 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] && 615 ((OpXRegs[CompOprIdx] & BanksMasks) == 616 (OpYRegs[CompOprIdx] & BanksMasks))) 617 return CompOprIdx; 618 } 619 620 return {}; 621 } 622 623 // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used 624 // by the specified component. If an operand is unused 625 // or is not a VGPR, the corresponding value is 0. 626 // 627 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index 628 // for the specified component and MC operand. The callback must return 0 629 // if the operand is not a register or not a VGPR. 630 InstInfo::RegIndices InstInfo::getRegIndices( 631 unsigned CompIdx, 632 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const { 633 assert(CompIdx < COMPONENTS_NUM); 634 635 const auto &Comp = CompInfo[CompIdx]; 636 InstInfo::RegIndices RegIndices; 637 638 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands()); 639 640 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) { 641 unsigned CompSrcIdx = CompOprIdx - DST_NUM; 642 RegIndices[CompOprIdx] = 643 Comp.hasRegSrcOperand(CompSrcIdx) 644 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx)) 645 : 0; 646 } 647 return RegIndices; 648 } 649 650 } // namespace VOPD 651 652 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) { 653 return VOPD::InstInfo(OpX, OpY); 654 } 655 656 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, 657 const MCInstrInfo *InstrInfo) { 658 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode); 659 const auto &OpXDesc = InstrInfo->get(OpX); 660 const auto &OpYDesc = InstrInfo->get(OpY); 661 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X); 662 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo); 663 return VOPD::InstInfo(OpXInfo, OpYInfo); 664 } 665 666 namespace IsaInfo { 667 668 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) 669 : STI(STI), XnackSetting(TargetIDSetting::Any), 670 SramEccSetting(TargetIDSetting::Any), CodeObjectVersion(0) { 671 if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) 672 XnackSetting = TargetIDSetting::Unsupported; 673 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) 674 SramEccSetting = TargetIDSetting::Unsupported; 675 } 676 677 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { 678 // Check if xnack or sramecc is explicitly enabled or disabled. In the 679 // absence of the target features we assume we must generate code that can run 680 // in any environment. 681 SubtargetFeatures Features(FS); 682 std::optional<bool> XnackRequested; 683 std::optional<bool> SramEccRequested; 684 685 for (const std::string &Feature : Features.getFeatures()) { 686 if (Feature == "+xnack") 687 XnackRequested = true; 688 else if (Feature == "-xnack") 689 XnackRequested = false; 690 else if (Feature == "+sramecc") 691 SramEccRequested = true; 692 else if (Feature == "-sramecc") 693 SramEccRequested = false; 694 } 695 696 bool XnackSupported = isXnackSupported(); 697 bool SramEccSupported = isSramEccSupported(); 698 699 if (XnackRequested) { 700 if (XnackSupported) { 701 XnackSetting = 702 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; 703 } else { 704 // If a specific xnack setting was requested and this GPU does not support 705 // xnack emit a warning. Setting will remain set to "Unsupported". 706 if (*XnackRequested) { 707 errs() << "warning: xnack 'On' was requested for a processor that does " 708 "not support it!\n"; 709 } else { 710 errs() << "warning: xnack 'Off' was requested for a processor that " 711 "does not support it!\n"; 712 } 713 } 714 } 715 716 if (SramEccRequested) { 717 if (SramEccSupported) { 718 SramEccSetting = 719 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; 720 } else { 721 // If a specific sramecc setting was requested and this GPU does not 722 // support sramecc emit a warning. Setting will remain set to 723 // "Unsupported". 724 if (*SramEccRequested) { 725 errs() << "warning: sramecc 'On' was requested for a processor that " 726 "does not support it!\n"; 727 } else { 728 errs() << "warning: sramecc 'Off' was requested for a processor that " 729 "does not support it!\n"; 730 } 731 } 732 } 733 } 734 735 static TargetIDSetting 736 getTargetIDSettingFromFeatureString(StringRef FeatureString) { 737 if (FeatureString.ends_with("-")) 738 return TargetIDSetting::Off; 739 if (FeatureString.ends_with("+")) 740 return TargetIDSetting::On; 741 742 llvm_unreachable("Malformed feature string"); 743 } 744 745 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { 746 SmallVector<StringRef, 3> TargetIDSplit; 747 TargetID.split(TargetIDSplit, ':'); 748 749 for (const auto &FeatureString : TargetIDSplit) { 750 if (FeatureString.starts_with("xnack")) 751 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); 752 if (FeatureString.starts_with("sramecc")) 753 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); 754 } 755 } 756 757 std::string AMDGPUTargetID::toString() const { 758 std::string StringRep; 759 raw_string_ostream StreamRep(StringRep); 760 761 auto TargetTriple = STI.getTargetTriple(); 762 auto Version = getIsaVersion(STI.getCPU()); 763 764 StreamRep << TargetTriple.getArchName() << '-' 765 << TargetTriple.getVendorName() << '-' 766 << TargetTriple.getOSName() << '-' 767 << TargetTriple.getEnvironmentName() << '-'; 768 769 std::string Processor; 770 // TODO: Following else statement is present here because we used various 771 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). 772 // Remove once all aliases are removed from GCNProcessors.td. 773 if (Version.Major >= 9) 774 Processor = STI.getCPU().str(); 775 else 776 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) + 777 Twine(Version.Stepping)) 778 .str(); 779 780 std::string Features; 781 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { 782 switch (CodeObjectVersion) { 783 case AMDGPU::AMDHSA_COV4: 784 case AMDGPU::AMDHSA_COV5: 785 // sramecc. 786 if (getSramEccSetting() == TargetIDSetting::Off) 787 Features += ":sramecc-"; 788 else if (getSramEccSetting() == TargetIDSetting::On) 789 Features += ":sramecc+"; 790 // xnack. 791 if (getXnackSetting() == TargetIDSetting::Off) 792 Features += ":xnack-"; 793 else if (getXnackSetting() == TargetIDSetting::On) 794 Features += ":xnack+"; 795 break; 796 default: 797 break; 798 } 799 } 800 801 StreamRep << Processor << Features; 802 803 StreamRep.flush(); 804 return StringRep; 805 } 806 807 unsigned getWavefrontSize(const MCSubtargetInfo *STI) { 808 if (STI->getFeatureBits().test(FeatureWavefrontSize16)) 809 return 16; 810 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) 811 return 32; 812 813 return 64; 814 } 815 816 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { 817 unsigned BytesPerCU = 0; 818 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 819 BytesPerCU = 32768; 820 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 821 BytesPerCU = 65536; 822 823 // "Per CU" really means "per whatever functional block the waves of a 824 // workgroup must share". So the effective local memory size is doubled in 825 // WGP mode on gfx10. 826 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 827 BytesPerCU *= 2; 828 829 return BytesPerCU; 830 } 831 832 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) { 833 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 834 return 32768; 835 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 836 return 65536; 837 return 0; 838 } 839 840 unsigned getEUsPerCU(const MCSubtargetInfo *STI) { 841 // "Per CU" really means "per whatever functional block the waves of a 842 // workgroup must share". For gfx10 in CU mode this is the CU, which contains 843 // two SIMDs. 844 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode)) 845 return 2; 846 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains 847 // two CUs, so a total of four SIMDs. 848 return 4; 849 } 850 851 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 852 unsigned FlatWorkGroupSize) { 853 assert(FlatWorkGroupSize != 0); 854 if (STI->getTargetTriple().getArch() != Triple::amdgcn) 855 return 8; 856 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI); 857 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); 858 if (N == 1) { 859 // Single-wave workgroups don't consume barrier resources. 860 return MaxWaves; 861 } 862 863 unsigned MaxBarriers = 16; 864 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 865 MaxBarriers = 32; 866 867 return std::min(MaxWaves / N, MaxBarriers); 868 } 869 870 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { 871 return 1; 872 } 873 874 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { 875 // FIXME: Need to take scratch memory into account. 876 if (isGFX90A(*STI)) 877 return 8; 878 if (!isGFX10Plus(*STI)) 879 return 10; 880 return hasGFX10_3Insts(*STI) ? 16 : 20; 881 } 882 883 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 884 unsigned FlatWorkGroupSize) { 885 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize), 886 getEUsPerCU(STI)); 887 } 888 889 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { 890 return 1; 891 } 892 893 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { 894 // Some subtargets allow encoding 2048, but this isn't tested or supported. 895 return 1024; 896 } 897 898 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 899 unsigned FlatWorkGroupSize) { 900 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI)); 901 } 902 903 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { 904 IsaVersion Version = getIsaVersion(STI->getCPU()); 905 if (Version.Major >= 10) 906 return getAddressableNumSGPRs(STI); 907 if (Version.Major >= 8) 908 return 16; 909 return 8; 910 } 911 912 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { 913 return 8; 914 } 915 916 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { 917 IsaVersion Version = getIsaVersion(STI->getCPU()); 918 if (Version.Major >= 8) 919 return 800; 920 return 512; 921 } 922 923 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { 924 if (STI->getFeatureBits().test(FeatureSGPRInitBug)) 925 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 926 927 IsaVersion Version = getIsaVersion(STI->getCPU()); 928 if (Version.Major >= 10) 929 return 106; 930 if (Version.Major >= 8) 931 return 102; 932 return 104; 933 } 934 935 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 936 assert(WavesPerEU != 0); 937 938 IsaVersion Version = getIsaVersion(STI->getCPU()); 939 if (Version.Major >= 10) 940 return 0; 941 942 if (WavesPerEU >= getMaxWavesPerEU(STI)) 943 return 0; 944 945 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); 946 if (STI->getFeatureBits().test(FeatureTrapHandler)) 947 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 948 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; 949 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); 950 } 951 952 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 953 bool Addressable) { 954 assert(WavesPerEU != 0); 955 956 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); 957 IsaVersion Version = getIsaVersion(STI->getCPU()); 958 if (Version.Major >= 10) 959 return Addressable ? AddressableNumSGPRs : 108; 960 if (Version.Major >= 8 && !Addressable) 961 AddressableNumSGPRs = 112; 962 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; 963 if (STI->getFeatureBits().test(FeatureTrapHandler)) 964 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 965 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); 966 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 967 } 968 969 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 970 bool FlatScrUsed, bool XNACKUsed) { 971 unsigned ExtraSGPRs = 0; 972 if (VCCUsed) 973 ExtraSGPRs = 2; 974 975 IsaVersion Version = getIsaVersion(STI->getCPU()); 976 if (Version.Major >= 10) 977 return ExtraSGPRs; 978 979 if (Version.Major < 8) { 980 if (FlatScrUsed) 981 ExtraSGPRs = 4; 982 } else { 983 if (XNACKUsed) 984 ExtraSGPRs = 4; 985 986 if (FlatScrUsed || 987 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch)) 988 ExtraSGPRs = 6; 989 } 990 991 return ExtraSGPRs; 992 } 993 994 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 995 bool FlatScrUsed) { 996 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, 997 STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); 998 } 999 1000 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { 1001 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); 1002 // SGPRBlocks is actual number of SGPR blocks minus 1. 1003 return NumSGPRs / getSGPREncodingGranule(STI) - 1; 1004 } 1005 1006 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 1007 std::optional<bool> EnableWavefrontSize32) { 1008 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1009 return 8; 1010 1011 bool IsWave32 = EnableWavefrontSize32 ? 1012 *EnableWavefrontSize32 : 1013 STI->getFeatureBits().test(FeatureWavefrontSize32); 1014 1015 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs)) 1016 return IsWave32 ? 24 : 12; 1017 1018 if (hasGFX10_3Insts(*STI)) 1019 return IsWave32 ? 16 : 8; 1020 1021 return IsWave32 ? 8 : 4; 1022 } 1023 1024 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 1025 std::optional<bool> EnableWavefrontSize32) { 1026 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1027 return 8; 1028 1029 bool IsWave32 = EnableWavefrontSize32 ? 1030 *EnableWavefrontSize32 : 1031 STI->getFeatureBits().test(FeatureWavefrontSize32); 1032 1033 return IsWave32 ? 8 : 4; 1034 } 1035 1036 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { 1037 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1038 return 512; 1039 if (!isGFX10Plus(*STI)) 1040 return 256; 1041 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32); 1042 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs)) 1043 return IsWave32 ? 1536 : 768; 1044 return IsWave32 ? 1024 : 512; 1045 } 1046 1047 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { 1048 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1049 return 512; 1050 return 256; 1051 } 1052 1053 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, 1054 unsigned NumVGPRs) { 1055 unsigned MaxWaves = getMaxWavesPerEU(STI); 1056 unsigned Granule = getVGPRAllocGranule(STI); 1057 if (NumVGPRs < Granule) 1058 return MaxWaves; 1059 unsigned RoundedRegs = alignTo(NumVGPRs, Granule); 1060 return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves); 1061 } 1062 1063 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1064 assert(WavesPerEU != 0); 1065 1066 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI); 1067 if (WavesPerEU >= MaxWavesPerEU) 1068 return 0; 1069 1070 unsigned TotNumVGPRs = getTotalNumVGPRs(STI); 1071 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI); 1072 unsigned Granule = getVGPRAllocGranule(STI); 1073 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule); 1074 1075 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule)) 1076 return 0; 1077 1078 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs); 1079 if (WavesPerEU < MinWavesPerEU) 1080 return getMinNumVGPRs(STI, MinWavesPerEU); 1081 1082 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule); 1083 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext); 1084 return std::min(MinNumVGPRs, AddrsableNumVGPRs); 1085 } 1086 1087 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1088 assert(WavesPerEU != 0); 1089 1090 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, 1091 getVGPRAllocGranule(STI)); 1092 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); 1093 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 1094 } 1095 1096 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, 1097 std::optional<bool> EnableWavefrontSize32) { 1098 NumVGPRs = alignTo(std::max(1u, NumVGPRs), 1099 getVGPREncodingGranule(STI, EnableWavefrontSize32)); 1100 // VGPRBlocks is actual number of VGPR blocks minus 1. 1101 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1; 1102 } 1103 1104 } // end namespace IsaInfo 1105 1106 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 1107 const MCSubtargetInfo *STI) { 1108 IsaVersion Version = getIsaVersion(STI->getCPU()); 1109 1110 memset(&Header, 0, sizeof(Header)); 1111 1112 Header.amd_kernel_code_version_major = 1; 1113 Header.amd_kernel_code_version_minor = 2; 1114 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 1115 Header.amd_machine_version_major = Version.Major; 1116 Header.amd_machine_version_minor = Version.Minor; 1117 Header.amd_machine_version_stepping = Version.Stepping; 1118 Header.kernel_code_entry_byte_offset = sizeof(Header); 1119 Header.wavefront_size = 6; 1120 1121 // If the code object does not support indirect functions, then the value must 1122 // be 0xffffffff. 1123 Header.call_convention = -1; 1124 1125 // These alignment values are specified in powers of two, so alignment = 1126 // 2^n. The minimum alignment is 2^4 = 16. 1127 Header.kernarg_segment_alignment = 4; 1128 Header.group_segment_alignment = 4; 1129 Header.private_segment_alignment = 4; 1130 1131 if (Version.Major >= 10) { 1132 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { 1133 Header.wavefront_size = 5; 1134 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; 1135 } 1136 Header.compute_pgm_resource_registers |= 1137 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | 1138 S_00B848_MEM_ORDERED(1); 1139 } 1140 } 1141 1142 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 1143 const MCSubtargetInfo *STI) { 1144 IsaVersion Version = getIsaVersion(STI->getCPU()); 1145 1146 amdhsa::kernel_descriptor_t KD; 1147 memset(&KD, 0, sizeof(KD)); 1148 1149 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1150 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, 1151 amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE); 1152 if (Version.Major >= 12) { 1153 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1154 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0); 1155 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1156 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0); 1157 } else { 1158 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1159 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1); 1160 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1161 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1); 1162 } 1163 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, 1164 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1); 1165 if (Version.Major >= 10) { 1166 AMDHSA_BITS_SET(KD.kernel_code_properties, 1167 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 1168 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0); 1169 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1170 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, 1171 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1); 1172 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1173 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1); 1174 } 1175 if (AMDGPU::isGFX90A(*STI)) { 1176 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, 1177 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, 1178 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0); 1179 } 1180 return KD; 1181 } 1182 1183 bool isGroupSegment(const GlobalValue *GV) { 1184 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 1185 } 1186 1187 bool isGlobalSegment(const GlobalValue *GV) { 1188 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 1189 } 1190 1191 bool isReadOnlySegment(const GlobalValue *GV) { 1192 unsigned AS = GV->getAddressSpace(); 1193 return AS == AMDGPUAS::CONSTANT_ADDRESS || 1194 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; 1195 } 1196 1197 bool shouldEmitConstantsToTextSection(const Triple &TT) { 1198 return TT.getArch() == Triple::r600; 1199 } 1200 1201 std::pair<unsigned, unsigned> 1202 getIntegerPairAttribute(const Function &F, StringRef Name, 1203 std::pair<unsigned, unsigned> Default, 1204 bool OnlyFirstRequired) { 1205 Attribute A = F.getFnAttribute(Name); 1206 if (!A.isStringAttribute()) 1207 return Default; 1208 1209 LLVMContext &Ctx = F.getContext(); 1210 std::pair<unsigned, unsigned> Ints = Default; 1211 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 1212 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 1213 Ctx.emitError("can't parse first integer attribute " + Name); 1214 return Default; 1215 } 1216 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 1217 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 1218 Ctx.emitError("can't parse second integer attribute " + Name); 1219 return Default; 1220 } 1221 } 1222 1223 return Ints; 1224 } 1225 1226 unsigned getVmcntBitMask(const IsaVersion &Version) { 1227 return (1 << (getVmcntBitWidthLo(Version.Major) + 1228 getVmcntBitWidthHi(Version.Major))) - 1229 1; 1230 } 1231 1232 unsigned getExpcntBitMask(const IsaVersion &Version) { 1233 return (1 << getExpcntBitWidth(Version.Major)) - 1; 1234 } 1235 1236 unsigned getLgkmcntBitMask(const IsaVersion &Version) { 1237 return (1 << getLgkmcntBitWidth(Version.Major)) - 1; 1238 } 1239 1240 unsigned getWaitcntBitMask(const IsaVersion &Version) { 1241 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major), 1242 getVmcntBitWidthLo(Version.Major)); 1243 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major), 1244 getExpcntBitWidth(Version.Major)); 1245 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major), 1246 getLgkmcntBitWidth(Version.Major)); 1247 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major), 1248 getVmcntBitWidthHi(Version.Major)); 1249 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi; 1250 } 1251 1252 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1253 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major), 1254 getVmcntBitWidthLo(Version.Major)); 1255 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major), 1256 getVmcntBitWidthHi(Version.Major)); 1257 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major); 1258 } 1259 1260 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { 1261 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major), 1262 getExpcntBitWidth(Version.Major)); 1263 } 1264 1265 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1266 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major), 1267 getLgkmcntBitWidth(Version.Major)); 1268 } 1269 1270 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 1271 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 1272 Vmcnt = decodeVmcnt(Version, Waitcnt); 1273 Expcnt = decodeExpcnt(Version, Waitcnt); 1274 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 1275 } 1276 1277 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { 1278 Waitcnt Decoded; 1279 Decoded.VmCnt = decodeVmcnt(Version, Encoded); 1280 Decoded.ExpCnt = decodeExpcnt(Version, Encoded); 1281 Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded); 1282 return Decoded; 1283 } 1284 1285 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 1286 unsigned Vmcnt) { 1287 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major), 1288 getVmcntBitWidthLo(Version.Major)); 1289 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt, 1290 getVmcntBitShiftHi(Version.Major), 1291 getVmcntBitWidthHi(Version.Major)); 1292 } 1293 1294 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 1295 unsigned Expcnt) { 1296 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major), 1297 getExpcntBitWidth(Version.Major)); 1298 } 1299 1300 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 1301 unsigned Lgkmcnt) { 1302 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major), 1303 getLgkmcntBitWidth(Version.Major)); 1304 } 1305 1306 unsigned encodeWaitcnt(const IsaVersion &Version, 1307 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 1308 unsigned Waitcnt = getWaitcntBitMask(Version); 1309 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 1310 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 1311 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 1312 return Waitcnt; 1313 } 1314 1315 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1316 return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt); 1317 } 1318 1319 //===----------------------------------------------------------------------===// 1320 // Custom Operands. 1321 // 1322 // A table of custom operands shall describe "primary" operand names 1323 // first followed by aliases if any. It is not required but recommended 1324 // to arrange operands so that operand encoding match operand position 1325 // in the table. This will make disassembly a bit more efficient. 1326 // Unused slots in the table shall have an empty name. 1327 // 1328 //===----------------------------------------------------------------------===// 1329 1330 template <class T> 1331 static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize, 1332 T Context) { 1333 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() && 1334 (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)); 1335 } 1336 1337 template <class T> 1338 static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test, 1339 const CustomOperand<T> OpInfo[], int OpInfoSize, 1340 T Context) { 1341 int InvalidIdx = OPR_ID_UNKNOWN; 1342 for (int Idx = 0; Idx < OpInfoSize; ++Idx) { 1343 if (Test(OpInfo[Idx])) { 1344 if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)) 1345 return Idx; 1346 InvalidIdx = OPR_ID_UNSUPPORTED; 1347 } 1348 } 1349 return InvalidIdx; 1350 } 1351 1352 template <class T> 1353 static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[], 1354 int OpInfoSize, T Context) { 1355 auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; }; 1356 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); 1357 } 1358 1359 template <class T> 1360 static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize, 1361 T Context, bool QuickCheck = true) { 1362 auto Test = [=](const CustomOperand<T> &Op) { 1363 return Op.Encoding == Id && !Op.Name.empty(); 1364 }; 1365 // This is an optimization that should work in most cases. 1366 // As a side effect, it may cause selection of an alias 1367 // instead of a primary operand name in case of sparse tables. 1368 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) && 1369 OpInfo[Id].Encoding == Id) { 1370 return Id; 1371 } 1372 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); 1373 } 1374 1375 //===----------------------------------------------------------------------===// 1376 // Custom Operand Values 1377 //===----------------------------------------------------------------------===// 1378 1379 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, 1380 int Size, 1381 const MCSubtargetInfo &STI) { 1382 unsigned Enc = 0; 1383 for (int Idx = 0; Idx < Size; ++Idx) { 1384 const auto &Op = Opr[Idx]; 1385 if (Op.isSupported(STI)) 1386 Enc |= Op.encode(Op.Default); 1387 } 1388 return Enc; 1389 } 1390 1391 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, 1392 int Size, unsigned Code, 1393 bool &HasNonDefaultVal, 1394 const MCSubtargetInfo &STI) { 1395 unsigned UsedOprMask = 0; 1396 HasNonDefaultVal = false; 1397 for (int Idx = 0; Idx < Size; ++Idx) { 1398 const auto &Op = Opr[Idx]; 1399 if (!Op.isSupported(STI)) 1400 continue; 1401 UsedOprMask |= Op.getMask(); 1402 unsigned Val = Op.decode(Code); 1403 if (!Op.isValid(Val)) 1404 return false; 1405 HasNonDefaultVal |= (Val != Op.Default); 1406 } 1407 return (Code & ~UsedOprMask) == 0; 1408 } 1409 1410 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, 1411 unsigned Code, int &Idx, StringRef &Name, 1412 unsigned &Val, bool &IsDefault, 1413 const MCSubtargetInfo &STI) { 1414 while (Idx < Size) { 1415 const auto &Op = Opr[Idx++]; 1416 if (Op.isSupported(STI)) { 1417 Name = Op.Name; 1418 Val = Op.decode(Code); 1419 IsDefault = (Val == Op.Default); 1420 return true; 1421 } 1422 } 1423 1424 return false; 1425 } 1426 1427 static int encodeCustomOperandVal(const CustomOperandVal &Op, 1428 int64_t InputVal) { 1429 if (InputVal < 0 || InputVal > Op.Max) 1430 return OPR_VAL_INVALID; 1431 return Op.encode(InputVal); 1432 } 1433 1434 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, 1435 const StringRef Name, int64_t InputVal, 1436 unsigned &UsedOprMask, 1437 const MCSubtargetInfo &STI) { 1438 int InvalidId = OPR_ID_UNKNOWN; 1439 for (int Idx = 0; Idx < Size; ++Idx) { 1440 const auto &Op = Opr[Idx]; 1441 if (Op.Name == Name) { 1442 if (!Op.isSupported(STI)) { 1443 InvalidId = OPR_ID_UNSUPPORTED; 1444 continue; 1445 } 1446 auto OprMask = Op.getMask(); 1447 if (OprMask & UsedOprMask) 1448 return OPR_ID_DUPLICATE; 1449 UsedOprMask |= OprMask; 1450 return encodeCustomOperandVal(Op, InputVal); 1451 } 1452 } 1453 return InvalidId; 1454 } 1455 1456 //===----------------------------------------------------------------------===// 1457 // DepCtr 1458 //===----------------------------------------------------------------------===// 1459 1460 namespace DepCtr { 1461 1462 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) { 1463 static int Default = -1; 1464 if (Default == -1) 1465 Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI); 1466 return Default; 1467 } 1468 1469 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 1470 const MCSubtargetInfo &STI) { 1471 return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code, 1472 HasNonDefaultVal, STI); 1473 } 1474 1475 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 1476 bool &IsDefault, const MCSubtargetInfo &STI) { 1477 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val, 1478 IsDefault, STI); 1479 } 1480 1481 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 1482 const MCSubtargetInfo &STI) { 1483 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask, 1484 STI); 1485 } 1486 1487 unsigned decodeFieldVmVsrc(unsigned Encoded) { 1488 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); 1489 } 1490 1491 unsigned decodeFieldVaVdst(unsigned Encoded) { 1492 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); 1493 } 1494 1495 unsigned decodeFieldSaSdst(unsigned Encoded) { 1496 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); 1497 } 1498 1499 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) { 1500 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); 1501 } 1502 1503 unsigned encodeFieldVmVsrc(unsigned VmVsrc) { 1504 return encodeFieldVmVsrc(0xffff, VmVsrc); 1505 } 1506 1507 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) { 1508 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); 1509 } 1510 1511 unsigned encodeFieldVaVdst(unsigned VaVdst) { 1512 return encodeFieldVaVdst(0xffff, VaVdst); 1513 } 1514 1515 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) { 1516 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); 1517 } 1518 1519 unsigned encodeFieldSaSdst(unsigned SaSdst) { 1520 return encodeFieldSaSdst(0xffff, SaSdst); 1521 } 1522 1523 } // namespace DepCtr 1524 1525 //===----------------------------------------------------------------------===// 1526 // hwreg 1527 //===----------------------------------------------------------------------===// 1528 1529 namespace Hwreg { 1530 1531 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) { 1532 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI); 1533 return (Idx < 0) ? Idx : Opr[Idx].Encoding; 1534 } 1535 1536 bool isValidHwreg(int64_t Id) { 1537 return 0 <= Id && isUInt<ID_WIDTH_>(Id); 1538 } 1539 1540 bool isValidHwregOffset(int64_t Offset) { 1541 return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset); 1542 } 1543 1544 bool isValidHwregWidth(int64_t Width) { 1545 return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1); 1546 } 1547 1548 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) { 1549 return (Id << ID_SHIFT_) | 1550 (Offset << OFFSET_SHIFT_) | 1551 ((Width - 1) << WIDTH_M1_SHIFT_); 1552 } 1553 1554 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) { 1555 int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI); 1556 return (Idx < 0) ? "" : Opr[Idx].Name; 1557 } 1558 1559 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) { 1560 Id = (Val & ID_MASK_) >> ID_SHIFT_; 1561 Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_; 1562 Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1; 1563 } 1564 1565 } // namespace Hwreg 1566 1567 //===----------------------------------------------------------------------===// 1568 // exp tgt 1569 //===----------------------------------------------------------------------===// 1570 1571 namespace Exp { 1572 1573 struct ExpTgt { 1574 StringLiteral Name; 1575 unsigned Tgt; 1576 unsigned MaxIndex; 1577 }; 1578 1579 static constexpr ExpTgt ExpTgtInfo[] = { 1580 {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, 1581 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, 1582 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, 1583 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, 1584 {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, 1585 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX}, 1586 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, 1587 }; 1588 1589 bool getTgtName(unsigned Id, StringRef &Name, int &Index) { 1590 for (const ExpTgt &Val : ExpTgtInfo) { 1591 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { 1592 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); 1593 Name = Val.Name; 1594 return true; 1595 } 1596 } 1597 return false; 1598 } 1599 1600 unsigned getTgtId(const StringRef Name) { 1601 1602 for (const ExpTgt &Val : ExpTgtInfo) { 1603 if (Val.MaxIndex == 0 && Name == Val.Name) 1604 return Val.Tgt; 1605 1606 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) { 1607 StringRef Suffix = Name.drop_front(Val.Name.size()); 1608 1609 unsigned Id; 1610 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex) 1611 return ET_INVALID; 1612 1613 // Disable leading zeroes 1614 if (Suffix.size() > 1 && Suffix[0] == '0') 1615 return ET_INVALID; 1616 1617 return Val.Tgt + Id; 1618 } 1619 } 1620 return ET_INVALID; 1621 } 1622 1623 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { 1624 switch (Id) { 1625 case ET_NULL: 1626 return !isGFX11Plus(STI); 1627 case ET_POS4: 1628 case ET_PRIM: 1629 return isGFX10Plus(STI); 1630 case ET_DUAL_SRC_BLEND0: 1631 case ET_DUAL_SRC_BLEND1: 1632 return isGFX11Plus(STI); 1633 default: 1634 if (Id >= ET_PARAM0 && Id <= ET_PARAM31) 1635 return !isGFX11Plus(STI); 1636 return true; 1637 } 1638 } 1639 1640 } // namespace Exp 1641 1642 //===----------------------------------------------------------------------===// 1643 // MTBUF Format 1644 //===----------------------------------------------------------------------===// 1645 1646 namespace MTBUFFormat { 1647 1648 int64_t getDfmt(const StringRef Name) { 1649 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { 1650 if (Name == DfmtSymbolic[Id]) 1651 return Id; 1652 } 1653 return DFMT_UNDEF; 1654 } 1655 1656 StringRef getDfmtName(unsigned Id) { 1657 assert(Id <= DFMT_MAX); 1658 return DfmtSymbolic[Id]; 1659 } 1660 1661 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { 1662 if (isSI(STI) || isCI(STI)) 1663 return NfmtSymbolicSICI; 1664 if (isVI(STI) || isGFX9(STI)) 1665 return NfmtSymbolicVI; 1666 return NfmtSymbolicGFX10; 1667 } 1668 1669 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { 1670 auto lookupTable = getNfmtLookupTable(STI); 1671 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { 1672 if (Name == lookupTable[Id]) 1673 return Id; 1674 } 1675 return NFMT_UNDEF; 1676 } 1677 1678 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { 1679 assert(Id <= NFMT_MAX); 1680 return getNfmtLookupTable(STI)[Id]; 1681 } 1682 1683 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1684 unsigned Dfmt; 1685 unsigned Nfmt; 1686 decodeDfmtNfmt(Id, Dfmt, Nfmt); 1687 return isValidNfmt(Nfmt, STI); 1688 } 1689 1690 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1691 return !getNfmtName(Id, STI).empty(); 1692 } 1693 1694 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { 1695 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); 1696 } 1697 1698 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { 1699 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; 1700 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; 1701 } 1702 1703 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) { 1704 if (isGFX11Plus(STI)) { 1705 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1706 if (Name == UfmtSymbolicGFX11[Id]) 1707 return Id; 1708 } 1709 } else { 1710 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1711 if (Name == UfmtSymbolicGFX10[Id]) 1712 return Id; 1713 } 1714 } 1715 return UFMT_UNDEF; 1716 } 1717 1718 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) { 1719 if(isValidUnifiedFormat(Id, STI)) 1720 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id]; 1721 return ""; 1722 } 1723 1724 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) { 1725 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST; 1726 } 1727 1728 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 1729 const MCSubtargetInfo &STI) { 1730 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); 1731 if (isGFX11Plus(STI)) { 1732 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1733 if (Fmt == DfmtNfmt2UFmtGFX11[Id]) 1734 return Id; 1735 } 1736 } else { 1737 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1738 if (Fmt == DfmtNfmt2UFmtGFX10[Id]) 1739 return Id; 1740 } 1741 } 1742 return UFMT_UNDEF; 1743 } 1744 1745 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { 1746 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); 1747 } 1748 1749 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { 1750 if (isGFX10Plus(STI)) 1751 return UFMT_DEFAULT; 1752 return DFMT_NFMT_DEFAULT; 1753 } 1754 1755 } // namespace MTBUFFormat 1756 1757 //===----------------------------------------------------------------------===// 1758 // SendMsg 1759 //===----------------------------------------------------------------------===// 1760 1761 namespace SendMsg { 1762 1763 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { 1764 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; 1765 } 1766 1767 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) { 1768 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI); 1769 return (Idx < 0) ? Idx : Msg[Idx].Encoding; 1770 } 1771 1772 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) { 1773 return (MsgId & ~(getMsgIdMask(STI))) == 0; 1774 } 1775 1776 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) { 1777 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI); 1778 return (Idx < 0) ? "" : Msg[Idx].Name; 1779 } 1780 1781 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) { 1782 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 1783 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 1784 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 1785 for (int i = F; i < L; ++i) { 1786 if (Name == S[i]) { 1787 return i; 1788 } 1789 } 1790 return OP_UNKNOWN_; 1791 } 1792 1793 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1794 bool Strict) { 1795 assert(isValidMsgId(MsgId, STI)); 1796 1797 if (!Strict) 1798 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); 1799 1800 if (MsgId == ID_SYSMSG) 1801 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_; 1802 if (!isGFX11Plus(STI)) { 1803 switch (MsgId) { 1804 case ID_GS_PreGFX11: 1805 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; 1806 case ID_GS_DONE_PreGFX11: 1807 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; 1808 } 1809 } 1810 return OpId == OP_NONE_; 1811 } 1812 1813 StringRef getMsgOpName(int64_t MsgId, int64_t OpId, 1814 const MCSubtargetInfo &STI) { 1815 assert(msgRequiresOp(MsgId, STI)); 1816 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId]; 1817 } 1818 1819 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1820 const MCSubtargetInfo &STI, bool Strict) { 1821 assert(isValidMsgOp(MsgId, OpId, STI, Strict)); 1822 1823 if (!Strict) 1824 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); 1825 1826 if (!isGFX11Plus(STI)) { 1827 switch (MsgId) { 1828 case ID_GS_PreGFX11: 1829 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; 1830 case ID_GS_DONE_PreGFX11: 1831 return (OpId == OP_GS_NOP) ? 1832 (StreamId == STREAM_ID_NONE_) : 1833 (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); 1834 } 1835 } 1836 return StreamId == STREAM_ID_NONE_; 1837 } 1838 1839 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { 1840 return MsgId == ID_SYSMSG || 1841 (!isGFX11Plus(STI) && 1842 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); 1843 } 1844 1845 bool msgSupportsStream(int64_t MsgId, int64_t OpId, 1846 const MCSubtargetInfo &STI) { 1847 return !isGFX11Plus(STI) && 1848 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && 1849 OpId != OP_GS_NOP; 1850 } 1851 1852 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 1853 uint16_t &StreamId, const MCSubtargetInfo &STI) { 1854 MsgId = Val & getMsgIdMask(STI); 1855 if (isGFX11Plus(STI)) { 1856 OpId = 0; 1857 StreamId = 0; 1858 } else { 1859 OpId = (Val & OP_MASK_) >> OP_SHIFT_; 1860 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; 1861 } 1862 } 1863 1864 uint64_t encodeMsg(uint64_t MsgId, 1865 uint64_t OpId, 1866 uint64_t StreamId) { 1867 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_); 1868 } 1869 1870 } // namespace SendMsg 1871 1872 //===----------------------------------------------------------------------===// 1873 // 1874 //===----------------------------------------------------------------------===// 1875 1876 unsigned getInitialPSInputAddr(const Function &F) { 1877 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0); 1878 } 1879 1880 bool getHasColorExport(const Function &F) { 1881 // As a safe default always respond as if PS has color exports. 1882 return F.getFnAttributeAsParsedInteger( 1883 "amdgpu-color-export", 1884 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0; 1885 } 1886 1887 bool getHasDepthExport(const Function &F) { 1888 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0; 1889 } 1890 1891 bool isShader(CallingConv::ID cc) { 1892 switch(cc) { 1893 case CallingConv::AMDGPU_VS: 1894 case CallingConv::AMDGPU_LS: 1895 case CallingConv::AMDGPU_HS: 1896 case CallingConv::AMDGPU_ES: 1897 case CallingConv::AMDGPU_GS: 1898 case CallingConv::AMDGPU_PS: 1899 case CallingConv::AMDGPU_CS_Chain: 1900 case CallingConv::AMDGPU_CS_ChainPreserve: 1901 case CallingConv::AMDGPU_CS: 1902 return true; 1903 default: 1904 return false; 1905 } 1906 } 1907 1908 bool isGraphics(CallingConv::ID cc) { 1909 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx; 1910 } 1911 1912 bool isCompute(CallingConv::ID cc) { 1913 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS; 1914 } 1915 1916 bool isEntryFunctionCC(CallingConv::ID CC) { 1917 switch (CC) { 1918 case CallingConv::AMDGPU_KERNEL: 1919 case CallingConv::SPIR_KERNEL: 1920 case CallingConv::AMDGPU_VS: 1921 case CallingConv::AMDGPU_GS: 1922 case CallingConv::AMDGPU_PS: 1923 case CallingConv::AMDGPU_CS: 1924 case CallingConv::AMDGPU_ES: 1925 case CallingConv::AMDGPU_HS: 1926 case CallingConv::AMDGPU_LS: 1927 return true; 1928 default: 1929 return false; 1930 } 1931 } 1932 1933 bool isModuleEntryFunctionCC(CallingConv::ID CC) { 1934 switch (CC) { 1935 case CallingConv::AMDGPU_Gfx: 1936 return true; 1937 default: 1938 return isEntryFunctionCC(CC) || isChainCC(CC); 1939 } 1940 } 1941 1942 bool isChainCC(CallingConv::ID CC) { 1943 switch (CC) { 1944 case CallingConv::AMDGPU_CS_Chain: 1945 case CallingConv::AMDGPU_CS_ChainPreserve: 1946 return true; 1947 default: 1948 return false; 1949 } 1950 } 1951 1952 bool isKernelCC(const Function *Func) { 1953 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); 1954 } 1955 1956 bool hasXNACK(const MCSubtargetInfo &STI) { 1957 return STI.hasFeature(AMDGPU::FeatureXNACK); 1958 } 1959 1960 bool hasSRAMECC(const MCSubtargetInfo &STI) { 1961 return STI.hasFeature(AMDGPU::FeatureSRAMECC); 1962 } 1963 1964 bool hasMIMG_R128(const MCSubtargetInfo &STI) { 1965 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16); 1966 } 1967 1968 bool hasA16(const MCSubtargetInfo &STI) { 1969 return STI.hasFeature(AMDGPU::FeatureA16); 1970 } 1971 1972 bool hasG16(const MCSubtargetInfo &STI) { 1973 return STI.hasFeature(AMDGPU::FeatureG16); 1974 } 1975 1976 bool hasPackedD16(const MCSubtargetInfo &STI) { 1977 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) && 1978 !isSI(STI); 1979 } 1980 1981 bool hasGDS(const MCSubtargetInfo &STI) { 1982 return STI.hasFeature(AMDGPU::FeatureGDS); 1983 } 1984 1985 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) { 1986 auto Version = getIsaVersion(STI.getCPU()); 1987 if (Version.Major == 10) 1988 return Version.Minor >= 3 ? 13 : 5; 1989 if (Version.Major == 11) 1990 return 5; 1991 if (Version.Major >= 12) 1992 return HasSampler ? 4 : 5; 1993 return 0; 1994 } 1995 1996 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; } 1997 1998 bool isSI(const MCSubtargetInfo &STI) { 1999 return STI.hasFeature(AMDGPU::FeatureSouthernIslands); 2000 } 2001 2002 bool isCI(const MCSubtargetInfo &STI) { 2003 return STI.hasFeature(AMDGPU::FeatureSeaIslands); 2004 } 2005 2006 bool isVI(const MCSubtargetInfo &STI) { 2007 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands); 2008 } 2009 2010 bool isGFX9(const MCSubtargetInfo &STI) { 2011 return STI.hasFeature(AMDGPU::FeatureGFX9); 2012 } 2013 2014 bool isGFX9_GFX10(const MCSubtargetInfo &STI) { 2015 return isGFX9(STI) || isGFX10(STI); 2016 } 2017 2018 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) { 2019 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI); 2020 } 2021 2022 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) { 2023 return isVI(STI) || isGFX9(STI) || isGFX10(STI); 2024 } 2025 2026 bool isGFX8Plus(const MCSubtargetInfo &STI) { 2027 return isVI(STI) || isGFX9Plus(STI); 2028 } 2029 2030 bool isGFX9Plus(const MCSubtargetInfo &STI) { 2031 return isGFX9(STI) || isGFX10Plus(STI); 2032 } 2033 2034 bool isGFX10(const MCSubtargetInfo &STI) { 2035 return STI.hasFeature(AMDGPU::FeatureGFX10); 2036 } 2037 2038 bool isGFX10_GFX11(const MCSubtargetInfo &STI) { 2039 return isGFX10(STI) || isGFX11(STI); 2040 } 2041 2042 bool isGFX10Plus(const MCSubtargetInfo &STI) { 2043 return isGFX10(STI) || isGFX11Plus(STI); 2044 } 2045 2046 bool isGFX11(const MCSubtargetInfo &STI) { 2047 return STI.hasFeature(AMDGPU::FeatureGFX11); 2048 } 2049 2050 bool isGFX11Plus(const MCSubtargetInfo &STI) { 2051 return isGFX11(STI) || isGFX12Plus(STI); 2052 } 2053 2054 bool isGFX12(const MCSubtargetInfo &STI) { 2055 return STI.getFeatureBits()[AMDGPU::FeatureGFX12]; 2056 } 2057 2058 bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); } 2059 2060 bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); } 2061 2062 bool isNotGFX11Plus(const MCSubtargetInfo &STI) { 2063 return !isGFX11Plus(STI); 2064 } 2065 2066 bool isNotGFX10Plus(const MCSubtargetInfo &STI) { 2067 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); 2068 } 2069 2070 bool isGFX10Before1030(const MCSubtargetInfo &STI) { 2071 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); 2072 } 2073 2074 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 2075 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding); 2076 } 2077 2078 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) { 2079 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding); 2080 } 2081 2082 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { 2083 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding); 2084 } 2085 2086 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { 2087 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts); 2088 } 2089 2090 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) { 2091 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI); 2092 } 2093 2094 bool isGFX90A(const MCSubtargetInfo &STI) { 2095 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts); 2096 } 2097 2098 bool isGFX940(const MCSubtargetInfo &STI) { 2099 return STI.hasFeature(AMDGPU::FeatureGFX940Insts); 2100 } 2101 2102 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { 2103 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch); 2104 } 2105 2106 bool hasMAIInsts(const MCSubtargetInfo &STI) { 2107 return STI.hasFeature(AMDGPU::FeatureMAIInsts); 2108 } 2109 2110 bool hasVOPD(const MCSubtargetInfo &STI) { 2111 return STI.hasFeature(AMDGPU::FeatureVOPD); 2112 } 2113 2114 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) { 2115 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR); 2116 } 2117 2118 unsigned hasKernargPreload(const MCSubtargetInfo &STI) { 2119 return STI.hasFeature(AMDGPU::FeatureKernargPreload); 2120 } 2121 2122 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, 2123 int32_t ArgNumVGPR) { 2124 if (has90AInsts && ArgNumAGPR) 2125 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR; 2126 return std::max(ArgNumVGPR, ArgNumAGPR); 2127 } 2128 2129 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 2130 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 2131 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); 2132 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 2133 Reg == AMDGPU::SCC; 2134 } 2135 2136 bool isHi(unsigned Reg, const MCRegisterInfo &MRI) { 2137 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI; 2138 } 2139 2140 #define MAP_REG2REG \ 2141 using namespace AMDGPU; \ 2142 switch(Reg) { \ 2143 default: return Reg; \ 2144 CASE_CI_VI(FLAT_SCR) \ 2145 CASE_CI_VI(FLAT_SCR_LO) \ 2146 CASE_CI_VI(FLAT_SCR_HI) \ 2147 CASE_VI_GFX9PLUS(TTMP0) \ 2148 CASE_VI_GFX9PLUS(TTMP1) \ 2149 CASE_VI_GFX9PLUS(TTMP2) \ 2150 CASE_VI_GFX9PLUS(TTMP3) \ 2151 CASE_VI_GFX9PLUS(TTMP4) \ 2152 CASE_VI_GFX9PLUS(TTMP5) \ 2153 CASE_VI_GFX9PLUS(TTMP6) \ 2154 CASE_VI_GFX9PLUS(TTMP7) \ 2155 CASE_VI_GFX9PLUS(TTMP8) \ 2156 CASE_VI_GFX9PLUS(TTMP9) \ 2157 CASE_VI_GFX9PLUS(TTMP10) \ 2158 CASE_VI_GFX9PLUS(TTMP11) \ 2159 CASE_VI_GFX9PLUS(TTMP12) \ 2160 CASE_VI_GFX9PLUS(TTMP13) \ 2161 CASE_VI_GFX9PLUS(TTMP14) \ 2162 CASE_VI_GFX9PLUS(TTMP15) \ 2163 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ 2164 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ 2165 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ 2166 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ 2167 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ 2168 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ 2169 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ 2170 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ 2171 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ 2172 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ 2173 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ 2174 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ 2175 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 2176 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 2177 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2178 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2179 CASE_GFXPRE11_GFX11PLUS(M0) \ 2180 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ 2181 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ 2182 } 2183 2184 #define CASE_CI_VI(node) \ 2185 assert(!isSI(STI)); \ 2186 case node: return isCI(STI) ? node##_ci : node##_vi; 2187 2188 #define CASE_VI_GFX9PLUS(node) \ 2189 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; 2190 2191 #define CASE_GFXPRE11_GFX11PLUS(node) \ 2192 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; 2193 2194 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ 2195 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; 2196 2197 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 2198 if (STI.getTargetTriple().getArch() == Triple::r600) 2199 return Reg; 2200 MAP_REG2REG 2201 } 2202 2203 #undef CASE_CI_VI 2204 #undef CASE_VI_GFX9PLUS 2205 #undef CASE_GFXPRE11_GFX11PLUS 2206 #undef CASE_GFXPRE11_GFX11PLUS_TO 2207 2208 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 2209 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; 2210 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node; 2211 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) 2212 2213 unsigned mc2PseudoReg(unsigned Reg) { 2214 MAP_REG2REG 2215 } 2216 2217 bool isInlineValue(unsigned Reg) { 2218 switch (Reg) { 2219 case AMDGPU::SRC_SHARED_BASE_LO: 2220 case AMDGPU::SRC_SHARED_BASE: 2221 case AMDGPU::SRC_SHARED_LIMIT_LO: 2222 case AMDGPU::SRC_SHARED_LIMIT: 2223 case AMDGPU::SRC_PRIVATE_BASE_LO: 2224 case AMDGPU::SRC_PRIVATE_BASE: 2225 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 2226 case AMDGPU::SRC_PRIVATE_LIMIT: 2227 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2228 return true; 2229 case AMDGPU::SRC_VCCZ: 2230 case AMDGPU::SRC_EXECZ: 2231 case AMDGPU::SRC_SCC: 2232 return true; 2233 case AMDGPU::SGPR_NULL: 2234 return true; 2235 default: 2236 return false; 2237 } 2238 } 2239 2240 #undef CASE_CI_VI 2241 #undef CASE_VI_GFX9PLUS 2242 #undef CASE_GFXPRE11_GFX11PLUS 2243 #undef CASE_GFXPRE11_GFX11PLUS_TO 2244 #undef MAP_REG2REG 2245 2246 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2247 assert(OpNo < Desc.NumOperands); 2248 unsigned OpType = Desc.operands()[OpNo].OperandType; 2249 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 2250 OpType <= AMDGPU::OPERAND_SRC_LAST; 2251 } 2252 2253 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2254 assert(OpNo < Desc.NumOperands); 2255 unsigned OpType = Desc.operands()[OpNo].OperandType; 2256 return OpType >= AMDGPU::OPERAND_KIMM_FIRST && 2257 OpType <= AMDGPU::OPERAND_KIMM_LAST; 2258 } 2259 2260 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2261 assert(OpNo < Desc.NumOperands); 2262 unsigned OpType = Desc.operands()[OpNo].OperandType; 2263 switch (OpType) { 2264 case AMDGPU::OPERAND_REG_IMM_FP32: 2265 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2266 case AMDGPU::OPERAND_REG_IMM_FP64: 2267 case AMDGPU::OPERAND_REG_IMM_FP16: 2268 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2269 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2270 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2271 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2272 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2273 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2274 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2275 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2276 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2277 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2278 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2279 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2280 return true; 2281 default: 2282 return false; 2283 } 2284 } 2285 2286 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2287 assert(OpNo < Desc.NumOperands); 2288 unsigned OpType = Desc.operands()[OpNo].OperandType; 2289 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 2290 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) || 2291 (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && 2292 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST); 2293 } 2294 2295 // Avoid using MCRegisterClass::getSize, since that function will go away 2296 // (move from MC* level to Target* level). Return size in bits. 2297 unsigned getRegBitWidth(unsigned RCID) { 2298 switch (RCID) { 2299 case AMDGPU::SGPR_LO16RegClassID: 2300 case AMDGPU::AGPR_LO16RegClassID: 2301 return 16; 2302 case AMDGPU::SGPR_32RegClassID: 2303 case AMDGPU::VGPR_32RegClassID: 2304 case AMDGPU::VRegOrLds_32RegClassID: 2305 case AMDGPU::AGPR_32RegClassID: 2306 case AMDGPU::VS_32RegClassID: 2307 case AMDGPU::AV_32RegClassID: 2308 case AMDGPU::SReg_32RegClassID: 2309 case AMDGPU::SReg_32_XM0RegClassID: 2310 case AMDGPU::SRegOrLds_32RegClassID: 2311 return 32; 2312 case AMDGPU::SGPR_64RegClassID: 2313 case AMDGPU::VS_64RegClassID: 2314 case AMDGPU::SReg_64RegClassID: 2315 case AMDGPU::VReg_64RegClassID: 2316 case AMDGPU::AReg_64RegClassID: 2317 case AMDGPU::SReg_64_XEXECRegClassID: 2318 case AMDGPU::VReg_64_Align2RegClassID: 2319 case AMDGPU::AReg_64_Align2RegClassID: 2320 case AMDGPU::AV_64RegClassID: 2321 case AMDGPU::AV_64_Align2RegClassID: 2322 return 64; 2323 case AMDGPU::SGPR_96RegClassID: 2324 case AMDGPU::SReg_96RegClassID: 2325 case AMDGPU::VReg_96RegClassID: 2326 case AMDGPU::AReg_96RegClassID: 2327 case AMDGPU::VReg_96_Align2RegClassID: 2328 case AMDGPU::AReg_96_Align2RegClassID: 2329 case AMDGPU::AV_96RegClassID: 2330 case AMDGPU::AV_96_Align2RegClassID: 2331 return 96; 2332 case AMDGPU::SGPR_128RegClassID: 2333 case AMDGPU::SReg_128RegClassID: 2334 case AMDGPU::VReg_128RegClassID: 2335 case AMDGPU::AReg_128RegClassID: 2336 case AMDGPU::VReg_128_Align2RegClassID: 2337 case AMDGPU::AReg_128_Align2RegClassID: 2338 case AMDGPU::AV_128RegClassID: 2339 case AMDGPU::AV_128_Align2RegClassID: 2340 return 128; 2341 case AMDGPU::SGPR_160RegClassID: 2342 case AMDGPU::SReg_160RegClassID: 2343 case AMDGPU::VReg_160RegClassID: 2344 case AMDGPU::AReg_160RegClassID: 2345 case AMDGPU::VReg_160_Align2RegClassID: 2346 case AMDGPU::AReg_160_Align2RegClassID: 2347 case AMDGPU::AV_160RegClassID: 2348 case AMDGPU::AV_160_Align2RegClassID: 2349 return 160; 2350 case AMDGPU::SGPR_192RegClassID: 2351 case AMDGPU::SReg_192RegClassID: 2352 case AMDGPU::VReg_192RegClassID: 2353 case AMDGPU::AReg_192RegClassID: 2354 case AMDGPU::VReg_192_Align2RegClassID: 2355 case AMDGPU::AReg_192_Align2RegClassID: 2356 case AMDGPU::AV_192RegClassID: 2357 case AMDGPU::AV_192_Align2RegClassID: 2358 return 192; 2359 case AMDGPU::SGPR_224RegClassID: 2360 case AMDGPU::SReg_224RegClassID: 2361 case AMDGPU::VReg_224RegClassID: 2362 case AMDGPU::AReg_224RegClassID: 2363 case AMDGPU::VReg_224_Align2RegClassID: 2364 case AMDGPU::AReg_224_Align2RegClassID: 2365 case AMDGPU::AV_224RegClassID: 2366 case AMDGPU::AV_224_Align2RegClassID: 2367 return 224; 2368 case AMDGPU::SGPR_256RegClassID: 2369 case AMDGPU::SReg_256RegClassID: 2370 case AMDGPU::VReg_256RegClassID: 2371 case AMDGPU::AReg_256RegClassID: 2372 case AMDGPU::VReg_256_Align2RegClassID: 2373 case AMDGPU::AReg_256_Align2RegClassID: 2374 case AMDGPU::AV_256RegClassID: 2375 case AMDGPU::AV_256_Align2RegClassID: 2376 return 256; 2377 case AMDGPU::SGPR_288RegClassID: 2378 case AMDGPU::SReg_288RegClassID: 2379 case AMDGPU::VReg_288RegClassID: 2380 case AMDGPU::AReg_288RegClassID: 2381 case AMDGPU::VReg_288_Align2RegClassID: 2382 case AMDGPU::AReg_288_Align2RegClassID: 2383 case AMDGPU::AV_288RegClassID: 2384 case AMDGPU::AV_288_Align2RegClassID: 2385 return 288; 2386 case AMDGPU::SGPR_320RegClassID: 2387 case AMDGPU::SReg_320RegClassID: 2388 case AMDGPU::VReg_320RegClassID: 2389 case AMDGPU::AReg_320RegClassID: 2390 case AMDGPU::VReg_320_Align2RegClassID: 2391 case AMDGPU::AReg_320_Align2RegClassID: 2392 case AMDGPU::AV_320RegClassID: 2393 case AMDGPU::AV_320_Align2RegClassID: 2394 return 320; 2395 case AMDGPU::SGPR_352RegClassID: 2396 case AMDGPU::SReg_352RegClassID: 2397 case AMDGPU::VReg_352RegClassID: 2398 case AMDGPU::AReg_352RegClassID: 2399 case AMDGPU::VReg_352_Align2RegClassID: 2400 case AMDGPU::AReg_352_Align2RegClassID: 2401 case AMDGPU::AV_352RegClassID: 2402 case AMDGPU::AV_352_Align2RegClassID: 2403 return 352; 2404 case AMDGPU::SGPR_384RegClassID: 2405 case AMDGPU::SReg_384RegClassID: 2406 case AMDGPU::VReg_384RegClassID: 2407 case AMDGPU::AReg_384RegClassID: 2408 case AMDGPU::VReg_384_Align2RegClassID: 2409 case AMDGPU::AReg_384_Align2RegClassID: 2410 case AMDGPU::AV_384RegClassID: 2411 case AMDGPU::AV_384_Align2RegClassID: 2412 return 384; 2413 case AMDGPU::SGPR_512RegClassID: 2414 case AMDGPU::SReg_512RegClassID: 2415 case AMDGPU::VReg_512RegClassID: 2416 case AMDGPU::AReg_512RegClassID: 2417 case AMDGPU::VReg_512_Align2RegClassID: 2418 case AMDGPU::AReg_512_Align2RegClassID: 2419 case AMDGPU::AV_512RegClassID: 2420 case AMDGPU::AV_512_Align2RegClassID: 2421 return 512; 2422 case AMDGPU::SGPR_1024RegClassID: 2423 case AMDGPU::SReg_1024RegClassID: 2424 case AMDGPU::VReg_1024RegClassID: 2425 case AMDGPU::AReg_1024RegClassID: 2426 case AMDGPU::VReg_1024_Align2RegClassID: 2427 case AMDGPU::AReg_1024_Align2RegClassID: 2428 case AMDGPU::AV_1024RegClassID: 2429 case AMDGPU::AV_1024_Align2RegClassID: 2430 return 1024; 2431 default: 2432 llvm_unreachable("Unexpected register class"); 2433 } 2434 } 2435 2436 unsigned getRegBitWidth(const MCRegisterClass &RC) { 2437 return getRegBitWidth(RC.getID()); 2438 } 2439 2440 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 2441 unsigned OpNo) { 2442 assert(OpNo < Desc.NumOperands); 2443 unsigned RCID = Desc.operands()[OpNo].RegClass; 2444 return getRegBitWidth(RCID) / 8; 2445 } 2446 2447 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 2448 if (isInlinableIntLiteral(Literal)) 2449 return true; 2450 2451 uint64_t Val = static_cast<uint64_t>(Literal); 2452 return (Val == llvm::bit_cast<uint64_t>(0.0)) || 2453 (Val == llvm::bit_cast<uint64_t>(1.0)) || 2454 (Val == llvm::bit_cast<uint64_t>(-1.0)) || 2455 (Val == llvm::bit_cast<uint64_t>(0.5)) || 2456 (Val == llvm::bit_cast<uint64_t>(-0.5)) || 2457 (Val == llvm::bit_cast<uint64_t>(2.0)) || 2458 (Val == llvm::bit_cast<uint64_t>(-2.0)) || 2459 (Val == llvm::bit_cast<uint64_t>(4.0)) || 2460 (Val == llvm::bit_cast<uint64_t>(-4.0)) || 2461 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 2462 } 2463 2464 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 2465 if (isInlinableIntLiteral(Literal)) 2466 return true; 2467 2468 // The actual type of the operand does not seem to matter as long 2469 // as the bits match one of the inline immediate values. For example: 2470 // 2471 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 2472 // so it is a legal inline immediate. 2473 // 2474 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 2475 // floating-point, so it is a legal inline immediate. 2476 2477 uint32_t Val = static_cast<uint32_t>(Literal); 2478 return (Val == llvm::bit_cast<uint32_t>(0.0f)) || 2479 (Val == llvm::bit_cast<uint32_t>(1.0f)) || 2480 (Val == llvm::bit_cast<uint32_t>(-1.0f)) || 2481 (Val == llvm::bit_cast<uint32_t>(0.5f)) || 2482 (Val == llvm::bit_cast<uint32_t>(-0.5f)) || 2483 (Val == llvm::bit_cast<uint32_t>(2.0f)) || 2484 (Val == llvm::bit_cast<uint32_t>(-2.0f)) || 2485 (Val == llvm::bit_cast<uint32_t>(4.0f)) || 2486 (Val == llvm::bit_cast<uint32_t>(-4.0f)) || 2487 (Val == 0x3e22f983 && HasInv2Pi); 2488 } 2489 2490 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 2491 if (!HasInv2Pi) 2492 return false; 2493 2494 if (isInlinableIntLiteral(Literal)) 2495 return true; 2496 2497 uint16_t Val = static_cast<uint16_t>(Literal); 2498 return Val == 0x3C00 || // 1.0 2499 Val == 0xBC00 || // -1.0 2500 Val == 0x3800 || // 0.5 2501 Val == 0xB800 || // -0.5 2502 Val == 0x4000 || // 2.0 2503 Val == 0xC000 || // -2.0 2504 Val == 0x4400 || // 4.0 2505 Val == 0xC400 || // -4.0 2506 Val == 0x3118; // 1/2pi 2507 } 2508 2509 std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) { 2510 // Unfortunately, the Instruction Set Architecture Reference Guide is 2511 // misleading about how the inline operands work for (packed) 16-bit 2512 // instructions. In a nutshell, the actual HW behavior is: 2513 // 2514 // - integer encodings (-16 .. 64) are always produced as sign-extended 2515 // 32-bit values 2516 // - float encodings are produced as: 2517 // - for F16 instructions: corresponding half-precision float values in 2518 // the LSBs, 0 in the MSBs 2519 // - for UI16 instructions: corresponding single-precision float value 2520 int32_t Signed = static_cast<int32_t>(Literal); 2521 if (Signed >= 0 && Signed <= 64) 2522 return 128 + Signed; 2523 2524 if (Signed >= -16 && Signed <= -1) 2525 return 192 + std::abs(Signed); 2526 2527 if (IsFloat) { 2528 // clang-format off 2529 switch (Literal) { 2530 case 0x3800: return 240; // 0.5 2531 case 0xB800: return 241; // -0.5 2532 case 0x3C00: return 242; // 1.0 2533 case 0xBC00: return 243; // -1.0 2534 case 0x4000: return 244; // 2.0 2535 case 0xC000: return 245; // -2.0 2536 case 0x4400: return 246; // 4.0 2537 case 0xC400: return 247; // -4.0 2538 case 0x3118: return 248; // 1.0 / (2.0 * pi) 2539 default: break; 2540 } 2541 // clang-format on 2542 } else { 2543 // clang-format off 2544 switch (Literal) { 2545 case 0x3F000000: return 240; // 0.5 2546 case 0xBF000000: return 241; // -0.5 2547 case 0x3F800000: return 242; // 1.0 2548 case 0xBF800000: return 243; // -1.0 2549 case 0x40000000: return 244; // 2.0 2550 case 0xC0000000: return 245; // -2.0 2551 case 0x40800000: return 246; // 4.0 2552 case 0xC0800000: return 247; // -4.0 2553 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi) 2554 default: break; 2555 } 2556 // clang-format on 2557 } 2558 2559 return {}; 2560 } 2561 2562 // Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction 2563 // or nullopt. 2564 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) { 2565 return getInlineEncodingV216(false, Literal); 2566 } 2567 2568 // Encoding of the literal as an inline constant for a V_PK_*_F16 instruction 2569 // or nullopt. 2570 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) { 2571 return getInlineEncodingV216(true, Literal); 2572 } 2573 2574 // Whether the given literal can be inlined for a V_PK_* instruction. 2575 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) { 2576 switch (OpType) { 2577 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2578 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2579 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2580 return getInlineEncodingV216(false, Literal).has_value(); 2581 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2582 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2583 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2584 return getInlineEncodingV216(true, Literal).has_value(); 2585 default: 2586 llvm_unreachable("bad packed operand type"); 2587 } 2588 } 2589 2590 // Whether the given literal can be inlined for a V_PK_*_IU16 instruction. 2591 bool isInlinableLiteralV2I16(uint32_t Literal) { 2592 return getInlineEncodingV2I16(Literal).has_value(); 2593 } 2594 2595 // Whether the given literal can be inlined for a V_PK_*_F16 instruction. 2596 bool isInlinableLiteralV2F16(uint32_t Literal) { 2597 return getInlineEncodingV2F16(Literal).has_value(); 2598 } 2599 2600 bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { 2601 if (IsFP64) 2602 return !(Val & 0xffffffffu); 2603 2604 return isUInt<32>(Val) || isInt<32>(Val); 2605 } 2606 2607 bool isArgPassedInSGPR(const Argument *A) { 2608 const Function *F = A->getParent(); 2609 2610 // Arguments to compute shaders are never a source of divergence. 2611 CallingConv::ID CC = F->getCallingConv(); 2612 switch (CC) { 2613 case CallingConv::AMDGPU_KERNEL: 2614 case CallingConv::SPIR_KERNEL: 2615 return true; 2616 case CallingConv::AMDGPU_VS: 2617 case CallingConv::AMDGPU_LS: 2618 case CallingConv::AMDGPU_HS: 2619 case CallingConv::AMDGPU_ES: 2620 case CallingConv::AMDGPU_GS: 2621 case CallingConv::AMDGPU_PS: 2622 case CallingConv::AMDGPU_CS: 2623 case CallingConv::AMDGPU_Gfx: 2624 case CallingConv::AMDGPU_CS_Chain: 2625 case CallingConv::AMDGPU_CS_ChainPreserve: 2626 // For non-compute shaders, SGPR inputs are marked with either inreg or 2627 // byval. Everything else is in VGPRs. 2628 return A->hasAttribute(Attribute::InReg) || 2629 A->hasAttribute(Attribute::ByVal); 2630 default: 2631 // TODO: treat i1 as divergent? 2632 return A->hasAttribute(Attribute::InReg); 2633 } 2634 } 2635 2636 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) { 2637 // Arguments to compute shaders are never a source of divergence. 2638 CallingConv::ID CC = CB->getCallingConv(); 2639 switch (CC) { 2640 case CallingConv::AMDGPU_KERNEL: 2641 case CallingConv::SPIR_KERNEL: 2642 return true; 2643 case CallingConv::AMDGPU_VS: 2644 case CallingConv::AMDGPU_LS: 2645 case CallingConv::AMDGPU_HS: 2646 case CallingConv::AMDGPU_ES: 2647 case CallingConv::AMDGPU_GS: 2648 case CallingConv::AMDGPU_PS: 2649 case CallingConv::AMDGPU_CS: 2650 case CallingConv::AMDGPU_Gfx: 2651 case CallingConv::AMDGPU_CS_Chain: 2652 case CallingConv::AMDGPU_CS_ChainPreserve: 2653 // For non-compute shaders, SGPR inputs are marked with either inreg or 2654 // byval. Everything else is in VGPRs. 2655 return CB->paramHasAttr(ArgNo, Attribute::InReg) || 2656 CB->paramHasAttr(ArgNo, Attribute::ByVal); 2657 default: 2658 return CB->paramHasAttr(ArgNo, Attribute::InReg); 2659 } 2660 } 2661 2662 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { 2663 return isGCN3Encoding(ST) || isGFX10Plus(ST); 2664 } 2665 2666 static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { 2667 return isGFX9Plus(ST); 2668 } 2669 2670 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 2671 int64_t EncodedOffset) { 2672 if (isGFX12Plus(ST)) 2673 return isUInt<23>(EncodedOffset); 2674 2675 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) 2676 : isUInt<8>(EncodedOffset); 2677 } 2678 2679 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 2680 int64_t EncodedOffset, 2681 bool IsBuffer) { 2682 if (isGFX12Plus(ST)) 2683 return isInt<24>(EncodedOffset); 2684 2685 return !IsBuffer && 2686 hasSMRDSignedImmOffset(ST) && 2687 isInt<21>(EncodedOffset); 2688 } 2689 2690 static bool isDwordAligned(uint64_t ByteOffset) { 2691 return (ByteOffset & 3) == 0; 2692 } 2693 2694 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, 2695 uint64_t ByteOffset) { 2696 if (hasSMEMByteOffset(ST)) 2697 return ByteOffset; 2698 2699 assert(isDwordAligned(ByteOffset)); 2700 return ByteOffset >> 2; 2701 } 2702 2703 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 2704 int64_t ByteOffset, bool IsBuffer) { 2705 if (isGFX12Plus(ST)) // 24 bit signed offsets 2706 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset) 2707 : std::nullopt; 2708 2709 // The signed version is always a byte offset. 2710 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { 2711 assert(hasSMEMByteOffset(ST)); 2712 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset) 2713 : std::nullopt; 2714 } 2715 2716 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) 2717 return std::nullopt; 2718 2719 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2720 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) 2721 ? std::optional<int64_t>(EncodedOffset) 2722 : std::nullopt; 2723 } 2724 2725 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 2726 int64_t ByteOffset) { 2727 if (!isCI(ST) || !isDwordAligned(ByteOffset)) 2728 return std::nullopt; 2729 2730 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2731 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset) 2732 : std::nullopt; 2733 } 2734 2735 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) { 2736 if (AMDGPU::isGFX10(ST)) 2737 return 12; 2738 2739 if (AMDGPU::isGFX12(ST)) 2740 return 24; 2741 return 13; 2742 } 2743 2744 namespace { 2745 2746 struct SourceOfDivergence { 2747 unsigned Intr; 2748 }; 2749 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); 2750 2751 struct AlwaysUniform { 2752 unsigned Intr; 2753 }; 2754 const AlwaysUniform *lookupAlwaysUniform(unsigned Intr); 2755 2756 #define GET_SourcesOfDivergence_IMPL 2757 #define GET_UniformIntrinsics_IMPL 2758 #define GET_Gfx9BufferFormat_IMPL 2759 #define GET_Gfx10BufferFormat_IMPL 2760 #define GET_Gfx11PlusBufferFormat_IMPL 2761 #include "AMDGPUGenSearchableTables.inc" 2762 2763 } // end anonymous namespace 2764 2765 bool isIntrinsicSourceOfDivergence(unsigned IntrID) { 2766 return lookupSourceOfDivergence(IntrID); 2767 } 2768 2769 bool isIntrinsicAlwaysUniform(unsigned IntrID) { 2770 return lookupAlwaysUniform(IntrID); 2771 } 2772 2773 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 2774 uint8_t NumComponents, 2775 uint8_t NumFormat, 2776 const MCSubtargetInfo &STI) { 2777 return isGFX11Plus(STI) 2778 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents, 2779 NumFormat) 2780 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp, 2781 NumComponents, NumFormat) 2782 : getGfx9BufferFormatInfo(BitsPerComp, 2783 NumComponents, NumFormat); 2784 } 2785 2786 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 2787 const MCSubtargetInfo &STI) { 2788 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format) 2789 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format) 2790 : getGfx9BufferFormatInfo(Format); 2791 } 2792 2793 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) { 2794 for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1, 2795 OpName::src2 }) { 2796 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName); 2797 if (Idx == -1) 2798 continue; 2799 2800 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID || 2801 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID) 2802 return true; 2803 } 2804 2805 return false; 2806 } 2807 2808 bool isDPALU_DPP(const MCInstrDesc &OpDesc) { 2809 return hasAny64BitVGPROperands(OpDesc); 2810 } 2811 2812 } // namespace AMDGPU 2813 2814 raw_ostream &operator<<(raw_ostream &OS, 2815 const AMDGPU::IsaInfo::TargetIDSetting S) { 2816 switch (S) { 2817 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): 2818 OS << "Unsupported"; 2819 break; 2820 case (AMDGPU::IsaInfo::TargetIDSetting::Any): 2821 OS << "Any"; 2822 break; 2823 case (AMDGPU::IsaInfo::TargetIDSetting::Off): 2824 OS << "Off"; 2825 break; 2826 case (AMDGPU::IsaInfo::TargetIDSetting::On): 2827 OS << "On"; 2828 break; 2829 } 2830 return OS; 2831 } 2832 2833 } // namespace llvm 2834