1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUBaseInfo.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUAsmUtils.h" 12 #include "AMDKernelCodeT.h" 13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14 #include "Utils/AMDKernelCodeTUtils.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/BinaryFormat/ELF.h" 17 #include "llvm/IR/Attributes.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Function.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/IntrinsicsAMDGPU.h" 22 #include "llvm/IR/IntrinsicsR600.h" 23 #include "llvm/IR/LLVMContext.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCRegisterInfo.h" 26 #include "llvm/MC/MCSubtargetInfo.h" 27 #include "llvm/Support/AMDHSAKernelDescriptor.h" 28 #include "llvm/Support/CommandLine.h" 29 #include "llvm/TargetParser/TargetParser.h" 30 #include <optional> 31 32 #define GET_INSTRINFO_NAMED_OPS 33 #define GET_INSTRMAP_INFO 34 #include "AMDGPUGenInstrInfo.inc" 35 36 static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion( 37 "amdhsa-code-object-version", llvm::cl::Hidden, 38 llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), 39 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " 40 "or asm directive still take priority if present)")); 41 42 namespace { 43 44 /// \returns Bit mask for given bit \p Shift and bit \p Width. 45 unsigned getBitMask(unsigned Shift, unsigned Width) { 46 return ((1 << Width) - 1) << Shift; 47 } 48 49 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 50 /// 51 /// \returns Packed \p Dst. 52 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 53 unsigned Mask = getBitMask(Shift, Width); 54 return ((Src << Shift) & Mask) | (Dst & ~Mask); 55 } 56 57 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 58 /// 59 /// \returns Unpacked bits. 60 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 61 return (Src & getBitMask(Shift, Width)) >> Shift; 62 } 63 64 /// \returns Vmcnt bit shift (lower bits). 65 unsigned getVmcntBitShiftLo(unsigned VersionMajor) { 66 return VersionMajor >= 11 ? 10 : 0; 67 } 68 69 /// \returns Vmcnt bit width (lower bits). 70 unsigned getVmcntBitWidthLo(unsigned VersionMajor) { 71 return VersionMajor >= 11 ? 6 : 4; 72 } 73 74 /// \returns Expcnt bit shift. 75 unsigned getExpcntBitShift(unsigned VersionMajor) { 76 return VersionMajor >= 11 ? 0 : 4; 77 } 78 79 /// \returns Expcnt bit width. 80 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; } 81 82 /// \returns Lgkmcnt bit shift. 83 unsigned getLgkmcntBitShift(unsigned VersionMajor) { 84 return VersionMajor >= 11 ? 4 : 8; 85 } 86 87 /// \returns Lgkmcnt bit width. 88 unsigned getLgkmcntBitWidth(unsigned VersionMajor) { 89 return VersionMajor >= 10 ? 6 : 4; 90 } 91 92 /// \returns Vmcnt bit shift (higher bits). 93 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; } 94 95 /// \returns Vmcnt bit width (higher bits). 96 unsigned getVmcntBitWidthHi(unsigned VersionMajor) { 97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; 98 } 99 100 /// \returns Loadcnt bit width 101 unsigned getLoadcntBitWidth(unsigned VersionMajor) { 102 return VersionMajor >= 12 ? 6 : 0; 103 } 104 105 /// \returns Samplecnt bit width. 106 unsigned getSamplecntBitWidth(unsigned VersionMajor) { 107 return VersionMajor >= 12 ? 6 : 0; 108 } 109 110 /// \returns Bvhcnt bit width. 111 unsigned getBvhcntBitWidth(unsigned VersionMajor) { 112 return VersionMajor >= 12 ? 3 : 0; 113 } 114 115 /// \returns Dscnt bit width. 116 unsigned getDscntBitWidth(unsigned VersionMajor) { 117 return VersionMajor >= 12 ? 6 : 0; 118 } 119 120 /// \returns Dscnt bit shift in combined S_WAIT instructions. 121 unsigned getDscntBitShift(unsigned VersionMajor) { return 0; } 122 123 /// \returns Storecnt or Vscnt bit width, depending on VersionMajor. 124 unsigned getStorecntBitWidth(unsigned VersionMajor) { 125 return VersionMajor >= 10 ? 6 : 0; 126 } 127 128 /// \returns Kmcnt bit width. 129 unsigned getKmcntBitWidth(unsigned VersionMajor) { 130 return VersionMajor >= 12 ? 5 : 0; 131 } 132 133 /// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions. 134 unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) { 135 return VersionMajor >= 12 ? 8 : 0; 136 } 137 138 /// \returns VmVsrc bit width 139 inline unsigned getVmVsrcBitWidth() { return 3; } 140 141 /// \returns VmVsrc bit shift 142 inline unsigned getVmVsrcBitShift() { return 2; } 143 144 /// \returns VaVdst bit width 145 inline unsigned getVaVdstBitWidth() { return 4; } 146 147 /// \returns VaVdst bit shift 148 inline unsigned getVaVdstBitShift() { return 12; } 149 150 /// \returns SaSdst bit width 151 inline unsigned getSaSdstBitWidth() { return 1; } 152 153 /// \returns SaSdst bit shift 154 inline unsigned getSaSdstBitShift() { return 0; } 155 156 } // end anonymous namespace 157 158 namespace llvm { 159 160 namespace AMDGPU { 161 162 /// \returns true if the target supports signed immediate offset for SMRD 163 /// instructions. 164 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { 165 return isGFX9Plus(ST); 166 } 167 168 /// \returns True if \p STI is AMDHSA. 169 bool isHsaAbi(const MCSubtargetInfo &STI) { 170 return STI.getTargetTriple().getOS() == Triple::AMDHSA; 171 } 172 173 unsigned getAMDHSACodeObjectVersion(const Module &M) { 174 if (auto Ver = mdconst::extract_or_null<ConstantInt>( 175 M.getModuleFlag("amdhsa_code_object_version"))) { 176 return (unsigned)Ver->getZExtValue() / 100; 177 } 178 179 return getDefaultAMDHSACodeObjectVersion(); 180 } 181 182 unsigned getDefaultAMDHSACodeObjectVersion() { 183 return DefaultAMDHSACodeObjectVersion; 184 } 185 186 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) { 187 switch (ABIVersion) { 188 case ELF::ELFABIVERSION_AMDGPU_HSA_V4: 189 return 4; 190 case ELF::ELFABIVERSION_AMDGPU_HSA_V5: 191 return 5; 192 case ELF::ELFABIVERSION_AMDGPU_HSA_V6: 193 return 6; 194 default: 195 return getDefaultAMDHSACodeObjectVersion(); 196 } 197 } 198 199 uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) { 200 if (T.getOS() != Triple::AMDHSA) 201 return 0; 202 203 switch (CodeObjectVersion) { 204 case 4: 205 return ELF::ELFABIVERSION_AMDGPU_HSA_V4; 206 case 5: 207 return ELF::ELFABIVERSION_AMDGPU_HSA_V5; 208 case 6: 209 return ELF::ELFABIVERSION_AMDGPU_HSA_V6; 210 default: 211 report_fatal_error("Unsupported AMDHSA Code Object Version " + 212 Twine(CodeObjectVersion)); 213 } 214 } 215 216 unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { 217 switch (CodeObjectVersion) { 218 case AMDHSA_COV4: 219 return 48; 220 case AMDHSA_COV5: 221 case AMDHSA_COV6: 222 default: 223 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; 224 } 225 } 226 227 228 // FIXME: All such magic numbers about the ABI should be in a 229 // central TD file. 230 unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { 231 switch (CodeObjectVersion) { 232 case AMDHSA_COV4: 233 return 24; 234 case AMDHSA_COV5: 235 case AMDHSA_COV6: 236 default: 237 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; 238 } 239 } 240 241 unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { 242 switch (CodeObjectVersion) { 243 case AMDHSA_COV4: 244 return 32; 245 case AMDHSA_COV5: 246 case AMDHSA_COV6: 247 default: 248 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET; 249 } 250 } 251 252 unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) { 253 switch (CodeObjectVersion) { 254 case AMDHSA_COV4: 255 return 40; 256 case AMDHSA_COV5: 257 case AMDHSA_COV6: 258 default: 259 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET; 260 } 261 } 262 263 #define GET_MIMGBaseOpcodesTable_IMPL 264 #define GET_MIMGDimInfoTable_IMPL 265 #define GET_MIMGInfoTable_IMPL 266 #define GET_MIMGLZMappingTable_IMPL 267 #define GET_MIMGMIPMappingTable_IMPL 268 #define GET_MIMGBiasMappingTable_IMPL 269 #define GET_MIMGOffsetMappingTable_IMPL 270 #define GET_MIMGG16MappingTable_IMPL 271 #define GET_MAIInstInfoTable_IMPL 272 #include "AMDGPUGenSearchableTables.inc" 273 274 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 275 unsigned VDataDwords, unsigned VAddrDwords) { 276 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, 277 VDataDwords, VAddrDwords); 278 return Info ? Info->Opcode : -1; 279 } 280 281 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { 282 const MIMGInfo *Info = getMIMGInfo(Opc); 283 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr; 284 } 285 286 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { 287 const MIMGInfo *OrigInfo = getMIMGInfo(Opc); 288 const MIMGInfo *NewInfo = 289 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding, 290 NewChannels, OrigInfo->VAddrDwords); 291 return NewInfo ? NewInfo->Opcode : -1; 292 } 293 294 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 295 const MIMGDimInfo *Dim, bool IsA16, 296 bool IsG16Supported) { 297 unsigned AddrWords = BaseOpcode->NumExtraArgs; 298 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + 299 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 300 if (IsA16) 301 AddrWords += divideCeil(AddrComponents, 2); 302 else 303 AddrWords += AddrComponents; 304 305 // Note: For subtargets that support A16 but not G16, enabling A16 also 306 // enables 16 bit gradients. 307 // For subtargets that support A16 (operand) and G16 (done with a different 308 // instruction encoding), they are independent. 309 310 if (BaseOpcode->Gradients) { 311 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16) 312 // There are two gradients per coordinate, we pack them separately. 313 // For the 3d case, 314 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv) 315 AddrWords += alignTo<2>(Dim->NumGradients / 2); 316 else 317 AddrWords += Dim->NumGradients; 318 } 319 return AddrWords; 320 } 321 322 struct MUBUFInfo { 323 uint16_t Opcode; 324 uint16_t BaseOpcode; 325 uint8_t elements; 326 bool has_vaddr; 327 bool has_srsrc; 328 bool has_soffset; 329 bool IsBufferInv; 330 bool tfe; 331 }; 332 333 struct MTBUFInfo { 334 uint16_t Opcode; 335 uint16_t BaseOpcode; 336 uint8_t elements; 337 bool has_vaddr; 338 bool has_srsrc; 339 bool has_soffset; 340 }; 341 342 struct SMInfo { 343 uint16_t Opcode; 344 bool IsBuffer; 345 }; 346 347 struct VOPInfo { 348 uint16_t Opcode; 349 bool IsSingle; 350 }; 351 352 struct VOPC64DPPInfo { 353 uint16_t Opcode; 354 }; 355 356 struct VOPCDPPAsmOnlyInfo { 357 uint16_t Opcode; 358 }; 359 360 struct VOP3CDPPAsmOnlyInfo { 361 uint16_t Opcode; 362 }; 363 364 struct VOPDComponentInfo { 365 uint16_t BaseVOP; 366 uint16_t VOPDOp; 367 bool CanBeVOPDX; 368 }; 369 370 struct VOPDInfo { 371 uint16_t Opcode; 372 uint16_t OpX; 373 uint16_t OpY; 374 uint16_t Subtarget; 375 }; 376 377 struct VOPTrue16Info { 378 uint16_t Opcode; 379 bool IsTrue16; 380 }; 381 382 struct SingleUseExceptionInfo { 383 uint16_t Opcode; 384 bool IsInvalidSingleUseConsumer; 385 bool IsInvalidSingleUseProducer; 386 }; 387 388 #define GET_MTBUFInfoTable_DECL 389 #define GET_MTBUFInfoTable_IMPL 390 #define GET_MUBUFInfoTable_DECL 391 #define GET_MUBUFInfoTable_IMPL 392 #define GET_SingleUseExceptionTable_DECL 393 #define GET_SingleUseExceptionTable_IMPL 394 #define GET_SMInfoTable_DECL 395 #define GET_SMInfoTable_IMPL 396 #define GET_VOP1InfoTable_DECL 397 #define GET_VOP1InfoTable_IMPL 398 #define GET_VOP2InfoTable_DECL 399 #define GET_VOP2InfoTable_IMPL 400 #define GET_VOP3InfoTable_DECL 401 #define GET_VOP3InfoTable_IMPL 402 #define GET_VOPC64DPPTable_DECL 403 #define GET_VOPC64DPPTable_IMPL 404 #define GET_VOPC64DPP8Table_DECL 405 #define GET_VOPC64DPP8Table_IMPL 406 #define GET_VOPCAsmOnlyInfoTable_DECL 407 #define GET_VOPCAsmOnlyInfoTable_IMPL 408 #define GET_VOP3CAsmOnlyInfoTable_DECL 409 #define GET_VOP3CAsmOnlyInfoTable_IMPL 410 #define GET_VOPDComponentTable_DECL 411 #define GET_VOPDComponentTable_IMPL 412 #define GET_VOPDPairs_DECL 413 #define GET_VOPDPairs_IMPL 414 #define GET_VOPTrue16Table_DECL 415 #define GET_VOPTrue16Table_IMPL 416 #define GET_WMMAOpcode2AddrMappingTable_DECL 417 #define GET_WMMAOpcode2AddrMappingTable_IMPL 418 #define GET_WMMAOpcode3AddrMappingTable_DECL 419 #define GET_WMMAOpcode3AddrMappingTable_IMPL 420 #include "AMDGPUGenSearchableTables.inc" 421 422 int getMTBUFBaseOpcode(unsigned Opc) { 423 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); 424 return Info ? Info->BaseOpcode : -1; 425 } 426 427 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { 428 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 429 return Info ? Info->Opcode : -1; 430 } 431 432 int getMTBUFElements(unsigned Opc) { 433 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 434 return Info ? Info->elements : 0; 435 } 436 437 bool getMTBUFHasVAddr(unsigned Opc) { 438 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 439 return Info ? Info->has_vaddr : false; 440 } 441 442 bool getMTBUFHasSrsrc(unsigned Opc) { 443 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 444 return Info ? Info->has_srsrc : false; 445 } 446 447 bool getMTBUFHasSoffset(unsigned Opc) { 448 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 449 return Info ? Info->has_soffset : false; 450 } 451 452 int getMUBUFBaseOpcode(unsigned Opc) { 453 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); 454 return Info ? Info->BaseOpcode : -1; 455 } 456 457 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { 458 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 459 return Info ? Info->Opcode : -1; 460 } 461 462 int getMUBUFElements(unsigned Opc) { 463 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 464 return Info ? Info->elements : 0; 465 } 466 467 bool getMUBUFHasVAddr(unsigned Opc) { 468 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 469 return Info ? Info->has_vaddr : false; 470 } 471 472 bool getMUBUFHasSrsrc(unsigned Opc) { 473 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 474 return Info ? Info->has_srsrc : false; 475 } 476 477 bool getMUBUFHasSoffset(unsigned Opc) { 478 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 479 return Info ? Info->has_soffset : false; 480 } 481 482 bool getMUBUFIsBufferInv(unsigned Opc) { 483 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 484 return Info ? Info->IsBufferInv : false; 485 } 486 487 bool getMUBUFTfe(unsigned Opc) { 488 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 489 return Info ? Info->tfe : false; 490 } 491 492 bool getSMEMIsBuffer(unsigned Opc) { 493 const SMInfo *Info = getSMEMOpcodeHelper(Opc); 494 return Info ? Info->IsBuffer : false; 495 } 496 497 bool getVOP1IsSingle(unsigned Opc) { 498 const VOPInfo *Info = getVOP1OpcodeHelper(Opc); 499 return Info ? Info->IsSingle : true; 500 } 501 502 bool getVOP2IsSingle(unsigned Opc) { 503 const VOPInfo *Info = getVOP2OpcodeHelper(Opc); 504 return Info ? Info->IsSingle : true; 505 } 506 507 bool getVOP3IsSingle(unsigned Opc) { 508 const VOPInfo *Info = getVOP3OpcodeHelper(Opc); 509 return Info ? Info->IsSingle : true; 510 } 511 512 bool isVOPC64DPP(unsigned Opc) { 513 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc); 514 } 515 516 bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); } 517 518 bool getMAIIsDGEMM(unsigned Opc) { 519 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 520 return Info ? Info->is_dgemm : false; 521 } 522 523 bool getMAIIsGFX940XDL(unsigned Opc) { 524 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 525 return Info ? Info->is_gfx940_xdl : false; 526 } 527 528 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) { 529 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts)) 530 return SIEncodingFamily::GFX12; 531 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts)) 532 return SIEncodingFamily::GFX11; 533 llvm_unreachable("Subtarget generation does not support VOPD!"); 534 } 535 536 CanBeVOPD getCanBeVOPD(unsigned Opc) { 537 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 538 if (Info) 539 return {Info->CanBeVOPDX, true}; 540 return {false, false}; 541 } 542 543 unsigned getVOPDOpcode(unsigned Opc) { 544 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 545 return Info ? Info->VOPDOp : ~0u; 546 } 547 548 bool isVOPD(unsigned Opc) { 549 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X); 550 } 551 552 bool isMAC(unsigned Opc) { 553 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 554 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 555 Opc == AMDGPU::V_MAC_F32_e64_vi || 556 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 557 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 558 Opc == AMDGPU::V_MAC_F16_e64_vi || 559 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 560 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 561 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 562 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 || 563 Opc == AMDGPU::V_FMAC_F32_e64_vi || 564 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 565 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 566 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 567 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 || 568 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 || 569 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi || 570 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi || 571 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi || 572 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi; 573 } 574 575 bool isPermlane16(unsigned Opc) { 576 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 577 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 || 578 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 || 579 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 || 580 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 || 581 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 || 582 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 || 583 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12; 584 } 585 586 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) { 587 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 || 588 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 || 589 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 || 590 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 || 591 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 || 592 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 || 593 Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 || 594 Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12; 595 } 596 597 bool isGenericAtomic(unsigned Opc) { 598 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP || 599 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD || 600 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB || 601 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN || 602 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN || 603 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX || 604 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX || 605 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND || 606 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR || 607 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR || 608 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC || 609 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC || 610 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD || 611 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN || 612 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX || 613 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP || 614 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG; 615 } 616 617 bool isTrue16Inst(unsigned Opc) { 618 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc); 619 return Info ? Info->IsTrue16 : false; 620 } 621 622 bool isInvalidSingleUseConsumerInst(unsigned Opc) { 623 const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc); 624 return Info && Info->IsInvalidSingleUseConsumer; 625 } 626 627 bool isInvalidSingleUseProducerInst(unsigned Opc) { 628 const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc); 629 return Info && Info->IsInvalidSingleUseProducer; 630 } 631 632 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { 633 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); 634 return Info ? Info->Opcode3Addr : ~0u; 635 } 636 637 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) { 638 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc); 639 return Info ? Info->Opcode2Addr : ~0u; 640 } 641 642 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 643 // header files, so we need to wrap it in a function that takes unsigned 644 // instead. 645 int getMCOpcode(uint16_t Opcode, unsigned Gen) { 646 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 647 } 648 649 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) { 650 const VOPDInfo *Info = 651 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily); 652 return Info ? Info->Opcode : -1; 653 } 654 655 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) { 656 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode); 657 assert(Info); 658 auto OpX = getVOPDBaseFromComponent(Info->OpX); 659 auto OpY = getVOPDBaseFromComponent(Info->OpY); 660 assert(OpX && OpY); 661 return {OpX->BaseVOP, OpY->BaseVOP}; 662 } 663 664 namespace VOPD { 665 666 ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) { 667 assert(OpDesc.getNumDefs() == Component::DST_NUM); 668 669 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1); 670 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1); 671 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO); 672 assert(TiedIdx == -1 || TiedIdx == Component::DST); 673 HasSrc2Acc = TiedIdx != -1; 674 675 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs(); 676 assert(SrcOperandsNum <= Component::MAX_SRC_NUM); 677 678 auto OperandsNum = OpDesc.getNumOperands(); 679 unsigned CompOprIdx; 680 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) { 681 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) { 682 MandatoryLiteralIdx = CompOprIdx; 683 break; 684 } 685 } 686 } 687 688 unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const { 689 assert(CompOprIdx < Component::MAX_OPR_NUM); 690 691 if (CompOprIdx == Component::DST) 692 return getIndexOfDstInParsedOperands(); 693 694 auto CompSrcIdx = CompOprIdx - Component::DST_NUM; 695 if (CompSrcIdx < getCompParsedSrcOperandsNum()) 696 return getIndexOfSrcInParsedOperands(CompSrcIdx); 697 698 // The specified operand does not exist. 699 return 0; 700 } 701 702 std::optional<unsigned> InstInfo::getInvalidCompOperandIndex( 703 std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const { 704 705 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx); 706 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx); 707 708 const unsigned CompOprNum = 709 SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM; 710 unsigned CompOprIdx; 711 for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) { 712 unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx]; 713 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] && 714 ((OpXRegs[CompOprIdx] & BanksMasks) == 715 (OpYRegs[CompOprIdx] & BanksMasks))) 716 return CompOprIdx; 717 } 718 719 return {}; 720 } 721 722 // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used 723 // by the specified component. If an operand is unused 724 // or is not a VGPR, the corresponding value is 0. 725 // 726 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index 727 // for the specified component and MC operand. The callback must return 0 728 // if the operand is not a register or not a VGPR. 729 InstInfo::RegIndices InstInfo::getRegIndices( 730 unsigned CompIdx, 731 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const { 732 assert(CompIdx < COMPONENTS_NUM); 733 734 const auto &Comp = CompInfo[CompIdx]; 735 InstInfo::RegIndices RegIndices; 736 737 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands()); 738 739 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) { 740 unsigned CompSrcIdx = CompOprIdx - DST_NUM; 741 RegIndices[CompOprIdx] = 742 Comp.hasRegSrcOperand(CompSrcIdx) 743 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx)) 744 : 0; 745 } 746 return RegIndices; 747 } 748 749 } // namespace VOPD 750 751 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) { 752 return VOPD::InstInfo(OpX, OpY); 753 } 754 755 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, 756 const MCInstrInfo *InstrInfo) { 757 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode); 758 const auto &OpXDesc = InstrInfo->get(OpX); 759 const auto &OpYDesc = InstrInfo->get(OpY); 760 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X); 761 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo); 762 return VOPD::InstInfo(OpXInfo, OpYInfo); 763 } 764 765 namespace IsaInfo { 766 767 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) 768 : STI(STI), XnackSetting(TargetIDSetting::Any), 769 SramEccSetting(TargetIDSetting::Any) { 770 if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) 771 XnackSetting = TargetIDSetting::Unsupported; 772 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) 773 SramEccSetting = TargetIDSetting::Unsupported; 774 } 775 776 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { 777 // Check if xnack or sramecc is explicitly enabled or disabled. In the 778 // absence of the target features we assume we must generate code that can run 779 // in any environment. 780 SubtargetFeatures Features(FS); 781 std::optional<bool> XnackRequested; 782 std::optional<bool> SramEccRequested; 783 784 for (const std::string &Feature : Features.getFeatures()) { 785 if (Feature == "+xnack") 786 XnackRequested = true; 787 else if (Feature == "-xnack") 788 XnackRequested = false; 789 else if (Feature == "+sramecc") 790 SramEccRequested = true; 791 else if (Feature == "-sramecc") 792 SramEccRequested = false; 793 } 794 795 bool XnackSupported = isXnackSupported(); 796 bool SramEccSupported = isSramEccSupported(); 797 798 if (XnackRequested) { 799 if (XnackSupported) { 800 XnackSetting = 801 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; 802 } else { 803 // If a specific xnack setting was requested and this GPU does not support 804 // xnack emit a warning. Setting will remain set to "Unsupported". 805 if (*XnackRequested) { 806 errs() << "warning: xnack 'On' was requested for a processor that does " 807 "not support it!\n"; 808 } else { 809 errs() << "warning: xnack 'Off' was requested for a processor that " 810 "does not support it!\n"; 811 } 812 } 813 } 814 815 if (SramEccRequested) { 816 if (SramEccSupported) { 817 SramEccSetting = 818 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; 819 } else { 820 // If a specific sramecc setting was requested and this GPU does not 821 // support sramecc emit a warning. Setting will remain set to 822 // "Unsupported". 823 if (*SramEccRequested) { 824 errs() << "warning: sramecc 'On' was requested for a processor that " 825 "does not support it!\n"; 826 } else { 827 errs() << "warning: sramecc 'Off' was requested for a processor that " 828 "does not support it!\n"; 829 } 830 } 831 } 832 } 833 834 static TargetIDSetting 835 getTargetIDSettingFromFeatureString(StringRef FeatureString) { 836 if (FeatureString.ends_with("-")) 837 return TargetIDSetting::Off; 838 if (FeatureString.ends_with("+")) 839 return TargetIDSetting::On; 840 841 llvm_unreachable("Malformed feature string"); 842 } 843 844 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { 845 SmallVector<StringRef, 3> TargetIDSplit; 846 TargetID.split(TargetIDSplit, ':'); 847 848 for (const auto &FeatureString : TargetIDSplit) { 849 if (FeatureString.starts_with("xnack")) 850 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); 851 if (FeatureString.starts_with("sramecc")) 852 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); 853 } 854 } 855 856 std::string AMDGPUTargetID::toString() const { 857 std::string StringRep; 858 raw_string_ostream StreamRep(StringRep); 859 860 auto TargetTriple = STI.getTargetTriple(); 861 auto Version = getIsaVersion(STI.getCPU()); 862 863 StreamRep << TargetTriple.getArchName() << '-' 864 << TargetTriple.getVendorName() << '-' 865 << TargetTriple.getOSName() << '-' 866 << TargetTriple.getEnvironmentName() << '-'; 867 868 std::string Processor; 869 // TODO: Following else statement is present here because we used various 870 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). 871 // Remove once all aliases are removed from GCNProcessors.td. 872 if (Version.Major >= 9) 873 Processor = STI.getCPU().str(); 874 else 875 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) + 876 Twine(Version.Stepping)) 877 .str(); 878 879 std::string Features; 880 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { 881 // sramecc. 882 if (getSramEccSetting() == TargetIDSetting::Off) 883 Features += ":sramecc-"; 884 else if (getSramEccSetting() == TargetIDSetting::On) 885 Features += ":sramecc+"; 886 // xnack. 887 if (getXnackSetting() == TargetIDSetting::Off) 888 Features += ":xnack-"; 889 else if (getXnackSetting() == TargetIDSetting::On) 890 Features += ":xnack+"; 891 } 892 893 StreamRep << Processor << Features; 894 895 StreamRep.flush(); 896 return StringRep; 897 } 898 899 unsigned getWavefrontSize(const MCSubtargetInfo *STI) { 900 if (STI->getFeatureBits().test(FeatureWavefrontSize16)) 901 return 16; 902 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) 903 return 32; 904 905 return 64; 906 } 907 908 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { 909 unsigned BytesPerCU = 0; 910 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 911 BytesPerCU = 32768; 912 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 913 BytesPerCU = 65536; 914 915 // "Per CU" really means "per whatever functional block the waves of a 916 // workgroup must share". So the effective local memory size is doubled in 917 // WGP mode on gfx10. 918 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 919 BytesPerCU *= 2; 920 921 return BytesPerCU; 922 } 923 924 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) { 925 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 926 return 32768; 927 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 928 return 65536; 929 return 0; 930 } 931 932 unsigned getEUsPerCU(const MCSubtargetInfo *STI) { 933 // "Per CU" really means "per whatever functional block the waves of a 934 // workgroup must share". For gfx10 in CU mode this is the CU, which contains 935 // two SIMDs. 936 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode)) 937 return 2; 938 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains 939 // two CUs, so a total of four SIMDs. 940 return 4; 941 } 942 943 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 944 unsigned FlatWorkGroupSize) { 945 assert(FlatWorkGroupSize != 0); 946 if (STI->getTargetTriple().getArch() != Triple::amdgcn) 947 return 8; 948 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI); 949 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); 950 if (N == 1) { 951 // Single-wave workgroups don't consume barrier resources. 952 return MaxWaves; 953 } 954 955 unsigned MaxBarriers = 16; 956 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 957 MaxBarriers = 32; 958 959 return std::min(MaxWaves / N, MaxBarriers); 960 } 961 962 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { 963 return 1; 964 } 965 966 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { 967 // FIXME: Need to take scratch memory into account. 968 if (isGFX90A(*STI)) 969 return 8; 970 if (!isGFX10Plus(*STI)) 971 return 10; 972 return hasGFX10_3Insts(*STI) ? 16 : 20; 973 } 974 975 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 976 unsigned FlatWorkGroupSize) { 977 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize), 978 getEUsPerCU(STI)); 979 } 980 981 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { 982 return 1; 983 } 984 985 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { 986 // Some subtargets allow encoding 2048, but this isn't tested or supported. 987 return 1024; 988 } 989 990 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 991 unsigned FlatWorkGroupSize) { 992 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI)); 993 } 994 995 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { 996 IsaVersion Version = getIsaVersion(STI->getCPU()); 997 if (Version.Major >= 10) 998 return getAddressableNumSGPRs(STI); 999 if (Version.Major >= 8) 1000 return 16; 1001 return 8; 1002 } 1003 1004 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { 1005 return 8; 1006 } 1007 1008 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { 1009 IsaVersion Version = getIsaVersion(STI->getCPU()); 1010 if (Version.Major >= 8) 1011 return 800; 1012 return 512; 1013 } 1014 1015 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { 1016 if (STI->getFeatureBits().test(FeatureSGPRInitBug)) 1017 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 1018 1019 IsaVersion Version = getIsaVersion(STI->getCPU()); 1020 if (Version.Major >= 10) 1021 return 106; 1022 if (Version.Major >= 8) 1023 return 102; 1024 return 104; 1025 } 1026 1027 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1028 assert(WavesPerEU != 0); 1029 1030 IsaVersion Version = getIsaVersion(STI->getCPU()); 1031 if (Version.Major >= 10) 1032 return 0; 1033 1034 if (WavesPerEU >= getMaxWavesPerEU(STI)) 1035 return 0; 1036 1037 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); 1038 if (STI->getFeatureBits().test(FeatureTrapHandler)) 1039 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 1040 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; 1041 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); 1042 } 1043 1044 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 1045 bool Addressable) { 1046 assert(WavesPerEU != 0); 1047 1048 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); 1049 IsaVersion Version = getIsaVersion(STI->getCPU()); 1050 if (Version.Major >= 10) 1051 return Addressable ? AddressableNumSGPRs : 108; 1052 if (Version.Major >= 8 && !Addressable) 1053 AddressableNumSGPRs = 112; 1054 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; 1055 if (STI->getFeatureBits().test(FeatureTrapHandler)) 1056 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 1057 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); 1058 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 1059 } 1060 1061 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 1062 bool FlatScrUsed, bool XNACKUsed) { 1063 unsigned ExtraSGPRs = 0; 1064 if (VCCUsed) 1065 ExtraSGPRs = 2; 1066 1067 IsaVersion Version = getIsaVersion(STI->getCPU()); 1068 if (Version.Major >= 10) 1069 return ExtraSGPRs; 1070 1071 if (Version.Major < 8) { 1072 if (FlatScrUsed) 1073 ExtraSGPRs = 4; 1074 } else { 1075 if (XNACKUsed) 1076 ExtraSGPRs = 4; 1077 1078 if (FlatScrUsed || 1079 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch)) 1080 ExtraSGPRs = 6; 1081 } 1082 1083 return ExtraSGPRs; 1084 } 1085 1086 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 1087 bool FlatScrUsed) { 1088 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, 1089 STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); 1090 } 1091 1092 static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, 1093 unsigned Granule) { 1094 return divideCeil(std::max(1u, NumRegs), Granule); 1095 } 1096 1097 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { 1098 // SGPRBlocks is actual number of SGPR blocks minus 1. 1099 return getGranulatedNumRegisterBlocks(NumSGPRs, getSGPREncodingGranule(STI)) - 1100 1; 1101 } 1102 1103 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 1104 std::optional<bool> EnableWavefrontSize32) { 1105 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1106 return 8; 1107 1108 bool IsWave32 = EnableWavefrontSize32 ? 1109 *EnableWavefrontSize32 : 1110 STI->getFeatureBits().test(FeatureWavefrontSize32); 1111 1112 if (STI->getFeatureBits().test(Feature1_5xVGPRs)) 1113 return IsWave32 ? 24 : 12; 1114 1115 if (hasGFX10_3Insts(*STI)) 1116 return IsWave32 ? 16 : 8; 1117 1118 return IsWave32 ? 8 : 4; 1119 } 1120 1121 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 1122 std::optional<bool> EnableWavefrontSize32) { 1123 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1124 return 8; 1125 1126 bool IsWave32 = EnableWavefrontSize32 ? 1127 *EnableWavefrontSize32 : 1128 STI->getFeatureBits().test(FeatureWavefrontSize32); 1129 1130 return IsWave32 ? 8 : 4; 1131 } 1132 1133 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { 1134 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1135 return 512; 1136 if (!isGFX10Plus(*STI)) 1137 return 256; 1138 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32); 1139 if (STI->getFeatureBits().test(Feature1_5xVGPRs)) 1140 return IsWave32 ? 1536 : 768; 1141 return IsWave32 ? 1024 : 512; 1142 } 1143 1144 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; } 1145 1146 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { 1147 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1148 return 512; 1149 return getAddressableNumArchVGPRs(STI); 1150 } 1151 1152 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, 1153 unsigned NumVGPRs) { 1154 return getNumWavesPerEUWithNumVGPRs(NumVGPRs, getVGPRAllocGranule(STI), 1155 getMaxWavesPerEU(STI), 1156 getTotalNumVGPRs(STI)); 1157 } 1158 1159 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule, 1160 unsigned MaxWaves, 1161 unsigned TotalNumVGPRs) { 1162 if (NumVGPRs < Granule) 1163 return MaxWaves; 1164 unsigned RoundedRegs = alignTo(NumVGPRs, Granule); 1165 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves); 1166 } 1167 1168 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, 1169 AMDGPUSubtarget::Generation Gen) { 1170 if (Gen >= AMDGPUSubtarget::GFX10) 1171 return MaxWaves; 1172 1173 if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 1174 if (SGPRs <= 80) 1175 return 10; 1176 if (SGPRs <= 88) 1177 return 9; 1178 if (SGPRs <= 100) 1179 return 8; 1180 return 7; 1181 } 1182 if (SGPRs <= 48) 1183 return 10; 1184 if (SGPRs <= 56) 1185 return 9; 1186 if (SGPRs <= 64) 1187 return 8; 1188 if (SGPRs <= 72) 1189 return 7; 1190 if (SGPRs <= 80) 1191 return 6; 1192 return 5; 1193 } 1194 1195 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1196 assert(WavesPerEU != 0); 1197 1198 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI); 1199 if (WavesPerEU >= MaxWavesPerEU) 1200 return 0; 1201 1202 unsigned TotNumVGPRs = getTotalNumVGPRs(STI); 1203 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI); 1204 unsigned Granule = getVGPRAllocGranule(STI); 1205 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule); 1206 1207 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule)) 1208 return 0; 1209 1210 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs); 1211 if (WavesPerEU < MinWavesPerEU) 1212 return getMinNumVGPRs(STI, MinWavesPerEU); 1213 1214 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule); 1215 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext); 1216 return std::min(MinNumVGPRs, AddrsableNumVGPRs); 1217 } 1218 1219 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1220 assert(WavesPerEU != 0); 1221 1222 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, 1223 getVGPRAllocGranule(STI)); 1224 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); 1225 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 1226 } 1227 1228 unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, 1229 std::optional<bool> EnableWavefrontSize32) { 1230 return getGranulatedNumRegisterBlocks( 1231 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) - 1232 1; 1233 } 1234 1235 unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, 1236 unsigned NumVGPRs, 1237 std::optional<bool> EnableWavefrontSize32) { 1238 return getGranulatedNumRegisterBlocks( 1239 NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32)); 1240 } 1241 } // end namespace IsaInfo 1242 1243 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, 1244 const MCSubtargetInfo *STI) { 1245 IsaVersion Version = getIsaVersion(STI->getCPU()); 1246 KernelCode.amd_kernel_code_version_major = 1; 1247 KernelCode.amd_kernel_code_version_minor = 2; 1248 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 1249 KernelCode.amd_machine_version_major = Version.Major; 1250 KernelCode.amd_machine_version_minor = Version.Minor; 1251 KernelCode.amd_machine_version_stepping = Version.Stepping; 1252 KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t); 1253 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { 1254 KernelCode.wavefront_size = 5; 1255 KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; 1256 } else { 1257 KernelCode.wavefront_size = 6; 1258 } 1259 1260 // If the code object does not support indirect functions, then the value must 1261 // be 0xffffffff. 1262 KernelCode.call_convention = -1; 1263 1264 // These alignment values are specified in powers of two, so alignment = 1265 // 2^n. The minimum alignment is 2^4 = 16. 1266 KernelCode.kernarg_segment_alignment = 4; 1267 KernelCode.group_segment_alignment = 4; 1268 KernelCode.private_segment_alignment = 4; 1269 1270 if (Version.Major >= 10) { 1271 KernelCode.compute_pgm_resource_registers |= 1272 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | 1273 S_00B848_MEM_ORDERED(1); 1274 } 1275 } 1276 1277 bool isGroupSegment(const GlobalValue *GV) { 1278 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 1279 } 1280 1281 bool isGlobalSegment(const GlobalValue *GV) { 1282 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 1283 } 1284 1285 bool isReadOnlySegment(const GlobalValue *GV) { 1286 unsigned AS = GV->getAddressSpace(); 1287 return AS == AMDGPUAS::CONSTANT_ADDRESS || 1288 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; 1289 } 1290 1291 bool shouldEmitConstantsToTextSection(const Triple &TT) { 1292 return TT.getArch() == Triple::r600; 1293 } 1294 1295 std::pair<unsigned, unsigned> 1296 getIntegerPairAttribute(const Function &F, StringRef Name, 1297 std::pair<unsigned, unsigned> Default, 1298 bool OnlyFirstRequired) { 1299 Attribute A = F.getFnAttribute(Name); 1300 if (!A.isStringAttribute()) 1301 return Default; 1302 1303 LLVMContext &Ctx = F.getContext(); 1304 std::pair<unsigned, unsigned> Ints = Default; 1305 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 1306 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 1307 Ctx.emitError("can't parse first integer attribute " + Name); 1308 return Default; 1309 } 1310 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 1311 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 1312 Ctx.emitError("can't parse second integer attribute " + Name); 1313 return Default; 1314 } 1315 } 1316 1317 return Ints; 1318 } 1319 1320 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name, 1321 unsigned Size) { 1322 assert(Size > 2); 1323 SmallVector<unsigned> Default(Size, 0); 1324 1325 Attribute A = F.getFnAttribute(Name); 1326 if (!A.isStringAttribute()) 1327 return Default; 1328 1329 SmallVector<unsigned> Vals(Size, 0); 1330 1331 LLVMContext &Ctx = F.getContext(); 1332 1333 StringRef S = A.getValueAsString(); 1334 unsigned i = 0; 1335 for (; !S.empty() && i < Size; i++) { 1336 std::pair<StringRef, StringRef> Strs = S.split(','); 1337 unsigned IntVal; 1338 if (Strs.first.trim().getAsInteger(0, IntVal)) { 1339 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " + 1340 Name); 1341 return Default; 1342 } 1343 Vals[i] = IntVal; 1344 S = Strs.second; 1345 } 1346 1347 if (!S.empty() || i < Size) { 1348 Ctx.emitError("attribute " + Name + 1349 " has incorrect number of integers; expected " + 1350 llvm::utostr(Size)); 1351 return Default; 1352 } 1353 return Vals; 1354 } 1355 1356 unsigned getVmcntBitMask(const IsaVersion &Version) { 1357 return (1 << (getVmcntBitWidthLo(Version.Major) + 1358 getVmcntBitWidthHi(Version.Major))) - 1359 1; 1360 } 1361 1362 unsigned getLoadcntBitMask(const IsaVersion &Version) { 1363 return (1 << getLoadcntBitWidth(Version.Major)) - 1; 1364 } 1365 1366 unsigned getSamplecntBitMask(const IsaVersion &Version) { 1367 return (1 << getSamplecntBitWidth(Version.Major)) - 1; 1368 } 1369 1370 unsigned getBvhcntBitMask(const IsaVersion &Version) { 1371 return (1 << getBvhcntBitWidth(Version.Major)) - 1; 1372 } 1373 1374 unsigned getExpcntBitMask(const IsaVersion &Version) { 1375 return (1 << getExpcntBitWidth(Version.Major)) - 1; 1376 } 1377 1378 unsigned getLgkmcntBitMask(const IsaVersion &Version) { 1379 return (1 << getLgkmcntBitWidth(Version.Major)) - 1; 1380 } 1381 1382 unsigned getDscntBitMask(const IsaVersion &Version) { 1383 return (1 << getDscntBitWidth(Version.Major)) - 1; 1384 } 1385 1386 unsigned getKmcntBitMask(const IsaVersion &Version) { 1387 return (1 << getKmcntBitWidth(Version.Major)) - 1; 1388 } 1389 1390 unsigned getStorecntBitMask(const IsaVersion &Version) { 1391 return (1 << getStorecntBitWidth(Version.Major)) - 1; 1392 } 1393 1394 unsigned getWaitcntBitMask(const IsaVersion &Version) { 1395 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major), 1396 getVmcntBitWidthLo(Version.Major)); 1397 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major), 1398 getExpcntBitWidth(Version.Major)); 1399 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major), 1400 getLgkmcntBitWidth(Version.Major)); 1401 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major), 1402 getVmcntBitWidthHi(Version.Major)); 1403 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi; 1404 } 1405 1406 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1407 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major), 1408 getVmcntBitWidthLo(Version.Major)); 1409 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major), 1410 getVmcntBitWidthHi(Version.Major)); 1411 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major); 1412 } 1413 1414 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { 1415 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major), 1416 getExpcntBitWidth(Version.Major)); 1417 } 1418 1419 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1420 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major), 1421 getLgkmcntBitWidth(Version.Major)); 1422 } 1423 1424 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 1425 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 1426 Vmcnt = decodeVmcnt(Version, Waitcnt); 1427 Expcnt = decodeExpcnt(Version, Waitcnt); 1428 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 1429 } 1430 1431 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { 1432 Waitcnt Decoded; 1433 Decoded.LoadCnt = decodeVmcnt(Version, Encoded); 1434 Decoded.ExpCnt = decodeExpcnt(Version, Encoded); 1435 Decoded.DsCnt = decodeLgkmcnt(Version, Encoded); 1436 return Decoded; 1437 } 1438 1439 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 1440 unsigned Vmcnt) { 1441 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major), 1442 getVmcntBitWidthLo(Version.Major)); 1443 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt, 1444 getVmcntBitShiftHi(Version.Major), 1445 getVmcntBitWidthHi(Version.Major)); 1446 } 1447 1448 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 1449 unsigned Expcnt) { 1450 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major), 1451 getExpcntBitWidth(Version.Major)); 1452 } 1453 1454 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 1455 unsigned Lgkmcnt) { 1456 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major), 1457 getLgkmcntBitWidth(Version.Major)); 1458 } 1459 1460 unsigned encodeWaitcnt(const IsaVersion &Version, 1461 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 1462 unsigned Waitcnt = getWaitcntBitMask(Version); 1463 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 1464 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 1465 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 1466 return Waitcnt; 1467 } 1468 1469 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1470 return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt); 1471 } 1472 1473 static unsigned getCombinedCountBitMask(const IsaVersion &Version, 1474 bool IsStore) { 1475 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major), 1476 getDscntBitWidth(Version.Major)); 1477 if (IsStore) { 1478 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major), 1479 getStorecntBitWidth(Version.Major)); 1480 return Dscnt | Storecnt; 1481 } 1482 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major), 1483 getLoadcntBitWidth(Version.Major)); 1484 return Dscnt | Loadcnt; 1485 } 1486 1487 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) { 1488 Waitcnt Decoded; 1489 Decoded.LoadCnt = 1490 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major), 1491 getLoadcntBitWidth(Version.Major)); 1492 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major), 1493 getDscntBitWidth(Version.Major)); 1494 return Decoded; 1495 } 1496 1497 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) { 1498 Waitcnt Decoded; 1499 Decoded.StoreCnt = 1500 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major), 1501 getStorecntBitWidth(Version.Major)); 1502 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major), 1503 getDscntBitWidth(Version.Major)); 1504 return Decoded; 1505 } 1506 1507 static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, 1508 unsigned Loadcnt) { 1509 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major), 1510 getLoadcntBitWidth(Version.Major)); 1511 } 1512 1513 static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, 1514 unsigned Storecnt) { 1515 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major), 1516 getStorecntBitWidth(Version.Major)); 1517 } 1518 1519 static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, 1520 unsigned Dscnt) { 1521 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major), 1522 getDscntBitWidth(Version.Major)); 1523 } 1524 1525 static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, 1526 unsigned Dscnt) { 1527 unsigned Waitcnt = getCombinedCountBitMask(Version, false); 1528 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt); 1529 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); 1530 return Waitcnt; 1531 } 1532 1533 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1534 return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt); 1535 } 1536 1537 static unsigned encodeStorecntDscnt(const IsaVersion &Version, 1538 unsigned Storecnt, unsigned Dscnt) { 1539 unsigned Waitcnt = getCombinedCountBitMask(Version, true); 1540 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt); 1541 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); 1542 return Waitcnt; 1543 } 1544 1545 unsigned encodeStorecntDscnt(const IsaVersion &Version, 1546 const Waitcnt &Decoded) { 1547 return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt); 1548 } 1549 1550 //===----------------------------------------------------------------------===// 1551 // Custom Operand Values 1552 //===----------------------------------------------------------------------===// 1553 1554 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, 1555 int Size, 1556 const MCSubtargetInfo &STI) { 1557 unsigned Enc = 0; 1558 for (int Idx = 0; Idx < Size; ++Idx) { 1559 const auto &Op = Opr[Idx]; 1560 if (Op.isSupported(STI)) 1561 Enc |= Op.encode(Op.Default); 1562 } 1563 return Enc; 1564 } 1565 1566 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, 1567 int Size, unsigned Code, 1568 bool &HasNonDefaultVal, 1569 const MCSubtargetInfo &STI) { 1570 unsigned UsedOprMask = 0; 1571 HasNonDefaultVal = false; 1572 for (int Idx = 0; Idx < Size; ++Idx) { 1573 const auto &Op = Opr[Idx]; 1574 if (!Op.isSupported(STI)) 1575 continue; 1576 UsedOprMask |= Op.getMask(); 1577 unsigned Val = Op.decode(Code); 1578 if (!Op.isValid(Val)) 1579 return false; 1580 HasNonDefaultVal |= (Val != Op.Default); 1581 } 1582 return (Code & ~UsedOprMask) == 0; 1583 } 1584 1585 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, 1586 unsigned Code, int &Idx, StringRef &Name, 1587 unsigned &Val, bool &IsDefault, 1588 const MCSubtargetInfo &STI) { 1589 while (Idx < Size) { 1590 const auto &Op = Opr[Idx++]; 1591 if (Op.isSupported(STI)) { 1592 Name = Op.Name; 1593 Val = Op.decode(Code); 1594 IsDefault = (Val == Op.Default); 1595 return true; 1596 } 1597 } 1598 1599 return false; 1600 } 1601 1602 static int encodeCustomOperandVal(const CustomOperandVal &Op, 1603 int64_t InputVal) { 1604 if (InputVal < 0 || InputVal > Op.Max) 1605 return OPR_VAL_INVALID; 1606 return Op.encode(InputVal); 1607 } 1608 1609 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, 1610 const StringRef Name, int64_t InputVal, 1611 unsigned &UsedOprMask, 1612 const MCSubtargetInfo &STI) { 1613 int InvalidId = OPR_ID_UNKNOWN; 1614 for (int Idx = 0; Idx < Size; ++Idx) { 1615 const auto &Op = Opr[Idx]; 1616 if (Op.Name == Name) { 1617 if (!Op.isSupported(STI)) { 1618 InvalidId = OPR_ID_UNSUPPORTED; 1619 continue; 1620 } 1621 auto OprMask = Op.getMask(); 1622 if (OprMask & UsedOprMask) 1623 return OPR_ID_DUPLICATE; 1624 UsedOprMask |= OprMask; 1625 return encodeCustomOperandVal(Op, InputVal); 1626 } 1627 } 1628 return InvalidId; 1629 } 1630 1631 //===----------------------------------------------------------------------===// 1632 // DepCtr 1633 //===----------------------------------------------------------------------===// 1634 1635 namespace DepCtr { 1636 1637 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) { 1638 static int Default = -1; 1639 if (Default == -1) 1640 Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI); 1641 return Default; 1642 } 1643 1644 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 1645 const MCSubtargetInfo &STI) { 1646 return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code, 1647 HasNonDefaultVal, STI); 1648 } 1649 1650 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 1651 bool &IsDefault, const MCSubtargetInfo &STI) { 1652 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val, 1653 IsDefault, STI); 1654 } 1655 1656 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 1657 const MCSubtargetInfo &STI) { 1658 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask, 1659 STI); 1660 } 1661 1662 unsigned decodeFieldVmVsrc(unsigned Encoded) { 1663 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); 1664 } 1665 1666 unsigned decodeFieldVaVdst(unsigned Encoded) { 1667 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); 1668 } 1669 1670 unsigned decodeFieldSaSdst(unsigned Encoded) { 1671 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); 1672 } 1673 1674 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) { 1675 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); 1676 } 1677 1678 unsigned encodeFieldVmVsrc(unsigned VmVsrc) { 1679 return encodeFieldVmVsrc(0xffff, VmVsrc); 1680 } 1681 1682 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) { 1683 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); 1684 } 1685 1686 unsigned encodeFieldVaVdst(unsigned VaVdst) { 1687 return encodeFieldVaVdst(0xffff, VaVdst); 1688 } 1689 1690 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) { 1691 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); 1692 } 1693 1694 unsigned encodeFieldSaSdst(unsigned SaSdst) { 1695 return encodeFieldSaSdst(0xffff, SaSdst); 1696 } 1697 1698 } // namespace DepCtr 1699 1700 //===----------------------------------------------------------------------===// 1701 // exp tgt 1702 //===----------------------------------------------------------------------===// 1703 1704 namespace Exp { 1705 1706 struct ExpTgt { 1707 StringLiteral Name; 1708 unsigned Tgt; 1709 unsigned MaxIndex; 1710 }; 1711 1712 static constexpr ExpTgt ExpTgtInfo[] = { 1713 {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, 1714 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, 1715 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, 1716 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, 1717 {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, 1718 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX}, 1719 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, 1720 }; 1721 1722 bool getTgtName(unsigned Id, StringRef &Name, int &Index) { 1723 for (const ExpTgt &Val : ExpTgtInfo) { 1724 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { 1725 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); 1726 Name = Val.Name; 1727 return true; 1728 } 1729 } 1730 return false; 1731 } 1732 1733 unsigned getTgtId(const StringRef Name) { 1734 1735 for (const ExpTgt &Val : ExpTgtInfo) { 1736 if (Val.MaxIndex == 0 && Name == Val.Name) 1737 return Val.Tgt; 1738 1739 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) { 1740 StringRef Suffix = Name.drop_front(Val.Name.size()); 1741 1742 unsigned Id; 1743 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex) 1744 return ET_INVALID; 1745 1746 // Disable leading zeroes 1747 if (Suffix.size() > 1 && Suffix[0] == '0') 1748 return ET_INVALID; 1749 1750 return Val.Tgt + Id; 1751 } 1752 } 1753 return ET_INVALID; 1754 } 1755 1756 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { 1757 switch (Id) { 1758 case ET_NULL: 1759 return !isGFX11Plus(STI); 1760 case ET_POS4: 1761 case ET_PRIM: 1762 return isGFX10Plus(STI); 1763 case ET_DUAL_SRC_BLEND0: 1764 case ET_DUAL_SRC_BLEND1: 1765 return isGFX11Plus(STI); 1766 default: 1767 if (Id >= ET_PARAM0 && Id <= ET_PARAM31) 1768 return !isGFX11Plus(STI); 1769 return true; 1770 } 1771 } 1772 1773 } // namespace Exp 1774 1775 //===----------------------------------------------------------------------===// 1776 // MTBUF Format 1777 //===----------------------------------------------------------------------===// 1778 1779 namespace MTBUFFormat { 1780 1781 int64_t getDfmt(const StringRef Name) { 1782 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { 1783 if (Name == DfmtSymbolic[Id]) 1784 return Id; 1785 } 1786 return DFMT_UNDEF; 1787 } 1788 1789 StringRef getDfmtName(unsigned Id) { 1790 assert(Id <= DFMT_MAX); 1791 return DfmtSymbolic[Id]; 1792 } 1793 1794 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { 1795 if (isSI(STI) || isCI(STI)) 1796 return NfmtSymbolicSICI; 1797 if (isVI(STI) || isGFX9(STI)) 1798 return NfmtSymbolicVI; 1799 return NfmtSymbolicGFX10; 1800 } 1801 1802 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { 1803 auto lookupTable = getNfmtLookupTable(STI); 1804 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { 1805 if (Name == lookupTable[Id]) 1806 return Id; 1807 } 1808 return NFMT_UNDEF; 1809 } 1810 1811 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { 1812 assert(Id <= NFMT_MAX); 1813 return getNfmtLookupTable(STI)[Id]; 1814 } 1815 1816 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1817 unsigned Dfmt; 1818 unsigned Nfmt; 1819 decodeDfmtNfmt(Id, Dfmt, Nfmt); 1820 return isValidNfmt(Nfmt, STI); 1821 } 1822 1823 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1824 return !getNfmtName(Id, STI).empty(); 1825 } 1826 1827 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { 1828 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); 1829 } 1830 1831 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { 1832 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; 1833 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; 1834 } 1835 1836 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) { 1837 if (isGFX11Plus(STI)) { 1838 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1839 if (Name == UfmtSymbolicGFX11[Id]) 1840 return Id; 1841 } 1842 } else { 1843 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1844 if (Name == UfmtSymbolicGFX10[Id]) 1845 return Id; 1846 } 1847 } 1848 return UFMT_UNDEF; 1849 } 1850 1851 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) { 1852 if(isValidUnifiedFormat(Id, STI)) 1853 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id]; 1854 return ""; 1855 } 1856 1857 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) { 1858 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST; 1859 } 1860 1861 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 1862 const MCSubtargetInfo &STI) { 1863 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); 1864 if (isGFX11Plus(STI)) { 1865 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1866 if (Fmt == DfmtNfmt2UFmtGFX11[Id]) 1867 return Id; 1868 } 1869 } else { 1870 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1871 if (Fmt == DfmtNfmt2UFmtGFX10[Id]) 1872 return Id; 1873 } 1874 } 1875 return UFMT_UNDEF; 1876 } 1877 1878 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { 1879 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); 1880 } 1881 1882 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { 1883 if (isGFX10Plus(STI)) 1884 return UFMT_DEFAULT; 1885 return DFMT_NFMT_DEFAULT; 1886 } 1887 1888 } // namespace MTBUFFormat 1889 1890 //===----------------------------------------------------------------------===// 1891 // SendMsg 1892 //===----------------------------------------------------------------------===// 1893 1894 namespace SendMsg { 1895 1896 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { 1897 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; 1898 } 1899 1900 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) { 1901 return (MsgId & ~(getMsgIdMask(STI))) == 0; 1902 } 1903 1904 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1905 bool Strict) { 1906 assert(isValidMsgId(MsgId, STI)); 1907 1908 if (!Strict) 1909 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); 1910 1911 if (msgRequiresOp(MsgId, STI)) { 1912 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP) 1913 return false; 1914 1915 return !getMsgOpName(MsgId, OpId, STI).empty(); 1916 } 1917 1918 return OpId == OP_NONE_; 1919 } 1920 1921 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1922 const MCSubtargetInfo &STI, bool Strict) { 1923 assert(isValidMsgOp(MsgId, OpId, STI, Strict)); 1924 1925 if (!Strict) 1926 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); 1927 1928 if (!isGFX11Plus(STI)) { 1929 switch (MsgId) { 1930 case ID_GS_PreGFX11: 1931 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; 1932 case ID_GS_DONE_PreGFX11: 1933 return (OpId == OP_GS_NOP) ? 1934 (StreamId == STREAM_ID_NONE_) : 1935 (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); 1936 } 1937 } 1938 return StreamId == STREAM_ID_NONE_; 1939 } 1940 1941 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { 1942 return MsgId == ID_SYSMSG || 1943 (!isGFX11Plus(STI) && 1944 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); 1945 } 1946 1947 bool msgSupportsStream(int64_t MsgId, int64_t OpId, 1948 const MCSubtargetInfo &STI) { 1949 return !isGFX11Plus(STI) && 1950 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && 1951 OpId != OP_GS_NOP; 1952 } 1953 1954 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 1955 uint16_t &StreamId, const MCSubtargetInfo &STI) { 1956 MsgId = Val & getMsgIdMask(STI); 1957 if (isGFX11Plus(STI)) { 1958 OpId = 0; 1959 StreamId = 0; 1960 } else { 1961 OpId = (Val & OP_MASK_) >> OP_SHIFT_; 1962 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; 1963 } 1964 } 1965 1966 uint64_t encodeMsg(uint64_t MsgId, 1967 uint64_t OpId, 1968 uint64_t StreamId) { 1969 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_); 1970 } 1971 1972 } // namespace SendMsg 1973 1974 //===----------------------------------------------------------------------===// 1975 // 1976 //===----------------------------------------------------------------------===// 1977 1978 unsigned getInitialPSInputAddr(const Function &F) { 1979 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0); 1980 } 1981 1982 bool getHasColorExport(const Function &F) { 1983 // As a safe default always respond as if PS has color exports. 1984 return F.getFnAttributeAsParsedInteger( 1985 "amdgpu-color-export", 1986 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0; 1987 } 1988 1989 bool getHasDepthExport(const Function &F) { 1990 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0; 1991 } 1992 1993 bool isShader(CallingConv::ID cc) { 1994 switch(cc) { 1995 case CallingConv::AMDGPU_VS: 1996 case CallingConv::AMDGPU_LS: 1997 case CallingConv::AMDGPU_HS: 1998 case CallingConv::AMDGPU_ES: 1999 case CallingConv::AMDGPU_GS: 2000 case CallingConv::AMDGPU_PS: 2001 case CallingConv::AMDGPU_CS_Chain: 2002 case CallingConv::AMDGPU_CS_ChainPreserve: 2003 case CallingConv::AMDGPU_CS: 2004 return true; 2005 default: 2006 return false; 2007 } 2008 } 2009 2010 bool isGraphics(CallingConv::ID cc) { 2011 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx; 2012 } 2013 2014 bool isCompute(CallingConv::ID cc) { 2015 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS; 2016 } 2017 2018 bool isEntryFunctionCC(CallingConv::ID CC) { 2019 switch (CC) { 2020 case CallingConv::AMDGPU_KERNEL: 2021 case CallingConv::SPIR_KERNEL: 2022 case CallingConv::AMDGPU_VS: 2023 case CallingConv::AMDGPU_GS: 2024 case CallingConv::AMDGPU_PS: 2025 case CallingConv::AMDGPU_CS: 2026 case CallingConv::AMDGPU_ES: 2027 case CallingConv::AMDGPU_HS: 2028 case CallingConv::AMDGPU_LS: 2029 return true; 2030 default: 2031 return false; 2032 } 2033 } 2034 2035 bool isModuleEntryFunctionCC(CallingConv::ID CC) { 2036 switch (CC) { 2037 case CallingConv::AMDGPU_Gfx: 2038 return true; 2039 default: 2040 return isEntryFunctionCC(CC) || isChainCC(CC); 2041 } 2042 } 2043 2044 bool isChainCC(CallingConv::ID CC) { 2045 switch (CC) { 2046 case CallingConv::AMDGPU_CS_Chain: 2047 case CallingConv::AMDGPU_CS_ChainPreserve: 2048 return true; 2049 default: 2050 return false; 2051 } 2052 } 2053 2054 bool isKernelCC(const Function *Func) { 2055 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); 2056 } 2057 2058 bool hasXNACK(const MCSubtargetInfo &STI) { 2059 return STI.hasFeature(AMDGPU::FeatureXNACK); 2060 } 2061 2062 bool hasSRAMECC(const MCSubtargetInfo &STI) { 2063 return STI.hasFeature(AMDGPU::FeatureSRAMECC); 2064 } 2065 2066 bool hasMIMG_R128(const MCSubtargetInfo &STI) { 2067 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16); 2068 } 2069 2070 bool hasA16(const MCSubtargetInfo &STI) { 2071 return STI.hasFeature(AMDGPU::FeatureA16); 2072 } 2073 2074 bool hasG16(const MCSubtargetInfo &STI) { 2075 return STI.hasFeature(AMDGPU::FeatureG16); 2076 } 2077 2078 bool hasPackedD16(const MCSubtargetInfo &STI) { 2079 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) && 2080 !isSI(STI); 2081 } 2082 2083 bool hasGDS(const MCSubtargetInfo &STI) { 2084 return STI.hasFeature(AMDGPU::FeatureGDS); 2085 } 2086 2087 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) { 2088 auto Version = getIsaVersion(STI.getCPU()); 2089 if (Version.Major == 10) 2090 return Version.Minor >= 3 ? 13 : 5; 2091 if (Version.Major == 11) 2092 return 5; 2093 if (Version.Major >= 12) 2094 return HasSampler ? 4 : 5; 2095 return 0; 2096 } 2097 2098 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; } 2099 2100 bool isSI(const MCSubtargetInfo &STI) { 2101 return STI.hasFeature(AMDGPU::FeatureSouthernIslands); 2102 } 2103 2104 bool isCI(const MCSubtargetInfo &STI) { 2105 return STI.hasFeature(AMDGPU::FeatureSeaIslands); 2106 } 2107 2108 bool isVI(const MCSubtargetInfo &STI) { 2109 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands); 2110 } 2111 2112 bool isGFX9(const MCSubtargetInfo &STI) { 2113 return STI.hasFeature(AMDGPU::FeatureGFX9); 2114 } 2115 2116 bool isGFX9_GFX10(const MCSubtargetInfo &STI) { 2117 return isGFX9(STI) || isGFX10(STI); 2118 } 2119 2120 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) { 2121 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI); 2122 } 2123 2124 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) { 2125 return isVI(STI) || isGFX9(STI) || isGFX10(STI); 2126 } 2127 2128 bool isGFX8Plus(const MCSubtargetInfo &STI) { 2129 return isVI(STI) || isGFX9Plus(STI); 2130 } 2131 2132 bool isGFX9Plus(const MCSubtargetInfo &STI) { 2133 return isGFX9(STI) || isGFX10Plus(STI); 2134 } 2135 2136 bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); } 2137 2138 bool isGFX10(const MCSubtargetInfo &STI) { 2139 return STI.hasFeature(AMDGPU::FeatureGFX10); 2140 } 2141 2142 bool isGFX10_GFX11(const MCSubtargetInfo &STI) { 2143 return isGFX10(STI) || isGFX11(STI); 2144 } 2145 2146 bool isGFX10Plus(const MCSubtargetInfo &STI) { 2147 return isGFX10(STI) || isGFX11Plus(STI); 2148 } 2149 2150 bool isGFX11(const MCSubtargetInfo &STI) { 2151 return STI.hasFeature(AMDGPU::FeatureGFX11); 2152 } 2153 2154 bool isGFX11Plus(const MCSubtargetInfo &STI) { 2155 return isGFX11(STI) || isGFX12Plus(STI); 2156 } 2157 2158 bool isGFX12(const MCSubtargetInfo &STI) { 2159 return STI.getFeatureBits()[AMDGPU::FeatureGFX12]; 2160 } 2161 2162 bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); } 2163 2164 bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); } 2165 2166 bool isNotGFX11Plus(const MCSubtargetInfo &STI) { 2167 return !isGFX11Plus(STI); 2168 } 2169 2170 bool isNotGFX10Plus(const MCSubtargetInfo &STI) { 2171 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); 2172 } 2173 2174 bool isGFX10Before1030(const MCSubtargetInfo &STI) { 2175 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); 2176 } 2177 2178 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 2179 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding); 2180 } 2181 2182 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) { 2183 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding); 2184 } 2185 2186 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { 2187 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding); 2188 } 2189 2190 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { 2191 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts); 2192 } 2193 2194 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) { 2195 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI); 2196 } 2197 2198 bool isGFX90A(const MCSubtargetInfo &STI) { 2199 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts); 2200 } 2201 2202 bool isGFX940(const MCSubtargetInfo &STI) { 2203 return STI.hasFeature(AMDGPU::FeatureGFX940Insts); 2204 } 2205 2206 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { 2207 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch); 2208 } 2209 2210 bool hasMAIInsts(const MCSubtargetInfo &STI) { 2211 return STI.hasFeature(AMDGPU::FeatureMAIInsts); 2212 } 2213 2214 bool hasVOPD(const MCSubtargetInfo &STI) { 2215 return STI.hasFeature(AMDGPU::FeatureVOPD); 2216 } 2217 2218 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) { 2219 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR); 2220 } 2221 2222 unsigned hasKernargPreload(const MCSubtargetInfo &STI) { 2223 return STI.hasFeature(AMDGPU::FeatureKernargPreload); 2224 } 2225 2226 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, 2227 int32_t ArgNumVGPR) { 2228 if (has90AInsts && ArgNumAGPR) 2229 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR; 2230 return std::max(ArgNumVGPR, ArgNumAGPR); 2231 } 2232 2233 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 2234 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 2235 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); 2236 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 2237 Reg == AMDGPU::SCC; 2238 } 2239 2240 bool isHi(unsigned Reg, const MCRegisterInfo &MRI) { 2241 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI; 2242 } 2243 2244 #define MAP_REG2REG \ 2245 using namespace AMDGPU; \ 2246 switch(Reg) { \ 2247 default: return Reg; \ 2248 CASE_CI_VI(FLAT_SCR) \ 2249 CASE_CI_VI(FLAT_SCR_LO) \ 2250 CASE_CI_VI(FLAT_SCR_HI) \ 2251 CASE_VI_GFX9PLUS(TTMP0) \ 2252 CASE_VI_GFX9PLUS(TTMP1) \ 2253 CASE_VI_GFX9PLUS(TTMP2) \ 2254 CASE_VI_GFX9PLUS(TTMP3) \ 2255 CASE_VI_GFX9PLUS(TTMP4) \ 2256 CASE_VI_GFX9PLUS(TTMP5) \ 2257 CASE_VI_GFX9PLUS(TTMP6) \ 2258 CASE_VI_GFX9PLUS(TTMP7) \ 2259 CASE_VI_GFX9PLUS(TTMP8) \ 2260 CASE_VI_GFX9PLUS(TTMP9) \ 2261 CASE_VI_GFX9PLUS(TTMP10) \ 2262 CASE_VI_GFX9PLUS(TTMP11) \ 2263 CASE_VI_GFX9PLUS(TTMP12) \ 2264 CASE_VI_GFX9PLUS(TTMP13) \ 2265 CASE_VI_GFX9PLUS(TTMP14) \ 2266 CASE_VI_GFX9PLUS(TTMP15) \ 2267 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ 2268 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ 2269 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ 2270 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ 2271 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ 2272 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ 2273 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ 2274 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ 2275 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ 2276 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ 2277 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ 2278 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ 2279 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 2280 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 2281 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2282 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2283 CASE_GFXPRE11_GFX11PLUS(M0) \ 2284 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ 2285 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ 2286 } 2287 2288 #define CASE_CI_VI(node) \ 2289 assert(!isSI(STI)); \ 2290 case node: return isCI(STI) ? node##_ci : node##_vi; 2291 2292 #define CASE_VI_GFX9PLUS(node) \ 2293 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; 2294 2295 #define CASE_GFXPRE11_GFX11PLUS(node) \ 2296 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; 2297 2298 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ 2299 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; 2300 2301 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 2302 if (STI.getTargetTriple().getArch() == Triple::r600) 2303 return Reg; 2304 MAP_REG2REG 2305 } 2306 2307 #undef CASE_CI_VI 2308 #undef CASE_VI_GFX9PLUS 2309 #undef CASE_GFXPRE11_GFX11PLUS 2310 #undef CASE_GFXPRE11_GFX11PLUS_TO 2311 2312 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 2313 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; 2314 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node; 2315 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) 2316 2317 unsigned mc2PseudoReg(unsigned Reg) { 2318 MAP_REG2REG 2319 } 2320 2321 bool isInlineValue(unsigned Reg) { 2322 switch (Reg) { 2323 case AMDGPU::SRC_SHARED_BASE_LO: 2324 case AMDGPU::SRC_SHARED_BASE: 2325 case AMDGPU::SRC_SHARED_LIMIT_LO: 2326 case AMDGPU::SRC_SHARED_LIMIT: 2327 case AMDGPU::SRC_PRIVATE_BASE_LO: 2328 case AMDGPU::SRC_PRIVATE_BASE: 2329 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 2330 case AMDGPU::SRC_PRIVATE_LIMIT: 2331 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2332 return true; 2333 case AMDGPU::SRC_VCCZ: 2334 case AMDGPU::SRC_EXECZ: 2335 case AMDGPU::SRC_SCC: 2336 return true; 2337 case AMDGPU::SGPR_NULL: 2338 return true; 2339 default: 2340 return false; 2341 } 2342 } 2343 2344 #undef CASE_CI_VI 2345 #undef CASE_VI_GFX9PLUS 2346 #undef CASE_GFXPRE11_GFX11PLUS 2347 #undef CASE_GFXPRE11_GFX11PLUS_TO 2348 #undef MAP_REG2REG 2349 2350 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2351 assert(OpNo < Desc.NumOperands); 2352 unsigned OpType = Desc.operands()[OpNo].OperandType; 2353 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 2354 OpType <= AMDGPU::OPERAND_SRC_LAST; 2355 } 2356 2357 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2358 assert(OpNo < Desc.NumOperands); 2359 unsigned OpType = Desc.operands()[OpNo].OperandType; 2360 return OpType >= AMDGPU::OPERAND_KIMM_FIRST && 2361 OpType <= AMDGPU::OPERAND_KIMM_LAST; 2362 } 2363 2364 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2365 assert(OpNo < Desc.NumOperands); 2366 unsigned OpType = Desc.operands()[OpNo].OperandType; 2367 switch (OpType) { 2368 case AMDGPU::OPERAND_REG_IMM_FP32: 2369 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2370 case AMDGPU::OPERAND_REG_IMM_FP64: 2371 case AMDGPU::OPERAND_REG_IMM_FP16: 2372 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2373 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2374 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2375 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2376 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2377 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2378 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2379 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2380 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2381 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2382 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2383 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2384 return true; 2385 default: 2386 return false; 2387 } 2388 } 2389 2390 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2391 assert(OpNo < Desc.NumOperands); 2392 unsigned OpType = Desc.operands()[OpNo].OperandType; 2393 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 2394 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) || 2395 (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && 2396 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST); 2397 } 2398 2399 // Avoid using MCRegisterClass::getSize, since that function will go away 2400 // (move from MC* level to Target* level). Return size in bits. 2401 unsigned getRegBitWidth(unsigned RCID) { 2402 switch (RCID) { 2403 case AMDGPU::SGPR_LO16RegClassID: 2404 case AMDGPU::AGPR_LO16RegClassID: 2405 return 16; 2406 case AMDGPU::SGPR_32RegClassID: 2407 case AMDGPU::VGPR_32RegClassID: 2408 case AMDGPU::VRegOrLds_32RegClassID: 2409 case AMDGPU::AGPR_32RegClassID: 2410 case AMDGPU::VS_32RegClassID: 2411 case AMDGPU::AV_32RegClassID: 2412 case AMDGPU::SReg_32RegClassID: 2413 case AMDGPU::SReg_32_XM0RegClassID: 2414 case AMDGPU::SRegOrLds_32RegClassID: 2415 return 32; 2416 case AMDGPU::SGPR_64RegClassID: 2417 case AMDGPU::VS_64RegClassID: 2418 case AMDGPU::SReg_64RegClassID: 2419 case AMDGPU::VReg_64RegClassID: 2420 case AMDGPU::AReg_64RegClassID: 2421 case AMDGPU::SReg_64_XEXECRegClassID: 2422 case AMDGPU::VReg_64_Align2RegClassID: 2423 case AMDGPU::AReg_64_Align2RegClassID: 2424 case AMDGPU::AV_64RegClassID: 2425 case AMDGPU::AV_64_Align2RegClassID: 2426 return 64; 2427 case AMDGPU::SGPR_96RegClassID: 2428 case AMDGPU::SReg_96RegClassID: 2429 case AMDGPU::VReg_96RegClassID: 2430 case AMDGPU::AReg_96RegClassID: 2431 case AMDGPU::VReg_96_Align2RegClassID: 2432 case AMDGPU::AReg_96_Align2RegClassID: 2433 case AMDGPU::AV_96RegClassID: 2434 case AMDGPU::AV_96_Align2RegClassID: 2435 return 96; 2436 case AMDGPU::SGPR_128RegClassID: 2437 case AMDGPU::SReg_128RegClassID: 2438 case AMDGPU::VReg_128RegClassID: 2439 case AMDGPU::AReg_128RegClassID: 2440 case AMDGPU::VReg_128_Align2RegClassID: 2441 case AMDGPU::AReg_128_Align2RegClassID: 2442 case AMDGPU::AV_128RegClassID: 2443 case AMDGPU::AV_128_Align2RegClassID: 2444 return 128; 2445 case AMDGPU::SGPR_160RegClassID: 2446 case AMDGPU::SReg_160RegClassID: 2447 case AMDGPU::VReg_160RegClassID: 2448 case AMDGPU::AReg_160RegClassID: 2449 case AMDGPU::VReg_160_Align2RegClassID: 2450 case AMDGPU::AReg_160_Align2RegClassID: 2451 case AMDGPU::AV_160RegClassID: 2452 case AMDGPU::AV_160_Align2RegClassID: 2453 return 160; 2454 case AMDGPU::SGPR_192RegClassID: 2455 case AMDGPU::SReg_192RegClassID: 2456 case AMDGPU::VReg_192RegClassID: 2457 case AMDGPU::AReg_192RegClassID: 2458 case AMDGPU::VReg_192_Align2RegClassID: 2459 case AMDGPU::AReg_192_Align2RegClassID: 2460 case AMDGPU::AV_192RegClassID: 2461 case AMDGPU::AV_192_Align2RegClassID: 2462 return 192; 2463 case AMDGPU::SGPR_224RegClassID: 2464 case AMDGPU::SReg_224RegClassID: 2465 case AMDGPU::VReg_224RegClassID: 2466 case AMDGPU::AReg_224RegClassID: 2467 case AMDGPU::VReg_224_Align2RegClassID: 2468 case AMDGPU::AReg_224_Align2RegClassID: 2469 case AMDGPU::AV_224RegClassID: 2470 case AMDGPU::AV_224_Align2RegClassID: 2471 return 224; 2472 case AMDGPU::SGPR_256RegClassID: 2473 case AMDGPU::SReg_256RegClassID: 2474 case AMDGPU::VReg_256RegClassID: 2475 case AMDGPU::AReg_256RegClassID: 2476 case AMDGPU::VReg_256_Align2RegClassID: 2477 case AMDGPU::AReg_256_Align2RegClassID: 2478 case AMDGPU::AV_256RegClassID: 2479 case AMDGPU::AV_256_Align2RegClassID: 2480 return 256; 2481 case AMDGPU::SGPR_288RegClassID: 2482 case AMDGPU::SReg_288RegClassID: 2483 case AMDGPU::VReg_288RegClassID: 2484 case AMDGPU::AReg_288RegClassID: 2485 case AMDGPU::VReg_288_Align2RegClassID: 2486 case AMDGPU::AReg_288_Align2RegClassID: 2487 case AMDGPU::AV_288RegClassID: 2488 case AMDGPU::AV_288_Align2RegClassID: 2489 return 288; 2490 case AMDGPU::SGPR_320RegClassID: 2491 case AMDGPU::SReg_320RegClassID: 2492 case AMDGPU::VReg_320RegClassID: 2493 case AMDGPU::AReg_320RegClassID: 2494 case AMDGPU::VReg_320_Align2RegClassID: 2495 case AMDGPU::AReg_320_Align2RegClassID: 2496 case AMDGPU::AV_320RegClassID: 2497 case AMDGPU::AV_320_Align2RegClassID: 2498 return 320; 2499 case AMDGPU::SGPR_352RegClassID: 2500 case AMDGPU::SReg_352RegClassID: 2501 case AMDGPU::VReg_352RegClassID: 2502 case AMDGPU::AReg_352RegClassID: 2503 case AMDGPU::VReg_352_Align2RegClassID: 2504 case AMDGPU::AReg_352_Align2RegClassID: 2505 case AMDGPU::AV_352RegClassID: 2506 case AMDGPU::AV_352_Align2RegClassID: 2507 return 352; 2508 case AMDGPU::SGPR_384RegClassID: 2509 case AMDGPU::SReg_384RegClassID: 2510 case AMDGPU::VReg_384RegClassID: 2511 case AMDGPU::AReg_384RegClassID: 2512 case AMDGPU::VReg_384_Align2RegClassID: 2513 case AMDGPU::AReg_384_Align2RegClassID: 2514 case AMDGPU::AV_384RegClassID: 2515 case AMDGPU::AV_384_Align2RegClassID: 2516 return 384; 2517 case AMDGPU::SGPR_512RegClassID: 2518 case AMDGPU::SReg_512RegClassID: 2519 case AMDGPU::VReg_512RegClassID: 2520 case AMDGPU::AReg_512RegClassID: 2521 case AMDGPU::VReg_512_Align2RegClassID: 2522 case AMDGPU::AReg_512_Align2RegClassID: 2523 case AMDGPU::AV_512RegClassID: 2524 case AMDGPU::AV_512_Align2RegClassID: 2525 return 512; 2526 case AMDGPU::SGPR_1024RegClassID: 2527 case AMDGPU::SReg_1024RegClassID: 2528 case AMDGPU::VReg_1024RegClassID: 2529 case AMDGPU::AReg_1024RegClassID: 2530 case AMDGPU::VReg_1024_Align2RegClassID: 2531 case AMDGPU::AReg_1024_Align2RegClassID: 2532 case AMDGPU::AV_1024RegClassID: 2533 case AMDGPU::AV_1024_Align2RegClassID: 2534 return 1024; 2535 default: 2536 llvm_unreachable("Unexpected register class"); 2537 } 2538 } 2539 2540 unsigned getRegBitWidth(const MCRegisterClass &RC) { 2541 return getRegBitWidth(RC.getID()); 2542 } 2543 2544 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 2545 unsigned OpNo) { 2546 assert(OpNo < Desc.NumOperands); 2547 unsigned RCID = Desc.operands()[OpNo].RegClass; 2548 return getRegBitWidth(RCID) / 8; 2549 } 2550 2551 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 2552 if (isInlinableIntLiteral(Literal)) 2553 return true; 2554 2555 uint64_t Val = static_cast<uint64_t>(Literal); 2556 return (Val == llvm::bit_cast<uint64_t>(0.0)) || 2557 (Val == llvm::bit_cast<uint64_t>(1.0)) || 2558 (Val == llvm::bit_cast<uint64_t>(-1.0)) || 2559 (Val == llvm::bit_cast<uint64_t>(0.5)) || 2560 (Val == llvm::bit_cast<uint64_t>(-0.5)) || 2561 (Val == llvm::bit_cast<uint64_t>(2.0)) || 2562 (Val == llvm::bit_cast<uint64_t>(-2.0)) || 2563 (Val == llvm::bit_cast<uint64_t>(4.0)) || 2564 (Val == llvm::bit_cast<uint64_t>(-4.0)) || 2565 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 2566 } 2567 2568 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 2569 if (isInlinableIntLiteral(Literal)) 2570 return true; 2571 2572 // The actual type of the operand does not seem to matter as long 2573 // as the bits match one of the inline immediate values. For example: 2574 // 2575 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 2576 // so it is a legal inline immediate. 2577 // 2578 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 2579 // floating-point, so it is a legal inline immediate. 2580 2581 uint32_t Val = static_cast<uint32_t>(Literal); 2582 return (Val == llvm::bit_cast<uint32_t>(0.0f)) || 2583 (Val == llvm::bit_cast<uint32_t>(1.0f)) || 2584 (Val == llvm::bit_cast<uint32_t>(-1.0f)) || 2585 (Val == llvm::bit_cast<uint32_t>(0.5f)) || 2586 (Val == llvm::bit_cast<uint32_t>(-0.5f)) || 2587 (Val == llvm::bit_cast<uint32_t>(2.0f)) || 2588 (Val == llvm::bit_cast<uint32_t>(-2.0f)) || 2589 (Val == llvm::bit_cast<uint32_t>(4.0f)) || 2590 (Val == llvm::bit_cast<uint32_t>(-4.0f)) || 2591 (Val == 0x3e22f983 && HasInv2Pi); 2592 } 2593 2594 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) { 2595 if (!HasInv2Pi) 2596 return false; 2597 if (isInlinableIntLiteral(Literal)) 2598 return true; 2599 uint16_t Val = static_cast<uint16_t>(Literal); 2600 return Val == 0x3F00 || // 0.5 2601 Val == 0xBF00 || // -0.5 2602 Val == 0x3F80 || // 1.0 2603 Val == 0xBF80 || // -1.0 2604 Val == 0x4000 || // 2.0 2605 Val == 0xC000 || // -2.0 2606 Val == 0x4080 || // 4.0 2607 Val == 0xC080 || // -4.0 2608 Val == 0x3E22; // 1.0 / (2.0 * pi) 2609 } 2610 2611 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) { 2612 return isInlinableLiteral32(Literal, HasInv2Pi); 2613 } 2614 2615 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) { 2616 if (!HasInv2Pi) 2617 return false; 2618 if (isInlinableIntLiteral(Literal)) 2619 return true; 2620 uint16_t Val = static_cast<uint16_t>(Literal); 2621 return Val == 0x3C00 || // 1.0 2622 Val == 0xBC00 || // -1.0 2623 Val == 0x3800 || // 0.5 2624 Val == 0xB800 || // -0.5 2625 Val == 0x4000 || // 2.0 2626 Val == 0xC000 || // -2.0 2627 Val == 0x4400 || // 4.0 2628 Val == 0xC400 || // -4.0 2629 Val == 0x3118; // 1/2pi 2630 } 2631 2632 std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) { 2633 // Unfortunately, the Instruction Set Architecture Reference Guide is 2634 // misleading about how the inline operands work for (packed) 16-bit 2635 // instructions. In a nutshell, the actual HW behavior is: 2636 // 2637 // - integer encodings (-16 .. 64) are always produced as sign-extended 2638 // 32-bit values 2639 // - float encodings are produced as: 2640 // - for F16 instructions: corresponding half-precision float values in 2641 // the LSBs, 0 in the MSBs 2642 // - for UI16 instructions: corresponding single-precision float value 2643 int32_t Signed = static_cast<int32_t>(Literal); 2644 if (Signed >= 0 && Signed <= 64) 2645 return 128 + Signed; 2646 2647 if (Signed >= -16 && Signed <= -1) 2648 return 192 + std::abs(Signed); 2649 2650 if (IsFloat) { 2651 // clang-format off 2652 switch (Literal) { 2653 case 0x3800: return 240; // 0.5 2654 case 0xB800: return 241; // -0.5 2655 case 0x3C00: return 242; // 1.0 2656 case 0xBC00: return 243; // -1.0 2657 case 0x4000: return 244; // 2.0 2658 case 0xC000: return 245; // -2.0 2659 case 0x4400: return 246; // 4.0 2660 case 0xC400: return 247; // -4.0 2661 case 0x3118: return 248; // 1.0 / (2.0 * pi) 2662 default: break; 2663 } 2664 // clang-format on 2665 } else { 2666 // clang-format off 2667 switch (Literal) { 2668 case 0x3F000000: return 240; // 0.5 2669 case 0xBF000000: return 241; // -0.5 2670 case 0x3F800000: return 242; // 1.0 2671 case 0xBF800000: return 243; // -1.0 2672 case 0x40000000: return 244; // 2.0 2673 case 0xC0000000: return 245; // -2.0 2674 case 0x40800000: return 246; // 4.0 2675 case 0xC0800000: return 247; // -4.0 2676 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi) 2677 default: break; 2678 } 2679 // clang-format on 2680 } 2681 2682 return {}; 2683 } 2684 2685 // Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction 2686 // or nullopt. 2687 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) { 2688 return getInlineEncodingV216(false, Literal); 2689 } 2690 2691 // Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction 2692 // or nullopt. 2693 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) { 2694 int32_t Signed = static_cast<int32_t>(Literal); 2695 if (Signed >= 0 && Signed <= 64) 2696 return 128 + Signed; 2697 2698 if (Signed >= -16 && Signed <= -1) 2699 return 192 + std::abs(Signed); 2700 2701 // clang-format off 2702 switch (Literal) { 2703 case 0x3F00: return 240; // 0.5 2704 case 0xBF00: return 241; // -0.5 2705 case 0x3F80: return 242; // 1.0 2706 case 0xBF80: return 243; // -1.0 2707 case 0x4000: return 244; // 2.0 2708 case 0xC000: return 245; // -2.0 2709 case 0x4080: return 246; // 4.0 2710 case 0xC080: return 247; // -4.0 2711 case 0x3E22: return 248; // 1.0 / (2.0 * pi) 2712 default: break; 2713 } 2714 // clang-format on 2715 2716 return std::nullopt; 2717 } 2718 2719 // Encoding of the literal as an inline constant for a V_PK_*_F16 instruction 2720 // or nullopt. 2721 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) { 2722 return getInlineEncodingV216(true, Literal); 2723 } 2724 2725 // Whether the given literal can be inlined for a V_PK_* instruction. 2726 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) { 2727 switch (OpType) { 2728 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2729 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2730 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2731 return getInlineEncodingV216(false, Literal).has_value(); 2732 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2733 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2734 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2735 return getInlineEncodingV216(true, Literal).has_value(); 2736 case AMDGPU::OPERAND_REG_IMM_V2BF16: 2737 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 2738 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: 2739 return isInlinableLiteralV2BF16(Literal); 2740 default: 2741 llvm_unreachable("bad packed operand type"); 2742 } 2743 } 2744 2745 // Whether the given literal can be inlined for a V_PK_*_IU16 instruction. 2746 bool isInlinableLiteralV2I16(uint32_t Literal) { 2747 return getInlineEncodingV2I16(Literal).has_value(); 2748 } 2749 2750 // Whether the given literal can be inlined for a V_PK_*_BF16 instruction. 2751 bool isInlinableLiteralV2BF16(uint32_t Literal) { 2752 return getInlineEncodingV2BF16(Literal).has_value(); 2753 } 2754 2755 // Whether the given literal can be inlined for a V_PK_*_F16 instruction. 2756 bool isInlinableLiteralV2F16(uint32_t Literal) { 2757 return getInlineEncodingV2F16(Literal).has_value(); 2758 } 2759 2760 bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { 2761 if (IsFP64) 2762 return !(Val & 0xffffffffu); 2763 2764 return isUInt<32>(Val) || isInt<32>(Val); 2765 } 2766 2767 bool isArgPassedInSGPR(const Argument *A) { 2768 const Function *F = A->getParent(); 2769 2770 // Arguments to compute shaders are never a source of divergence. 2771 CallingConv::ID CC = F->getCallingConv(); 2772 switch (CC) { 2773 case CallingConv::AMDGPU_KERNEL: 2774 case CallingConv::SPIR_KERNEL: 2775 return true; 2776 case CallingConv::AMDGPU_VS: 2777 case CallingConv::AMDGPU_LS: 2778 case CallingConv::AMDGPU_HS: 2779 case CallingConv::AMDGPU_ES: 2780 case CallingConv::AMDGPU_GS: 2781 case CallingConv::AMDGPU_PS: 2782 case CallingConv::AMDGPU_CS: 2783 case CallingConv::AMDGPU_Gfx: 2784 case CallingConv::AMDGPU_CS_Chain: 2785 case CallingConv::AMDGPU_CS_ChainPreserve: 2786 // For non-compute shaders, SGPR inputs are marked with either inreg or 2787 // byval. Everything else is in VGPRs. 2788 return A->hasAttribute(Attribute::InReg) || 2789 A->hasAttribute(Attribute::ByVal); 2790 default: 2791 // TODO: treat i1 as divergent? 2792 return A->hasAttribute(Attribute::InReg); 2793 } 2794 } 2795 2796 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) { 2797 // Arguments to compute shaders are never a source of divergence. 2798 CallingConv::ID CC = CB->getCallingConv(); 2799 switch (CC) { 2800 case CallingConv::AMDGPU_KERNEL: 2801 case CallingConv::SPIR_KERNEL: 2802 return true; 2803 case CallingConv::AMDGPU_VS: 2804 case CallingConv::AMDGPU_LS: 2805 case CallingConv::AMDGPU_HS: 2806 case CallingConv::AMDGPU_ES: 2807 case CallingConv::AMDGPU_GS: 2808 case CallingConv::AMDGPU_PS: 2809 case CallingConv::AMDGPU_CS: 2810 case CallingConv::AMDGPU_Gfx: 2811 case CallingConv::AMDGPU_CS_Chain: 2812 case CallingConv::AMDGPU_CS_ChainPreserve: 2813 // For non-compute shaders, SGPR inputs are marked with either inreg or 2814 // byval. Everything else is in VGPRs. 2815 return CB->paramHasAttr(ArgNo, Attribute::InReg) || 2816 CB->paramHasAttr(ArgNo, Attribute::ByVal); 2817 default: 2818 return CB->paramHasAttr(ArgNo, Attribute::InReg); 2819 } 2820 } 2821 2822 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { 2823 return isGCN3Encoding(ST) || isGFX10Plus(ST); 2824 } 2825 2826 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 2827 int64_t EncodedOffset) { 2828 if (isGFX12Plus(ST)) 2829 return isUInt<23>(EncodedOffset); 2830 2831 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) 2832 : isUInt<8>(EncodedOffset); 2833 } 2834 2835 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 2836 int64_t EncodedOffset, 2837 bool IsBuffer) { 2838 if (isGFX12Plus(ST)) 2839 return isInt<24>(EncodedOffset); 2840 2841 return !IsBuffer && 2842 hasSMRDSignedImmOffset(ST) && 2843 isInt<21>(EncodedOffset); 2844 } 2845 2846 static bool isDwordAligned(uint64_t ByteOffset) { 2847 return (ByteOffset & 3) == 0; 2848 } 2849 2850 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, 2851 uint64_t ByteOffset) { 2852 if (hasSMEMByteOffset(ST)) 2853 return ByteOffset; 2854 2855 assert(isDwordAligned(ByteOffset)); 2856 return ByteOffset >> 2; 2857 } 2858 2859 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 2860 int64_t ByteOffset, bool IsBuffer, 2861 bool HasSOffset) { 2862 // For unbuffered smem loads, it is illegal for the Immediate Offset to be 2863 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative. 2864 // Handle case where SOffset is not present. 2865 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST)) 2866 return std::nullopt; 2867 2868 if (isGFX12Plus(ST)) // 24 bit signed offsets 2869 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset) 2870 : std::nullopt; 2871 2872 // The signed version is always a byte offset. 2873 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { 2874 assert(hasSMEMByteOffset(ST)); 2875 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset) 2876 : std::nullopt; 2877 } 2878 2879 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) 2880 return std::nullopt; 2881 2882 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2883 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) 2884 ? std::optional<int64_t>(EncodedOffset) 2885 : std::nullopt; 2886 } 2887 2888 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 2889 int64_t ByteOffset) { 2890 if (!isCI(ST) || !isDwordAligned(ByteOffset)) 2891 return std::nullopt; 2892 2893 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2894 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset) 2895 : std::nullopt; 2896 } 2897 2898 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) { 2899 if (AMDGPU::isGFX10(ST)) 2900 return 12; 2901 2902 if (AMDGPU::isGFX12(ST)) 2903 return 24; 2904 return 13; 2905 } 2906 2907 namespace { 2908 2909 struct SourceOfDivergence { 2910 unsigned Intr; 2911 }; 2912 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); 2913 2914 struct AlwaysUniform { 2915 unsigned Intr; 2916 }; 2917 const AlwaysUniform *lookupAlwaysUniform(unsigned Intr); 2918 2919 #define GET_SourcesOfDivergence_IMPL 2920 #define GET_UniformIntrinsics_IMPL 2921 #define GET_Gfx9BufferFormat_IMPL 2922 #define GET_Gfx10BufferFormat_IMPL 2923 #define GET_Gfx11PlusBufferFormat_IMPL 2924 #include "AMDGPUGenSearchableTables.inc" 2925 2926 } // end anonymous namespace 2927 2928 bool isIntrinsicSourceOfDivergence(unsigned IntrID) { 2929 return lookupSourceOfDivergence(IntrID); 2930 } 2931 2932 bool isIntrinsicAlwaysUniform(unsigned IntrID) { 2933 return lookupAlwaysUniform(IntrID); 2934 } 2935 2936 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 2937 uint8_t NumComponents, 2938 uint8_t NumFormat, 2939 const MCSubtargetInfo &STI) { 2940 return isGFX11Plus(STI) 2941 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents, 2942 NumFormat) 2943 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp, 2944 NumComponents, NumFormat) 2945 : getGfx9BufferFormatInfo(BitsPerComp, 2946 NumComponents, NumFormat); 2947 } 2948 2949 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 2950 const MCSubtargetInfo &STI) { 2951 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format) 2952 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format) 2953 : getGfx9BufferFormatInfo(Format); 2954 } 2955 2956 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) { 2957 for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1, 2958 OpName::src2 }) { 2959 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName); 2960 if (Idx == -1) 2961 continue; 2962 2963 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID || 2964 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID) 2965 return true; 2966 } 2967 2968 return false; 2969 } 2970 2971 bool isDPALU_DPP(const MCInstrDesc &OpDesc) { 2972 return hasAny64BitVGPROperands(OpDesc); 2973 } 2974 2975 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) { 2976 // Currently this is 128 for all subtargets 2977 return 128; 2978 } 2979 2980 } // namespace AMDGPU 2981 2982 raw_ostream &operator<<(raw_ostream &OS, 2983 const AMDGPU::IsaInfo::TargetIDSetting S) { 2984 switch (S) { 2985 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): 2986 OS << "Unsupported"; 2987 break; 2988 case (AMDGPU::IsaInfo::TargetIDSetting::Any): 2989 OS << "Any"; 2990 break; 2991 case (AMDGPU::IsaInfo::TargetIDSetting::Off): 2992 OS << "Off"; 2993 break; 2994 case (AMDGPU::IsaInfo::TargetIDSetting::On): 2995 OS << "On"; 2996 break; 2997 } 2998 return OS; 2999 } 3000 3001 } // namespace llvm 3002