1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUBaseInfo.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUAsmUtils.h" 12 #include "AMDKernelCodeT.h" 13 #include "GCNSubtarget.h" 14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/IR/Attributes.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/GlobalValue.h" 19 #include "llvm/IR/IntrinsicsAMDGPU.h" 20 #include "llvm/IR/IntrinsicsR600.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/MC/MCSubtargetInfo.h" 23 #include "llvm/Support/AMDHSAKernelDescriptor.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/TargetParser.h" 26 #include <optional> 27 28 #define GET_INSTRINFO_NAMED_OPS 29 #define GET_INSTRMAP_INFO 30 #include "AMDGPUGenInstrInfo.inc" 31 32 static llvm::cl::opt<unsigned> 33 AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, 34 llvm::cl::desc("AMDHSA Code Object Version"), 35 llvm::cl::init(4)); 36 37 namespace { 38 39 /// \returns Bit mask for given bit \p Shift and bit \p Width. 40 unsigned getBitMask(unsigned Shift, unsigned Width) { 41 return ((1 << Width) - 1) << Shift; 42 } 43 44 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 45 /// 46 /// \returns Packed \p Dst. 47 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 48 unsigned Mask = getBitMask(Shift, Width); 49 return ((Src << Shift) & Mask) | (Dst & ~Mask); 50 } 51 52 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 53 /// 54 /// \returns Unpacked bits. 55 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 56 return (Src & getBitMask(Shift, Width)) >> Shift; 57 } 58 59 /// \returns Vmcnt bit shift (lower bits). 60 unsigned getVmcntBitShiftLo(unsigned VersionMajor) { 61 return VersionMajor >= 11 ? 10 : 0; 62 } 63 64 /// \returns Vmcnt bit width (lower bits). 65 unsigned getVmcntBitWidthLo(unsigned VersionMajor) { 66 return VersionMajor >= 11 ? 6 : 4; 67 } 68 69 /// \returns Expcnt bit shift. 70 unsigned getExpcntBitShift(unsigned VersionMajor) { 71 return VersionMajor >= 11 ? 0 : 4; 72 } 73 74 /// \returns Expcnt bit width. 75 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; } 76 77 /// \returns Lgkmcnt bit shift. 78 unsigned getLgkmcntBitShift(unsigned VersionMajor) { 79 return VersionMajor >= 11 ? 4 : 8; 80 } 81 82 /// \returns Lgkmcnt bit width. 83 unsigned getLgkmcntBitWidth(unsigned VersionMajor) { 84 return VersionMajor >= 10 ? 6 : 4; 85 } 86 87 /// \returns Vmcnt bit shift (higher bits). 88 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; } 89 90 /// \returns Vmcnt bit width (higher bits). 91 unsigned getVmcntBitWidthHi(unsigned VersionMajor) { 92 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; 93 } 94 95 } // end namespace anonymous 96 97 namespace llvm { 98 99 namespace AMDGPU { 100 101 std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) { 102 if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA) 103 return std::nullopt; 104 105 switch (AmdhsaCodeObjectVersion) { 106 case 2: 107 return ELF::ELFABIVERSION_AMDGPU_HSA_V2; 108 case 3: 109 return ELF::ELFABIVERSION_AMDGPU_HSA_V3; 110 case 4: 111 return ELF::ELFABIVERSION_AMDGPU_HSA_V4; 112 case 5: 113 return ELF::ELFABIVERSION_AMDGPU_HSA_V5; 114 default: 115 report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") + 116 Twine(AmdhsaCodeObjectVersion)); 117 } 118 } 119 120 bool isHsaAbiVersion2(const MCSubtargetInfo *STI) { 121 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 122 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2; 123 return false; 124 } 125 126 bool isHsaAbiVersion3(const MCSubtargetInfo *STI) { 127 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 128 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3; 129 return false; 130 } 131 132 bool isHsaAbiVersion4(const MCSubtargetInfo *STI) { 133 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 134 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4; 135 return false; 136 } 137 138 bool isHsaAbiVersion5(const MCSubtargetInfo *STI) { 139 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 140 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5; 141 return false; 142 } 143 144 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) { 145 return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI) || 146 isHsaAbiVersion5(STI); 147 } 148 149 unsigned getAmdhsaCodeObjectVersion() { 150 return AmdhsaCodeObjectVersion; 151 } 152 153 unsigned getMultigridSyncArgImplicitArgPosition() { 154 switch (AmdhsaCodeObjectVersion) { 155 case 2: 156 case 3: 157 case 4: 158 return 48; 159 case 5: 160 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; 161 default: 162 llvm_unreachable("Unexpected code object version"); 163 return 0; 164 } 165 } 166 167 168 // FIXME: All such magic numbers about the ABI should be in a 169 // central TD file. 170 unsigned getHostcallImplicitArgPosition() { 171 switch (AmdhsaCodeObjectVersion) { 172 case 2: 173 case 3: 174 case 4: 175 return 24; 176 case 5: 177 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; 178 default: 179 llvm_unreachable("Unexpected code object version"); 180 return 0; 181 } 182 } 183 184 unsigned getDefaultQueueImplicitArgPosition() { 185 switch (AmdhsaCodeObjectVersion) { 186 case 2: 187 case 3: 188 case 4: 189 return 32; 190 case 5: 191 default: 192 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET; 193 } 194 } 195 196 unsigned getCompletionActionImplicitArgPosition() { 197 switch (AmdhsaCodeObjectVersion) { 198 case 2: 199 case 3: 200 case 4: 201 return 40; 202 case 5: 203 default: 204 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET; 205 } 206 } 207 208 #define GET_MIMGBaseOpcodesTable_IMPL 209 #define GET_MIMGDimInfoTable_IMPL 210 #define GET_MIMGInfoTable_IMPL 211 #define GET_MIMGLZMappingTable_IMPL 212 #define GET_MIMGMIPMappingTable_IMPL 213 #define GET_MIMGBiasMappingTable_IMPL 214 #define GET_MIMGOffsetMappingTable_IMPL 215 #define GET_MIMGG16MappingTable_IMPL 216 #define GET_MAIInstInfoTable_IMPL 217 #include "AMDGPUGenSearchableTables.inc" 218 219 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 220 unsigned VDataDwords, unsigned VAddrDwords) { 221 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, 222 VDataDwords, VAddrDwords); 223 return Info ? Info->Opcode : -1; 224 } 225 226 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { 227 const MIMGInfo *Info = getMIMGInfo(Opc); 228 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr; 229 } 230 231 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { 232 const MIMGInfo *OrigInfo = getMIMGInfo(Opc); 233 const MIMGInfo *NewInfo = 234 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding, 235 NewChannels, OrigInfo->VAddrDwords); 236 return NewInfo ? NewInfo->Opcode : -1; 237 } 238 239 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 240 const MIMGDimInfo *Dim, bool IsA16, 241 bool IsG16Supported) { 242 unsigned AddrWords = BaseOpcode->NumExtraArgs; 243 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + 244 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 245 if (IsA16) 246 AddrWords += divideCeil(AddrComponents, 2); 247 else 248 AddrWords += AddrComponents; 249 250 // Note: For subtargets that support A16 but not G16, enabling A16 also 251 // enables 16 bit gradients. 252 // For subtargets that support A16 (operand) and G16 (done with a different 253 // instruction encoding), they are independent. 254 255 if (BaseOpcode->Gradients) { 256 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16) 257 // There are two gradients per coordinate, we pack them separately. 258 // For the 3d case, 259 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv) 260 AddrWords += alignTo<2>(Dim->NumGradients / 2); 261 else 262 AddrWords += Dim->NumGradients; 263 } 264 return AddrWords; 265 } 266 267 struct MUBUFInfo { 268 uint16_t Opcode; 269 uint16_t BaseOpcode; 270 uint8_t elements; 271 bool has_vaddr; 272 bool has_srsrc; 273 bool has_soffset; 274 bool IsBufferInv; 275 }; 276 277 struct MTBUFInfo { 278 uint16_t Opcode; 279 uint16_t BaseOpcode; 280 uint8_t elements; 281 bool has_vaddr; 282 bool has_srsrc; 283 bool has_soffset; 284 }; 285 286 struct SMInfo { 287 uint16_t Opcode; 288 bool IsBuffer; 289 }; 290 291 struct VOPInfo { 292 uint16_t Opcode; 293 bool IsSingle; 294 }; 295 296 struct VOPC64DPPInfo { 297 uint16_t Opcode; 298 }; 299 300 struct VOPDComponentInfo { 301 uint16_t BaseVOP; 302 uint16_t VOPDOp; 303 bool CanBeVOPDX; 304 }; 305 306 struct VOPDInfo { 307 uint16_t Opcode; 308 uint16_t OpX; 309 uint16_t OpY; 310 }; 311 312 struct VOPTrue16Info { 313 uint16_t Opcode; 314 bool IsTrue16; 315 }; 316 317 #define GET_MTBUFInfoTable_DECL 318 #define GET_MTBUFInfoTable_IMPL 319 #define GET_MUBUFInfoTable_DECL 320 #define GET_MUBUFInfoTable_IMPL 321 #define GET_SMInfoTable_DECL 322 #define GET_SMInfoTable_IMPL 323 #define GET_VOP1InfoTable_DECL 324 #define GET_VOP1InfoTable_IMPL 325 #define GET_VOP2InfoTable_DECL 326 #define GET_VOP2InfoTable_IMPL 327 #define GET_VOP3InfoTable_DECL 328 #define GET_VOP3InfoTable_IMPL 329 #define GET_VOPC64DPPTable_DECL 330 #define GET_VOPC64DPPTable_IMPL 331 #define GET_VOPC64DPP8Table_DECL 332 #define GET_VOPC64DPP8Table_IMPL 333 #define GET_VOPDComponentTable_DECL 334 #define GET_VOPDComponentTable_IMPL 335 #define GET_VOPDPairs_DECL 336 #define GET_VOPDPairs_IMPL 337 #define GET_VOPTrue16Table_DECL 338 #define GET_VOPTrue16Table_IMPL 339 #define GET_WMMAOpcode2AddrMappingTable_DECL 340 #define GET_WMMAOpcode2AddrMappingTable_IMPL 341 #define GET_WMMAOpcode3AddrMappingTable_DECL 342 #define GET_WMMAOpcode3AddrMappingTable_IMPL 343 #include "AMDGPUGenSearchableTables.inc" 344 345 int getMTBUFBaseOpcode(unsigned Opc) { 346 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); 347 return Info ? Info->BaseOpcode : -1; 348 } 349 350 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { 351 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 352 return Info ? Info->Opcode : -1; 353 } 354 355 int getMTBUFElements(unsigned Opc) { 356 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 357 return Info ? Info->elements : 0; 358 } 359 360 bool getMTBUFHasVAddr(unsigned Opc) { 361 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 362 return Info ? Info->has_vaddr : false; 363 } 364 365 bool getMTBUFHasSrsrc(unsigned Opc) { 366 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 367 return Info ? Info->has_srsrc : false; 368 } 369 370 bool getMTBUFHasSoffset(unsigned Opc) { 371 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 372 return Info ? Info->has_soffset : false; 373 } 374 375 int getMUBUFBaseOpcode(unsigned Opc) { 376 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); 377 return Info ? Info->BaseOpcode : -1; 378 } 379 380 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { 381 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 382 return Info ? Info->Opcode : -1; 383 } 384 385 int getMUBUFElements(unsigned Opc) { 386 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 387 return Info ? Info->elements : 0; 388 } 389 390 bool getMUBUFHasVAddr(unsigned Opc) { 391 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 392 return Info ? Info->has_vaddr : false; 393 } 394 395 bool getMUBUFHasSrsrc(unsigned Opc) { 396 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 397 return Info ? Info->has_srsrc : false; 398 } 399 400 bool getMUBUFHasSoffset(unsigned Opc) { 401 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 402 return Info ? Info->has_soffset : false; 403 } 404 405 bool getMUBUFIsBufferInv(unsigned Opc) { 406 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 407 return Info ? Info->IsBufferInv : false; 408 } 409 410 bool getSMEMIsBuffer(unsigned Opc) { 411 const SMInfo *Info = getSMEMOpcodeHelper(Opc); 412 return Info ? Info->IsBuffer : false; 413 } 414 415 bool getVOP1IsSingle(unsigned Opc) { 416 const VOPInfo *Info = getVOP1OpcodeHelper(Opc); 417 return Info ? Info->IsSingle : false; 418 } 419 420 bool getVOP2IsSingle(unsigned Opc) { 421 const VOPInfo *Info = getVOP2OpcodeHelper(Opc); 422 return Info ? Info->IsSingle : false; 423 } 424 425 bool getVOP3IsSingle(unsigned Opc) { 426 const VOPInfo *Info = getVOP3OpcodeHelper(Opc); 427 return Info ? Info->IsSingle : false; 428 } 429 430 bool isVOPC64DPP(unsigned Opc) { 431 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc); 432 } 433 434 bool getMAIIsDGEMM(unsigned Opc) { 435 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 436 return Info ? Info->is_dgemm : false; 437 } 438 439 bool getMAIIsGFX940XDL(unsigned Opc) { 440 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 441 return Info ? Info->is_gfx940_xdl : false; 442 } 443 444 CanBeVOPD getCanBeVOPD(unsigned Opc) { 445 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 446 if (Info) 447 return {Info->CanBeVOPDX, true}; 448 else 449 return {false, false}; 450 } 451 452 unsigned getVOPDOpcode(unsigned Opc) { 453 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 454 return Info ? Info->VOPDOp : ~0u; 455 } 456 457 bool isVOPD(unsigned Opc) { 458 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X); 459 } 460 461 bool isMAC(unsigned Opc) { 462 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 463 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 464 Opc == AMDGPU::V_MAC_F32_e64_vi || 465 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 466 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 467 Opc == AMDGPU::V_MAC_F16_e64_vi || 468 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 469 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 470 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 471 Opc == AMDGPU::V_FMAC_F32_e64_vi || 472 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 473 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 474 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 475 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 || 476 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi || 477 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi || 478 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi || 479 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi; 480 } 481 482 bool isPermlane16(unsigned Opc) { 483 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 484 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 || 485 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 || 486 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11; 487 } 488 489 bool isTrue16Inst(unsigned Opc) { 490 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc); 491 return Info ? Info->IsTrue16 : false; 492 } 493 494 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { 495 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); 496 return Info ? Info->Opcode3Addr : ~0u; 497 } 498 499 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) { 500 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc); 501 return Info ? Info->Opcode2Addr : ~0u; 502 } 503 504 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 505 // header files, so we need to wrap it in a function that takes unsigned 506 // instead. 507 int getMCOpcode(uint16_t Opcode, unsigned Gen) { 508 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 509 } 510 511 int getVOPDFull(unsigned OpX, unsigned OpY) { 512 const VOPDInfo *Info = getVOPDInfoFromComponentOpcodes(OpX, OpY); 513 return Info ? Info->Opcode : -1; 514 } 515 516 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) { 517 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode); 518 assert(Info); 519 auto OpX = getVOPDBaseFromComponent(Info->OpX); 520 auto OpY = getVOPDBaseFromComponent(Info->OpY); 521 assert(OpX && OpY); 522 return {OpX->BaseVOP, OpY->BaseVOP}; 523 } 524 525 namespace VOPD { 526 527 ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) { 528 assert(OpDesc.getNumDefs() == Component::DST_NUM); 529 530 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1); 531 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1); 532 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO); 533 assert(TiedIdx == -1 || TiedIdx == Component::DST); 534 HasSrc2Acc = TiedIdx != -1; 535 536 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs(); 537 assert(SrcOperandsNum <= Component::MAX_SRC_NUM); 538 539 auto OperandsNum = OpDesc.getNumOperands(); 540 unsigned CompOprIdx; 541 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) { 542 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) { 543 MandatoryLiteralIdx = CompOprIdx; 544 break; 545 } 546 } 547 } 548 549 unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const { 550 assert(CompOprIdx < Component::MAX_OPR_NUM); 551 552 if (CompOprIdx == Component::DST) 553 return getIndexOfDstInParsedOperands(); 554 555 auto CompSrcIdx = CompOprIdx - Component::DST_NUM; 556 if (CompSrcIdx < getCompParsedSrcOperandsNum()) 557 return getIndexOfSrcInParsedOperands(CompSrcIdx); 558 559 // The specified operand does not exist. 560 return 0; 561 } 562 563 std::optional<unsigned> InstInfo::getInvalidCompOperandIndex( 564 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const { 565 566 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx); 567 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx); 568 569 unsigned CompOprIdx; 570 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) { 571 unsigned BanksNum = BANKS_NUM[CompOprIdx]; 572 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] && 573 (OpXRegs[CompOprIdx] % BanksNum == OpYRegs[CompOprIdx] % BanksNum)) 574 return CompOprIdx; 575 } 576 577 return {}; 578 } 579 580 // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used 581 // by the specified component. If an operand is unused 582 // or is not a VGPR, the corresponding value is 0. 583 // 584 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index 585 // for the specified component and MC operand. The callback must return 0 586 // if the operand is not a register or not a VGPR. 587 InstInfo::RegIndices InstInfo::getRegIndices( 588 unsigned CompIdx, 589 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const { 590 assert(CompIdx < COMPONENTS_NUM); 591 592 const auto &Comp = CompInfo[CompIdx]; 593 InstInfo::RegIndices RegIndices; 594 595 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands()); 596 597 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) { 598 unsigned CompSrcIdx = CompOprIdx - DST_NUM; 599 RegIndices[CompOprIdx] = 600 Comp.hasRegSrcOperand(CompSrcIdx) 601 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx)) 602 : 0; 603 } 604 return RegIndices; 605 } 606 607 } // namespace VOPD 608 609 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) { 610 return VOPD::InstInfo(OpX, OpY); 611 } 612 613 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, 614 const MCInstrInfo *InstrInfo) { 615 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode); 616 const auto &OpXDesc = InstrInfo->get(OpX); 617 const auto &OpYDesc = InstrInfo->get(OpY); 618 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X); 619 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo); 620 return VOPD::InstInfo(OpXInfo, OpYInfo); 621 } 622 623 namespace IsaInfo { 624 625 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) 626 : STI(STI), XnackSetting(TargetIDSetting::Any), 627 SramEccSetting(TargetIDSetting::Any) { 628 if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) 629 XnackSetting = TargetIDSetting::Unsupported; 630 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) 631 SramEccSetting = TargetIDSetting::Unsupported; 632 } 633 634 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { 635 // Check if xnack or sramecc is explicitly enabled or disabled. In the 636 // absence of the target features we assume we must generate code that can run 637 // in any environment. 638 SubtargetFeatures Features(FS); 639 std::optional<bool> XnackRequested; 640 std::optional<bool> SramEccRequested; 641 642 for (const std::string &Feature : Features.getFeatures()) { 643 if (Feature == "+xnack") 644 XnackRequested = true; 645 else if (Feature == "-xnack") 646 XnackRequested = false; 647 else if (Feature == "+sramecc") 648 SramEccRequested = true; 649 else if (Feature == "-sramecc") 650 SramEccRequested = false; 651 } 652 653 bool XnackSupported = isXnackSupported(); 654 bool SramEccSupported = isSramEccSupported(); 655 656 if (XnackRequested) { 657 if (XnackSupported) { 658 XnackSetting = 659 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; 660 } else { 661 // If a specific xnack setting was requested and this GPU does not support 662 // xnack emit a warning. Setting will remain set to "Unsupported". 663 if (*XnackRequested) { 664 errs() << "warning: xnack 'On' was requested for a processor that does " 665 "not support it!\n"; 666 } else { 667 errs() << "warning: xnack 'Off' was requested for a processor that " 668 "does not support it!\n"; 669 } 670 } 671 } 672 673 if (SramEccRequested) { 674 if (SramEccSupported) { 675 SramEccSetting = 676 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; 677 } else { 678 // If a specific sramecc setting was requested and this GPU does not 679 // support sramecc emit a warning. Setting will remain set to 680 // "Unsupported". 681 if (*SramEccRequested) { 682 errs() << "warning: sramecc 'On' was requested for a processor that " 683 "does not support it!\n"; 684 } else { 685 errs() << "warning: sramecc 'Off' was requested for a processor that " 686 "does not support it!\n"; 687 } 688 } 689 } 690 } 691 692 static TargetIDSetting 693 getTargetIDSettingFromFeatureString(StringRef FeatureString) { 694 if (FeatureString.endswith("-")) 695 return TargetIDSetting::Off; 696 if (FeatureString.endswith("+")) 697 return TargetIDSetting::On; 698 699 llvm_unreachable("Malformed feature string"); 700 } 701 702 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { 703 SmallVector<StringRef, 3> TargetIDSplit; 704 TargetID.split(TargetIDSplit, ':'); 705 706 for (const auto &FeatureString : TargetIDSplit) { 707 if (FeatureString.startswith("xnack")) 708 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); 709 if (FeatureString.startswith("sramecc")) 710 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); 711 } 712 } 713 714 std::string AMDGPUTargetID::toString() const { 715 std::string StringRep; 716 raw_string_ostream StreamRep(StringRep); 717 718 auto TargetTriple = STI.getTargetTriple(); 719 auto Version = getIsaVersion(STI.getCPU()); 720 721 StreamRep << TargetTriple.getArchName() << '-' 722 << TargetTriple.getVendorName() << '-' 723 << TargetTriple.getOSName() << '-' 724 << TargetTriple.getEnvironmentName() << '-'; 725 726 std::string Processor; 727 // TODO: Following else statement is present here because we used various 728 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). 729 // Remove once all aliases are removed from GCNProcessors.td. 730 if (Version.Major >= 9) 731 Processor = STI.getCPU().str(); 732 else 733 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) + 734 Twine(Version.Stepping)) 735 .str(); 736 737 std::string Features; 738 if (std::optional<uint8_t> HsaAbiVersion = getHsaAbiVersion(&STI)) { 739 switch (*HsaAbiVersion) { 740 case ELF::ELFABIVERSION_AMDGPU_HSA_V2: 741 // Code object V2 only supported specific processors and had fixed 742 // settings for the XNACK. 743 if (Processor == "gfx600") { 744 } else if (Processor == "gfx601") { 745 } else if (Processor == "gfx602") { 746 } else if (Processor == "gfx700") { 747 } else if (Processor == "gfx701") { 748 } else if (Processor == "gfx702") { 749 } else if (Processor == "gfx703") { 750 } else if (Processor == "gfx704") { 751 } else if (Processor == "gfx705") { 752 } else if (Processor == "gfx801") { 753 if (!isXnackOnOrAny()) 754 report_fatal_error( 755 "AMD GPU code object V2 does not support processor " + 756 Twine(Processor) + " without XNACK"); 757 } else if (Processor == "gfx802") { 758 } else if (Processor == "gfx803") { 759 } else if (Processor == "gfx805") { 760 } else if (Processor == "gfx810") { 761 if (!isXnackOnOrAny()) 762 report_fatal_error( 763 "AMD GPU code object V2 does not support processor " + 764 Twine(Processor) + " without XNACK"); 765 } else if (Processor == "gfx900") { 766 if (isXnackOnOrAny()) 767 Processor = "gfx901"; 768 } else if (Processor == "gfx902") { 769 if (isXnackOnOrAny()) 770 Processor = "gfx903"; 771 } else if (Processor == "gfx904") { 772 if (isXnackOnOrAny()) 773 Processor = "gfx905"; 774 } else if (Processor == "gfx906") { 775 if (isXnackOnOrAny()) 776 Processor = "gfx907"; 777 } else if (Processor == "gfx90c") { 778 if (isXnackOnOrAny()) 779 report_fatal_error( 780 "AMD GPU code object V2 does not support processor " + 781 Twine(Processor) + " with XNACK being ON or ANY"); 782 } else { 783 report_fatal_error( 784 "AMD GPU code object V2 does not support processor " + 785 Twine(Processor)); 786 } 787 break; 788 case ELF::ELFABIVERSION_AMDGPU_HSA_V3: 789 // xnack. 790 if (isXnackOnOrAny()) 791 Features += "+xnack"; 792 // In code object v2 and v3, "sramecc" feature was spelled with a 793 // hyphen ("sram-ecc"). 794 if (isSramEccOnOrAny()) 795 Features += "+sram-ecc"; 796 break; 797 case ELF::ELFABIVERSION_AMDGPU_HSA_V4: 798 case ELF::ELFABIVERSION_AMDGPU_HSA_V5: 799 // sramecc. 800 if (getSramEccSetting() == TargetIDSetting::Off) 801 Features += ":sramecc-"; 802 else if (getSramEccSetting() == TargetIDSetting::On) 803 Features += ":sramecc+"; 804 // xnack. 805 if (getXnackSetting() == TargetIDSetting::Off) 806 Features += ":xnack-"; 807 else if (getXnackSetting() == TargetIDSetting::On) 808 Features += ":xnack+"; 809 break; 810 default: 811 break; 812 } 813 } 814 815 StreamRep << Processor << Features; 816 817 StreamRep.flush(); 818 return StringRep; 819 } 820 821 unsigned getWavefrontSize(const MCSubtargetInfo *STI) { 822 if (STI->getFeatureBits().test(FeatureWavefrontSize16)) 823 return 16; 824 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) 825 return 32; 826 827 return 64; 828 } 829 830 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { 831 unsigned BytesPerCU = 0; 832 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 833 BytesPerCU = 32768; 834 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 835 BytesPerCU = 65536; 836 837 // "Per CU" really means "per whatever functional block the waves of a 838 // workgroup must share". So the effective local memory size is doubled in 839 // WGP mode on gfx10. 840 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 841 BytesPerCU *= 2; 842 843 return BytesPerCU; 844 } 845 846 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) { 847 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 848 return 32768; 849 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 850 return 65536; 851 return 0; 852 } 853 854 unsigned getEUsPerCU(const MCSubtargetInfo *STI) { 855 // "Per CU" really means "per whatever functional block the waves of a 856 // workgroup must share". For gfx10 in CU mode this is the CU, which contains 857 // two SIMDs. 858 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode)) 859 return 2; 860 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains 861 // two CUs, so a total of four SIMDs. 862 return 4; 863 } 864 865 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 866 unsigned FlatWorkGroupSize) { 867 assert(FlatWorkGroupSize != 0); 868 if (STI->getTargetTriple().getArch() != Triple::amdgcn) 869 return 8; 870 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI); 871 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); 872 if (N == 1) { 873 // Single-wave workgroups don't consume barrier resources. 874 return MaxWaves; 875 } 876 877 unsigned MaxBarriers = 16; 878 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) 879 MaxBarriers = 32; 880 881 return std::min(MaxWaves / N, MaxBarriers); 882 } 883 884 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { 885 return 1; 886 } 887 888 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { 889 // FIXME: Need to take scratch memory into account. 890 if (isGFX90A(*STI)) 891 return 8; 892 if (!isGFX10Plus(*STI)) 893 return 10; 894 return hasGFX10_3Insts(*STI) ? 16 : 20; 895 } 896 897 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 898 unsigned FlatWorkGroupSize) { 899 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize), 900 getEUsPerCU(STI)); 901 } 902 903 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { 904 return 1; 905 } 906 907 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { 908 // Some subtargets allow encoding 2048, but this isn't tested or supported. 909 return 1024; 910 } 911 912 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 913 unsigned FlatWorkGroupSize) { 914 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI)); 915 } 916 917 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { 918 IsaVersion Version = getIsaVersion(STI->getCPU()); 919 if (Version.Major >= 10) 920 return getAddressableNumSGPRs(STI); 921 if (Version.Major >= 8) 922 return 16; 923 return 8; 924 } 925 926 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { 927 return 8; 928 } 929 930 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { 931 IsaVersion Version = getIsaVersion(STI->getCPU()); 932 if (Version.Major >= 8) 933 return 800; 934 return 512; 935 } 936 937 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { 938 if (STI->getFeatureBits().test(FeatureSGPRInitBug)) 939 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 940 941 IsaVersion Version = getIsaVersion(STI->getCPU()); 942 if (Version.Major >= 10) 943 return 106; 944 if (Version.Major >= 8) 945 return 102; 946 return 104; 947 } 948 949 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 950 assert(WavesPerEU != 0); 951 952 IsaVersion Version = getIsaVersion(STI->getCPU()); 953 if (Version.Major >= 10) 954 return 0; 955 956 if (WavesPerEU >= getMaxWavesPerEU(STI)) 957 return 0; 958 959 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); 960 if (STI->getFeatureBits().test(FeatureTrapHandler)) 961 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 962 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; 963 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); 964 } 965 966 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 967 bool Addressable) { 968 assert(WavesPerEU != 0); 969 970 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); 971 IsaVersion Version = getIsaVersion(STI->getCPU()); 972 if (Version.Major >= 10) 973 return Addressable ? AddressableNumSGPRs : 108; 974 if (Version.Major >= 8 && !Addressable) 975 AddressableNumSGPRs = 112; 976 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; 977 if (STI->getFeatureBits().test(FeatureTrapHandler)) 978 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 979 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); 980 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 981 } 982 983 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 984 bool FlatScrUsed, bool XNACKUsed) { 985 unsigned ExtraSGPRs = 0; 986 if (VCCUsed) 987 ExtraSGPRs = 2; 988 989 IsaVersion Version = getIsaVersion(STI->getCPU()); 990 if (Version.Major >= 10) 991 return ExtraSGPRs; 992 993 if (Version.Major < 8) { 994 if (FlatScrUsed) 995 ExtraSGPRs = 4; 996 } else { 997 if (XNACKUsed) 998 ExtraSGPRs = 4; 999 1000 if (FlatScrUsed || 1001 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch)) 1002 ExtraSGPRs = 6; 1003 } 1004 1005 return ExtraSGPRs; 1006 } 1007 1008 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 1009 bool FlatScrUsed) { 1010 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, 1011 STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); 1012 } 1013 1014 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { 1015 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); 1016 // SGPRBlocks is actual number of SGPR blocks minus 1. 1017 return NumSGPRs / getSGPREncodingGranule(STI) - 1; 1018 } 1019 1020 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 1021 std::optional<bool> EnableWavefrontSize32) { 1022 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1023 return 8; 1024 1025 bool IsWave32 = EnableWavefrontSize32 ? 1026 *EnableWavefrontSize32 : 1027 STI->getFeatureBits().test(FeatureWavefrontSize32); 1028 1029 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs)) 1030 return IsWave32 ? 24 : 12; 1031 1032 if (hasGFX10_3Insts(*STI)) 1033 return IsWave32 ? 16 : 8; 1034 1035 return IsWave32 ? 8 : 4; 1036 } 1037 1038 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 1039 std::optional<bool> EnableWavefrontSize32) { 1040 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1041 return 8; 1042 1043 bool IsWave32 = EnableWavefrontSize32 ? 1044 *EnableWavefrontSize32 : 1045 STI->getFeatureBits().test(FeatureWavefrontSize32); 1046 1047 return IsWave32 ? 8 : 4; 1048 } 1049 1050 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { 1051 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1052 return 512; 1053 if (!isGFX10Plus(*STI)) 1054 return 256; 1055 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32); 1056 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs)) 1057 return IsWave32 ? 1536 : 768; 1058 return IsWave32 ? 1024 : 512; 1059 } 1060 1061 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { 1062 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 1063 return 512; 1064 return 256; 1065 } 1066 1067 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, 1068 unsigned NumVGPRs) { 1069 unsigned MaxWaves = getMaxWavesPerEU(STI); 1070 unsigned Granule = getVGPRAllocGranule(STI); 1071 if (NumVGPRs < Granule) 1072 return MaxWaves; 1073 unsigned RoundedRegs = alignTo(NumVGPRs, Granule); 1074 return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves); 1075 } 1076 1077 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1078 assert(WavesPerEU != 0); 1079 1080 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI); 1081 if (WavesPerEU >= MaxWavesPerEU) 1082 return 0; 1083 1084 unsigned TotNumVGPRs = getTotalNumVGPRs(STI); 1085 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI); 1086 unsigned Granule = getVGPRAllocGranule(STI); 1087 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule); 1088 1089 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule)) 1090 return 0; 1091 1092 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs); 1093 if (WavesPerEU < MinWavesPerEU) 1094 return getMinNumVGPRs(STI, MinWavesPerEU); 1095 1096 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule); 1097 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext); 1098 return std::min(MinNumVGPRs, AddrsableNumVGPRs); 1099 } 1100 1101 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 1102 assert(WavesPerEU != 0); 1103 1104 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, 1105 getVGPRAllocGranule(STI)); 1106 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); 1107 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 1108 } 1109 1110 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, 1111 std::optional<bool> EnableWavefrontSize32) { 1112 NumVGPRs = alignTo(std::max(1u, NumVGPRs), 1113 getVGPREncodingGranule(STI, EnableWavefrontSize32)); 1114 // VGPRBlocks is actual number of VGPR blocks minus 1. 1115 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1; 1116 } 1117 1118 } // end namespace IsaInfo 1119 1120 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 1121 const MCSubtargetInfo *STI) { 1122 IsaVersion Version = getIsaVersion(STI->getCPU()); 1123 1124 memset(&Header, 0, sizeof(Header)); 1125 1126 Header.amd_kernel_code_version_major = 1; 1127 Header.amd_kernel_code_version_minor = 2; 1128 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 1129 Header.amd_machine_version_major = Version.Major; 1130 Header.amd_machine_version_minor = Version.Minor; 1131 Header.amd_machine_version_stepping = Version.Stepping; 1132 Header.kernel_code_entry_byte_offset = sizeof(Header); 1133 Header.wavefront_size = 6; 1134 1135 // If the code object does not support indirect functions, then the value must 1136 // be 0xffffffff. 1137 Header.call_convention = -1; 1138 1139 // These alignment values are specified in powers of two, so alignment = 1140 // 2^n. The minimum alignment is 2^4 = 16. 1141 Header.kernarg_segment_alignment = 4; 1142 Header.group_segment_alignment = 4; 1143 Header.private_segment_alignment = 4; 1144 1145 if (Version.Major >= 10) { 1146 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { 1147 Header.wavefront_size = 5; 1148 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; 1149 } 1150 Header.compute_pgm_resource_registers |= 1151 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | 1152 S_00B848_MEM_ORDERED(1); 1153 } 1154 } 1155 1156 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 1157 const MCSubtargetInfo *STI) { 1158 IsaVersion Version = getIsaVersion(STI->getCPU()); 1159 1160 amdhsa::kernel_descriptor_t KD; 1161 memset(&KD, 0, sizeof(KD)); 1162 1163 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1164 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, 1165 amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE); 1166 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1167 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1); 1168 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1169 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1); 1170 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, 1171 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1); 1172 if (Version.Major >= 10) { 1173 AMDHSA_BITS_SET(KD.kernel_code_properties, 1174 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 1175 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0); 1176 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1177 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE, 1178 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1); 1179 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 1180 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1); 1181 } 1182 if (AMDGPU::isGFX90A(*STI)) { 1183 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, 1184 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, 1185 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0); 1186 } 1187 return KD; 1188 } 1189 1190 bool isGroupSegment(const GlobalValue *GV) { 1191 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 1192 } 1193 1194 bool isGlobalSegment(const GlobalValue *GV) { 1195 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 1196 } 1197 1198 bool isReadOnlySegment(const GlobalValue *GV) { 1199 unsigned AS = GV->getAddressSpace(); 1200 return AS == AMDGPUAS::CONSTANT_ADDRESS || 1201 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; 1202 } 1203 1204 bool shouldEmitConstantsToTextSection(const Triple &TT) { 1205 return TT.getArch() == Triple::r600; 1206 } 1207 1208 std::pair<int, int> getIntegerPairAttribute(const Function &F, 1209 StringRef Name, 1210 std::pair<int, int> Default, 1211 bool OnlyFirstRequired) { 1212 Attribute A = F.getFnAttribute(Name); 1213 if (!A.isStringAttribute()) 1214 return Default; 1215 1216 LLVMContext &Ctx = F.getContext(); 1217 std::pair<int, int> Ints = Default; 1218 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 1219 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 1220 Ctx.emitError("can't parse first integer attribute " + Name); 1221 return Default; 1222 } 1223 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 1224 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 1225 Ctx.emitError("can't parse second integer attribute " + Name); 1226 return Default; 1227 } 1228 } 1229 1230 return Ints; 1231 } 1232 1233 unsigned getVmcntBitMask(const IsaVersion &Version) { 1234 return (1 << (getVmcntBitWidthLo(Version.Major) + 1235 getVmcntBitWidthHi(Version.Major))) - 1236 1; 1237 } 1238 1239 unsigned getExpcntBitMask(const IsaVersion &Version) { 1240 return (1 << getExpcntBitWidth(Version.Major)) - 1; 1241 } 1242 1243 unsigned getLgkmcntBitMask(const IsaVersion &Version) { 1244 return (1 << getLgkmcntBitWidth(Version.Major)) - 1; 1245 } 1246 1247 unsigned getWaitcntBitMask(const IsaVersion &Version) { 1248 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major), 1249 getVmcntBitWidthLo(Version.Major)); 1250 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major), 1251 getExpcntBitWidth(Version.Major)); 1252 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major), 1253 getLgkmcntBitWidth(Version.Major)); 1254 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major), 1255 getVmcntBitWidthHi(Version.Major)); 1256 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi; 1257 } 1258 1259 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1260 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major), 1261 getVmcntBitWidthLo(Version.Major)); 1262 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major), 1263 getVmcntBitWidthHi(Version.Major)); 1264 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major); 1265 } 1266 1267 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { 1268 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major), 1269 getExpcntBitWidth(Version.Major)); 1270 } 1271 1272 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1273 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major), 1274 getLgkmcntBitWidth(Version.Major)); 1275 } 1276 1277 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 1278 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 1279 Vmcnt = decodeVmcnt(Version, Waitcnt); 1280 Expcnt = decodeExpcnt(Version, Waitcnt); 1281 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 1282 } 1283 1284 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { 1285 Waitcnt Decoded; 1286 Decoded.VmCnt = decodeVmcnt(Version, Encoded); 1287 Decoded.ExpCnt = decodeExpcnt(Version, Encoded); 1288 Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded); 1289 return Decoded; 1290 } 1291 1292 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 1293 unsigned Vmcnt) { 1294 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major), 1295 getVmcntBitWidthLo(Version.Major)); 1296 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt, 1297 getVmcntBitShiftHi(Version.Major), 1298 getVmcntBitWidthHi(Version.Major)); 1299 } 1300 1301 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 1302 unsigned Expcnt) { 1303 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major), 1304 getExpcntBitWidth(Version.Major)); 1305 } 1306 1307 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 1308 unsigned Lgkmcnt) { 1309 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major), 1310 getLgkmcntBitWidth(Version.Major)); 1311 } 1312 1313 unsigned encodeWaitcnt(const IsaVersion &Version, 1314 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 1315 unsigned Waitcnt = getWaitcntBitMask(Version); 1316 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 1317 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 1318 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 1319 return Waitcnt; 1320 } 1321 1322 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1323 return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt); 1324 } 1325 1326 //===----------------------------------------------------------------------===// 1327 // Custom Operands. 1328 // 1329 // A table of custom operands shall describe "primary" operand names 1330 // first followed by aliases if any. It is not required but recommended 1331 // to arrange operands so that operand encoding match operand position 1332 // in the table. This will make disassembly a bit more efficient. 1333 // Unused slots in the table shall have an empty name. 1334 // 1335 //===----------------------------------------------------------------------===// 1336 1337 template <class T> 1338 static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize, 1339 T Context) { 1340 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() && 1341 (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)); 1342 } 1343 1344 template <class T> 1345 static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test, 1346 const CustomOperand<T> OpInfo[], int OpInfoSize, 1347 T Context) { 1348 int InvalidIdx = OPR_ID_UNKNOWN; 1349 for (int Idx = 0; Idx < OpInfoSize; ++Idx) { 1350 if (Test(OpInfo[Idx])) { 1351 if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)) 1352 return Idx; 1353 InvalidIdx = OPR_ID_UNSUPPORTED; 1354 } 1355 } 1356 return InvalidIdx; 1357 } 1358 1359 template <class T> 1360 static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[], 1361 int OpInfoSize, T Context) { 1362 auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; }; 1363 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); 1364 } 1365 1366 template <class T> 1367 static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize, 1368 T Context, bool QuickCheck = true) { 1369 auto Test = [=](const CustomOperand<T> &Op) { 1370 return Op.Encoding == Id && !Op.Name.empty(); 1371 }; 1372 // This is an optimization that should work in most cases. 1373 // As a side effect, it may cause selection of an alias 1374 // instead of a primary operand name in case of sparse tables. 1375 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) && 1376 OpInfo[Id].Encoding == Id) { 1377 return Id; 1378 } 1379 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); 1380 } 1381 1382 //===----------------------------------------------------------------------===// 1383 // Custom Operand Values 1384 //===----------------------------------------------------------------------===// 1385 1386 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, 1387 int Size, 1388 const MCSubtargetInfo &STI) { 1389 unsigned Enc = 0; 1390 for (int Idx = 0; Idx < Size; ++Idx) { 1391 const auto &Op = Opr[Idx]; 1392 if (Op.isSupported(STI)) 1393 Enc |= Op.encode(Op.Default); 1394 } 1395 return Enc; 1396 } 1397 1398 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, 1399 int Size, unsigned Code, 1400 bool &HasNonDefaultVal, 1401 const MCSubtargetInfo &STI) { 1402 unsigned UsedOprMask = 0; 1403 HasNonDefaultVal = false; 1404 for (int Idx = 0; Idx < Size; ++Idx) { 1405 const auto &Op = Opr[Idx]; 1406 if (!Op.isSupported(STI)) 1407 continue; 1408 UsedOprMask |= Op.getMask(); 1409 unsigned Val = Op.decode(Code); 1410 if (!Op.isValid(Val)) 1411 return false; 1412 HasNonDefaultVal |= (Val != Op.Default); 1413 } 1414 return (Code & ~UsedOprMask) == 0; 1415 } 1416 1417 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, 1418 unsigned Code, int &Idx, StringRef &Name, 1419 unsigned &Val, bool &IsDefault, 1420 const MCSubtargetInfo &STI) { 1421 while (Idx < Size) { 1422 const auto &Op = Opr[Idx++]; 1423 if (Op.isSupported(STI)) { 1424 Name = Op.Name; 1425 Val = Op.decode(Code); 1426 IsDefault = (Val == Op.Default); 1427 return true; 1428 } 1429 } 1430 1431 return false; 1432 } 1433 1434 static int encodeCustomOperandVal(const CustomOperandVal &Op, 1435 int64_t InputVal) { 1436 if (InputVal < 0 || InputVal > Op.Max) 1437 return OPR_VAL_INVALID; 1438 return Op.encode(InputVal); 1439 } 1440 1441 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, 1442 const StringRef Name, int64_t InputVal, 1443 unsigned &UsedOprMask, 1444 const MCSubtargetInfo &STI) { 1445 int InvalidId = OPR_ID_UNKNOWN; 1446 for (int Idx = 0; Idx < Size; ++Idx) { 1447 const auto &Op = Opr[Idx]; 1448 if (Op.Name == Name) { 1449 if (!Op.isSupported(STI)) { 1450 InvalidId = OPR_ID_UNSUPPORTED; 1451 continue; 1452 } 1453 auto OprMask = Op.getMask(); 1454 if (OprMask & UsedOprMask) 1455 return OPR_ID_DUPLICATE; 1456 UsedOprMask |= OprMask; 1457 return encodeCustomOperandVal(Op, InputVal); 1458 } 1459 } 1460 return InvalidId; 1461 } 1462 1463 //===----------------------------------------------------------------------===// 1464 // DepCtr 1465 //===----------------------------------------------------------------------===// 1466 1467 namespace DepCtr { 1468 1469 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) { 1470 static int Default = -1; 1471 if (Default == -1) 1472 Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI); 1473 return Default; 1474 } 1475 1476 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 1477 const MCSubtargetInfo &STI) { 1478 return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code, 1479 HasNonDefaultVal, STI); 1480 } 1481 1482 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 1483 bool &IsDefault, const MCSubtargetInfo &STI) { 1484 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val, 1485 IsDefault, STI); 1486 } 1487 1488 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 1489 const MCSubtargetInfo &STI) { 1490 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask, 1491 STI); 1492 } 1493 1494 } // namespace DepCtr 1495 1496 //===----------------------------------------------------------------------===// 1497 // hwreg 1498 //===----------------------------------------------------------------------===// 1499 1500 namespace Hwreg { 1501 1502 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) { 1503 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI); 1504 return (Idx < 0) ? Idx : Opr[Idx].Encoding; 1505 } 1506 1507 bool isValidHwreg(int64_t Id) { 1508 return 0 <= Id && isUInt<ID_WIDTH_>(Id); 1509 } 1510 1511 bool isValidHwregOffset(int64_t Offset) { 1512 return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset); 1513 } 1514 1515 bool isValidHwregWidth(int64_t Width) { 1516 return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1); 1517 } 1518 1519 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) { 1520 return (Id << ID_SHIFT_) | 1521 (Offset << OFFSET_SHIFT_) | 1522 ((Width - 1) << WIDTH_M1_SHIFT_); 1523 } 1524 1525 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) { 1526 int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI); 1527 return (Idx < 0) ? "" : Opr[Idx].Name; 1528 } 1529 1530 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) { 1531 Id = (Val & ID_MASK_) >> ID_SHIFT_; 1532 Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_; 1533 Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1; 1534 } 1535 1536 } // namespace Hwreg 1537 1538 //===----------------------------------------------------------------------===// 1539 // exp tgt 1540 //===----------------------------------------------------------------------===// 1541 1542 namespace Exp { 1543 1544 struct ExpTgt { 1545 StringLiteral Name; 1546 unsigned Tgt; 1547 unsigned MaxIndex; 1548 }; 1549 1550 static constexpr ExpTgt ExpTgtInfo[] = { 1551 {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, 1552 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, 1553 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, 1554 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, 1555 {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, 1556 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX}, 1557 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, 1558 }; 1559 1560 bool getTgtName(unsigned Id, StringRef &Name, int &Index) { 1561 for (const ExpTgt &Val : ExpTgtInfo) { 1562 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { 1563 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); 1564 Name = Val.Name; 1565 return true; 1566 } 1567 } 1568 return false; 1569 } 1570 1571 unsigned getTgtId(const StringRef Name) { 1572 1573 for (const ExpTgt &Val : ExpTgtInfo) { 1574 if (Val.MaxIndex == 0 && Name == Val.Name) 1575 return Val.Tgt; 1576 1577 if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) { 1578 StringRef Suffix = Name.drop_front(Val.Name.size()); 1579 1580 unsigned Id; 1581 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex) 1582 return ET_INVALID; 1583 1584 // Disable leading zeroes 1585 if (Suffix.size() > 1 && Suffix[0] == '0') 1586 return ET_INVALID; 1587 1588 return Val.Tgt + Id; 1589 } 1590 } 1591 return ET_INVALID; 1592 } 1593 1594 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { 1595 switch (Id) { 1596 case ET_NULL: 1597 return !isGFX11Plus(STI); 1598 case ET_POS4: 1599 case ET_PRIM: 1600 return isGFX10Plus(STI); 1601 case ET_DUAL_SRC_BLEND0: 1602 case ET_DUAL_SRC_BLEND1: 1603 return isGFX11Plus(STI); 1604 default: 1605 if (Id >= ET_PARAM0 && Id <= ET_PARAM31) 1606 return !isGFX11Plus(STI); 1607 return true; 1608 } 1609 } 1610 1611 } // namespace Exp 1612 1613 //===----------------------------------------------------------------------===// 1614 // MTBUF Format 1615 //===----------------------------------------------------------------------===// 1616 1617 namespace MTBUFFormat { 1618 1619 int64_t getDfmt(const StringRef Name) { 1620 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { 1621 if (Name == DfmtSymbolic[Id]) 1622 return Id; 1623 } 1624 return DFMT_UNDEF; 1625 } 1626 1627 StringRef getDfmtName(unsigned Id) { 1628 assert(Id <= DFMT_MAX); 1629 return DfmtSymbolic[Id]; 1630 } 1631 1632 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { 1633 if (isSI(STI) || isCI(STI)) 1634 return NfmtSymbolicSICI; 1635 if (isVI(STI) || isGFX9(STI)) 1636 return NfmtSymbolicVI; 1637 return NfmtSymbolicGFX10; 1638 } 1639 1640 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { 1641 auto lookupTable = getNfmtLookupTable(STI); 1642 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { 1643 if (Name == lookupTable[Id]) 1644 return Id; 1645 } 1646 return NFMT_UNDEF; 1647 } 1648 1649 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { 1650 assert(Id <= NFMT_MAX); 1651 return getNfmtLookupTable(STI)[Id]; 1652 } 1653 1654 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1655 unsigned Dfmt; 1656 unsigned Nfmt; 1657 decodeDfmtNfmt(Id, Dfmt, Nfmt); 1658 return isValidNfmt(Nfmt, STI); 1659 } 1660 1661 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1662 return !getNfmtName(Id, STI).empty(); 1663 } 1664 1665 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { 1666 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); 1667 } 1668 1669 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { 1670 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; 1671 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; 1672 } 1673 1674 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) { 1675 if (isGFX11Plus(STI)) { 1676 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1677 if (Name == UfmtSymbolicGFX11[Id]) 1678 return Id; 1679 } 1680 } else { 1681 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1682 if (Name == UfmtSymbolicGFX10[Id]) 1683 return Id; 1684 } 1685 } 1686 return UFMT_UNDEF; 1687 } 1688 1689 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) { 1690 if(isValidUnifiedFormat(Id, STI)) 1691 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id]; 1692 return ""; 1693 } 1694 1695 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) { 1696 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST; 1697 } 1698 1699 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 1700 const MCSubtargetInfo &STI) { 1701 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); 1702 if (isGFX11Plus(STI)) { 1703 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1704 if (Fmt == DfmtNfmt2UFmtGFX11[Id]) 1705 return Id; 1706 } 1707 } else { 1708 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1709 if (Fmt == DfmtNfmt2UFmtGFX10[Id]) 1710 return Id; 1711 } 1712 } 1713 return UFMT_UNDEF; 1714 } 1715 1716 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { 1717 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); 1718 } 1719 1720 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { 1721 if (isGFX10Plus(STI)) 1722 return UFMT_DEFAULT; 1723 return DFMT_NFMT_DEFAULT; 1724 } 1725 1726 } // namespace MTBUFFormat 1727 1728 //===----------------------------------------------------------------------===// 1729 // SendMsg 1730 //===----------------------------------------------------------------------===// 1731 1732 namespace SendMsg { 1733 1734 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { 1735 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; 1736 } 1737 1738 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) { 1739 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI); 1740 return (Idx < 0) ? Idx : Msg[Idx].Encoding; 1741 } 1742 1743 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) { 1744 return (MsgId & ~(getMsgIdMask(STI))) == 0; 1745 } 1746 1747 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) { 1748 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI); 1749 return (Idx < 0) ? "" : Msg[Idx].Name; 1750 } 1751 1752 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) { 1753 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 1754 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 1755 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 1756 for (int i = F; i < L; ++i) { 1757 if (Name == S[i]) { 1758 return i; 1759 } 1760 } 1761 return OP_UNKNOWN_; 1762 } 1763 1764 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1765 bool Strict) { 1766 assert(isValidMsgId(MsgId, STI)); 1767 1768 if (!Strict) 1769 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); 1770 1771 if (MsgId == ID_SYSMSG) 1772 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_; 1773 if (!isGFX11Plus(STI)) { 1774 switch (MsgId) { 1775 case ID_GS_PreGFX11: 1776 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; 1777 case ID_GS_DONE_PreGFX11: 1778 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; 1779 } 1780 } 1781 return OpId == OP_NONE_; 1782 } 1783 1784 StringRef getMsgOpName(int64_t MsgId, int64_t OpId, 1785 const MCSubtargetInfo &STI) { 1786 assert(msgRequiresOp(MsgId, STI)); 1787 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId]; 1788 } 1789 1790 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1791 const MCSubtargetInfo &STI, bool Strict) { 1792 assert(isValidMsgOp(MsgId, OpId, STI, Strict)); 1793 1794 if (!Strict) 1795 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); 1796 1797 if (!isGFX11Plus(STI)) { 1798 switch (MsgId) { 1799 case ID_GS_PreGFX11: 1800 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; 1801 case ID_GS_DONE_PreGFX11: 1802 return (OpId == OP_GS_NOP) ? 1803 (StreamId == STREAM_ID_NONE_) : 1804 (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); 1805 } 1806 } 1807 return StreamId == STREAM_ID_NONE_; 1808 } 1809 1810 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { 1811 return MsgId == ID_SYSMSG || 1812 (!isGFX11Plus(STI) && 1813 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); 1814 } 1815 1816 bool msgSupportsStream(int64_t MsgId, int64_t OpId, 1817 const MCSubtargetInfo &STI) { 1818 return !isGFX11Plus(STI) && 1819 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && 1820 OpId != OP_GS_NOP; 1821 } 1822 1823 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 1824 uint16_t &StreamId, const MCSubtargetInfo &STI) { 1825 MsgId = Val & getMsgIdMask(STI); 1826 if (isGFX11Plus(STI)) { 1827 OpId = 0; 1828 StreamId = 0; 1829 } else { 1830 OpId = (Val & OP_MASK_) >> OP_SHIFT_; 1831 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; 1832 } 1833 } 1834 1835 uint64_t encodeMsg(uint64_t MsgId, 1836 uint64_t OpId, 1837 uint64_t StreamId) { 1838 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_); 1839 } 1840 1841 } // namespace SendMsg 1842 1843 //===----------------------------------------------------------------------===// 1844 // 1845 //===----------------------------------------------------------------------===// 1846 1847 unsigned getInitialPSInputAddr(const Function &F) { 1848 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0); 1849 } 1850 1851 bool getHasColorExport(const Function &F) { 1852 // As a safe default always respond as if PS has color exports. 1853 return F.getFnAttributeAsParsedInteger( 1854 "amdgpu-color-export", 1855 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0; 1856 } 1857 1858 bool getHasDepthExport(const Function &F) { 1859 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0; 1860 } 1861 1862 bool isShader(CallingConv::ID cc) { 1863 switch(cc) { 1864 case CallingConv::AMDGPU_VS: 1865 case CallingConv::AMDGPU_LS: 1866 case CallingConv::AMDGPU_HS: 1867 case CallingConv::AMDGPU_ES: 1868 case CallingConv::AMDGPU_GS: 1869 case CallingConv::AMDGPU_PS: 1870 case CallingConv::AMDGPU_CS: 1871 return true; 1872 default: 1873 return false; 1874 } 1875 } 1876 1877 bool isGraphics(CallingConv::ID cc) { 1878 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx; 1879 } 1880 1881 bool isCompute(CallingConv::ID cc) { 1882 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS; 1883 } 1884 1885 bool isEntryFunctionCC(CallingConv::ID CC) { 1886 switch (CC) { 1887 case CallingConv::AMDGPU_KERNEL: 1888 case CallingConv::SPIR_KERNEL: 1889 case CallingConv::AMDGPU_VS: 1890 case CallingConv::AMDGPU_GS: 1891 case CallingConv::AMDGPU_PS: 1892 case CallingConv::AMDGPU_CS: 1893 case CallingConv::AMDGPU_ES: 1894 case CallingConv::AMDGPU_HS: 1895 case CallingConv::AMDGPU_LS: 1896 return true; 1897 default: 1898 return false; 1899 } 1900 } 1901 1902 bool isModuleEntryFunctionCC(CallingConv::ID CC) { 1903 switch (CC) { 1904 case CallingConv::AMDGPU_Gfx: 1905 return true; 1906 default: 1907 return isEntryFunctionCC(CC); 1908 } 1909 } 1910 1911 bool isKernelCC(const Function *Func) { 1912 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); 1913 } 1914 1915 bool hasXNACK(const MCSubtargetInfo &STI) { 1916 return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; 1917 } 1918 1919 bool hasSRAMECC(const MCSubtargetInfo &STI) { 1920 return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC]; 1921 } 1922 1923 bool hasMIMG_R128(const MCSubtargetInfo &STI) { 1924 return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16]; 1925 } 1926 1927 bool hasA16(const MCSubtargetInfo &STI) { 1928 return STI.getFeatureBits()[AMDGPU::FeatureA16]; 1929 } 1930 1931 bool hasG16(const MCSubtargetInfo &STI) { 1932 return STI.getFeatureBits()[AMDGPU::FeatureG16]; 1933 } 1934 1935 bool hasPackedD16(const MCSubtargetInfo &STI) { 1936 return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem] && !isCI(STI) && 1937 !isSI(STI); 1938 } 1939 1940 bool isSI(const MCSubtargetInfo &STI) { 1941 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 1942 } 1943 1944 bool isCI(const MCSubtargetInfo &STI) { 1945 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 1946 } 1947 1948 bool isVI(const MCSubtargetInfo &STI) { 1949 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 1950 } 1951 1952 bool isGFX9(const MCSubtargetInfo &STI) { 1953 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 1954 } 1955 1956 bool isGFX9_GFX10(const MCSubtargetInfo &STI) { 1957 return isGFX9(STI) || isGFX10(STI); 1958 } 1959 1960 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) { 1961 return isVI(STI) || isGFX9(STI) || isGFX10(STI); 1962 } 1963 1964 bool isGFX8Plus(const MCSubtargetInfo &STI) { 1965 return isVI(STI) || isGFX9Plus(STI); 1966 } 1967 1968 bool isGFX9Plus(const MCSubtargetInfo &STI) { 1969 return isGFX9(STI) || isGFX10Plus(STI); 1970 } 1971 1972 bool isGFX10(const MCSubtargetInfo &STI) { 1973 return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; 1974 } 1975 1976 bool isGFX10Plus(const MCSubtargetInfo &STI) { 1977 return isGFX10(STI) || isGFX11Plus(STI); 1978 } 1979 1980 bool isGFX11(const MCSubtargetInfo &STI) { 1981 return STI.getFeatureBits()[AMDGPU::FeatureGFX11]; 1982 } 1983 1984 bool isGFX11Plus(const MCSubtargetInfo &STI) { 1985 return isGFX11(STI); 1986 } 1987 1988 bool isNotGFX11Plus(const MCSubtargetInfo &STI) { 1989 return !isGFX11Plus(STI); 1990 } 1991 1992 bool isNotGFX10Plus(const MCSubtargetInfo &STI) { 1993 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); 1994 } 1995 1996 bool isGFX10Before1030(const MCSubtargetInfo &STI) { 1997 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); 1998 } 1999 2000 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 2001 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; 2002 } 2003 2004 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) { 2005 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_AEncoding]; 2006 } 2007 2008 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { 2009 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]; 2010 } 2011 2012 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { 2013 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_3Insts]; 2014 } 2015 2016 bool isGFX90A(const MCSubtargetInfo &STI) { 2017 return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]; 2018 } 2019 2020 bool isGFX940(const MCSubtargetInfo &STI) { 2021 return STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts]; 2022 } 2023 2024 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { 2025 return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 2026 } 2027 2028 bool hasMAIInsts(const MCSubtargetInfo &STI) { 2029 return STI.getFeatureBits()[AMDGPU::FeatureMAIInsts]; 2030 } 2031 2032 bool hasVOPD(const MCSubtargetInfo &STI) { 2033 return STI.getFeatureBits()[AMDGPU::FeatureVOPD]; 2034 } 2035 2036 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, 2037 int32_t ArgNumVGPR) { 2038 if (has90AInsts && ArgNumAGPR) 2039 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR; 2040 return std::max(ArgNumVGPR, ArgNumAGPR); 2041 } 2042 2043 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 2044 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 2045 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); 2046 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 2047 Reg == AMDGPU::SCC; 2048 } 2049 2050 #define MAP_REG2REG \ 2051 using namespace AMDGPU; \ 2052 switch(Reg) { \ 2053 default: return Reg; \ 2054 CASE_CI_VI(FLAT_SCR) \ 2055 CASE_CI_VI(FLAT_SCR_LO) \ 2056 CASE_CI_VI(FLAT_SCR_HI) \ 2057 CASE_VI_GFX9PLUS(TTMP0) \ 2058 CASE_VI_GFX9PLUS(TTMP1) \ 2059 CASE_VI_GFX9PLUS(TTMP2) \ 2060 CASE_VI_GFX9PLUS(TTMP3) \ 2061 CASE_VI_GFX9PLUS(TTMP4) \ 2062 CASE_VI_GFX9PLUS(TTMP5) \ 2063 CASE_VI_GFX9PLUS(TTMP6) \ 2064 CASE_VI_GFX9PLUS(TTMP7) \ 2065 CASE_VI_GFX9PLUS(TTMP8) \ 2066 CASE_VI_GFX9PLUS(TTMP9) \ 2067 CASE_VI_GFX9PLUS(TTMP10) \ 2068 CASE_VI_GFX9PLUS(TTMP11) \ 2069 CASE_VI_GFX9PLUS(TTMP12) \ 2070 CASE_VI_GFX9PLUS(TTMP13) \ 2071 CASE_VI_GFX9PLUS(TTMP14) \ 2072 CASE_VI_GFX9PLUS(TTMP15) \ 2073 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ 2074 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ 2075 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ 2076 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ 2077 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ 2078 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ 2079 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ 2080 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ 2081 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ 2082 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ 2083 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ 2084 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ 2085 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 2086 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 2087 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2088 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 2089 CASE_GFXPRE11_GFX11PLUS(M0) \ 2090 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ 2091 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ 2092 } 2093 2094 #define CASE_CI_VI(node) \ 2095 assert(!isSI(STI)); \ 2096 case node: return isCI(STI) ? node##_ci : node##_vi; 2097 2098 #define CASE_VI_GFX9PLUS(node) \ 2099 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; 2100 2101 #define CASE_GFXPRE11_GFX11PLUS(node) \ 2102 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; 2103 2104 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ 2105 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; 2106 2107 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 2108 if (STI.getTargetTriple().getArch() == Triple::r600) 2109 return Reg; 2110 MAP_REG2REG 2111 } 2112 2113 #undef CASE_CI_VI 2114 #undef CASE_VI_GFX9PLUS 2115 #undef CASE_GFXPRE11_GFX11PLUS 2116 #undef CASE_GFXPRE11_GFX11PLUS_TO 2117 2118 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 2119 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; 2120 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node; 2121 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) 2122 2123 unsigned mc2PseudoReg(unsigned Reg) { 2124 MAP_REG2REG 2125 } 2126 2127 bool isInlineValue(unsigned Reg) { 2128 switch (Reg) { 2129 case AMDGPU::SRC_SHARED_BASE_LO: 2130 case AMDGPU::SRC_SHARED_BASE: 2131 case AMDGPU::SRC_SHARED_LIMIT_LO: 2132 case AMDGPU::SRC_SHARED_LIMIT: 2133 case AMDGPU::SRC_PRIVATE_BASE_LO: 2134 case AMDGPU::SRC_PRIVATE_BASE: 2135 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 2136 case AMDGPU::SRC_PRIVATE_LIMIT: 2137 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2138 return true; 2139 case AMDGPU::SRC_VCCZ: 2140 case AMDGPU::SRC_EXECZ: 2141 case AMDGPU::SRC_SCC: 2142 return true; 2143 case AMDGPU::SGPR_NULL: 2144 return true; 2145 default: 2146 return false; 2147 } 2148 } 2149 2150 #undef CASE_CI_VI 2151 #undef CASE_VI_GFX9PLUS 2152 #undef CASE_GFXPRE11_GFX11PLUS 2153 #undef CASE_GFXPRE11_GFX11PLUS_TO 2154 #undef MAP_REG2REG 2155 2156 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2157 assert(OpNo < Desc.NumOperands); 2158 unsigned OpType = Desc.operands()[OpNo].OperandType; 2159 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 2160 OpType <= AMDGPU::OPERAND_SRC_LAST; 2161 } 2162 2163 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2164 assert(OpNo < Desc.NumOperands); 2165 unsigned OpType = Desc.operands()[OpNo].OperandType; 2166 return OpType >= AMDGPU::OPERAND_KIMM_FIRST && 2167 OpType <= AMDGPU::OPERAND_KIMM_LAST; 2168 } 2169 2170 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2171 assert(OpNo < Desc.NumOperands); 2172 unsigned OpType = Desc.operands()[OpNo].OperandType; 2173 switch (OpType) { 2174 case AMDGPU::OPERAND_REG_IMM_FP32: 2175 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2176 case AMDGPU::OPERAND_REG_IMM_FP64: 2177 case AMDGPU::OPERAND_REG_IMM_FP16: 2178 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2179 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2180 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2181 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2182 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2183 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2184 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2185 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2186 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2187 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2188 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2189 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2190 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2191 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2192 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2193 return true; 2194 default: 2195 return false; 2196 } 2197 } 2198 2199 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 2200 assert(OpNo < Desc.NumOperands); 2201 unsigned OpType = Desc.operands()[OpNo].OperandType; 2202 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 2203 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 2204 } 2205 2206 // Avoid using MCRegisterClass::getSize, since that function will go away 2207 // (move from MC* level to Target* level). Return size in bits. 2208 unsigned getRegBitWidth(unsigned RCID) { 2209 switch (RCID) { 2210 case AMDGPU::VGPR_LO16RegClassID: 2211 case AMDGPU::VGPR_HI16RegClassID: 2212 case AMDGPU::SGPR_LO16RegClassID: 2213 case AMDGPU::AGPR_LO16RegClassID: 2214 return 16; 2215 case AMDGPU::SGPR_32RegClassID: 2216 case AMDGPU::VGPR_32RegClassID: 2217 case AMDGPU::VRegOrLds_32RegClassID: 2218 case AMDGPU::AGPR_32RegClassID: 2219 case AMDGPU::VS_32RegClassID: 2220 case AMDGPU::AV_32RegClassID: 2221 case AMDGPU::SReg_32RegClassID: 2222 case AMDGPU::SReg_32_XM0RegClassID: 2223 case AMDGPU::SRegOrLds_32RegClassID: 2224 return 32; 2225 case AMDGPU::SGPR_64RegClassID: 2226 case AMDGPU::VS_64RegClassID: 2227 case AMDGPU::SReg_64RegClassID: 2228 case AMDGPU::VReg_64RegClassID: 2229 case AMDGPU::AReg_64RegClassID: 2230 case AMDGPU::SReg_64_XEXECRegClassID: 2231 case AMDGPU::VReg_64_Align2RegClassID: 2232 case AMDGPU::AReg_64_Align2RegClassID: 2233 case AMDGPU::AV_64RegClassID: 2234 case AMDGPU::AV_64_Align2RegClassID: 2235 return 64; 2236 case AMDGPU::SGPR_96RegClassID: 2237 case AMDGPU::SReg_96RegClassID: 2238 case AMDGPU::VReg_96RegClassID: 2239 case AMDGPU::AReg_96RegClassID: 2240 case AMDGPU::VReg_96_Align2RegClassID: 2241 case AMDGPU::AReg_96_Align2RegClassID: 2242 case AMDGPU::AV_96RegClassID: 2243 case AMDGPU::AV_96_Align2RegClassID: 2244 return 96; 2245 case AMDGPU::SGPR_128RegClassID: 2246 case AMDGPU::SReg_128RegClassID: 2247 case AMDGPU::VReg_128RegClassID: 2248 case AMDGPU::AReg_128RegClassID: 2249 case AMDGPU::VReg_128_Align2RegClassID: 2250 case AMDGPU::AReg_128_Align2RegClassID: 2251 case AMDGPU::AV_128RegClassID: 2252 case AMDGPU::AV_128_Align2RegClassID: 2253 return 128; 2254 case AMDGPU::SGPR_160RegClassID: 2255 case AMDGPU::SReg_160RegClassID: 2256 case AMDGPU::VReg_160RegClassID: 2257 case AMDGPU::AReg_160RegClassID: 2258 case AMDGPU::VReg_160_Align2RegClassID: 2259 case AMDGPU::AReg_160_Align2RegClassID: 2260 case AMDGPU::AV_160RegClassID: 2261 case AMDGPU::AV_160_Align2RegClassID: 2262 return 160; 2263 case AMDGPU::SGPR_192RegClassID: 2264 case AMDGPU::SReg_192RegClassID: 2265 case AMDGPU::VReg_192RegClassID: 2266 case AMDGPU::AReg_192RegClassID: 2267 case AMDGPU::VReg_192_Align2RegClassID: 2268 case AMDGPU::AReg_192_Align2RegClassID: 2269 case AMDGPU::AV_192RegClassID: 2270 case AMDGPU::AV_192_Align2RegClassID: 2271 return 192; 2272 case AMDGPU::SGPR_224RegClassID: 2273 case AMDGPU::SReg_224RegClassID: 2274 case AMDGPU::VReg_224RegClassID: 2275 case AMDGPU::AReg_224RegClassID: 2276 case AMDGPU::VReg_224_Align2RegClassID: 2277 case AMDGPU::AReg_224_Align2RegClassID: 2278 case AMDGPU::AV_224RegClassID: 2279 case AMDGPU::AV_224_Align2RegClassID: 2280 return 224; 2281 case AMDGPU::SGPR_256RegClassID: 2282 case AMDGPU::SReg_256RegClassID: 2283 case AMDGPU::VReg_256RegClassID: 2284 case AMDGPU::AReg_256RegClassID: 2285 case AMDGPU::VReg_256_Align2RegClassID: 2286 case AMDGPU::AReg_256_Align2RegClassID: 2287 case AMDGPU::AV_256RegClassID: 2288 case AMDGPU::AV_256_Align2RegClassID: 2289 return 256; 2290 case AMDGPU::SGPR_288RegClassID: 2291 case AMDGPU::SReg_288RegClassID: 2292 case AMDGPU::VReg_288RegClassID: 2293 case AMDGPU::AReg_288RegClassID: 2294 case AMDGPU::VReg_288_Align2RegClassID: 2295 case AMDGPU::AReg_288_Align2RegClassID: 2296 case AMDGPU::AV_288RegClassID: 2297 case AMDGPU::AV_288_Align2RegClassID: 2298 return 288; 2299 case AMDGPU::SGPR_320RegClassID: 2300 case AMDGPU::SReg_320RegClassID: 2301 case AMDGPU::VReg_320RegClassID: 2302 case AMDGPU::AReg_320RegClassID: 2303 case AMDGPU::VReg_320_Align2RegClassID: 2304 case AMDGPU::AReg_320_Align2RegClassID: 2305 case AMDGPU::AV_320RegClassID: 2306 case AMDGPU::AV_320_Align2RegClassID: 2307 return 320; 2308 case AMDGPU::SGPR_352RegClassID: 2309 case AMDGPU::SReg_352RegClassID: 2310 case AMDGPU::VReg_352RegClassID: 2311 case AMDGPU::AReg_352RegClassID: 2312 case AMDGPU::VReg_352_Align2RegClassID: 2313 case AMDGPU::AReg_352_Align2RegClassID: 2314 case AMDGPU::AV_352RegClassID: 2315 case AMDGPU::AV_352_Align2RegClassID: 2316 return 352; 2317 case AMDGPU::SGPR_384RegClassID: 2318 case AMDGPU::SReg_384RegClassID: 2319 case AMDGPU::VReg_384RegClassID: 2320 case AMDGPU::AReg_384RegClassID: 2321 case AMDGPU::VReg_384_Align2RegClassID: 2322 case AMDGPU::AReg_384_Align2RegClassID: 2323 case AMDGPU::AV_384RegClassID: 2324 case AMDGPU::AV_384_Align2RegClassID: 2325 return 384; 2326 case AMDGPU::SGPR_512RegClassID: 2327 case AMDGPU::SReg_512RegClassID: 2328 case AMDGPU::VReg_512RegClassID: 2329 case AMDGPU::AReg_512RegClassID: 2330 case AMDGPU::VReg_512_Align2RegClassID: 2331 case AMDGPU::AReg_512_Align2RegClassID: 2332 case AMDGPU::AV_512RegClassID: 2333 case AMDGPU::AV_512_Align2RegClassID: 2334 return 512; 2335 case AMDGPU::SGPR_1024RegClassID: 2336 case AMDGPU::SReg_1024RegClassID: 2337 case AMDGPU::VReg_1024RegClassID: 2338 case AMDGPU::AReg_1024RegClassID: 2339 case AMDGPU::VReg_1024_Align2RegClassID: 2340 case AMDGPU::AReg_1024_Align2RegClassID: 2341 case AMDGPU::AV_1024RegClassID: 2342 case AMDGPU::AV_1024_Align2RegClassID: 2343 return 1024; 2344 default: 2345 llvm_unreachable("Unexpected register class"); 2346 } 2347 } 2348 2349 unsigned getRegBitWidth(const MCRegisterClass &RC) { 2350 return getRegBitWidth(RC.getID()); 2351 } 2352 2353 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 2354 unsigned OpNo) { 2355 assert(OpNo < Desc.NumOperands); 2356 unsigned RCID = Desc.operands()[OpNo].RegClass; 2357 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 2358 } 2359 2360 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 2361 if (isInlinableIntLiteral(Literal)) 2362 return true; 2363 2364 uint64_t Val = static_cast<uint64_t>(Literal); 2365 return (Val == DoubleToBits(0.0)) || 2366 (Val == DoubleToBits(1.0)) || 2367 (Val == DoubleToBits(-1.0)) || 2368 (Val == DoubleToBits(0.5)) || 2369 (Val == DoubleToBits(-0.5)) || 2370 (Val == DoubleToBits(2.0)) || 2371 (Val == DoubleToBits(-2.0)) || 2372 (Val == DoubleToBits(4.0)) || 2373 (Val == DoubleToBits(-4.0)) || 2374 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 2375 } 2376 2377 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 2378 if (isInlinableIntLiteral(Literal)) 2379 return true; 2380 2381 // The actual type of the operand does not seem to matter as long 2382 // as the bits match one of the inline immediate values. For example: 2383 // 2384 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 2385 // so it is a legal inline immediate. 2386 // 2387 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 2388 // floating-point, so it is a legal inline immediate. 2389 2390 uint32_t Val = static_cast<uint32_t>(Literal); 2391 return (Val == FloatToBits(0.0f)) || 2392 (Val == FloatToBits(1.0f)) || 2393 (Val == FloatToBits(-1.0f)) || 2394 (Val == FloatToBits(0.5f)) || 2395 (Val == FloatToBits(-0.5f)) || 2396 (Val == FloatToBits(2.0f)) || 2397 (Val == FloatToBits(-2.0f)) || 2398 (Val == FloatToBits(4.0f)) || 2399 (Val == FloatToBits(-4.0f)) || 2400 (Val == 0x3e22f983 && HasInv2Pi); 2401 } 2402 2403 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 2404 if (!HasInv2Pi) 2405 return false; 2406 2407 if (isInlinableIntLiteral(Literal)) 2408 return true; 2409 2410 uint16_t Val = static_cast<uint16_t>(Literal); 2411 return Val == 0x3C00 || // 1.0 2412 Val == 0xBC00 || // -1.0 2413 Val == 0x3800 || // 0.5 2414 Val == 0xB800 || // -0.5 2415 Val == 0x4000 || // 2.0 2416 Val == 0xC000 || // -2.0 2417 Val == 0x4400 || // 4.0 2418 Val == 0xC400 || // -4.0 2419 Val == 0x3118; // 1/2pi 2420 } 2421 2422 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 2423 assert(HasInv2Pi); 2424 2425 if (isInt<16>(Literal) || isUInt<16>(Literal)) { 2426 int16_t Trunc = static_cast<int16_t>(Literal); 2427 return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi); 2428 } 2429 if (!(Literal & 0xffff)) 2430 return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi); 2431 2432 int16_t Lo16 = static_cast<int16_t>(Literal); 2433 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 2434 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 2435 } 2436 2437 bool isInlinableIntLiteralV216(int32_t Literal) { 2438 int16_t Lo16 = static_cast<int16_t>(Literal); 2439 if (isInt<16>(Literal) || isUInt<16>(Literal)) 2440 return isInlinableIntLiteral(Lo16); 2441 2442 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 2443 if (!(Literal & 0xffff)) 2444 return isInlinableIntLiteral(Hi16); 2445 return Lo16 == Hi16 && isInlinableIntLiteral(Lo16); 2446 } 2447 2448 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) { 2449 assert(HasInv2Pi); 2450 2451 int16_t Lo16 = static_cast<int16_t>(Literal); 2452 if (isInt<16>(Literal) || isUInt<16>(Literal)) 2453 return true; 2454 2455 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 2456 if (!(Literal & 0xffff)) 2457 return true; 2458 return Lo16 == Hi16; 2459 } 2460 2461 bool isArgPassedInSGPR(const Argument *A) { 2462 const Function *F = A->getParent(); 2463 2464 // Arguments to compute shaders are never a source of divergence. 2465 CallingConv::ID CC = F->getCallingConv(); 2466 switch (CC) { 2467 case CallingConv::AMDGPU_KERNEL: 2468 case CallingConv::SPIR_KERNEL: 2469 return true; 2470 case CallingConv::AMDGPU_VS: 2471 case CallingConv::AMDGPU_LS: 2472 case CallingConv::AMDGPU_HS: 2473 case CallingConv::AMDGPU_ES: 2474 case CallingConv::AMDGPU_GS: 2475 case CallingConv::AMDGPU_PS: 2476 case CallingConv::AMDGPU_CS: 2477 case CallingConv::AMDGPU_Gfx: 2478 // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 2479 // Everything else is in VGPRs. 2480 return F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::InReg) || 2481 F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::ByVal); 2482 default: 2483 // TODO: Should calls support inreg for SGPR inputs? 2484 return false; 2485 } 2486 } 2487 2488 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { 2489 return isGCN3Encoding(ST) || isGFX10Plus(ST); 2490 } 2491 2492 static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { 2493 return isGFX9Plus(ST); 2494 } 2495 2496 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 2497 int64_t EncodedOffset) { 2498 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) 2499 : isUInt<8>(EncodedOffset); 2500 } 2501 2502 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 2503 int64_t EncodedOffset, 2504 bool IsBuffer) { 2505 return !IsBuffer && 2506 hasSMRDSignedImmOffset(ST) && 2507 isInt<21>(EncodedOffset); 2508 } 2509 2510 static bool isDwordAligned(uint64_t ByteOffset) { 2511 return (ByteOffset & 3) == 0; 2512 } 2513 2514 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, 2515 uint64_t ByteOffset) { 2516 if (hasSMEMByteOffset(ST)) 2517 return ByteOffset; 2518 2519 assert(isDwordAligned(ByteOffset)); 2520 return ByteOffset >> 2; 2521 } 2522 2523 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 2524 int64_t ByteOffset, bool IsBuffer) { 2525 // The signed version is always a byte offset. 2526 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { 2527 assert(hasSMEMByteOffset(ST)); 2528 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset) 2529 : std::nullopt; 2530 } 2531 2532 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) 2533 return std::nullopt; 2534 2535 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2536 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) 2537 ? std::optional<int64_t>(EncodedOffset) 2538 : std::nullopt; 2539 } 2540 2541 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 2542 int64_t ByteOffset) { 2543 if (!isCI(ST) || !isDwordAligned(ByteOffset)) 2544 return std::nullopt; 2545 2546 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2547 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset) 2548 : std::nullopt; 2549 } 2550 2551 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) { 2552 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9 and GFX11+. 2553 if (AMDGPU::isGFX10(ST)) 2554 return 12; 2555 2556 return 13; 2557 } 2558 2559 // Given Imm, split it into the values to put into the SOffset and ImmOffset 2560 // fields in an MUBUF instruction. Return false if it is not possible (due to a 2561 // hardware bug needing a workaround). 2562 // 2563 // The required alignment ensures that individual address components remain 2564 // aligned if they are aligned to begin with. It also ensures that additional 2565 // offsets within the given alignment can be added to the resulting ImmOffset. 2566 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 2567 const GCNSubtarget *Subtarget, Align Alignment) { 2568 const uint32_t MaxImm = alignDown(4095, Alignment.value()); 2569 uint32_t Overflow = 0; 2570 2571 if (Imm > MaxImm) { 2572 if (Imm <= MaxImm + 64) { 2573 // Use an SOffset inline constant for 4..64 2574 Overflow = Imm - MaxImm; 2575 Imm = MaxImm; 2576 } else { 2577 // Try to keep the same value in SOffset for adjacent loads, so that 2578 // the corresponding register contents can be re-used. 2579 // 2580 // Load values with all low-bits (except for alignment bits) set into 2581 // SOffset, so that a larger range of values can be covered using 2582 // s_movk_i32. 2583 // 2584 // Atomic operations fail to work correctly when individual address 2585 // components are unaligned, even if their sum is aligned. 2586 uint32_t High = (Imm + Alignment.value()) & ~4095; 2587 uint32_t Low = (Imm + Alignment.value()) & 4095; 2588 Imm = Low; 2589 Overflow = High - Alignment.value(); 2590 } 2591 } 2592 2593 // There is a hardware bug in SI and CI which prevents address clamping in 2594 // MUBUF instructions from working correctly with SOffsets. The immediate 2595 // offset is unaffected. 2596 if (Overflow > 0 && 2597 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 2598 return false; 2599 2600 ImmOffset = Imm; 2601 SOffset = Overflow; 2602 return true; 2603 } 2604 2605 SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) { 2606 *this = getDefaultForCallingConv(F.getCallingConv()); 2607 2608 StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString(); 2609 if (!IEEEAttr.empty()) 2610 IEEE = IEEEAttr == "true"; 2611 2612 StringRef DX10ClampAttr 2613 = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString(); 2614 if (!DX10ClampAttr.empty()) 2615 DX10Clamp = DX10ClampAttr == "true"; 2616 2617 StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString(); 2618 if (!DenormF32Attr.empty()) 2619 FP32Denormals = parseDenormalFPAttribute(DenormF32Attr); 2620 2621 StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString(); 2622 if (!DenormAttr.empty()) { 2623 DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr); 2624 if (DenormF32Attr.empty()) 2625 FP32Denormals = DenormMode; 2626 FP64FP16Denormals = DenormMode; 2627 } 2628 } 2629 2630 namespace { 2631 2632 struct SourceOfDivergence { 2633 unsigned Intr; 2634 }; 2635 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); 2636 2637 #define GET_SourcesOfDivergence_IMPL 2638 #define GET_Gfx9BufferFormat_IMPL 2639 #define GET_Gfx10BufferFormat_IMPL 2640 #define GET_Gfx11PlusBufferFormat_IMPL 2641 #include "AMDGPUGenSearchableTables.inc" 2642 2643 } // end anonymous namespace 2644 2645 bool isIntrinsicSourceOfDivergence(unsigned IntrID) { 2646 return lookupSourceOfDivergence(IntrID); 2647 } 2648 2649 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 2650 uint8_t NumComponents, 2651 uint8_t NumFormat, 2652 const MCSubtargetInfo &STI) { 2653 return isGFX11Plus(STI) 2654 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents, 2655 NumFormat) 2656 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp, 2657 NumComponents, NumFormat) 2658 : getGfx9BufferFormatInfo(BitsPerComp, 2659 NumComponents, NumFormat); 2660 } 2661 2662 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 2663 const MCSubtargetInfo &STI) { 2664 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format) 2665 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format) 2666 : getGfx9BufferFormatInfo(Format); 2667 } 2668 2669 } // namespace AMDGPU 2670 2671 raw_ostream &operator<<(raw_ostream &OS, 2672 const AMDGPU::IsaInfo::TargetIDSetting S) { 2673 switch (S) { 2674 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): 2675 OS << "Unsupported"; 2676 break; 2677 case (AMDGPU::IsaInfo::TargetIDSetting::Any): 2678 OS << "Any"; 2679 break; 2680 case (AMDGPU::IsaInfo::TargetIDSetting::Off): 2681 OS << "Off"; 2682 break; 2683 case (AMDGPU::IsaInfo::TargetIDSetting::On): 2684 OS << "On"; 2685 break; 2686 } 2687 return OS; 2688 } 2689 2690 } // namespace llvm 2691