1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUBaseInfo.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUAsmUtils.h" 12 #include "AMDKernelCodeT.h" 13 #include "GCNSubtarget.h" 14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/IR/Attributes.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/GlobalValue.h" 19 #include "llvm/IR/IntrinsicsAMDGPU.h" 20 #include "llvm/IR/IntrinsicsR600.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/MC/MCSubtargetInfo.h" 23 #include "llvm/Support/AMDHSAKernelDescriptor.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/TargetParser.h" 26 27 #define GET_INSTRINFO_NAMED_OPS 28 #define GET_INSTRMAP_INFO 29 #include "AMDGPUGenInstrInfo.inc" 30 31 static llvm::cl::opt<unsigned> 32 AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, 33 llvm::cl::desc("AMDHSA Code Object Version"), 34 llvm::cl::init(4)); 35 36 // TODO-GFX11: Remove this when full 16-bit codegen is implemented. 37 static llvm::cl::opt<bool> 38 LimitTo128VGPRs("amdgpu-limit-to-128-vgprs", llvm::cl::Hidden, 39 llvm::cl::desc("Never use more than 128 VGPRs")); 40 41 namespace { 42 43 /// \returns Bit mask for given bit \p Shift and bit \p Width. 44 unsigned getBitMask(unsigned Shift, unsigned Width) { 45 return ((1 << Width) - 1) << Shift; 46 } 47 48 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 49 /// 50 /// \returns Packed \p Dst. 51 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 52 unsigned Mask = getBitMask(Shift, Width); 53 return ((Src << Shift) & Mask) | (Dst & ~Mask); 54 } 55 56 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 57 /// 58 /// \returns Unpacked bits. 59 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 60 return (Src & getBitMask(Shift, Width)) >> Shift; 61 } 62 63 /// \returns Vmcnt bit shift (lower bits). 64 unsigned getVmcntBitShiftLo(unsigned VersionMajor) { 65 return VersionMajor >= 11 ? 10 : 0; 66 } 67 68 /// \returns Vmcnt bit width (lower bits). 69 unsigned getVmcntBitWidthLo(unsigned VersionMajor) { 70 return VersionMajor >= 11 ? 6 : 4; 71 } 72 73 /// \returns Expcnt bit shift. 74 unsigned getExpcntBitShift(unsigned VersionMajor) { 75 return VersionMajor >= 11 ? 0 : 4; 76 } 77 78 /// \returns Expcnt bit width. 79 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; } 80 81 /// \returns Lgkmcnt bit shift. 82 unsigned getLgkmcntBitShift(unsigned VersionMajor) { 83 return VersionMajor >= 11 ? 4 : 8; 84 } 85 86 /// \returns Lgkmcnt bit width. 87 unsigned getLgkmcntBitWidth(unsigned VersionMajor) { 88 return VersionMajor >= 10 ? 6 : 4; 89 } 90 91 /// \returns Vmcnt bit shift (higher bits). 92 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; } 93 94 /// \returns Vmcnt bit width (higher bits). 95 unsigned getVmcntBitWidthHi(unsigned VersionMajor) { 96 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; 97 } 98 99 } // end namespace anonymous 100 101 namespace llvm { 102 103 namespace AMDGPU { 104 105 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) { 106 if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA) 107 return None; 108 109 switch (AmdhsaCodeObjectVersion) { 110 case 2: 111 return ELF::ELFABIVERSION_AMDGPU_HSA_V2; 112 case 3: 113 return ELF::ELFABIVERSION_AMDGPU_HSA_V3; 114 case 4: 115 return ELF::ELFABIVERSION_AMDGPU_HSA_V4; 116 case 5: 117 return ELF::ELFABIVERSION_AMDGPU_HSA_V5; 118 default: 119 report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") + 120 Twine(AmdhsaCodeObjectVersion)); 121 } 122 } 123 124 bool isHsaAbiVersion2(const MCSubtargetInfo *STI) { 125 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 126 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2; 127 return false; 128 } 129 130 bool isHsaAbiVersion3(const MCSubtargetInfo *STI) { 131 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 132 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3; 133 return false; 134 } 135 136 bool isHsaAbiVersion4(const MCSubtargetInfo *STI) { 137 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 138 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4; 139 return false; 140 } 141 142 bool isHsaAbiVersion5(const MCSubtargetInfo *STI) { 143 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 144 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5; 145 return false; 146 } 147 148 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) { 149 return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI) || 150 isHsaAbiVersion5(STI); 151 } 152 153 unsigned getAmdhsaCodeObjectVersion() { 154 return AmdhsaCodeObjectVersion; 155 } 156 157 unsigned getMultigridSyncArgImplicitArgPosition() { 158 switch (AmdhsaCodeObjectVersion) { 159 case 2: 160 case 3: 161 case 4: 162 return 48; 163 case 5: 164 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; 165 default: 166 llvm_unreachable("Unexpected code object version"); 167 return 0; 168 } 169 } 170 171 172 // FIXME: All such magic numbers about the ABI should be in a 173 // central TD file. 174 unsigned getHostcallImplicitArgPosition() { 175 switch (AmdhsaCodeObjectVersion) { 176 case 2: 177 case 3: 178 case 4: 179 return 24; 180 case 5: 181 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; 182 default: 183 llvm_unreachable("Unexpected code object version"); 184 return 0; 185 } 186 } 187 188 #define GET_MIMGBaseOpcodesTable_IMPL 189 #define GET_MIMGDimInfoTable_IMPL 190 #define GET_MIMGInfoTable_IMPL 191 #define GET_MIMGLZMappingTable_IMPL 192 #define GET_MIMGMIPMappingTable_IMPL 193 #define GET_MIMGBiasMappingTable_IMPL 194 #define GET_MIMGOffsetMappingTable_IMPL 195 #define GET_MIMGG16MappingTable_IMPL 196 #define GET_MAIInstInfoTable_IMPL 197 #include "AMDGPUGenSearchableTables.inc" 198 199 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 200 unsigned VDataDwords, unsigned VAddrDwords) { 201 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, 202 VDataDwords, VAddrDwords); 203 return Info ? Info->Opcode : -1; 204 } 205 206 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { 207 const MIMGInfo *Info = getMIMGInfo(Opc); 208 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr; 209 } 210 211 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { 212 const MIMGInfo *OrigInfo = getMIMGInfo(Opc); 213 const MIMGInfo *NewInfo = 214 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding, 215 NewChannels, OrigInfo->VAddrDwords); 216 return NewInfo ? NewInfo->Opcode : -1; 217 } 218 219 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 220 const MIMGDimInfo *Dim, bool IsA16, 221 bool IsG16Supported) { 222 unsigned AddrWords = BaseOpcode->NumExtraArgs; 223 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + 224 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 225 if (IsA16) 226 AddrWords += divideCeil(AddrComponents, 2); 227 else 228 AddrWords += AddrComponents; 229 230 // Note: For subtargets that support A16 but not G16, enabling A16 also 231 // enables 16 bit gradients. 232 // For subtargets that support A16 (operand) and G16 (done with a different 233 // instruction encoding), they are independent. 234 235 if (BaseOpcode->Gradients) { 236 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16) 237 // There are two gradients per coordinate, we pack them separately. 238 // For the 3d case, 239 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv) 240 AddrWords += alignTo<2>(Dim->NumGradients / 2); 241 else 242 AddrWords += Dim->NumGradients; 243 } 244 return AddrWords; 245 } 246 247 struct MUBUFInfo { 248 uint16_t Opcode; 249 uint16_t BaseOpcode; 250 uint8_t elements; 251 bool has_vaddr; 252 bool has_srsrc; 253 bool has_soffset; 254 bool IsBufferInv; 255 }; 256 257 struct MTBUFInfo { 258 uint16_t Opcode; 259 uint16_t BaseOpcode; 260 uint8_t elements; 261 bool has_vaddr; 262 bool has_srsrc; 263 bool has_soffset; 264 }; 265 266 struct SMInfo { 267 uint16_t Opcode; 268 bool IsBuffer; 269 }; 270 271 struct VOPInfo { 272 uint16_t Opcode; 273 bool IsSingle; 274 }; 275 276 struct VOPC64DPPInfo { 277 uint16_t Opcode; 278 }; 279 280 struct VOPDComponentInfo { 281 uint16_t BaseVOP; 282 uint16_t VOPDOp; 283 bool CanBeVOPDX; 284 }; 285 286 struct VOPDInfo { 287 uint16_t Opcode; 288 uint16_t OpX; 289 uint16_t OpY; 290 }; 291 292 #define GET_MTBUFInfoTable_DECL 293 #define GET_MTBUFInfoTable_IMPL 294 #define GET_MUBUFInfoTable_DECL 295 #define GET_MUBUFInfoTable_IMPL 296 #define GET_SMInfoTable_DECL 297 #define GET_SMInfoTable_IMPL 298 #define GET_VOP1InfoTable_DECL 299 #define GET_VOP1InfoTable_IMPL 300 #define GET_VOP2InfoTable_DECL 301 #define GET_VOP2InfoTable_IMPL 302 #define GET_VOP3InfoTable_DECL 303 #define GET_VOP3InfoTable_IMPL 304 #define GET_VOPC64DPPTable_DECL 305 #define GET_VOPC64DPPTable_IMPL 306 #define GET_VOPC64DPP8Table_DECL 307 #define GET_VOPC64DPP8Table_IMPL 308 #define GET_VOPDComponentTable_DECL 309 #define GET_VOPDComponentTable_IMPL 310 #define GET_VOPDPairs_DECL 311 #define GET_VOPDPairs_IMPL 312 #define GET_WMMAOpcode2AddrMappingTable_DECL 313 #define GET_WMMAOpcode2AddrMappingTable_IMPL 314 #define GET_WMMAOpcode3AddrMappingTable_DECL 315 #define GET_WMMAOpcode3AddrMappingTable_IMPL 316 #include "AMDGPUGenSearchableTables.inc" 317 318 int getMTBUFBaseOpcode(unsigned Opc) { 319 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); 320 return Info ? Info->BaseOpcode : -1; 321 } 322 323 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { 324 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 325 return Info ? Info->Opcode : -1; 326 } 327 328 int getMTBUFElements(unsigned Opc) { 329 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 330 return Info ? Info->elements : 0; 331 } 332 333 bool getMTBUFHasVAddr(unsigned Opc) { 334 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 335 return Info ? Info->has_vaddr : false; 336 } 337 338 bool getMTBUFHasSrsrc(unsigned Opc) { 339 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 340 return Info ? Info->has_srsrc : false; 341 } 342 343 bool getMTBUFHasSoffset(unsigned Opc) { 344 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 345 return Info ? Info->has_soffset : false; 346 } 347 348 int getMUBUFBaseOpcode(unsigned Opc) { 349 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); 350 return Info ? Info->BaseOpcode : -1; 351 } 352 353 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { 354 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 355 return Info ? Info->Opcode : -1; 356 } 357 358 int getMUBUFElements(unsigned Opc) { 359 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 360 return Info ? Info->elements : 0; 361 } 362 363 bool getMUBUFHasVAddr(unsigned Opc) { 364 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 365 return Info ? Info->has_vaddr : false; 366 } 367 368 bool getMUBUFHasSrsrc(unsigned Opc) { 369 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 370 return Info ? Info->has_srsrc : false; 371 } 372 373 bool getMUBUFHasSoffset(unsigned Opc) { 374 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 375 return Info ? Info->has_soffset : false; 376 } 377 378 bool getMUBUFIsBufferInv(unsigned Opc) { 379 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 380 return Info ? Info->IsBufferInv : false; 381 } 382 383 bool getSMEMIsBuffer(unsigned Opc) { 384 const SMInfo *Info = getSMEMOpcodeHelper(Opc); 385 return Info ? Info->IsBuffer : false; 386 } 387 388 bool getVOP1IsSingle(unsigned Opc) { 389 const VOPInfo *Info = getVOP1OpcodeHelper(Opc); 390 return Info ? Info->IsSingle : false; 391 } 392 393 bool getVOP2IsSingle(unsigned Opc) { 394 const VOPInfo *Info = getVOP2OpcodeHelper(Opc); 395 return Info ? Info->IsSingle : false; 396 } 397 398 bool getVOP3IsSingle(unsigned Opc) { 399 const VOPInfo *Info = getVOP3OpcodeHelper(Opc); 400 return Info ? Info->IsSingle : false; 401 } 402 403 bool isVOPC64DPP(unsigned Opc) { 404 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc); 405 } 406 407 bool getMAIIsDGEMM(unsigned Opc) { 408 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 409 return Info ? Info->is_dgemm : false; 410 } 411 412 bool getMAIIsGFX940XDL(unsigned Opc) { 413 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); 414 return Info ? Info->is_gfx940_xdl : false; 415 } 416 417 CanBeVOPD getCanBeVOPD(unsigned Opc) { 418 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 419 if (Info) 420 return {Info->CanBeVOPDX, true}; 421 else 422 return {false, false}; 423 } 424 425 unsigned getVOPDOpcode(unsigned Opc) { 426 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); 427 return Info ? Info->VOPDOp : ~0u; 428 } 429 430 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { 431 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); 432 return Info ? Info->Opcode3Addr : ~0u; 433 } 434 435 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) { 436 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc); 437 return Info ? Info->Opcode2Addr : ~0u; 438 } 439 440 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 441 // header files, so we need to wrap it in a function that takes unsigned 442 // instead. 443 int getMCOpcode(uint16_t Opcode, unsigned Gen) { 444 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 445 } 446 447 int getVOPDFull(unsigned OpX, unsigned OpY) { 448 const VOPDInfo *Info = getVOPDInfoFromComponentOpcodes(OpX, OpY); 449 return Info ? Info->Opcode : -1; 450 } 451 452 namespace IsaInfo { 453 454 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) 455 : STI(STI), XnackSetting(TargetIDSetting::Any), 456 SramEccSetting(TargetIDSetting::Any) { 457 if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) 458 XnackSetting = TargetIDSetting::Unsupported; 459 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) 460 SramEccSetting = TargetIDSetting::Unsupported; 461 } 462 463 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { 464 // Check if xnack or sramecc is explicitly enabled or disabled. In the 465 // absence of the target features we assume we must generate code that can run 466 // in any environment. 467 SubtargetFeatures Features(FS); 468 Optional<bool> XnackRequested; 469 Optional<bool> SramEccRequested; 470 471 for (const std::string &Feature : Features.getFeatures()) { 472 if (Feature == "+xnack") 473 XnackRequested = true; 474 else if (Feature == "-xnack") 475 XnackRequested = false; 476 else if (Feature == "+sramecc") 477 SramEccRequested = true; 478 else if (Feature == "-sramecc") 479 SramEccRequested = false; 480 } 481 482 bool XnackSupported = isXnackSupported(); 483 bool SramEccSupported = isSramEccSupported(); 484 485 if (XnackRequested) { 486 if (XnackSupported) { 487 XnackSetting = 488 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; 489 } else { 490 // If a specific xnack setting was requested and this GPU does not support 491 // xnack emit a warning. Setting will remain set to "Unsupported". 492 if (*XnackRequested) { 493 errs() << "warning: xnack 'On' was requested for a processor that does " 494 "not support it!\n"; 495 } else { 496 errs() << "warning: xnack 'Off' was requested for a processor that " 497 "does not support it!\n"; 498 } 499 } 500 } 501 502 if (SramEccRequested) { 503 if (SramEccSupported) { 504 SramEccSetting = 505 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; 506 } else { 507 // If a specific sramecc setting was requested and this GPU does not 508 // support sramecc emit a warning. Setting will remain set to 509 // "Unsupported". 510 if (*SramEccRequested) { 511 errs() << "warning: sramecc 'On' was requested for a processor that " 512 "does not support it!\n"; 513 } else { 514 errs() << "warning: sramecc 'Off' was requested for a processor that " 515 "does not support it!\n"; 516 } 517 } 518 } 519 } 520 521 static TargetIDSetting 522 getTargetIDSettingFromFeatureString(StringRef FeatureString) { 523 if (FeatureString.endswith("-")) 524 return TargetIDSetting::Off; 525 if (FeatureString.endswith("+")) 526 return TargetIDSetting::On; 527 528 llvm_unreachable("Malformed feature string"); 529 } 530 531 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { 532 SmallVector<StringRef, 3> TargetIDSplit; 533 TargetID.split(TargetIDSplit, ':'); 534 535 for (const auto &FeatureString : TargetIDSplit) { 536 if (FeatureString.startswith("xnack")) 537 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); 538 if (FeatureString.startswith("sramecc")) 539 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); 540 } 541 } 542 543 std::string AMDGPUTargetID::toString() const { 544 std::string StringRep; 545 raw_string_ostream StreamRep(StringRep); 546 547 auto TargetTriple = STI.getTargetTriple(); 548 auto Version = getIsaVersion(STI.getCPU()); 549 550 StreamRep << TargetTriple.getArchName() << '-' 551 << TargetTriple.getVendorName() << '-' 552 << TargetTriple.getOSName() << '-' 553 << TargetTriple.getEnvironmentName() << '-'; 554 555 std::string Processor; 556 // TODO: Following else statement is present here because we used various 557 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). 558 // Remove once all aliases are removed from GCNProcessors.td. 559 if (Version.Major >= 9) 560 Processor = STI.getCPU().str(); 561 else 562 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) + 563 Twine(Version.Stepping)) 564 .str(); 565 566 std::string Features; 567 if (Optional<uint8_t> HsaAbiVersion = getHsaAbiVersion(&STI)) { 568 switch (*HsaAbiVersion) { 569 case ELF::ELFABIVERSION_AMDGPU_HSA_V2: 570 // Code object V2 only supported specific processors and had fixed 571 // settings for the XNACK. 572 if (Processor == "gfx600") { 573 } else if (Processor == "gfx601") { 574 } else if (Processor == "gfx602") { 575 } else if (Processor == "gfx700") { 576 } else if (Processor == "gfx701") { 577 } else if (Processor == "gfx702") { 578 } else if (Processor == "gfx703") { 579 } else if (Processor == "gfx704") { 580 } else if (Processor == "gfx705") { 581 } else if (Processor == "gfx801") { 582 if (!isXnackOnOrAny()) 583 report_fatal_error( 584 "AMD GPU code object V2 does not support processor " + 585 Twine(Processor) + " without XNACK"); 586 } else if (Processor == "gfx802") { 587 } else if (Processor == "gfx803") { 588 } else if (Processor == "gfx805") { 589 } else if (Processor == "gfx810") { 590 if (!isXnackOnOrAny()) 591 report_fatal_error( 592 "AMD GPU code object V2 does not support processor " + 593 Twine(Processor) + " without XNACK"); 594 } else if (Processor == "gfx900") { 595 if (isXnackOnOrAny()) 596 Processor = "gfx901"; 597 } else if (Processor == "gfx902") { 598 if (isXnackOnOrAny()) 599 Processor = "gfx903"; 600 } else if (Processor == "gfx904") { 601 if (isXnackOnOrAny()) 602 Processor = "gfx905"; 603 } else if (Processor == "gfx906") { 604 if (isXnackOnOrAny()) 605 Processor = "gfx907"; 606 } else if (Processor == "gfx90c") { 607 if (isXnackOnOrAny()) 608 report_fatal_error( 609 "AMD GPU code object V2 does not support processor " + 610 Twine(Processor) + " with XNACK being ON or ANY"); 611 } else { 612 report_fatal_error( 613 "AMD GPU code object V2 does not support processor " + 614 Twine(Processor)); 615 } 616 break; 617 case ELF::ELFABIVERSION_AMDGPU_HSA_V3: 618 // xnack. 619 if (isXnackOnOrAny()) 620 Features += "+xnack"; 621 // In code object v2 and v3, "sramecc" feature was spelled with a 622 // hyphen ("sram-ecc"). 623 if (isSramEccOnOrAny()) 624 Features += "+sram-ecc"; 625 break; 626 case ELF::ELFABIVERSION_AMDGPU_HSA_V4: 627 case ELF::ELFABIVERSION_AMDGPU_HSA_V5: 628 // sramecc. 629 if (getSramEccSetting() == TargetIDSetting::Off) 630 Features += ":sramecc-"; 631 else if (getSramEccSetting() == TargetIDSetting::On) 632 Features += ":sramecc+"; 633 // xnack. 634 if (getXnackSetting() == TargetIDSetting::Off) 635 Features += ":xnack-"; 636 else if (getXnackSetting() == TargetIDSetting::On) 637 Features += ":xnack+"; 638 break; 639 default: 640 break; 641 } 642 } 643 644 StreamRep << Processor << Features; 645 646 StreamRep.flush(); 647 return StringRep; 648 } 649 650 unsigned getWavefrontSize(const MCSubtargetInfo *STI) { 651 if (STI->getFeatureBits().test(FeatureWavefrontSize16)) 652 return 16; 653 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) 654 return 32; 655 656 return 64; 657 } 658 659 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { 660 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 661 return 32768; 662 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 663 return 65536; 664 665 return 0; 666 } 667 668 unsigned getEUsPerCU(const MCSubtargetInfo *STI) { 669 // "Per CU" really means "per whatever functional block the waves of a 670 // workgroup must share". For gfx10 in CU mode this is the CU, which contains 671 // two SIMDs. 672 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode)) 673 return 2; 674 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains 675 // two CUs, so a total of four SIMDs. 676 return 4; 677 } 678 679 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 680 unsigned FlatWorkGroupSize) { 681 assert(FlatWorkGroupSize != 0); 682 if (STI->getTargetTriple().getArch() != Triple::amdgcn) 683 return 8; 684 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); 685 if (N == 1) 686 return 40; 687 N = 40 / N; 688 return std::min(N, 16u); 689 } 690 691 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { 692 return 1; 693 } 694 695 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { 696 // FIXME: Need to take scratch memory into account. 697 if (isGFX90A(*STI)) 698 return 8; 699 if (!isGFX10Plus(*STI)) 700 return 10; 701 return hasGFX10_3Insts(*STI) ? 16 : 20; 702 } 703 704 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 705 unsigned FlatWorkGroupSize) { 706 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize), 707 getEUsPerCU(STI)); 708 } 709 710 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { 711 return 1; 712 } 713 714 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { 715 // Some subtargets allow encoding 2048, but this isn't tested or supported. 716 return 1024; 717 } 718 719 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 720 unsigned FlatWorkGroupSize) { 721 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI)); 722 } 723 724 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { 725 IsaVersion Version = getIsaVersion(STI->getCPU()); 726 if (Version.Major >= 10) 727 return getAddressableNumSGPRs(STI); 728 if (Version.Major >= 8) 729 return 16; 730 return 8; 731 } 732 733 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { 734 return 8; 735 } 736 737 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { 738 IsaVersion Version = getIsaVersion(STI->getCPU()); 739 if (Version.Major >= 8) 740 return 800; 741 return 512; 742 } 743 744 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { 745 if (STI->getFeatureBits().test(FeatureSGPRInitBug)) 746 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 747 748 IsaVersion Version = getIsaVersion(STI->getCPU()); 749 if (Version.Major >= 10) 750 return 106; 751 if (Version.Major >= 8) 752 return 102; 753 return 104; 754 } 755 756 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 757 assert(WavesPerEU != 0); 758 759 IsaVersion Version = getIsaVersion(STI->getCPU()); 760 if (Version.Major >= 10) 761 return 0; 762 763 if (WavesPerEU >= getMaxWavesPerEU(STI)) 764 return 0; 765 766 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); 767 if (STI->getFeatureBits().test(FeatureTrapHandler)) 768 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 769 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; 770 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); 771 } 772 773 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 774 bool Addressable) { 775 assert(WavesPerEU != 0); 776 777 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); 778 IsaVersion Version = getIsaVersion(STI->getCPU()); 779 if (Version.Major >= 10) 780 return Addressable ? AddressableNumSGPRs : 108; 781 if (Version.Major >= 8 && !Addressable) 782 AddressableNumSGPRs = 112; 783 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; 784 if (STI->getFeatureBits().test(FeatureTrapHandler)) 785 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 786 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); 787 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 788 } 789 790 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 791 bool FlatScrUsed, bool XNACKUsed) { 792 unsigned ExtraSGPRs = 0; 793 if (VCCUsed) 794 ExtraSGPRs = 2; 795 796 IsaVersion Version = getIsaVersion(STI->getCPU()); 797 if (Version.Major >= 10) 798 return ExtraSGPRs; 799 800 if (Version.Major < 8) { 801 if (FlatScrUsed) 802 ExtraSGPRs = 4; 803 } else { 804 if (XNACKUsed) 805 ExtraSGPRs = 4; 806 807 if (FlatScrUsed || 808 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch)) 809 ExtraSGPRs = 6; 810 } 811 812 return ExtraSGPRs; 813 } 814 815 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 816 bool FlatScrUsed) { 817 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, 818 STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); 819 } 820 821 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { 822 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); 823 // SGPRBlocks is actual number of SGPR blocks minus 1. 824 return NumSGPRs / getSGPREncodingGranule(STI) - 1; 825 } 826 827 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 828 Optional<bool> EnableWavefrontSize32) { 829 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 830 return 8; 831 832 bool IsWave32 = EnableWavefrontSize32 ? 833 *EnableWavefrontSize32 : 834 STI->getFeatureBits().test(FeatureWavefrontSize32); 835 836 if (hasGFX10_3Insts(*STI)) 837 return IsWave32 ? 16 : 8; 838 839 return IsWave32 ? 8 : 4; 840 } 841 842 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 843 Optional<bool> EnableWavefrontSize32) { 844 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 845 return 8; 846 847 bool IsWave32 = EnableWavefrontSize32 ? 848 *EnableWavefrontSize32 : 849 STI->getFeatureBits().test(FeatureWavefrontSize32); 850 851 return IsWave32 ? 8 : 4; 852 } 853 854 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { 855 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 856 return 512; 857 if (!isGFX10Plus(*STI)) 858 return 256; 859 return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512; 860 } 861 862 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { 863 if (LimitTo128VGPRs.getNumOccurrences() ? LimitTo128VGPRs 864 : isGFX11Plus(*STI)) { 865 // GFX11 changes the encoding of 16-bit operands in VOP1/2/C instructions 866 // such that values 128..255 no longer mean v128..v255, they mean 867 // v0.hi..v127.hi instead. Until the compiler understands this, it is not 868 // safe to use v128..v255. 869 // TODO-GFX11: Remove this when full 16-bit codegen is implemented. 870 return 128; 871 } 872 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 873 return 512; 874 return 256; 875 } 876 877 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 878 assert(WavesPerEU != 0); 879 880 if (WavesPerEU >= getMaxWavesPerEU(STI)) 881 return 0; 882 unsigned MinNumVGPRs = 883 alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1), 884 getVGPRAllocGranule(STI)) + 1; 885 return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI)); 886 } 887 888 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 889 assert(WavesPerEU != 0); 890 891 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, 892 getVGPRAllocGranule(STI)); 893 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); 894 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 895 } 896 897 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, 898 Optional<bool> EnableWavefrontSize32) { 899 NumVGPRs = alignTo(std::max(1u, NumVGPRs), 900 getVGPREncodingGranule(STI, EnableWavefrontSize32)); 901 // VGPRBlocks is actual number of VGPR blocks minus 1. 902 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1; 903 } 904 905 } // end namespace IsaInfo 906 907 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 908 const MCSubtargetInfo *STI) { 909 IsaVersion Version = getIsaVersion(STI->getCPU()); 910 911 memset(&Header, 0, sizeof(Header)); 912 913 Header.amd_kernel_code_version_major = 1; 914 Header.amd_kernel_code_version_minor = 2; 915 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 916 Header.amd_machine_version_major = Version.Major; 917 Header.amd_machine_version_minor = Version.Minor; 918 Header.amd_machine_version_stepping = Version.Stepping; 919 Header.kernel_code_entry_byte_offset = sizeof(Header); 920 Header.wavefront_size = 6; 921 922 // If the code object does not support indirect functions, then the value must 923 // be 0xffffffff. 924 Header.call_convention = -1; 925 926 // These alignment values are specified in powers of two, so alignment = 927 // 2^n. The minimum alignment is 2^4 = 16. 928 Header.kernarg_segment_alignment = 4; 929 Header.group_segment_alignment = 4; 930 Header.private_segment_alignment = 4; 931 932 if (Version.Major >= 10) { 933 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { 934 Header.wavefront_size = 5; 935 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; 936 } 937 Header.compute_pgm_resource_registers |= 938 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | 939 S_00B848_MEM_ORDERED(1); 940 } 941 } 942 943 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 944 const MCSubtargetInfo *STI) { 945 IsaVersion Version = getIsaVersion(STI->getCPU()); 946 947 amdhsa::kernel_descriptor_t KD; 948 memset(&KD, 0, sizeof(KD)); 949 950 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 951 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, 952 amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE); 953 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 954 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1); 955 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 956 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1); 957 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, 958 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1); 959 if (Version.Major >= 10) { 960 AMDHSA_BITS_SET(KD.kernel_code_properties, 961 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 962 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0); 963 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 964 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE, 965 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1); 966 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 967 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1); 968 } 969 if (AMDGPU::isGFX90A(*STI)) { 970 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, 971 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, 972 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0); 973 } 974 return KD; 975 } 976 977 bool isGroupSegment(const GlobalValue *GV) { 978 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 979 } 980 981 bool isGlobalSegment(const GlobalValue *GV) { 982 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 983 } 984 985 bool isReadOnlySegment(const GlobalValue *GV) { 986 unsigned AS = GV->getAddressSpace(); 987 return AS == AMDGPUAS::CONSTANT_ADDRESS || 988 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; 989 } 990 991 bool shouldEmitConstantsToTextSection(const Triple &TT) { 992 return TT.getArch() == Triple::r600; 993 } 994 995 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 996 Attribute A = F.getFnAttribute(Name); 997 int Result = Default; 998 999 if (A.isStringAttribute()) { 1000 StringRef Str = A.getValueAsString(); 1001 if (Str.getAsInteger(0, Result)) { 1002 LLVMContext &Ctx = F.getContext(); 1003 Ctx.emitError("can't parse integer attribute " + Name); 1004 } 1005 } 1006 1007 return Result; 1008 } 1009 1010 std::pair<int, int> getIntegerPairAttribute(const Function &F, 1011 StringRef Name, 1012 std::pair<int, int> Default, 1013 bool OnlyFirstRequired) { 1014 Attribute A = F.getFnAttribute(Name); 1015 if (!A.isStringAttribute()) 1016 return Default; 1017 1018 LLVMContext &Ctx = F.getContext(); 1019 std::pair<int, int> Ints = Default; 1020 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 1021 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 1022 Ctx.emitError("can't parse first integer attribute " + Name); 1023 return Default; 1024 } 1025 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 1026 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 1027 Ctx.emitError("can't parse second integer attribute " + Name); 1028 return Default; 1029 } 1030 } 1031 1032 return Ints; 1033 } 1034 1035 unsigned getVmcntBitMask(const IsaVersion &Version) { 1036 return (1 << (getVmcntBitWidthLo(Version.Major) + 1037 getVmcntBitWidthHi(Version.Major))) - 1038 1; 1039 } 1040 1041 unsigned getExpcntBitMask(const IsaVersion &Version) { 1042 return (1 << getExpcntBitWidth(Version.Major)) - 1; 1043 } 1044 1045 unsigned getLgkmcntBitMask(const IsaVersion &Version) { 1046 return (1 << getLgkmcntBitWidth(Version.Major)) - 1; 1047 } 1048 1049 unsigned getWaitcntBitMask(const IsaVersion &Version) { 1050 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major), 1051 getVmcntBitWidthLo(Version.Major)); 1052 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major), 1053 getExpcntBitWidth(Version.Major)); 1054 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major), 1055 getLgkmcntBitWidth(Version.Major)); 1056 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major), 1057 getVmcntBitWidthHi(Version.Major)); 1058 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi; 1059 } 1060 1061 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1062 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major), 1063 getVmcntBitWidthLo(Version.Major)); 1064 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major), 1065 getVmcntBitWidthHi(Version.Major)); 1066 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major); 1067 } 1068 1069 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { 1070 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major), 1071 getExpcntBitWidth(Version.Major)); 1072 } 1073 1074 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { 1075 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major), 1076 getLgkmcntBitWidth(Version.Major)); 1077 } 1078 1079 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 1080 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 1081 Vmcnt = decodeVmcnt(Version, Waitcnt); 1082 Expcnt = decodeExpcnt(Version, Waitcnt); 1083 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 1084 } 1085 1086 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { 1087 Waitcnt Decoded; 1088 Decoded.VmCnt = decodeVmcnt(Version, Encoded); 1089 Decoded.ExpCnt = decodeExpcnt(Version, Encoded); 1090 Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded); 1091 return Decoded; 1092 } 1093 1094 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 1095 unsigned Vmcnt) { 1096 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major), 1097 getVmcntBitWidthLo(Version.Major)); 1098 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt, 1099 getVmcntBitShiftHi(Version.Major), 1100 getVmcntBitWidthHi(Version.Major)); 1101 } 1102 1103 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 1104 unsigned Expcnt) { 1105 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major), 1106 getExpcntBitWidth(Version.Major)); 1107 } 1108 1109 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 1110 unsigned Lgkmcnt) { 1111 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major), 1112 getLgkmcntBitWidth(Version.Major)); 1113 } 1114 1115 unsigned encodeWaitcnt(const IsaVersion &Version, 1116 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 1117 unsigned Waitcnt = getWaitcntBitMask(Version); 1118 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 1119 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 1120 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 1121 return Waitcnt; 1122 } 1123 1124 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1125 return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt); 1126 } 1127 1128 //===----------------------------------------------------------------------===// 1129 // Custom Operands. 1130 // 1131 // A table of custom operands shall describe "primary" operand names 1132 // first followed by aliases if any. It is not required but recommended 1133 // to arrange operands so that operand encoding match operand position 1134 // in the table. This will make disassembly a bit more efficient. 1135 // Unused slots in the table shall have an empty name. 1136 // 1137 //===----------------------------------------------------------------------===// 1138 1139 template <class T> 1140 static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize, 1141 T Context) { 1142 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() && 1143 (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)); 1144 } 1145 1146 template <class T> 1147 static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test, 1148 const CustomOperand<T> OpInfo[], int OpInfoSize, 1149 T Context) { 1150 int InvalidIdx = OPR_ID_UNKNOWN; 1151 for (int Idx = 0; Idx < OpInfoSize; ++Idx) { 1152 if (Test(OpInfo[Idx])) { 1153 if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)) 1154 return Idx; 1155 InvalidIdx = OPR_ID_UNSUPPORTED; 1156 } 1157 } 1158 return InvalidIdx; 1159 } 1160 1161 template <class T> 1162 static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[], 1163 int OpInfoSize, T Context) { 1164 auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; }; 1165 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); 1166 } 1167 1168 template <class T> 1169 static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize, 1170 T Context, bool QuickCheck = true) { 1171 auto Test = [=](const CustomOperand<T> &Op) { 1172 return Op.Encoding == Id && !Op.Name.empty(); 1173 }; 1174 // This is an optimization that should work in most cases. 1175 // As a side effect, it may cause selection of an alias 1176 // instead of a primary operand name in case of sparse tables. 1177 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) && 1178 OpInfo[Id].Encoding == Id) { 1179 return Id; 1180 } 1181 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); 1182 } 1183 1184 //===----------------------------------------------------------------------===// 1185 // Custom Operand Values 1186 //===----------------------------------------------------------------------===// 1187 1188 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, 1189 int Size, 1190 const MCSubtargetInfo &STI) { 1191 unsigned Enc = 0; 1192 for (int Idx = 0; Idx < Size; ++Idx) { 1193 const auto &Op = Opr[Idx]; 1194 if (Op.isSupported(STI)) 1195 Enc |= Op.encode(Op.Default); 1196 } 1197 return Enc; 1198 } 1199 1200 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, 1201 int Size, unsigned Code, 1202 bool &HasNonDefaultVal, 1203 const MCSubtargetInfo &STI) { 1204 unsigned UsedOprMask = 0; 1205 HasNonDefaultVal = false; 1206 for (int Idx = 0; Idx < Size; ++Idx) { 1207 const auto &Op = Opr[Idx]; 1208 if (!Op.isSupported(STI)) 1209 continue; 1210 UsedOprMask |= Op.getMask(); 1211 unsigned Val = Op.decode(Code); 1212 if (!Op.isValid(Val)) 1213 return false; 1214 HasNonDefaultVal |= (Val != Op.Default); 1215 } 1216 return (Code & ~UsedOprMask) == 0; 1217 } 1218 1219 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, 1220 unsigned Code, int &Idx, StringRef &Name, 1221 unsigned &Val, bool &IsDefault, 1222 const MCSubtargetInfo &STI) { 1223 while (Idx < Size) { 1224 const auto &Op = Opr[Idx++]; 1225 if (Op.isSupported(STI)) { 1226 Name = Op.Name; 1227 Val = Op.decode(Code); 1228 IsDefault = (Val == Op.Default); 1229 return true; 1230 } 1231 } 1232 1233 return false; 1234 } 1235 1236 static int encodeCustomOperandVal(const CustomOperandVal &Op, 1237 int64_t InputVal) { 1238 if (InputVal < 0 || InputVal > Op.Max) 1239 return OPR_VAL_INVALID; 1240 return Op.encode(InputVal); 1241 } 1242 1243 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, 1244 const StringRef Name, int64_t InputVal, 1245 unsigned &UsedOprMask, 1246 const MCSubtargetInfo &STI) { 1247 int InvalidId = OPR_ID_UNKNOWN; 1248 for (int Idx = 0; Idx < Size; ++Idx) { 1249 const auto &Op = Opr[Idx]; 1250 if (Op.Name == Name) { 1251 if (!Op.isSupported(STI)) { 1252 InvalidId = OPR_ID_UNSUPPORTED; 1253 continue; 1254 } 1255 auto OprMask = Op.getMask(); 1256 if (OprMask & UsedOprMask) 1257 return OPR_ID_DUPLICATE; 1258 UsedOprMask |= OprMask; 1259 return encodeCustomOperandVal(Op, InputVal); 1260 } 1261 } 1262 return InvalidId; 1263 } 1264 1265 //===----------------------------------------------------------------------===// 1266 // DepCtr 1267 //===----------------------------------------------------------------------===// 1268 1269 namespace DepCtr { 1270 1271 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) { 1272 static int Default = -1; 1273 if (Default == -1) 1274 Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI); 1275 return Default; 1276 } 1277 1278 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, 1279 const MCSubtargetInfo &STI) { 1280 return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code, 1281 HasNonDefaultVal, STI); 1282 } 1283 1284 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, 1285 bool &IsDefault, const MCSubtargetInfo &STI) { 1286 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val, 1287 IsDefault, STI); 1288 } 1289 1290 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, 1291 const MCSubtargetInfo &STI) { 1292 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask, 1293 STI); 1294 } 1295 1296 } // namespace DepCtr 1297 1298 //===----------------------------------------------------------------------===// 1299 // hwreg 1300 //===----------------------------------------------------------------------===// 1301 1302 namespace Hwreg { 1303 1304 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) { 1305 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI); 1306 return (Idx < 0) ? Idx : Opr[Idx].Encoding; 1307 } 1308 1309 bool isValidHwreg(int64_t Id) { 1310 return 0 <= Id && isUInt<ID_WIDTH_>(Id); 1311 } 1312 1313 bool isValidHwregOffset(int64_t Offset) { 1314 return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset); 1315 } 1316 1317 bool isValidHwregWidth(int64_t Width) { 1318 return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1); 1319 } 1320 1321 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) { 1322 return (Id << ID_SHIFT_) | 1323 (Offset << OFFSET_SHIFT_) | 1324 ((Width - 1) << WIDTH_M1_SHIFT_); 1325 } 1326 1327 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) { 1328 int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI); 1329 return (Idx < 0) ? "" : Opr[Idx].Name; 1330 } 1331 1332 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) { 1333 Id = (Val & ID_MASK_) >> ID_SHIFT_; 1334 Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_; 1335 Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1; 1336 } 1337 1338 } // namespace Hwreg 1339 1340 //===----------------------------------------------------------------------===// 1341 // exp tgt 1342 //===----------------------------------------------------------------------===// 1343 1344 namespace Exp { 1345 1346 struct ExpTgt { 1347 StringLiteral Name; 1348 unsigned Tgt; 1349 unsigned MaxIndex; 1350 }; 1351 1352 static constexpr ExpTgt ExpTgtInfo[] = { 1353 {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, 1354 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, 1355 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, 1356 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, 1357 {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, 1358 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX}, 1359 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, 1360 }; 1361 1362 bool getTgtName(unsigned Id, StringRef &Name, int &Index) { 1363 for (const ExpTgt &Val : ExpTgtInfo) { 1364 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { 1365 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); 1366 Name = Val.Name; 1367 return true; 1368 } 1369 } 1370 return false; 1371 } 1372 1373 unsigned getTgtId(const StringRef Name) { 1374 1375 for (const ExpTgt &Val : ExpTgtInfo) { 1376 if (Val.MaxIndex == 0 && Name == Val.Name) 1377 return Val.Tgt; 1378 1379 if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) { 1380 StringRef Suffix = Name.drop_front(Val.Name.size()); 1381 1382 unsigned Id; 1383 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex) 1384 return ET_INVALID; 1385 1386 // Disable leading zeroes 1387 if (Suffix.size() > 1 && Suffix[0] == '0') 1388 return ET_INVALID; 1389 1390 return Val.Tgt + Id; 1391 } 1392 } 1393 return ET_INVALID; 1394 } 1395 1396 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { 1397 switch (Id) { 1398 case ET_NULL: 1399 return !isGFX11Plus(STI); 1400 case ET_POS4: 1401 case ET_PRIM: 1402 return isGFX10Plus(STI); 1403 case ET_DUAL_SRC_BLEND0: 1404 case ET_DUAL_SRC_BLEND1: 1405 return isGFX11Plus(STI); 1406 default: 1407 if (Id >= ET_PARAM0 && Id <= ET_PARAM31) 1408 return !isGFX11Plus(STI); 1409 return true; 1410 } 1411 } 1412 1413 } // namespace Exp 1414 1415 //===----------------------------------------------------------------------===// 1416 // MTBUF Format 1417 //===----------------------------------------------------------------------===// 1418 1419 namespace MTBUFFormat { 1420 1421 int64_t getDfmt(const StringRef Name) { 1422 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { 1423 if (Name == DfmtSymbolic[Id]) 1424 return Id; 1425 } 1426 return DFMT_UNDEF; 1427 } 1428 1429 StringRef getDfmtName(unsigned Id) { 1430 assert(Id <= DFMT_MAX); 1431 return DfmtSymbolic[Id]; 1432 } 1433 1434 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { 1435 if (isSI(STI) || isCI(STI)) 1436 return NfmtSymbolicSICI; 1437 if (isVI(STI) || isGFX9(STI)) 1438 return NfmtSymbolicVI; 1439 return NfmtSymbolicGFX10; 1440 } 1441 1442 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { 1443 auto lookupTable = getNfmtLookupTable(STI); 1444 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { 1445 if (Name == lookupTable[Id]) 1446 return Id; 1447 } 1448 return NFMT_UNDEF; 1449 } 1450 1451 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { 1452 assert(Id <= NFMT_MAX); 1453 return getNfmtLookupTable(STI)[Id]; 1454 } 1455 1456 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1457 unsigned Dfmt; 1458 unsigned Nfmt; 1459 decodeDfmtNfmt(Id, Dfmt, Nfmt); 1460 return isValidNfmt(Nfmt, STI); 1461 } 1462 1463 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1464 return !getNfmtName(Id, STI).empty(); 1465 } 1466 1467 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { 1468 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); 1469 } 1470 1471 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { 1472 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; 1473 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; 1474 } 1475 1476 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) { 1477 if (isGFX11Plus(STI)) { 1478 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1479 if (Name == UfmtSymbolicGFX11[Id]) 1480 return Id; 1481 } 1482 } else { 1483 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1484 if (Name == UfmtSymbolicGFX10[Id]) 1485 return Id; 1486 } 1487 } 1488 return UFMT_UNDEF; 1489 } 1490 1491 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) { 1492 if(isValidUnifiedFormat(Id, STI)) 1493 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id]; 1494 return ""; 1495 } 1496 1497 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) { 1498 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST; 1499 } 1500 1501 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, 1502 const MCSubtargetInfo &STI) { 1503 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); 1504 if (isGFX11Plus(STI)) { 1505 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { 1506 if (Fmt == DfmtNfmt2UFmtGFX11[Id]) 1507 return Id; 1508 } 1509 } else { 1510 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { 1511 if (Fmt == DfmtNfmt2UFmtGFX10[Id]) 1512 return Id; 1513 } 1514 } 1515 return UFMT_UNDEF; 1516 } 1517 1518 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { 1519 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); 1520 } 1521 1522 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { 1523 if (isGFX10Plus(STI)) 1524 return UFMT_DEFAULT; 1525 return DFMT_NFMT_DEFAULT; 1526 } 1527 1528 } // namespace MTBUFFormat 1529 1530 //===----------------------------------------------------------------------===// 1531 // SendMsg 1532 //===----------------------------------------------------------------------===// 1533 1534 namespace SendMsg { 1535 1536 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { 1537 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; 1538 } 1539 1540 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) { 1541 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI); 1542 return (Idx < 0) ? Idx : Msg[Idx].Encoding; 1543 } 1544 1545 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) { 1546 return (MsgId & ~(getMsgIdMask(STI))) == 0; 1547 } 1548 1549 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) { 1550 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI); 1551 return (Idx < 0) ? "" : Msg[Idx].Name; 1552 } 1553 1554 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) { 1555 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 1556 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 1557 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 1558 for (int i = F; i < L; ++i) { 1559 if (Name == S[i]) { 1560 return i; 1561 } 1562 } 1563 return OP_UNKNOWN_; 1564 } 1565 1566 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1567 bool Strict) { 1568 assert(isValidMsgId(MsgId, STI)); 1569 1570 if (!Strict) 1571 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); 1572 1573 if (MsgId == ID_SYSMSG) 1574 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_; 1575 if (!isGFX11Plus(STI)) { 1576 switch (MsgId) { 1577 case ID_GS_PreGFX11: 1578 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; 1579 case ID_GS_DONE_PreGFX11: 1580 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; 1581 } 1582 } 1583 return OpId == OP_NONE_; 1584 } 1585 1586 StringRef getMsgOpName(int64_t MsgId, int64_t OpId, 1587 const MCSubtargetInfo &STI) { 1588 assert(msgRequiresOp(MsgId, STI)); 1589 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId]; 1590 } 1591 1592 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1593 const MCSubtargetInfo &STI, bool Strict) { 1594 assert(isValidMsgOp(MsgId, OpId, STI, Strict)); 1595 1596 if (!Strict) 1597 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); 1598 1599 if (!isGFX11Plus(STI)) { 1600 switch (MsgId) { 1601 case ID_GS_PreGFX11: 1602 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; 1603 case ID_GS_DONE_PreGFX11: 1604 return (OpId == OP_GS_NOP) ? 1605 (StreamId == STREAM_ID_NONE_) : 1606 (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); 1607 } 1608 } 1609 return StreamId == STREAM_ID_NONE_; 1610 } 1611 1612 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { 1613 return MsgId == ID_SYSMSG || 1614 (!isGFX11Plus(STI) && 1615 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); 1616 } 1617 1618 bool msgSupportsStream(int64_t MsgId, int64_t OpId, 1619 const MCSubtargetInfo &STI) { 1620 return !isGFX11Plus(STI) && 1621 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && 1622 OpId != OP_GS_NOP; 1623 } 1624 1625 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, 1626 uint16_t &StreamId, const MCSubtargetInfo &STI) { 1627 MsgId = Val & getMsgIdMask(STI); 1628 if (isGFX11Plus(STI)) { 1629 OpId = 0; 1630 StreamId = 0; 1631 } else { 1632 OpId = (Val & OP_MASK_) >> OP_SHIFT_; 1633 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; 1634 } 1635 } 1636 1637 uint64_t encodeMsg(uint64_t MsgId, 1638 uint64_t OpId, 1639 uint64_t StreamId) { 1640 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_); 1641 } 1642 1643 } // namespace SendMsg 1644 1645 //===----------------------------------------------------------------------===// 1646 // 1647 //===----------------------------------------------------------------------===// 1648 1649 unsigned getInitialPSInputAddr(const Function &F) { 1650 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 1651 } 1652 1653 bool getHasColorExport(const Function &F) { 1654 // As a safe default always respond as if PS has color exports. 1655 return getIntegerAttribute( 1656 F, "amdgpu-color-export", 1657 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0; 1658 } 1659 1660 bool getHasDepthExport(const Function &F) { 1661 return getIntegerAttribute(F, "amdgpu-depth-export", 0) != 0; 1662 } 1663 1664 bool isShader(CallingConv::ID cc) { 1665 switch(cc) { 1666 case CallingConv::AMDGPU_VS: 1667 case CallingConv::AMDGPU_LS: 1668 case CallingConv::AMDGPU_HS: 1669 case CallingConv::AMDGPU_ES: 1670 case CallingConv::AMDGPU_GS: 1671 case CallingConv::AMDGPU_PS: 1672 case CallingConv::AMDGPU_CS: 1673 return true; 1674 default: 1675 return false; 1676 } 1677 } 1678 1679 bool isGraphics(CallingConv::ID cc) { 1680 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx; 1681 } 1682 1683 bool isCompute(CallingConv::ID cc) { 1684 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS; 1685 } 1686 1687 bool isEntryFunctionCC(CallingConv::ID CC) { 1688 switch (CC) { 1689 case CallingConv::AMDGPU_KERNEL: 1690 case CallingConv::SPIR_KERNEL: 1691 case CallingConv::AMDGPU_VS: 1692 case CallingConv::AMDGPU_GS: 1693 case CallingConv::AMDGPU_PS: 1694 case CallingConv::AMDGPU_CS: 1695 case CallingConv::AMDGPU_ES: 1696 case CallingConv::AMDGPU_HS: 1697 case CallingConv::AMDGPU_LS: 1698 return true; 1699 default: 1700 return false; 1701 } 1702 } 1703 1704 bool isModuleEntryFunctionCC(CallingConv::ID CC) { 1705 switch (CC) { 1706 case CallingConv::AMDGPU_Gfx: 1707 return true; 1708 default: 1709 return isEntryFunctionCC(CC); 1710 } 1711 } 1712 1713 bool isKernelCC(const Function *Func) { 1714 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); 1715 } 1716 1717 bool hasXNACK(const MCSubtargetInfo &STI) { 1718 return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; 1719 } 1720 1721 bool hasSRAMECC(const MCSubtargetInfo &STI) { 1722 return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC]; 1723 } 1724 1725 bool hasMIMG_R128(const MCSubtargetInfo &STI) { 1726 return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16]; 1727 } 1728 1729 bool hasGFX10A16(const MCSubtargetInfo &STI) { 1730 return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16]; 1731 } 1732 1733 bool hasG16(const MCSubtargetInfo &STI) { 1734 return STI.getFeatureBits()[AMDGPU::FeatureG16]; 1735 } 1736 1737 bool hasPackedD16(const MCSubtargetInfo &STI) { 1738 return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem] && !isCI(STI) && 1739 !isSI(STI); 1740 } 1741 1742 bool isSI(const MCSubtargetInfo &STI) { 1743 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 1744 } 1745 1746 bool isCI(const MCSubtargetInfo &STI) { 1747 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 1748 } 1749 1750 bool isVI(const MCSubtargetInfo &STI) { 1751 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 1752 } 1753 1754 bool isGFX9(const MCSubtargetInfo &STI) { 1755 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 1756 } 1757 1758 bool isGFX9_GFX10(const MCSubtargetInfo &STI) { 1759 return isGFX9(STI) || isGFX10(STI); 1760 } 1761 1762 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) { 1763 return isVI(STI) || isGFX9(STI) || isGFX10(STI); 1764 } 1765 1766 bool isGFX8Plus(const MCSubtargetInfo &STI) { 1767 return isVI(STI) || isGFX9Plus(STI); 1768 } 1769 1770 bool isGFX9Plus(const MCSubtargetInfo &STI) { 1771 return isGFX9(STI) || isGFX10Plus(STI); 1772 } 1773 1774 bool isGFX10(const MCSubtargetInfo &STI) { 1775 return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; 1776 } 1777 1778 bool isGFX10Plus(const MCSubtargetInfo &STI) { 1779 return isGFX10(STI) || isGFX11Plus(STI); 1780 } 1781 1782 bool isGFX11(const MCSubtargetInfo &STI) { 1783 return STI.getFeatureBits()[AMDGPU::FeatureGFX11]; 1784 } 1785 1786 bool isGFX11Plus(const MCSubtargetInfo &STI) { 1787 return isGFX11(STI); 1788 } 1789 1790 bool isNotGFX11Plus(const MCSubtargetInfo &STI) { 1791 return !isGFX11Plus(STI); 1792 } 1793 1794 bool isNotGFX10Plus(const MCSubtargetInfo &STI) { 1795 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); 1796 } 1797 1798 bool isGFX10Before1030(const MCSubtargetInfo &STI) { 1799 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); 1800 } 1801 1802 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 1803 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; 1804 } 1805 1806 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) { 1807 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_AEncoding]; 1808 } 1809 1810 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { 1811 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]; 1812 } 1813 1814 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { 1815 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_3Insts]; 1816 } 1817 1818 bool isGFX90A(const MCSubtargetInfo &STI) { 1819 return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]; 1820 } 1821 1822 bool isGFX940(const MCSubtargetInfo &STI) { 1823 return STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts]; 1824 } 1825 1826 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { 1827 return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1828 } 1829 1830 bool hasMAIInsts(const MCSubtargetInfo &STI) { 1831 return STI.getFeatureBits()[AMDGPU::FeatureMAIInsts]; 1832 } 1833 1834 bool hasVOPD(const MCSubtargetInfo &STI) { 1835 return STI.getFeatureBits()[AMDGPU::FeatureVOPD]; 1836 } 1837 1838 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, 1839 int32_t ArgNumVGPR) { 1840 if (has90AInsts && ArgNumAGPR) 1841 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR; 1842 return std::max(ArgNumVGPR, ArgNumAGPR); 1843 } 1844 1845 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 1846 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 1847 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); 1848 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 1849 Reg == AMDGPU::SCC; 1850 } 1851 1852 #define MAP_REG2REG \ 1853 using namespace AMDGPU; \ 1854 switch(Reg) { \ 1855 default: return Reg; \ 1856 CASE_CI_VI(FLAT_SCR) \ 1857 CASE_CI_VI(FLAT_SCR_LO) \ 1858 CASE_CI_VI(FLAT_SCR_HI) \ 1859 CASE_VI_GFX9PLUS(TTMP0) \ 1860 CASE_VI_GFX9PLUS(TTMP1) \ 1861 CASE_VI_GFX9PLUS(TTMP2) \ 1862 CASE_VI_GFX9PLUS(TTMP3) \ 1863 CASE_VI_GFX9PLUS(TTMP4) \ 1864 CASE_VI_GFX9PLUS(TTMP5) \ 1865 CASE_VI_GFX9PLUS(TTMP6) \ 1866 CASE_VI_GFX9PLUS(TTMP7) \ 1867 CASE_VI_GFX9PLUS(TTMP8) \ 1868 CASE_VI_GFX9PLUS(TTMP9) \ 1869 CASE_VI_GFX9PLUS(TTMP10) \ 1870 CASE_VI_GFX9PLUS(TTMP11) \ 1871 CASE_VI_GFX9PLUS(TTMP12) \ 1872 CASE_VI_GFX9PLUS(TTMP13) \ 1873 CASE_VI_GFX9PLUS(TTMP14) \ 1874 CASE_VI_GFX9PLUS(TTMP15) \ 1875 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ 1876 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ 1877 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ 1878 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ 1879 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ 1880 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ 1881 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ 1882 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ 1883 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ 1884 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ 1885 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ 1886 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ 1887 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 1888 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 1889 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 1890 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 1891 CASE_GFXPRE11_GFX11PLUS(M0) \ 1892 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ 1893 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ 1894 } 1895 1896 #define CASE_CI_VI(node) \ 1897 assert(!isSI(STI)); \ 1898 case node: return isCI(STI) ? node##_ci : node##_vi; 1899 1900 #define CASE_VI_GFX9PLUS(node) \ 1901 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; 1902 1903 #define CASE_GFXPRE11_GFX11PLUS(node) \ 1904 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; 1905 1906 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ 1907 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; 1908 1909 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 1910 if (STI.getTargetTriple().getArch() == Triple::r600) 1911 return Reg; 1912 MAP_REG2REG 1913 } 1914 1915 #undef CASE_CI_VI 1916 #undef CASE_VI_GFX9PLUS 1917 #undef CASE_GFXPRE11_GFX11PLUS 1918 #undef CASE_GFXPRE11_GFX11PLUS_TO 1919 1920 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 1921 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; 1922 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node; 1923 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) 1924 1925 unsigned mc2PseudoReg(unsigned Reg) { 1926 MAP_REG2REG 1927 } 1928 1929 #undef CASE_CI_VI 1930 #undef CASE_VI_GFX9PLUS 1931 #undef CASE_GFXPRE11_GFX11PLUS 1932 #undef CASE_GFXPRE11_GFX11PLUS_TO 1933 #undef MAP_REG2REG 1934 1935 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 1936 assert(OpNo < Desc.NumOperands); 1937 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 1938 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 1939 OpType <= AMDGPU::OPERAND_SRC_LAST; 1940 } 1941 1942 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 1943 assert(OpNo < Desc.NumOperands); 1944 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 1945 switch (OpType) { 1946 case AMDGPU::OPERAND_REG_IMM_FP32: 1947 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1948 case AMDGPU::OPERAND_REG_IMM_FP64: 1949 case AMDGPU::OPERAND_REG_IMM_FP16: 1950 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1951 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1952 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1953 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1954 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1955 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1956 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1957 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1958 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1959 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1960 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1961 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1962 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1963 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1964 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1965 return true; 1966 default: 1967 return false; 1968 } 1969 } 1970 1971 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 1972 assert(OpNo < Desc.NumOperands); 1973 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 1974 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 1975 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 1976 } 1977 1978 // Avoid using MCRegisterClass::getSize, since that function will go away 1979 // (move from MC* level to Target* level). Return size in bits. 1980 unsigned getRegBitWidth(unsigned RCID) { 1981 switch (RCID) { 1982 case AMDGPU::VGPR_LO16RegClassID: 1983 case AMDGPU::VGPR_HI16RegClassID: 1984 case AMDGPU::SGPR_LO16RegClassID: 1985 case AMDGPU::AGPR_LO16RegClassID: 1986 return 16; 1987 case AMDGPU::SGPR_32RegClassID: 1988 case AMDGPU::VGPR_32RegClassID: 1989 case AMDGPU::VRegOrLds_32RegClassID: 1990 case AMDGPU::AGPR_32RegClassID: 1991 case AMDGPU::VS_32RegClassID: 1992 case AMDGPU::AV_32RegClassID: 1993 case AMDGPU::SReg_32RegClassID: 1994 case AMDGPU::SReg_32_XM0RegClassID: 1995 case AMDGPU::SRegOrLds_32RegClassID: 1996 return 32; 1997 case AMDGPU::SGPR_64RegClassID: 1998 case AMDGPU::VS_64RegClassID: 1999 case AMDGPU::SReg_64RegClassID: 2000 case AMDGPU::VReg_64RegClassID: 2001 case AMDGPU::AReg_64RegClassID: 2002 case AMDGPU::SReg_64_XEXECRegClassID: 2003 case AMDGPU::VReg_64_Align2RegClassID: 2004 case AMDGPU::AReg_64_Align2RegClassID: 2005 case AMDGPU::AV_64RegClassID: 2006 case AMDGPU::AV_64_Align2RegClassID: 2007 return 64; 2008 case AMDGPU::SGPR_96RegClassID: 2009 case AMDGPU::SReg_96RegClassID: 2010 case AMDGPU::VReg_96RegClassID: 2011 case AMDGPU::AReg_96RegClassID: 2012 case AMDGPU::VReg_96_Align2RegClassID: 2013 case AMDGPU::AReg_96_Align2RegClassID: 2014 case AMDGPU::AV_96RegClassID: 2015 case AMDGPU::AV_96_Align2RegClassID: 2016 return 96; 2017 case AMDGPU::SGPR_128RegClassID: 2018 case AMDGPU::SReg_128RegClassID: 2019 case AMDGPU::VReg_128RegClassID: 2020 case AMDGPU::AReg_128RegClassID: 2021 case AMDGPU::VReg_128_Align2RegClassID: 2022 case AMDGPU::AReg_128_Align2RegClassID: 2023 case AMDGPU::AV_128RegClassID: 2024 case AMDGPU::AV_128_Align2RegClassID: 2025 return 128; 2026 case AMDGPU::SGPR_160RegClassID: 2027 case AMDGPU::SReg_160RegClassID: 2028 case AMDGPU::VReg_160RegClassID: 2029 case AMDGPU::AReg_160RegClassID: 2030 case AMDGPU::VReg_160_Align2RegClassID: 2031 case AMDGPU::AReg_160_Align2RegClassID: 2032 case AMDGPU::AV_160RegClassID: 2033 case AMDGPU::AV_160_Align2RegClassID: 2034 return 160; 2035 case AMDGPU::SGPR_192RegClassID: 2036 case AMDGPU::SReg_192RegClassID: 2037 case AMDGPU::VReg_192RegClassID: 2038 case AMDGPU::AReg_192RegClassID: 2039 case AMDGPU::VReg_192_Align2RegClassID: 2040 case AMDGPU::AReg_192_Align2RegClassID: 2041 case AMDGPU::AV_192RegClassID: 2042 case AMDGPU::AV_192_Align2RegClassID: 2043 return 192; 2044 case AMDGPU::SGPR_224RegClassID: 2045 case AMDGPU::SReg_224RegClassID: 2046 case AMDGPU::VReg_224RegClassID: 2047 case AMDGPU::AReg_224RegClassID: 2048 case AMDGPU::VReg_224_Align2RegClassID: 2049 case AMDGPU::AReg_224_Align2RegClassID: 2050 case AMDGPU::AV_224RegClassID: 2051 case AMDGPU::AV_224_Align2RegClassID: 2052 return 224; 2053 case AMDGPU::SGPR_256RegClassID: 2054 case AMDGPU::SReg_256RegClassID: 2055 case AMDGPU::VReg_256RegClassID: 2056 case AMDGPU::AReg_256RegClassID: 2057 case AMDGPU::VReg_256_Align2RegClassID: 2058 case AMDGPU::AReg_256_Align2RegClassID: 2059 case AMDGPU::AV_256RegClassID: 2060 case AMDGPU::AV_256_Align2RegClassID: 2061 return 256; 2062 case AMDGPU::SGPR_512RegClassID: 2063 case AMDGPU::SReg_512RegClassID: 2064 case AMDGPU::VReg_512RegClassID: 2065 case AMDGPU::AReg_512RegClassID: 2066 case AMDGPU::VReg_512_Align2RegClassID: 2067 case AMDGPU::AReg_512_Align2RegClassID: 2068 case AMDGPU::AV_512RegClassID: 2069 case AMDGPU::AV_512_Align2RegClassID: 2070 return 512; 2071 case AMDGPU::SGPR_1024RegClassID: 2072 case AMDGPU::SReg_1024RegClassID: 2073 case AMDGPU::VReg_1024RegClassID: 2074 case AMDGPU::AReg_1024RegClassID: 2075 case AMDGPU::VReg_1024_Align2RegClassID: 2076 case AMDGPU::AReg_1024_Align2RegClassID: 2077 case AMDGPU::AV_1024RegClassID: 2078 case AMDGPU::AV_1024_Align2RegClassID: 2079 return 1024; 2080 default: 2081 llvm_unreachable("Unexpected register class"); 2082 } 2083 } 2084 2085 unsigned getRegBitWidth(const MCRegisterClass &RC) { 2086 return getRegBitWidth(RC.getID()); 2087 } 2088 2089 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 2090 unsigned OpNo) { 2091 assert(OpNo < Desc.NumOperands); 2092 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 2093 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 2094 } 2095 2096 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 2097 if (isInlinableIntLiteral(Literal)) 2098 return true; 2099 2100 uint64_t Val = static_cast<uint64_t>(Literal); 2101 return (Val == DoubleToBits(0.0)) || 2102 (Val == DoubleToBits(1.0)) || 2103 (Val == DoubleToBits(-1.0)) || 2104 (Val == DoubleToBits(0.5)) || 2105 (Val == DoubleToBits(-0.5)) || 2106 (Val == DoubleToBits(2.0)) || 2107 (Val == DoubleToBits(-2.0)) || 2108 (Val == DoubleToBits(4.0)) || 2109 (Val == DoubleToBits(-4.0)) || 2110 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 2111 } 2112 2113 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 2114 if (isInlinableIntLiteral(Literal)) 2115 return true; 2116 2117 // The actual type of the operand does not seem to matter as long 2118 // as the bits match one of the inline immediate values. For example: 2119 // 2120 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 2121 // so it is a legal inline immediate. 2122 // 2123 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 2124 // floating-point, so it is a legal inline immediate. 2125 2126 uint32_t Val = static_cast<uint32_t>(Literal); 2127 return (Val == FloatToBits(0.0f)) || 2128 (Val == FloatToBits(1.0f)) || 2129 (Val == FloatToBits(-1.0f)) || 2130 (Val == FloatToBits(0.5f)) || 2131 (Val == FloatToBits(-0.5f)) || 2132 (Val == FloatToBits(2.0f)) || 2133 (Val == FloatToBits(-2.0f)) || 2134 (Val == FloatToBits(4.0f)) || 2135 (Val == FloatToBits(-4.0f)) || 2136 (Val == 0x3e22f983 && HasInv2Pi); 2137 } 2138 2139 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 2140 if (!HasInv2Pi) 2141 return false; 2142 2143 if (isInlinableIntLiteral(Literal)) 2144 return true; 2145 2146 uint16_t Val = static_cast<uint16_t>(Literal); 2147 return Val == 0x3C00 || // 1.0 2148 Val == 0xBC00 || // -1.0 2149 Val == 0x3800 || // 0.5 2150 Val == 0xB800 || // -0.5 2151 Val == 0x4000 || // 2.0 2152 Val == 0xC000 || // -2.0 2153 Val == 0x4400 || // 4.0 2154 Val == 0xC400 || // -4.0 2155 Val == 0x3118; // 1/2pi 2156 } 2157 2158 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 2159 assert(HasInv2Pi); 2160 2161 if (isInt<16>(Literal) || isUInt<16>(Literal)) { 2162 int16_t Trunc = static_cast<int16_t>(Literal); 2163 return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi); 2164 } 2165 if (!(Literal & 0xffff)) 2166 return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi); 2167 2168 int16_t Lo16 = static_cast<int16_t>(Literal); 2169 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 2170 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 2171 } 2172 2173 bool isInlinableIntLiteralV216(int32_t Literal) { 2174 int16_t Lo16 = static_cast<int16_t>(Literal); 2175 if (isInt<16>(Literal) || isUInt<16>(Literal)) 2176 return isInlinableIntLiteral(Lo16); 2177 2178 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 2179 if (!(Literal & 0xffff)) 2180 return isInlinableIntLiteral(Hi16); 2181 return Lo16 == Hi16 && isInlinableIntLiteral(Lo16); 2182 } 2183 2184 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) { 2185 assert(HasInv2Pi); 2186 2187 int16_t Lo16 = static_cast<int16_t>(Literal); 2188 if (isInt<16>(Literal) || isUInt<16>(Literal)) 2189 return true; 2190 2191 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 2192 if (!(Literal & 0xffff)) 2193 return true; 2194 return Lo16 == Hi16; 2195 } 2196 2197 bool isArgPassedInSGPR(const Argument *A) { 2198 const Function *F = A->getParent(); 2199 2200 // Arguments to compute shaders are never a source of divergence. 2201 CallingConv::ID CC = F->getCallingConv(); 2202 switch (CC) { 2203 case CallingConv::AMDGPU_KERNEL: 2204 case CallingConv::SPIR_KERNEL: 2205 return true; 2206 case CallingConv::AMDGPU_VS: 2207 case CallingConv::AMDGPU_LS: 2208 case CallingConv::AMDGPU_HS: 2209 case CallingConv::AMDGPU_ES: 2210 case CallingConv::AMDGPU_GS: 2211 case CallingConv::AMDGPU_PS: 2212 case CallingConv::AMDGPU_CS: 2213 case CallingConv::AMDGPU_Gfx: 2214 // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 2215 // Everything else is in VGPRs. 2216 return F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::InReg) || 2217 F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::ByVal); 2218 default: 2219 // TODO: Should calls support inreg for SGPR inputs? 2220 return false; 2221 } 2222 } 2223 2224 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { 2225 return isGCN3Encoding(ST) || isGFX10Plus(ST); 2226 } 2227 2228 static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { 2229 return isGFX9Plus(ST); 2230 } 2231 2232 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 2233 int64_t EncodedOffset) { 2234 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) 2235 : isUInt<8>(EncodedOffset); 2236 } 2237 2238 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 2239 int64_t EncodedOffset, 2240 bool IsBuffer) { 2241 return !IsBuffer && 2242 hasSMRDSignedImmOffset(ST) && 2243 isInt<21>(EncodedOffset); 2244 } 2245 2246 static bool isDwordAligned(uint64_t ByteOffset) { 2247 return (ByteOffset & 3) == 0; 2248 } 2249 2250 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, 2251 uint64_t ByteOffset) { 2252 if (hasSMEMByteOffset(ST)) 2253 return ByteOffset; 2254 2255 assert(isDwordAligned(ByteOffset)); 2256 return ByteOffset >> 2; 2257 } 2258 2259 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 2260 int64_t ByteOffset, bool IsBuffer) { 2261 // The signed version is always a byte offset. 2262 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { 2263 assert(hasSMEMByteOffset(ST)); 2264 return isInt<20>(ByteOffset) ? Optional<int64_t>(ByteOffset) : None; 2265 } 2266 2267 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) 2268 return None; 2269 2270 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2271 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) 2272 ? Optional<int64_t>(EncodedOffset) 2273 : None; 2274 } 2275 2276 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 2277 int64_t ByteOffset) { 2278 if (!isCI(ST) || !isDwordAligned(ByteOffset)) 2279 return None; 2280 2281 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 2282 return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None; 2283 } 2284 2285 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) { 2286 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9 and GFX11+. 2287 if (AMDGPU::isGFX10(ST)) 2288 return Signed ? 12 : 11; 2289 2290 return Signed ? 13 : 12; 2291 } 2292 2293 // Given Imm, split it into the values to put into the SOffset and ImmOffset 2294 // fields in an MUBUF instruction. Return false if it is not possible (due to a 2295 // hardware bug needing a workaround). 2296 // 2297 // The required alignment ensures that individual address components remain 2298 // aligned if they are aligned to begin with. It also ensures that additional 2299 // offsets within the given alignment can be added to the resulting ImmOffset. 2300 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 2301 const GCNSubtarget *Subtarget, Align Alignment) { 2302 const uint32_t MaxImm = alignDown(4095, Alignment.value()); 2303 uint32_t Overflow = 0; 2304 2305 if (Imm > MaxImm) { 2306 if (Imm <= MaxImm + 64) { 2307 // Use an SOffset inline constant for 4..64 2308 Overflow = Imm - MaxImm; 2309 Imm = MaxImm; 2310 } else { 2311 // Try to keep the same value in SOffset for adjacent loads, so that 2312 // the corresponding register contents can be re-used. 2313 // 2314 // Load values with all low-bits (except for alignment bits) set into 2315 // SOffset, so that a larger range of values can be covered using 2316 // s_movk_i32. 2317 // 2318 // Atomic operations fail to work correctly when individual address 2319 // components are unaligned, even if their sum is aligned. 2320 uint32_t High = (Imm + Alignment.value()) & ~4095; 2321 uint32_t Low = (Imm + Alignment.value()) & 4095; 2322 Imm = Low; 2323 Overflow = High - Alignment.value(); 2324 } 2325 } 2326 2327 // There is a hardware bug in SI and CI which prevents address clamping in 2328 // MUBUF instructions from working correctly with SOffsets. The immediate 2329 // offset is unaffected. 2330 if (Overflow > 0 && 2331 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 2332 return false; 2333 2334 ImmOffset = Imm; 2335 SOffset = Overflow; 2336 return true; 2337 } 2338 2339 SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) { 2340 *this = getDefaultForCallingConv(F.getCallingConv()); 2341 2342 StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString(); 2343 if (!IEEEAttr.empty()) 2344 IEEE = IEEEAttr == "true"; 2345 2346 StringRef DX10ClampAttr 2347 = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString(); 2348 if (!DX10ClampAttr.empty()) 2349 DX10Clamp = DX10ClampAttr == "true"; 2350 2351 StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString(); 2352 if (!DenormF32Attr.empty()) { 2353 DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr); 2354 FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE; 2355 FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE; 2356 } 2357 2358 StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString(); 2359 if (!DenormAttr.empty()) { 2360 DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr); 2361 2362 if (DenormF32Attr.empty()) { 2363 FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE; 2364 FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE; 2365 } 2366 2367 FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE; 2368 FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE; 2369 } 2370 } 2371 2372 namespace { 2373 2374 struct SourceOfDivergence { 2375 unsigned Intr; 2376 }; 2377 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); 2378 2379 #define GET_SourcesOfDivergence_IMPL 2380 #define GET_Gfx9BufferFormat_IMPL 2381 #define GET_Gfx10BufferFormat_IMPL 2382 #define GET_Gfx11PlusBufferFormat_IMPL 2383 #include "AMDGPUGenSearchableTables.inc" 2384 2385 } // end anonymous namespace 2386 2387 bool isIntrinsicSourceOfDivergence(unsigned IntrID) { 2388 return lookupSourceOfDivergence(IntrID); 2389 } 2390 2391 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 2392 uint8_t NumComponents, 2393 uint8_t NumFormat, 2394 const MCSubtargetInfo &STI) { 2395 return isGFX11Plus(STI) 2396 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents, 2397 NumFormat) 2398 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp, 2399 NumComponents, NumFormat) 2400 : getGfx9BufferFormatInfo(BitsPerComp, 2401 NumComponents, NumFormat); 2402 } 2403 2404 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 2405 const MCSubtargetInfo &STI) { 2406 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format) 2407 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format) 2408 : getGfx9BufferFormatInfo(Format); 2409 } 2410 2411 } // namespace AMDGPU 2412 2413 raw_ostream &operator<<(raw_ostream &OS, 2414 const AMDGPU::IsaInfo::TargetIDSetting S) { 2415 switch (S) { 2416 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): 2417 OS << "Unsupported"; 2418 break; 2419 case (AMDGPU::IsaInfo::TargetIDSetting::Any): 2420 OS << "Any"; 2421 break; 2422 case (AMDGPU::IsaInfo::TargetIDSetting::Off): 2423 OS << "Off"; 2424 break; 2425 case (AMDGPU::IsaInfo::TargetIDSetting::On): 2426 OS << "On"; 2427 break; 2428 } 2429 return OS; 2430 } 2431 2432 } // namespace llvm 2433