1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUBaseInfo.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUAsmUtils.h" 12 #include "AMDKernelCodeT.h" 13 #include "GCNSubtarget.h" 14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/IR/Attributes.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/GlobalValue.h" 19 #include "llvm/IR/IntrinsicsAMDGPU.h" 20 #include "llvm/IR/IntrinsicsR600.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/MC/MCSubtargetInfo.h" 23 #include "llvm/Support/AMDHSAKernelDescriptor.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/TargetParser.h" 26 27 #define GET_INSTRINFO_NAMED_OPS 28 #define GET_INSTRMAP_INFO 29 #include "AMDGPUGenInstrInfo.inc" 30 31 static llvm::cl::opt<unsigned> AmdhsaCodeObjectVersion( 32 "amdhsa-code-object-version", llvm::cl::Hidden, 33 llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4), 34 llvm::cl::ZeroOrMore); 35 36 namespace { 37 38 /// \returns Bit mask for given bit \p Shift and bit \p Width. 39 unsigned getBitMask(unsigned Shift, unsigned Width) { 40 return ((1 << Width) - 1) << Shift; 41 } 42 43 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 44 /// 45 /// \returns Packed \p Dst. 46 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 47 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 48 Dst |= (Src << Shift) & getBitMask(Shift, Width); 49 return Dst; 50 } 51 52 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 53 /// 54 /// \returns Unpacked bits. 55 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 56 return (Src & getBitMask(Shift, Width)) >> Shift; 57 } 58 59 /// \returns Vmcnt bit shift (lower bits). 60 unsigned getVmcntBitShiftLo() { return 0; } 61 62 /// \returns Vmcnt bit width (lower bits). 63 unsigned getVmcntBitWidthLo() { return 4; } 64 65 /// \returns Expcnt bit shift. 66 unsigned getExpcntBitShift() { return 4; } 67 68 /// \returns Expcnt bit width. 69 unsigned getExpcntBitWidth() { return 3; } 70 71 /// \returns Lgkmcnt bit shift. 72 unsigned getLgkmcntBitShift() { return 8; } 73 74 /// \returns Lgkmcnt bit width. 75 unsigned getLgkmcntBitWidth(unsigned VersionMajor) { 76 return (VersionMajor >= 10) ? 6 : 4; 77 } 78 79 /// \returns Vmcnt bit shift (higher bits). 80 unsigned getVmcntBitShiftHi() { return 14; } 81 82 /// \returns Vmcnt bit width (higher bits). 83 unsigned getVmcntBitWidthHi() { return 2; } 84 85 } // end namespace anonymous 86 87 namespace llvm { 88 89 namespace AMDGPU { 90 91 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) { 92 if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA) 93 return None; 94 95 switch (AmdhsaCodeObjectVersion) { 96 case 2: 97 return ELF::ELFABIVERSION_AMDGPU_HSA_V2; 98 case 3: 99 return ELF::ELFABIVERSION_AMDGPU_HSA_V3; 100 case 4: 101 return ELF::ELFABIVERSION_AMDGPU_HSA_V4; 102 case 5: 103 return ELF::ELFABIVERSION_AMDGPU_HSA_V5; 104 default: 105 report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") + 106 Twine(AmdhsaCodeObjectVersion)); 107 } 108 } 109 110 bool isHsaAbiVersion2(const MCSubtargetInfo *STI) { 111 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 112 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2; 113 return false; 114 } 115 116 bool isHsaAbiVersion3(const MCSubtargetInfo *STI) { 117 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 118 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3; 119 return false; 120 } 121 122 bool isHsaAbiVersion4(const MCSubtargetInfo *STI) { 123 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 124 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4; 125 return false; 126 } 127 128 bool isHsaAbiVersion5(const MCSubtargetInfo *STI) { 129 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 130 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5; 131 return false; 132 } 133 134 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) { 135 return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI) || 136 isHsaAbiVersion5(STI); 137 } 138 139 #define GET_MIMGBaseOpcodesTable_IMPL 140 #define GET_MIMGDimInfoTable_IMPL 141 #define GET_MIMGInfoTable_IMPL 142 #define GET_MIMGLZMappingTable_IMPL 143 #define GET_MIMGMIPMappingTable_IMPL 144 #define GET_MIMGBiasMappingTable_IMPL 145 #define GET_MIMGOffsetMappingTable_IMPL 146 #define GET_MIMGG16MappingTable_IMPL 147 #include "AMDGPUGenSearchableTables.inc" 148 149 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 150 unsigned VDataDwords, unsigned VAddrDwords) { 151 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, 152 VDataDwords, VAddrDwords); 153 return Info ? Info->Opcode : -1; 154 } 155 156 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { 157 const MIMGInfo *Info = getMIMGInfo(Opc); 158 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr; 159 } 160 161 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { 162 const MIMGInfo *OrigInfo = getMIMGInfo(Opc); 163 const MIMGInfo *NewInfo = 164 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding, 165 NewChannels, OrigInfo->VAddrDwords); 166 return NewInfo ? NewInfo->Opcode : -1; 167 } 168 169 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 170 const MIMGDimInfo *Dim, bool IsA16, 171 bool IsG16Supported) { 172 unsigned AddrWords = BaseOpcode->NumExtraArgs; 173 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + 174 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 175 if (IsA16) 176 AddrWords += divideCeil(AddrComponents, 2); 177 else 178 AddrWords += AddrComponents; 179 180 // Note: For subtargets that support A16 but not G16, enabling A16 also 181 // enables 16 bit gradients. 182 // For subtargets that support A16 (operand) and G16 (done with a different 183 // instruction encoding), they are independent. 184 185 if (BaseOpcode->Gradients) { 186 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16) 187 // There are two gradients per coordinate, we pack them separately. 188 // For the 3d case, 189 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv) 190 AddrWords += alignTo<2>(Dim->NumGradients / 2); 191 else 192 AddrWords += Dim->NumGradients; 193 } 194 return AddrWords; 195 } 196 197 struct MUBUFInfo { 198 uint16_t Opcode; 199 uint16_t BaseOpcode; 200 uint8_t elements; 201 bool has_vaddr; 202 bool has_srsrc; 203 bool has_soffset; 204 bool IsBufferInv; 205 }; 206 207 struct MTBUFInfo { 208 uint16_t Opcode; 209 uint16_t BaseOpcode; 210 uint8_t elements; 211 bool has_vaddr; 212 bool has_srsrc; 213 bool has_soffset; 214 }; 215 216 struct SMInfo { 217 uint16_t Opcode; 218 bool IsBuffer; 219 }; 220 221 struct VOPInfo { 222 uint16_t Opcode; 223 bool IsSingle; 224 }; 225 226 #define GET_MTBUFInfoTable_DECL 227 #define GET_MTBUFInfoTable_IMPL 228 #define GET_MUBUFInfoTable_DECL 229 #define GET_MUBUFInfoTable_IMPL 230 #define GET_SMInfoTable_DECL 231 #define GET_SMInfoTable_IMPL 232 #define GET_VOP1InfoTable_DECL 233 #define GET_VOP1InfoTable_IMPL 234 #define GET_VOP2InfoTable_DECL 235 #define GET_VOP2InfoTable_IMPL 236 #define GET_VOP3InfoTable_DECL 237 #define GET_VOP3InfoTable_IMPL 238 #include "AMDGPUGenSearchableTables.inc" 239 240 int getMTBUFBaseOpcode(unsigned Opc) { 241 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); 242 return Info ? Info->BaseOpcode : -1; 243 } 244 245 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { 246 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 247 return Info ? Info->Opcode : -1; 248 } 249 250 int getMTBUFElements(unsigned Opc) { 251 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 252 return Info ? Info->elements : 0; 253 } 254 255 bool getMTBUFHasVAddr(unsigned Opc) { 256 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 257 return Info ? Info->has_vaddr : false; 258 } 259 260 bool getMTBUFHasSrsrc(unsigned Opc) { 261 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 262 return Info ? Info->has_srsrc : false; 263 } 264 265 bool getMTBUFHasSoffset(unsigned Opc) { 266 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 267 return Info ? Info->has_soffset : false; 268 } 269 270 int getMUBUFBaseOpcode(unsigned Opc) { 271 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); 272 return Info ? Info->BaseOpcode : -1; 273 } 274 275 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { 276 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 277 return Info ? Info->Opcode : -1; 278 } 279 280 int getMUBUFElements(unsigned Opc) { 281 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 282 return Info ? Info->elements : 0; 283 } 284 285 bool getMUBUFHasVAddr(unsigned Opc) { 286 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 287 return Info ? Info->has_vaddr : false; 288 } 289 290 bool getMUBUFHasSrsrc(unsigned Opc) { 291 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 292 return Info ? Info->has_srsrc : false; 293 } 294 295 bool getMUBUFHasSoffset(unsigned Opc) { 296 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 297 return Info ? Info->has_soffset : false; 298 } 299 300 bool getMUBUFIsBufferInv(unsigned Opc) { 301 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 302 return Info ? Info->IsBufferInv : false; 303 } 304 305 bool getSMEMIsBuffer(unsigned Opc) { 306 const SMInfo *Info = getSMEMOpcodeHelper(Opc); 307 return Info ? Info->IsBuffer : false; 308 } 309 310 bool getVOP1IsSingle(unsigned Opc) { 311 const VOPInfo *Info = getVOP1OpcodeHelper(Opc); 312 return Info ? Info->IsSingle : false; 313 } 314 315 bool getVOP2IsSingle(unsigned Opc) { 316 const VOPInfo *Info = getVOP2OpcodeHelper(Opc); 317 return Info ? Info->IsSingle : false; 318 } 319 320 bool getVOP3IsSingle(unsigned Opc) { 321 const VOPInfo *Info = getVOP3OpcodeHelper(Opc); 322 return Info ? Info->IsSingle : false; 323 } 324 325 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 326 // header files, so we need to wrap it in a function that takes unsigned 327 // instead. 328 int getMCOpcode(uint16_t Opcode, unsigned Gen) { 329 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 330 } 331 332 namespace IsaInfo { 333 334 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) 335 : STI(STI), XnackSetting(TargetIDSetting::Any), 336 SramEccSetting(TargetIDSetting::Any) { 337 if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) 338 XnackSetting = TargetIDSetting::Unsupported; 339 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) 340 SramEccSetting = TargetIDSetting::Unsupported; 341 } 342 343 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { 344 // Check if xnack or sramecc is explicitly enabled or disabled. In the 345 // absence of the target features we assume we must generate code that can run 346 // in any environment. 347 SubtargetFeatures Features(FS); 348 Optional<bool> XnackRequested; 349 Optional<bool> SramEccRequested; 350 351 for (const std::string &Feature : Features.getFeatures()) { 352 if (Feature == "+xnack") 353 XnackRequested = true; 354 else if (Feature == "-xnack") 355 XnackRequested = false; 356 else if (Feature == "+sramecc") 357 SramEccRequested = true; 358 else if (Feature == "-sramecc") 359 SramEccRequested = false; 360 } 361 362 bool XnackSupported = isXnackSupported(); 363 bool SramEccSupported = isSramEccSupported(); 364 365 if (XnackRequested) { 366 if (XnackSupported) { 367 XnackSetting = 368 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; 369 } else { 370 // If a specific xnack setting was requested and this GPU does not support 371 // xnack emit a warning. Setting will remain set to "Unsupported". 372 if (*XnackRequested) { 373 errs() << "warning: xnack 'On' was requested for a processor that does " 374 "not support it!\n"; 375 } else { 376 errs() << "warning: xnack 'Off' was requested for a processor that " 377 "does not support it!\n"; 378 } 379 } 380 } 381 382 if (SramEccRequested) { 383 if (SramEccSupported) { 384 SramEccSetting = 385 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; 386 } else { 387 // If a specific sramecc setting was requested and this GPU does not 388 // support sramecc emit a warning. Setting will remain set to 389 // "Unsupported". 390 if (*SramEccRequested) { 391 errs() << "warning: sramecc 'On' was requested for a processor that " 392 "does not support it!\n"; 393 } else { 394 errs() << "warning: sramecc 'Off' was requested for a processor that " 395 "does not support it!\n"; 396 } 397 } 398 } 399 } 400 401 static TargetIDSetting 402 getTargetIDSettingFromFeatureString(StringRef FeatureString) { 403 if (FeatureString.endswith("-")) 404 return TargetIDSetting::Off; 405 if (FeatureString.endswith("+")) 406 return TargetIDSetting::On; 407 408 llvm_unreachable("Malformed feature string"); 409 } 410 411 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { 412 SmallVector<StringRef, 3> TargetIDSplit; 413 TargetID.split(TargetIDSplit, ':'); 414 415 for (const auto &FeatureString : TargetIDSplit) { 416 if (FeatureString.startswith("xnack")) 417 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); 418 if (FeatureString.startswith("sramecc")) 419 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); 420 } 421 } 422 423 std::string AMDGPUTargetID::toString() const { 424 std::string StringRep; 425 raw_string_ostream StreamRep(StringRep); 426 427 auto TargetTriple = STI.getTargetTriple(); 428 auto Version = getIsaVersion(STI.getCPU()); 429 430 StreamRep << TargetTriple.getArchName() << '-' 431 << TargetTriple.getVendorName() << '-' 432 << TargetTriple.getOSName() << '-' 433 << TargetTriple.getEnvironmentName() << '-'; 434 435 std::string Processor; 436 // TODO: Following else statement is present here because we used various 437 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). 438 // Remove once all aliases are removed from GCNProcessors.td. 439 if (Version.Major >= 9) 440 Processor = STI.getCPU().str(); 441 else 442 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) + 443 Twine(Version.Stepping)) 444 .str(); 445 446 std::string Features; 447 if (Optional<uint8_t> HsaAbiVersion = getHsaAbiVersion(&STI)) { 448 switch (*HsaAbiVersion) { 449 case ELF::ELFABIVERSION_AMDGPU_HSA_V2: 450 // Code object V2 only supported specific processors and had fixed 451 // settings for the XNACK. 452 if (Processor == "gfx600") { 453 } else if (Processor == "gfx601") { 454 } else if (Processor == "gfx602") { 455 } else if (Processor == "gfx700") { 456 } else if (Processor == "gfx701") { 457 } else if (Processor == "gfx702") { 458 } else if (Processor == "gfx703") { 459 } else if (Processor == "gfx704") { 460 } else if (Processor == "gfx705") { 461 } else if (Processor == "gfx801") { 462 if (!isXnackOnOrAny()) 463 report_fatal_error( 464 "AMD GPU code object V2 does not support processor " + 465 Twine(Processor) + " without XNACK"); 466 } else if (Processor == "gfx802") { 467 } else if (Processor == "gfx803") { 468 } else if (Processor == "gfx805") { 469 } else if (Processor == "gfx810") { 470 if (!isXnackOnOrAny()) 471 report_fatal_error( 472 "AMD GPU code object V2 does not support processor " + 473 Twine(Processor) + " without XNACK"); 474 } else if (Processor == "gfx900") { 475 if (isXnackOnOrAny()) 476 Processor = "gfx901"; 477 } else if (Processor == "gfx902") { 478 if (isXnackOnOrAny()) 479 Processor = "gfx903"; 480 } else if (Processor == "gfx904") { 481 if (isXnackOnOrAny()) 482 Processor = "gfx905"; 483 } else if (Processor == "gfx906") { 484 if (isXnackOnOrAny()) 485 Processor = "gfx907"; 486 } else if (Processor == "gfx90c") { 487 if (isXnackOnOrAny()) 488 report_fatal_error( 489 "AMD GPU code object V2 does not support processor " + 490 Twine(Processor) + " with XNACK being ON or ANY"); 491 } else { 492 report_fatal_error( 493 "AMD GPU code object V2 does not support processor " + 494 Twine(Processor)); 495 } 496 break; 497 case ELF::ELFABIVERSION_AMDGPU_HSA_V3: 498 // xnack. 499 if (isXnackOnOrAny()) 500 Features += "+xnack"; 501 // In code object v2 and v3, "sramecc" feature was spelled with a 502 // hyphen ("sram-ecc"). 503 if (isSramEccOnOrAny()) 504 Features += "+sram-ecc"; 505 break; 506 case ELF::ELFABIVERSION_AMDGPU_HSA_V4: 507 case ELF::ELFABIVERSION_AMDGPU_HSA_V5: 508 // sramecc. 509 if (getSramEccSetting() == TargetIDSetting::Off) 510 Features += ":sramecc-"; 511 else if (getSramEccSetting() == TargetIDSetting::On) 512 Features += ":sramecc+"; 513 // xnack. 514 if (getXnackSetting() == TargetIDSetting::Off) 515 Features += ":xnack-"; 516 else if (getXnackSetting() == TargetIDSetting::On) 517 Features += ":xnack+"; 518 break; 519 default: 520 break; 521 } 522 } 523 524 StreamRep << Processor << Features; 525 526 StreamRep.flush(); 527 return StringRep; 528 } 529 530 unsigned getWavefrontSize(const MCSubtargetInfo *STI) { 531 if (STI->getFeatureBits().test(FeatureWavefrontSize16)) 532 return 16; 533 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) 534 return 32; 535 536 return 64; 537 } 538 539 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { 540 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 541 return 32768; 542 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 543 return 65536; 544 545 return 0; 546 } 547 548 unsigned getEUsPerCU(const MCSubtargetInfo *STI) { 549 // "Per CU" really means "per whatever functional block the waves of a 550 // workgroup must share". For gfx10 in CU mode this is the CU, which contains 551 // two SIMDs. 552 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode)) 553 return 2; 554 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains 555 // two CUs, so a total of four SIMDs. 556 return 4; 557 } 558 559 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 560 unsigned FlatWorkGroupSize) { 561 assert(FlatWorkGroupSize != 0); 562 if (STI->getTargetTriple().getArch() != Triple::amdgcn) 563 return 8; 564 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); 565 if (N == 1) 566 return 40; 567 N = 40 / N; 568 return std::min(N, 16u); 569 } 570 571 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { 572 return 1; 573 } 574 575 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { 576 // FIXME: Need to take scratch memory into account. 577 if (isGFX90A(*STI)) 578 return 8; 579 if (!isGFX10Plus(*STI)) 580 return 10; 581 return hasGFX10_3Insts(*STI) ? 16 : 20; 582 } 583 584 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 585 unsigned FlatWorkGroupSize) { 586 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize), 587 getEUsPerCU(STI)); 588 } 589 590 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { 591 return 1; 592 } 593 594 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { 595 // Some subtargets allow encoding 2048, but this isn't tested or supported. 596 return 1024; 597 } 598 599 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 600 unsigned FlatWorkGroupSize) { 601 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI)); 602 } 603 604 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { 605 IsaVersion Version = getIsaVersion(STI->getCPU()); 606 if (Version.Major >= 10) 607 return getAddressableNumSGPRs(STI); 608 if (Version.Major >= 8) 609 return 16; 610 return 8; 611 } 612 613 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { 614 return 8; 615 } 616 617 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { 618 IsaVersion Version = getIsaVersion(STI->getCPU()); 619 if (Version.Major >= 8) 620 return 800; 621 return 512; 622 } 623 624 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { 625 if (STI->getFeatureBits().test(FeatureSGPRInitBug)) 626 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 627 628 IsaVersion Version = getIsaVersion(STI->getCPU()); 629 if (Version.Major >= 10) 630 return 106; 631 if (Version.Major >= 8) 632 return 102; 633 return 104; 634 } 635 636 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 637 assert(WavesPerEU != 0); 638 639 IsaVersion Version = getIsaVersion(STI->getCPU()); 640 if (Version.Major >= 10) 641 return 0; 642 643 if (WavesPerEU >= getMaxWavesPerEU(STI)) 644 return 0; 645 646 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); 647 if (STI->getFeatureBits().test(FeatureTrapHandler)) 648 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 649 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; 650 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); 651 } 652 653 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 654 bool Addressable) { 655 assert(WavesPerEU != 0); 656 657 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); 658 IsaVersion Version = getIsaVersion(STI->getCPU()); 659 if (Version.Major >= 10) 660 return Addressable ? AddressableNumSGPRs : 108; 661 if (Version.Major >= 8 && !Addressable) 662 AddressableNumSGPRs = 112; 663 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; 664 if (STI->getFeatureBits().test(FeatureTrapHandler)) 665 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 666 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); 667 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 668 } 669 670 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 671 bool FlatScrUsed, bool XNACKUsed) { 672 unsigned ExtraSGPRs = 0; 673 if (VCCUsed) 674 ExtraSGPRs = 2; 675 676 IsaVersion Version = getIsaVersion(STI->getCPU()); 677 if (Version.Major >= 10) 678 return ExtraSGPRs; 679 680 if (Version.Major < 8) { 681 if (FlatScrUsed) 682 ExtraSGPRs = 4; 683 } else { 684 if (XNACKUsed) 685 ExtraSGPRs = 4; 686 687 if (FlatScrUsed || 688 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch)) 689 ExtraSGPRs = 6; 690 } 691 692 return ExtraSGPRs; 693 } 694 695 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 696 bool FlatScrUsed) { 697 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, 698 STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); 699 } 700 701 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { 702 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); 703 // SGPRBlocks is actual number of SGPR blocks minus 1. 704 return NumSGPRs / getSGPREncodingGranule(STI) - 1; 705 } 706 707 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 708 Optional<bool> EnableWavefrontSize32) { 709 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 710 return 8; 711 712 bool IsWave32 = EnableWavefrontSize32 ? 713 *EnableWavefrontSize32 : 714 STI->getFeatureBits().test(FeatureWavefrontSize32); 715 716 if (hasGFX10_3Insts(*STI)) 717 return IsWave32 ? 16 : 8; 718 719 return IsWave32 ? 8 : 4; 720 } 721 722 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 723 Optional<bool> EnableWavefrontSize32) { 724 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 725 return 8; 726 727 bool IsWave32 = EnableWavefrontSize32 ? 728 *EnableWavefrontSize32 : 729 STI->getFeatureBits().test(FeatureWavefrontSize32); 730 731 return IsWave32 ? 8 : 4; 732 } 733 734 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { 735 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 736 return 512; 737 if (!isGFX10Plus(*STI)) 738 return 256; 739 return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512; 740 } 741 742 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { 743 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 744 return 512; 745 return 256; 746 } 747 748 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 749 assert(WavesPerEU != 0); 750 751 if (WavesPerEU >= getMaxWavesPerEU(STI)) 752 return 0; 753 unsigned MinNumVGPRs = 754 alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1), 755 getVGPRAllocGranule(STI)) + 1; 756 return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI)); 757 } 758 759 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 760 assert(WavesPerEU != 0); 761 762 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, 763 getVGPRAllocGranule(STI)); 764 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); 765 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 766 } 767 768 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, 769 Optional<bool> EnableWavefrontSize32) { 770 NumVGPRs = alignTo(std::max(1u, NumVGPRs), 771 getVGPREncodingGranule(STI, EnableWavefrontSize32)); 772 // VGPRBlocks is actual number of VGPR blocks minus 1. 773 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1; 774 } 775 776 } // end namespace IsaInfo 777 778 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 779 const MCSubtargetInfo *STI) { 780 IsaVersion Version = getIsaVersion(STI->getCPU()); 781 782 memset(&Header, 0, sizeof(Header)); 783 784 Header.amd_kernel_code_version_major = 1; 785 Header.amd_kernel_code_version_minor = 2; 786 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 787 Header.amd_machine_version_major = Version.Major; 788 Header.amd_machine_version_minor = Version.Minor; 789 Header.amd_machine_version_stepping = Version.Stepping; 790 Header.kernel_code_entry_byte_offset = sizeof(Header); 791 Header.wavefront_size = 6; 792 793 // If the code object does not support indirect functions, then the value must 794 // be 0xffffffff. 795 Header.call_convention = -1; 796 797 // These alignment values are specified in powers of two, so alignment = 798 // 2^n. The minimum alignment is 2^4 = 16. 799 Header.kernarg_segment_alignment = 4; 800 Header.group_segment_alignment = 4; 801 Header.private_segment_alignment = 4; 802 803 if (Version.Major >= 10) { 804 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { 805 Header.wavefront_size = 5; 806 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; 807 } 808 Header.compute_pgm_resource_registers |= 809 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | 810 S_00B848_MEM_ORDERED(1); 811 } 812 } 813 814 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 815 const MCSubtargetInfo *STI) { 816 IsaVersion Version = getIsaVersion(STI->getCPU()); 817 818 amdhsa::kernel_descriptor_t KD; 819 memset(&KD, 0, sizeof(KD)); 820 821 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 822 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, 823 amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE); 824 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 825 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1); 826 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 827 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1); 828 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, 829 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1); 830 if (Version.Major >= 10) { 831 AMDHSA_BITS_SET(KD.kernel_code_properties, 832 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 833 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0); 834 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 835 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE, 836 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1); 837 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 838 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1); 839 } 840 if (AMDGPU::isGFX90A(*STI)) { 841 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, 842 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, 843 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0); 844 } 845 return KD; 846 } 847 848 bool isGroupSegment(const GlobalValue *GV) { 849 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 850 } 851 852 bool isGlobalSegment(const GlobalValue *GV) { 853 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 854 } 855 856 bool isReadOnlySegment(const GlobalValue *GV) { 857 unsigned AS = GV->getAddressSpace(); 858 return AS == AMDGPUAS::CONSTANT_ADDRESS || 859 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; 860 } 861 862 bool shouldEmitConstantsToTextSection(const Triple &TT) { 863 return TT.getArch() == Triple::r600; 864 } 865 866 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 867 Attribute A = F.getFnAttribute(Name); 868 int Result = Default; 869 870 if (A.isStringAttribute()) { 871 StringRef Str = A.getValueAsString(); 872 if (Str.getAsInteger(0, Result)) { 873 LLVMContext &Ctx = F.getContext(); 874 Ctx.emitError("can't parse integer attribute " + Name); 875 } 876 } 877 878 return Result; 879 } 880 881 std::pair<int, int> getIntegerPairAttribute(const Function &F, 882 StringRef Name, 883 std::pair<int, int> Default, 884 bool OnlyFirstRequired) { 885 Attribute A = F.getFnAttribute(Name); 886 if (!A.isStringAttribute()) 887 return Default; 888 889 LLVMContext &Ctx = F.getContext(); 890 std::pair<int, int> Ints = Default; 891 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 892 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 893 Ctx.emitError("can't parse first integer attribute " + Name); 894 return Default; 895 } 896 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 897 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 898 Ctx.emitError("can't parse second integer attribute " + Name); 899 return Default; 900 } 901 } 902 903 return Ints; 904 } 905 906 unsigned getVmcntBitMask(const IsaVersion &Version) { 907 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 908 if (Version.Major < 9) 909 return VmcntLo; 910 911 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 912 return VmcntLo | VmcntHi; 913 } 914 915 unsigned getExpcntBitMask(const IsaVersion &Version) { 916 return (1 << getExpcntBitWidth()) - 1; 917 } 918 919 unsigned getLgkmcntBitMask(const IsaVersion &Version) { 920 return (1 << getLgkmcntBitWidth(Version.Major)) - 1; 921 } 922 923 unsigned getWaitcntBitMask(const IsaVersion &Version) { 924 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 925 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 926 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), 927 getLgkmcntBitWidth(Version.Major)); 928 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 929 if (Version.Major < 9) 930 return Waitcnt; 931 932 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 933 return Waitcnt | VmcntHi; 934 } 935 936 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { 937 unsigned VmcntLo = 938 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 939 if (Version.Major < 9) 940 return VmcntLo; 941 942 unsigned VmcntHi = 943 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 944 VmcntHi <<= getVmcntBitWidthLo(); 945 return VmcntLo | VmcntHi; 946 } 947 948 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { 949 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 950 } 951 952 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { 953 return unpackBits(Waitcnt, getLgkmcntBitShift(), 954 getLgkmcntBitWidth(Version.Major)); 955 } 956 957 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 958 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 959 Vmcnt = decodeVmcnt(Version, Waitcnt); 960 Expcnt = decodeExpcnt(Version, Waitcnt); 961 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 962 } 963 964 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { 965 Waitcnt Decoded; 966 Decoded.VmCnt = decodeVmcnt(Version, Encoded); 967 Decoded.ExpCnt = decodeExpcnt(Version, Encoded); 968 Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded); 969 return Decoded; 970 } 971 972 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 973 unsigned Vmcnt) { 974 Waitcnt = 975 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 976 if (Version.Major < 9) 977 return Waitcnt; 978 979 Vmcnt >>= getVmcntBitWidthLo(); 980 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 981 } 982 983 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 984 unsigned Expcnt) { 985 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 986 } 987 988 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 989 unsigned Lgkmcnt) { 990 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), 991 getLgkmcntBitWidth(Version.Major)); 992 } 993 994 unsigned encodeWaitcnt(const IsaVersion &Version, 995 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 996 unsigned Waitcnt = getWaitcntBitMask(Version); 997 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 998 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 999 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 1000 return Waitcnt; 1001 } 1002 1003 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { 1004 return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt); 1005 } 1006 1007 //===----------------------------------------------------------------------===// 1008 // hwreg 1009 //===----------------------------------------------------------------------===// 1010 1011 namespace Hwreg { 1012 1013 int64_t getHwregId(const StringRef Name) { 1014 for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) { 1015 if (IdSymbolic[Id] && Name == IdSymbolic[Id]) 1016 return Id; 1017 } 1018 return ID_UNKNOWN_; 1019 } 1020 1021 static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) { 1022 if (isSI(STI) || isCI(STI) || isVI(STI)) 1023 return ID_SYMBOLIC_FIRST_GFX9_; 1024 else if (isGFX9(STI)) 1025 return ID_SYMBOLIC_FIRST_GFX10_; 1026 else if (isGFX10(STI) && !isGFX10_BEncoding(STI)) 1027 return ID_SYMBOLIC_FIRST_GFX1030_; 1028 else 1029 return ID_SYMBOLIC_LAST_; 1030 } 1031 1032 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) { 1033 switch (Id) { 1034 case ID_HW_ID: 1035 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); 1036 case ID_HW_ID1: 1037 case ID_HW_ID2: 1038 return isGFX10Plus(STI); 1039 case ID_XNACK_MASK: 1040 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); 1041 default: 1042 return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) && 1043 IdSymbolic[Id]; 1044 } 1045 } 1046 1047 bool isValidHwreg(int64_t Id) { 1048 return 0 <= Id && isUInt<ID_WIDTH_>(Id); 1049 } 1050 1051 bool isValidHwregOffset(int64_t Offset) { 1052 return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset); 1053 } 1054 1055 bool isValidHwregWidth(int64_t Width) { 1056 return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1); 1057 } 1058 1059 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) { 1060 return (Id << ID_SHIFT_) | 1061 (Offset << OFFSET_SHIFT_) | 1062 ((Width - 1) << WIDTH_M1_SHIFT_); 1063 } 1064 1065 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) { 1066 return isValidHwreg(Id, STI) ? IdSymbolic[Id] : ""; 1067 } 1068 1069 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) { 1070 Id = (Val & ID_MASK_) >> ID_SHIFT_; 1071 Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_; 1072 Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1; 1073 } 1074 1075 } // namespace Hwreg 1076 1077 //===----------------------------------------------------------------------===// 1078 // exp tgt 1079 //===----------------------------------------------------------------------===// 1080 1081 namespace Exp { 1082 1083 struct ExpTgt { 1084 StringLiteral Name; 1085 unsigned Tgt; 1086 unsigned MaxIndex; 1087 }; 1088 1089 static constexpr ExpTgt ExpTgtInfo[] = { 1090 {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, 1091 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, 1092 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, 1093 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, 1094 {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, 1095 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, 1096 }; 1097 1098 bool getTgtName(unsigned Id, StringRef &Name, int &Index) { 1099 for (const ExpTgt &Val : ExpTgtInfo) { 1100 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { 1101 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); 1102 Name = Val.Name; 1103 return true; 1104 } 1105 } 1106 return false; 1107 } 1108 1109 unsigned getTgtId(const StringRef Name) { 1110 1111 for (const ExpTgt &Val : ExpTgtInfo) { 1112 if (Val.MaxIndex == 0 && Name == Val.Name) 1113 return Val.Tgt; 1114 1115 if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) { 1116 StringRef Suffix = Name.drop_front(Val.Name.size()); 1117 1118 unsigned Id; 1119 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex) 1120 return ET_INVALID; 1121 1122 // Disable leading zeroes 1123 if (Suffix.size() > 1 && Suffix[0] == '0') 1124 return ET_INVALID; 1125 1126 return Val.Tgt + Id; 1127 } 1128 } 1129 return ET_INVALID; 1130 } 1131 1132 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { 1133 return (Id != ET_POS4 && Id != ET_PRIM) || isGFX10Plus(STI); 1134 } 1135 1136 } // namespace Exp 1137 1138 //===----------------------------------------------------------------------===// 1139 // MTBUF Format 1140 //===----------------------------------------------------------------------===// 1141 1142 namespace MTBUFFormat { 1143 1144 int64_t getDfmt(const StringRef Name) { 1145 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { 1146 if (Name == DfmtSymbolic[Id]) 1147 return Id; 1148 } 1149 return DFMT_UNDEF; 1150 } 1151 1152 StringRef getDfmtName(unsigned Id) { 1153 assert(Id <= DFMT_MAX); 1154 return DfmtSymbolic[Id]; 1155 } 1156 1157 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { 1158 if (isSI(STI) || isCI(STI)) 1159 return NfmtSymbolicSICI; 1160 if (isVI(STI) || isGFX9(STI)) 1161 return NfmtSymbolicVI; 1162 return NfmtSymbolicGFX10; 1163 } 1164 1165 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { 1166 auto lookupTable = getNfmtLookupTable(STI); 1167 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { 1168 if (Name == lookupTable[Id]) 1169 return Id; 1170 } 1171 return NFMT_UNDEF; 1172 } 1173 1174 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { 1175 assert(Id <= NFMT_MAX); 1176 return getNfmtLookupTable(STI)[Id]; 1177 } 1178 1179 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1180 unsigned Dfmt; 1181 unsigned Nfmt; 1182 decodeDfmtNfmt(Id, Dfmt, Nfmt); 1183 return isValidNfmt(Nfmt, STI); 1184 } 1185 1186 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1187 return !getNfmtName(Id, STI).empty(); 1188 } 1189 1190 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { 1191 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); 1192 } 1193 1194 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { 1195 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; 1196 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; 1197 } 1198 1199 int64_t getUnifiedFormat(const StringRef Name) { 1200 for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) { 1201 if (Name == UfmtSymbolic[Id]) 1202 return Id; 1203 } 1204 return UFMT_UNDEF; 1205 } 1206 1207 StringRef getUnifiedFormatName(unsigned Id) { 1208 return isValidUnifiedFormat(Id) ? UfmtSymbolic[Id] : ""; 1209 } 1210 1211 bool isValidUnifiedFormat(unsigned Id) { 1212 return Id <= UFMT_LAST; 1213 } 1214 1215 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt) { 1216 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); 1217 for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) { 1218 if (Fmt == DfmtNfmt2UFmt[Id]) 1219 return Id; 1220 } 1221 return UFMT_UNDEF; 1222 } 1223 1224 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { 1225 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); 1226 } 1227 1228 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { 1229 if (isGFX10Plus(STI)) 1230 return UFMT_DEFAULT; 1231 return DFMT_NFMT_DEFAULT; 1232 } 1233 1234 } // namespace MTBUFFormat 1235 1236 //===----------------------------------------------------------------------===// 1237 // SendMsg 1238 //===----------------------------------------------------------------------===// 1239 1240 namespace SendMsg { 1241 1242 int64_t getMsgId(const StringRef Name) { 1243 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 1244 if (IdSymbolic[i] && Name == IdSymbolic[i]) 1245 return i; 1246 } 1247 return ID_UNKNOWN_; 1248 } 1249 1250 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) { 1251 if (Strict) { 1252 switch (MsgId) { 1253 case ID_SAVEWAVE: 1254 return isVI(STI) || isGFX9Plus(STI); 1255 case ID_STALL_WAVE_GEN: 1256 case ID_HALT_WAVES: 1257 case ID_ORDERED_PS_DONE: 1258 case ID_GS_ALLOC_REQ: 1259 case ID_GET_DOORBELL: 1260 return isGFX9Plus(STI); 1261 case ID_EARLY_PRIM_DEALLOC: 1262 return isGFX9(STI); 1263 case ID_GET_DDID: 1264 return isGFX10Plus(STI); 1265 default: 1266 return 0 <= MsgId && MsgId < ID_GAPS_LAST_ && IdSymbolic[MsgId]; 1267 } 1268 } else { 1269 return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId); 1270 } 1271 } 1272 1273 StringRef getMsgName(int64_t MsgId) { 1274 assert(0 <= MsgId && MsgId < ID_GAPS_LAST_); 1275 return IdSymbolic[MsgId]; 1276 } 1277 1278 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) { 1279 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 1280 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 1281 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 1282 for (int i = F; i < L; ++i) { 1283 if (Name == S[i]) { 1284 return i; 1285 } 1286 } 1287 return OP_UNKNOWN_; 1288 } 1289 1290 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1291 bool Strict) { 1292 assert(isValidMsgId(MsgId, STI, Strict)); 1293 1294 if (!Strict) 1295 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); 1296 1297 switch(MsgId) 1298 { 1299 case ID_GS: 1300 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; 1301 case ID_GS_DONE: 1302 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; 1303 case ID_SYSMSG: 1304 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_; 1305 default: 1306 return OpId == OP_NONE_; 1307 } 1308 } 1309 1310 StringRef getMsgOpName(int64_t MsgId, int64_t OpId) { 1311 assert(msgRequiresOp(MsgId)); 1312 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId]; 1313 } 1314 1315 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1316 const MCSubtargetInfo &STI, bool Strict) { 1317 assert(isValidMsgOp(MsgId, OpId, STI, Strict)); 1318 1319 if (!Strict) 1320 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); 1321 1322 switch(MsgId) 1323 { 1324 case ID_GS: 1325 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; 1326 case ID_GS_DONE: 1327 return (OpId == OP_GS_NOP)? 1328 (StreamId == STREAM_ID_NONE_) : 1329 (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); 1330 default: 1331 return StreamId == STREAM_ID_NONE_; 1332 } 1333 } 1334 1335 bool msgRequiresOp(int64_t MsgId) { 1336 return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG; 1337 } 1338 1339 bool msgSupportsStream(int64_t MsgId, int64_t OpId) { 1340 return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP; 1341 } 1342 1343 void decodeMsg(unsigned Val, 1344 uint16_t &MsgId, 1345 uint16_t &OpId, 1346 uint16_t &StreamId) { 1347 MsgId = Val & ID_MASK_; 1348 OpId = (Val & OP_MASK_) >> OP_SHIFT_; 1349 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; 1350 } 1351 1352 uint64_t encodeMsg(uint64_t MsgId, 1353 uint64_t OpId, 1354 uint64_t StreamId) { 1355 return (MsgId << ID_SHIFT_) | 1356 (OpId << OP_SHIFT_) | 1357 (StreamId << STREAM_ID_SHIFT_); 1358 } 1359 1360 } // namespace SendMsg 1361 1362 //===----------------------------------------------------------------------===// 1363 // 1364 //===----------------------------------------------------------------------===// 1365 1366 unsigned getInitialPSInputAddr(const Function &F) { 1367 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 1368 } 1369 1370 bool getHasColorExport(const Function &F) { 1371 // As a safe default always respond as if PS has color exports. 1372 return getIntegerAttribute( 1373 F, "amdgpu-color-export", 1374 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0; 1375 } 1376 1377 bool getHasDepthExport(const Function &F) { 1378 return getIntegerAttribute(F, "amdgpu-depth-export", 0) != 0; 1379 } 1380 1381 bool isShader(CallingConv::ID cc) { 1382 switch(cc) { 1383 case CallingConv::AMDGPU_VS: 1384 case CallingConv::AMDGPU_LS: 1385 case CallingConv::AMDGPU_HS: 1386 case CallingConv::AMDGPU_ES: 1387 case CallingConv::AMDGPU_GS: 1388 case CallingConv::AMDGPU_PS: 1389 case CallingConv::AMDGPU_CS: 1390 return true; 1391 default: 1392 return false; 1393 } 1394 } 1395 1396 bool isGraphics(CallingConv::ID cc) { 1397 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx; 1398 } 1399 1400 bool isCompute(CallingConv::ID cc) { 1401 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS; 1402 } 1403 1404 bool isEntryFunctionCC(CallingConv::ID CC) { 1405 switch (CC) { 1406 case CallingConv::AMDGPU_KERNEL: 1407 case CallingConv::SPIR_KERNEL: 1408 case CallingConv::AMDGPU_VS: 1409 case CallingConv::AMDGPU_GS: 1410 case CallingConv::AMDGPU_PS: 1411 case CallingConv::AMDGPU_CS: 1412 case CallingConv::AMDGPU_ES: 1413 case CallingConv::AMDGPU_HS: 1414 case CallingConv::AMDGPU_LS: 1415 return true; 1416 default: 1417 return false; 1418 } 1419 } 1420 1421 bool isModuleEntryFunctionCC(CallingConv::ID CC) { 1422 switch (CC) { 1423 case CallingConv::AMDGPU_Gfx: 1424 return true; 1425 default: 1426 return isEntryFunctionCC(CC); 1427 } 1428 } 1429 1430 bool hasXNACK(const MCSubtargetInfo &STI) { 1431 return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; 1432 } 1433 1434 bool hasSRAMECC(const MCSubtargetInfo &STI) { 1435 return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC]; 1436 } 1437 1438 bool hasMIMG_R128(const MCSubtargetInfo &STI) { 1439 return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16]; 1440 } 1441 1442 bool hasGFX10A16(const MCSubtargetInfo &STI) { 1443 return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16]; 1444 } 1445 1446 bool hasG16(const MCSubtargetInfo &STI) { 1447 return STI.getFeatureBits()[AMDGPU::FeatureG16]; 1448 } 1449 1450 bool hasPackedD16(const MCSubtargetInfo &STI) { 1451 return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]; 1452 } 1453 1454 bool isSI(const MCSubtargetInfo &STI) { 1455 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 1456 } 1457 1458 bool isCI(const MCSubtargetInfo &STI) { 1459 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 1460 } 1461 1462 bool isVI(const MCSubtargetInfo &STI) { 1463 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 1464 } 1465 1466 bool isGFX9(const MCSubtargetInfo &STI) { 1467 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 1468 } 1469 1470 bool isGFX9Plus(const MCSubtargetInfo &STI) { 1471 return isGFX9(STI) || isGFX10Plus(STI); 1472 } 1473 1474 bool isGFX10(const MCSubtargetInfo &STI) { 1475 return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; 1476 } 1477 1478 bool isGFX10Plus(const MCSubtargetInfo &STI) { return isGFX10(STI); } 1479 1480 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 1481 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; 1482 } 1483 1484 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) { 1485 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_AEncoding]; 1486 } 1487 1488 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { 1489 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]; 1490 } 1491 1492 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { 1493 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_3Insts]; 1494 } 1495 1496 bool isGFX90A(const MCSubtargetInfo &STI) { 1497 return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]; 1498 } 1499 1500 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { 1501 return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1502 } 1503 1504 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 1505 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 1506 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); 1507 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 1508 Reg == AMDGPU::SCC; 1509 } 1510 1511 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { 1512 for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { 1513 if (*R == Reg1) return true; 1514 } 1515 return false; 1516 } 1517 1518 #define MAP_REG2REG \ 1519 using namespace AMDGPU; \ 1520 switch(Reg) { \ 1521 default: return Reg; \ 1522 CASE_CI_VI(FLAT_SCR) \ 1523 CASE_CI_VI(FLAT_SCR_LO) \ 1524 CASE_CI_VI(FLAT_SCR_HI) \ 1525 CASE_VI_GFX9PLUS(TTMP0) \ 1526 CASE_VI_GFX9PLUS(TTMP1) \ 1527 CASE_VI_GFX9PLUS(TTMP2) \ 1528 CASE_VI_GFX9PLUS(TTMP3) \ 1529 CASE_VI_GFX9PLUS(TTMP4) \ 1530 CASE_VI_GFX9PLUS(TTMP5) \ 1531 CASE_VI_GFX9PLUS(TTMP6) \ 1532 CASE_VI_GFX9PLUS(TTMP7) \ 1533 CASE_VI_GFX9PLUS(TTMP8) \ 1534 CASE_VI_GFX9PLUS(TTMP9) \ 1535 CASE_VI_GFX9PLUS(TTMP10) \ 1536 CASE_VI_GFX9PLUS(TTMP11) \ 1537 CASE_VI_GFX9PLUS(TTMP12) \ 1538 CASE_VI_GFX9PLUS(TTMP13) \ 1539 CASE_VI_GFX9PLUS(TTMP14) \ 1540 CASE_VI_GFX9PLUS(TTMP15) \ 1541 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ 1542 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ 1543 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ 1544 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ 1545 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ 1546 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ 1547 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ 1548 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ 1549 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ 1550 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ 1551 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ 1552 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ 1553 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 1554 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 1555 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 1556 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 1557 } 1558 1559 #define CASE_CI_VI(node) \ 1560 assert(!isSI(STI)); \ 1561 case node: return isCI(STI) ? node##_ci : node##_vi; 1562 1563 #define CASE_VI_GFX9PLUS(node) \ 1564 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; 1565 1566 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 1567 if (STI.getTargetTriple().getArch() == Triple::r600) 1568 return Reg; 1569 MAP_REG2REG 1570 } 1571 1572 #undef CASE_CI_VI 1573 #undef CASE_VI_GFX9PLUS 1574 1575 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 1576 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; 1577 1578 unsigned mc2PseudoReg(unsigned Reg) { 1579 MAP_REG2REG 1580 } 1581 1582 #undef CASE_CI_VI 1583 #undef CASE_VI_GFX9PLUS 1584 #undef MAP_REG2REG 1585 1586 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 1587 assert(OpNo < Desc.NumOperands); 1588 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 1589 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 1590 OpType <= AMDGPU::OPERAND_SRC_LAST; 1591 } 1592 1593 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 1594 assert(OpNo < Desc.NumOperands); 1595 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 1596 switch (OpType) { 1597 case AMDGPU::OPERAND_REG_IMM_FP32: 1598 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1599 case AMDGPU::OPERAND_REG_IMM_FP64: 1600 case AMDGPU::OPERAND_REG_IMM_FP16: 1601 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1602 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1603 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1604 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1605 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1606 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1607 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1608 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1609 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1610 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1611 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1612 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1613 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1614 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1615 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1616 return true; 1617 default: 1618 return false; 1619 } 1620 } 1621 1622 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 1623 assert(OpNo < Desc.NumOperands); 1624 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 1625 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 1626 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 1627 } 1628 1629 // Avoid using MCRegisterClass::getSize, since that function will go away 1630 // (move from MC* level to Target* level). Return size in bits. 1631 unsigned getRegBitWidth(unsigned RCID) { 1632 switch (RCID) { 1633 case AMDGPU::VGPR_LO16RegClassID: 1634 case AMDGPU::VGPR_HI16RegClassID: 1635 case AMDGPU::SGPR_LO16RegClassID: 1636 case AMDGPU::AGPR_LO16RegClassID: 1637 return 16; 1638 case AMDGPU::SGPR_32RegClassID: 1639 case AMDGPU::VGPR_32RegClassID: 1640 case AMDGPU::VRegOrLds_32RegClassID: 1641 case AMDGPU::AGPR_32RegClassID: 1642 case AMDGPU::VS_32RegClassID: 1643 case AMDGPU::AV_32RegClassID: 1644 case AMDGPU::SReg_32RegClassID: 1645 case AMDGPU::SReg_32_XM0RegClassID: 1646 case AMDGPU::SRegOrLds_32RegClassID: 1647 return 32; 1648 case AMDGPU::SGPR_64RegClassID: 1649 case AMDGPU::VS_64RegClassID: 1650 case AMDGPU::SReg_64RegClassID: 1651 case AMDGPU::VReg_64RegClassID: 1652 case AMDGPU::AReg_64RegClassID: 1653 case AMDGPU::SReg_64_XEXECRegClassID: 1654 case AMDGPU::VReg_64_Align2RegClassID: 1655 case AMDGPU::AReg_64_Align2RegClassID: 1656 case AMDGPU::AV_64RegClassID: 1657 case AMDGPU::AV_64_Align2RegClassID: 1658 return 64; 1659 case AMDGPU::SGPR_96RegClassID: 1660 case AMDGPU::SReg_96RegClassID: 1661 case AMDGPU::VReg_96RegClassID: 1662 case AMDGPU::AReg_96RegClassID: 1663 case AMDGPU::VReg_96_Align2RegClassID: 1664 case AMDGPU::AReg_96_Align2RegClassID: 1665 case AMDGPU::AV_96RegClassID: 1666 case AMDGPU::AV_96_Align2RegClassID: 1667 return 96; 1668 case AMDGPU::SGPR_128RegClassID: 1669 case AMDGPU::SReg_128RegClassID: 1670 case AMDGPU::VReg_128RegClassID: 1671 case AMDGPU::AReg_128RegClassID: 1672 case AMDGPU::VReg_128_Align2RegClassID: 1673 case AMDGPU::AReg_128_Align2RegClassID: 1674 case AMDGPU::AV_128RegClassID: 1675 case AMDGPU::AV_128_Align2RegClassID: 1676 return 128; 1677 case AMDGPU::SGPR_160RegClassID: 1678 case AMDGPU::SReg_160RegClassID: 1679 case AMDGPU::VReg_160RegClassID: 1680 case AMDGPU::AReg_160RegClassID: 1681 case AMDGPU::VReg_160_Align2RegClassID: 1682 case AMDGPU::AReg_160_Align2RegClassID: 1683 case AMDGPU::AV_160RegClassID: 1684 case AMDGPU::AV_160_Align2RegClassID: 1685 return 160; 1686 case AMDGPU::SGPR_192RegClassID: 1687 case AMDGPU::SReg_192RegClassID: 1688 case AMDGPU::VReg_192RegClassID: 1689 case AMDGPU::AReg_192RegClassID: 1690 case AMDGPU::VReg_192_Align2RegClassID: 1691 case AMDGPU::AReg_192_Align2RegClassID: 1692 case AMDGPU::AV_192RegClassID: 1693 case AMDGPU::AV_192_Align2RegClassID: 1694 return 192; 1695 case AMDGPU::SGPR_224RegClassID: 1696 case AMDGPU::SReg_224RegClassID: 1697 case AMDGPU::VReg_224RegClassID: 1698 case AMDGPU::AReg_224RegClassID: 1699 case AMDGPU::VReg_224_Align2RegClassID: 1700 case AMDGPU::AReg_224_Align2RegClassID: 1701 case AMDGPU::AV_224RegClassID: 1702 case AMDGPU::AV_224_Align2RegClassID: 1703 return 224; 1704 case AMDGPU::SGPR_256RegClassID: 1705 case AMDGPU::SReg_256RegClassID: 1706 case AMDGPU::VReg_256RegClassID: 1707 case AMDGPU::AReg_256RegClassID: 1708 case AMDGPU::VReg_256_Align2RegClassID: 1709 case AMDGPU::AReg_256_Align2RegClassID: 1710 case AMDGPU::AV_256RegClassID: 1711 case AMDGPU::AV_256_Align2RegClassID: 1712 return 256; 1713 case AMDGPU::SGPR_512RegClassID: 1714 case AMDGPU::SReg_512RegClassID: 1715 case AMDGPU::VReg_512RegClassID: 1716 case AMDGPU::AReg_512RegClassID: 1717 case AMDGPU::VReg_512_Align2RegClassID: 1718 case AMDGPU::AReg_512_Align2RegClassID: 1719 case AMDGPU::AV_512RegClassID: 1720 case AMDGPU::AV_512_Align2RegClassID: 1721 return 512; 1722 case AMDGPU::SGPR_1024RegClassID: 1723 case AMDGPU::SReg_1024RegClassID: 1724 case AMDGPU::VReg_1024RegClassID: 1725 case AMDGPU::AReg_1024RegClassID: 1726 case AMDGPU::VReg_1024_Align2RegClassID: 1727 case AMDGPU::AReg_1024_Align2RegClassID: 1728 case AMDGPU::AV_1024RegClassID: 1729 case AMDGPU::AV_1024_Align2RegClassID: 1730 return 1024; 1731 default: 1732 llvm_unreachable("Unexpected register class"); 1733 } 1734 } 1735 1736 unsigned getRegBitWidth(const MCRegisterClass &RC) { 1737 return getRegBitWidth(RC.getID()); 1738 } 1739 1740 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 1741 unsigned OpNo) { 1742 assert(OpNo < Desc.NumOperands); 1743 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 1744 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 1745 } 1746 1747 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 1748 if (isInlinableIntLiteral(Literal)) 1749 return true; 1750 1751 uint64_t Val = static_cast<uint64_t>(Literal); 1752 return (Val == DoubleToBits(0.0)) || 1753 (Val == DoubleToBits(1.0)) || 1754 (Val == DoubleToBits(-1.0)) || 1755 (Val == DoubleToBits(0.5)) || 1756 (Val == DoubleToBits(-0.5)) || 1757 (Val == DoubleToBits(2.0)) || 1758 (Val == DoubleToBits(-2.0)) || 1759 (Val == DoubleToBits(4.0)) || 1760 (Val == DoubleToBits(-4.0)) || 1761 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 1762 } 1763 1764 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 1765 if (isInlinableIntLiteral(Literal)) 1766 return true; 1767 1768 // The actual type of the operand does not seem to matter as long 1769 // as the bits match one of the inline immediate values. For example: 1770 // 1771 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 1772 // so it is a legal inline immediate. 1773 // 1774 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 1775 // floating-point, so it is a legal inline immediate. 1776 1777 uint32_t Val = static_cast<uint32_t>(Literal); 1778 return (Val == FloatToBits(0.0f)) || 1779 (Val == FloatToBits(1.0f)) || 1780 (Val == FloatToBits(-1.0f)) || 1781 (Val == FloatToBits(0.5f)) || 1782 (Val == FloatToBits(-0.5f)) || 1783 (Val == FloatToBits(2.0f)) || 1784 (Val == FloatToBits(-2.0f)) || 1785 (Val == FloatToBits(4.0f)) || 1786 (Val == FloatToBits(-4.0f)) || 1787 (Val == 0x3e22f983 && HasInv2Pi); 1788 } 1789 1790 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 1791 if (!HasInv2Pi) 1792 return false; 1793 1794 if (isInlinableIntLiteral(Literal)) 1795 return true; 1796 1797 uint16_t Val = static_cast<uint16_t>(Literal); 1798 return Val == 0x3C00 || // 1.0 1799 Val == 0xBC00 || // -1.0 1800 Val == 0x3800 || // 0.5 1801 Val == 0xB800 || // -0.5 1802 Val == 0x4000 || // 2.0 1803 Val == 0xC000 || // -2.0 1804 Val == 0x4400 || // 4.0 1805 Val == 0xC400 || // -4.0 1806 Val == 0x3118; // 1/2pi 1807 } 1808 1809 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 1810 assert(HasInv2Pi); 1811 1812 if (isInt<16>(Literal) || isUInt<16>(Literal)) { 1813 int16_t Trunc = static_cast<int16_t>(Literal); 1814 return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi); 1815 } 1816 if (!(Literal & 0xffff)) 1817 return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi); 1818 1819 int16_t Lo16 = static_cast<int16_t>(Literal); 1820 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 1821 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 1822 } 1823 1824 bool isInlinableIntLiteralV216(int32_t Literal) { 1825 int16_t Lo16 = static_cast<int16_t>(Literal); 1826 if (isInt<16>(Literal) || isUInt<16>(Literal)) 1827 return isInlinableIntLiteral(Lo16); 1828 1829 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 1830 if (!(Literal & 0xffff)) 1831 return isInlinableIntLiteral(Hi16); 1832 return Lo16 == Hi16 && isInlinableIntLiteral(Lo16); 1833 } 1834 1835 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) { 1836 assert(HasInv2Pi); 1837 1838 int16_t Lo16 = static_cast<int16_t>(Literal); 1839 if (isInt<16>(Literal) || isUInt<16>(Literal)) 1840 return true; 1841 1842 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 1843 if (!(Literal & 0xffff)) 1844 return true; 1845 return Lo16 == Hi16; 1846 } 1847 1848 bool isArgPassedInSGPR(const Argument *A) { 1849 const Function *F = A->getParent(); 1850 1851 // Arguments to compute shaders are never a source of divergence. 1852 CallingConv::ID CC = F->getCallingConv(); 1853 switch (CC) { 1854 case CallingConv::AMDGPU_KERNEL: 1855 case CallingConv::SPIR_KERNEL: 1856 return true; 1857 case CallingConv::AMDGPU_VS: 1858 case CallingConv::AMDGPU_LS: 1859 case CallingConv::AMDGPU_HS: 1860 case CallingConv::AMDGPU_ES: 1861 case CallingConv::AMDGPU_GS: 1862 case CallingConv::AMDGPU_PS: 1863 case CallingConv::AMDGPU_CS: 1864 case CallingConv::AMDGPU_Gfx: 1865 // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 1866 // Everything else is in VGPRs. 1867 return F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::InReg) || 1868 F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::ByVal); 1869 default: 1870 // TODO: Should calls support inreg for SGPR inputs? 1871 return false; 1872 } 1873 } 1874 1875 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { 1876 return isGCN3Encoding(ST) || isGFX10Plus(ST); 1877 } 1878 1879 static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { 1880 return isGFX9Plus(ST); 1881 } 1882 1883 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 1884 int64_t EncodedOffset) { 1885 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) 1886 : isUInt<8>(EncodedOffset); 1887 } 1888 1889 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 1890 int64_t EncodedOffset, 1891 bool IsBuffer) { 1892 return !IsBuffer && 1893 hasSMRDSignedImmOffset(ST) && 1894 isInt<21>(EncodedOffset); 1895 } 1896 1897 static bool isDwordAligned(uint64_t ByteOffset) { 1898 return (ByteOffset & 3) == 0; 1899 } 1900 1901 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, 1902 uint64_t ByteOffset) { 1903 if (hasSMEMByteOffset(ST)) 1904 return ByteOffset; 1905 1906 assert(isDwordAligned(ByteOffset)); 1907 return ByteOffset >> 2; 1908 } 1909 1910 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 1911 int64_t ByteOffset, bool IsBuffer) { 1912 // The signed version is always a byte offset. 1913 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { 1914 assert(hasSMEMByteOffset(ST)); 1915 return isInt<20>(ByteOffset) ? Optional<int64_t>(ByteOffset) : None; 1916 } 1917 1918 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) 1919 return None; 1920 1921 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 1922 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) 1923 ? Optional<int64_t>(EncodedOffset) 1924 : None; 1925 } 1926 1927 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 1928 int64_t ByteOffset) { 1929 if (!isCI(ST) || !isDwordAligned(ByteOffset)) 1930 return None; 1931 1932 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 1933 return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None; 1934 } 1935 1936 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) { 1937 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 1938 if (AMDGPU::isGFX10(ST)) 1939 return Signed ? 12 : 11; 1940 1941 return Signed ? 13 : 12; 1942 } 1943 1944 // Given Imm, split it into the values to put into the SOffset and ImmOffset 1945 // fields in an MUBUF instruction. Return false if it is not possible (due to a 1946 // hardware bug needing a workaround). 1947 // 1948 // The required alignment ensures that individual address components remain 1949 // aligned if they are aligned to begin with. It also ensures that additional 1950 // offsets within the given alignment can be added to the resulting ImmOffset. 1951 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 1952 const GCNSubtarget *Subtarget, Align Alignment) { 1953 const uint32_t MaxImm = alignDown(4095, Alignment.value()); 1954 uint32_t Overflow = 0; 1955 1956 if (Imm > MaxImm) { 1957 if (Imm <= MaxImm + 64) { 1958 // Use an SOffset inline constant for 4..64 1959 Overflow = Imm - MaxImm; 1960 Imm = MaxImm; 1961 } else { 1962 // Try to keep the same value in SOffset for adjacent loads, so that 1963 // the corresponding register contents can be re-used. 1964 // 1965 // Load values with all low-bits (except for alignment bits) set into 1966 // SOffset, so that a larger range of values can be covered using 1967 // s_movk_i32. 1968 // 1969 // Atomic operations fail to work correctly when individual address 1970 // components are unaligned, even if their sum is aligned. 1971 uint32_t High = (Imm + Alignment.value()) & ~4095; 1972 uint32_t Low = (Imm + Alignment.value()) & 4095; 1973 Imm = Low; 1974 Overflow = High - Alignment.value(); 1975 } 1976 } 1977 1978 // There is a hardware bug in SI and CI which prevents address clamping in 1979 // MUBUF instructions from working correctly with SOffsets. The immediate 1980 // offset is unaffected. 1981 if (Overflow > 0 && 1982 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1983 return false; 1984 1985 ImmOffset = Imm; 1986 SOffset = Overflow; 1987 return true; 1988 } 1989 1990 SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) { 1991 *this = getDefaultForCallingConv(F.getCallingConv()); 1992 1993 StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString(); 1994 if (!IEEEAttr.empty()) 1995 IEEE = IEEEAttr == "true"; 1996 1997 StringRef DX10ClampAttr 1998 = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString(); 1999 if (!DX10ClampAttr.empty()) 2000 DX10Clamp = DX10ClampAttr == "true"; 2001 2002 StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString(); 2003 if (!DenormF32Attr.empty()) { 2004 DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr); 2005 FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE; 2006 FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE; 2007 } 2008 2009 StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString(); 2010 if (!DenormAttr.empty()) { 2011 DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr); 2012 2013 if (DenormF32Attr.empty()) { 2014 FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE; 2015 FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE; 2016 } 2017 2018 FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE; 2019 FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE; 2020 } 2021 } 2022 2023 namespace { 2024 2025 struct SourceOfDivergence { 2026 unsigned Intr; 2027 }; 2028 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); 2029 2030 #define GET_SourcesOfDivergence_IMPL 2031 #define GET_Gfx9BufferFormat_IMPL 2032 #define GET_Gfx10PlusBufferFormat_IMPL 2033 #include "AMDGPUGenSearchableTables.inc" 2034 2035 } // end anonymous namespace 2036 2037 bool isIntrinsicSourceOfDivergence(unsigned IntrID) { 2038 return lookupSourceOfDivergence(IntrID); 2039 } 2040 2041 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 2042 uint8_t NumComponents, 2043 uint8_t NumFormat, 2044 const MCSubtargetInfo &STI) { 2045 return isGFX10Plus(STI) 2046 ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents, 2047 NumFormat) 2048 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat); 2049 } 2050 2051 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 2052 const MCSubtargetInfo &STI) { 2053 return isGFX10Plus(STI) ? getGfx10PlusBufferFormatInfo(Format) 2054 : getGfx9BufferFormatInfo(Format); 2055 } 2056 2057 } // namespace AMDGPU 2058 2059 raw_ostream &operator<<(raw_ostream &OS, 2060 const AMDGPU::IsaInfo::TargetIDSetting S) { 2061 switch (S) { 2062 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): 2063 OS << "Unsupported"; 2064 break; 2065 case (AMDGPU::IsaInfo::TargetIDSetting::Any): 2066 OS << "Any"; 2067 break; 2068 case (AMDGPU::IsaInfo::TargetIDSetting::Off): 2069 OS << "Off"; 2070 break; 2071 case (AMDGPU::IsaInfo::TargetIDSetting::On): 2072 OS << "On"; 2073 break; 2074 } 2075 return OS; 2076 } 2077 2078 } // namespace llvm 2079