1 //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// AMD GCN specific subclass of TargetSubtarget. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 15 #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 16 17 #include "AMDGPUCallLowering.h" 18 #include "AMDGPUSubtarget.h" 19 #include "SIFrameLowering.h" 20 #include "SIISelLowering.h" 21 #include "SIInstrInfo.h" 22 #include "llvm/CodeGen/SelectionDAGTargetInfo.h" 23 24 namespace llvm { 25 26 class MCInst; 27 class MCInstrInfo; 28 29 } // namespace llvm 30 31 #define GET_SUBTARGETINFO_HEADER 32 #include "AMDGPUGenSubtargetInfo.inc" 33 34 namespace llvm { 35 36 class GCNTargetMachine; 37 38 class GCNSubtarget final : public AMDGPUGenSubtargetInfo, 39 public AMDGPUSubtarget { 40 41 using AMDGPUSubtarget::getMaxWavesPerEU; 42 43 public: 44 enum TrapHandlerAbi { 45 TrapHandlerAbiNone = 0, 46 TrapHandlerAbiHsa = 1 47 }; 48 49 enum TrapID { 50 TrapIDHardwareReserved = 0, 51 TrapIDHSADebugTrap = 1, 52 TrapIDLLVMTrap = 2, 53 TrapIDLLVMDebugTrap = 3, 54 TrapIDDebugBreakpoint = 7, 55 TrapIDDebugReserved8 = 8, 56 TrapIDDebugReservedFE = 0xfe, 57 TrapIDDebugReservedFF = 0xff 58 }; 59 60 enum TrapRegValues { 61 LLVMTrapHandlerRegValue = 1 62 }; 63 64 private: 65 /// GlobalISel related APIs. 66 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; 67 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; 68 std::unique_ptr<InstructionSelector> InstSelector; 69 std::unique_ptr<LegalizerInfo> Legalizer; 70 std::unique_ptr<RegisterBankInfo> RegBankInfo; 71 72 protected: 73 // Basic subtarget description. 74 Triple TargetTriple; 75 AMDGPU::IsaInfo::AMDGPUTargetID TargetID; 76 unsigned Gen; 77 InstrItineraryData InstrItins; 78 int LDSBankCount; 79 unsigned MaxPrivateElementSize; 80 81 // Possibly statically set by tablegen, but may want to be overridden. 82 bool FastFMAF32; 83 bool FastDenormalF32; 84 bool HalfRate64Ops; 85 86 // Dynamically set bits that enable features. 87 bool FlatForGlobal; 88 bool AutoWaitcntBeforeBarrier; 89 bool UnalignedScratchAccess; 90 bool UnalignedAccessMode; 91 bool HasApertureRegs; 92 bool SupportsXNACK; 93 94 // This should not be used directly. 'TargetID' tracks the dynamic settings 95 // for XNACK. 96 bool EnableXNACK; 97 98 bool EnableCuMode; 99 bool TrapHandler; 100 101 // Used as options. 102 bool EnableLoadStoreOpt; 103 bool EnableUnsafeDSOffsetFolding; 104 bool EnableSIScheduler; 105 bool EnableDS128; 106 bool EnablePRTStrictNull; 107 bool DumpCode; 108 109 // Subtarget statically properties set by tablegen 110 bool FP64; 111 bool FMA; 112 bool MIMG_R128; 113 bool GCN3Encoding; 114 bool CIInsts; 115 bool GFX8Insts; 116 bool GFX9Insts; 117 bool GFX10Insts; 118 bool GFX10_3Insts; 119 bool GFX7GFX8GFX9Insts; 120 bool SGPRInitBug; 121 bool HasSMemRealTime; 122 bool HasIntClamp; 123 bool HasFmaMixInsts; 124 bool HasMovrel; 125 bool HasVGPRIndexMode; 126 bool HasScalarStores; 127 bool HasScalarAtomics; 128 bool HasSDWAOmod; 129 bool HasSDWAScalar; 130 bool HasSDWASdst; 131 bool HasSDWAMac; 132 bool HasSDWAOutModsVOPC; 133 bool HasDPP; 134 bool HasDPP8; 135 bool HasR128A16; 136 bool HasGFX10A16; 137 bool HasG16; 138 bool HasNSAEncoding; 139 bool GFX10_BEncoding; 140 bool HasDLInsts; 141 bool HasDot1Insts; 142 bool HasDot2Insts; 143 bool HasDot3Insts; 144 bool HasDot4Insts; 145 bool HasDot5Insts; 146 bool HasDot6Insts; 147 bool HasMAIInsts; 148 bool HasPkFmacF16Inst; 149 bool HasAtomicFaddInsts; 150 bool SupportsSRAMECC; 151 152 // This should not be used directly. 'TargetID' tracks the dynamic settings 153 // for SRAMECC. 154 bool EnableSRAMECC; 155 156 bool HasNoSdstCMPX; 157 bool HasVscnt; 158 bool HasGetWaveIdInst; 159 bool HasSMemTimeInst; 160 bool HasRegisterBanking; 161 bool HasVOP3Literal; 162 bool HasNoDataDepHazard; 163 bool FlatAddressSpace; 164 bool FlatInstOffsets; 165 bool FlatGlobalInsts; 166 bool FlatScratchInsts; 167 bool ScalarFlatScratchInsts; 168 bool AddNoCarryInsts; 169 bool HasUnpackedD16VMem; 170 bool LDSMisalignedBug; 171 bool HasMFMAInlineLiteralBug; 172 bool UnalignedBufferAccess; 173 bool UnalignedDSAccess; 174 bool ScalarizeGlobal; 175 176 bool HasVcmpxPermlaneHazard; 177 bool HasVMEMtoScalarWriteHazard; 178 bool HasSMEMtoVectorWriteHazard; 179 bool HasInstFwdPrefetchBug; 180 bool HasVcmpxExecWARHazard; 181 bool HasLdsBranchVmemWARHazard; 182 bool HasNSAtoVMEMBug; 183 bool HasOffset3fBug; 184 bool HasFlatSegmentOffsetBug; 185 bool HasImageStoreD16Bug; 186 bool HasImageGather4D16Bug; 187 188 // Dummy feature to use for assembler in tablegen. 189 bool FeatureDisable; 190 191 SelectionDAGTargetInfo TSInfo; 192 private: 193 SIInstrInfo InstrInfo; 194 SITargetLowering TLInfo; 195 SIFrameLowering FrameLowering; 196 197 public: 198 // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword. 199 static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1); 200 201 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 202 const GCNTargetMachine &TM); 203 ~GCNSubtarget() override; 204 205 GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, 206 StringRef GPU, StringRef FS); 207 208 const SIInstrInfo *getInstrInfo() const override { 209 return &InstrInfo; 210 } 211 212 const SIFrameLowering *getFrameLowering() const override { 213 return &FrameLowering; 214 } 215 216 const SITargetLowering *getTargetLowering() const override { 217 return &TLInfo; 218 } 219 220 const SIRegisterInfo *getRegisterInfo() const override { 221 return &InstrInfo.getRegisterInfo(); 222 } 223 224 const CallLowering *getCallLowering() const override { 225 return CallLoweringInfo.get(); 226 } 227 228 const InlineAsmLowering *getInlineAsmLowering() const override { 229 return InlineAsmLoweringInfo.get(); 230 } 231 232 InstructionSelector *getInstructionSelector() const override { 233 return InstSelector.get(); 234 } 235 236 const LegalizerInfo *getLegalizerInfo() const override { 237 return Legalizer.get(); 238 } 239 240 const RegisterBankInfo *getRegBankInfo() const override { 241 return RegBankInfo.get(); 242 } 243 244 // Nothing implemented, just prevent crashes on use. 245 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { 246 return &TSInfo; 247 } 248 249 const InstrItineraryData *getInstrItineraryData() const override { 250 return &InstrItins; 251 } 252 253 void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); 254 255 Generation getGeneration() const { 256 return (Generation)Gen; 257 } 258 259 /// Return the number of high bits known to be zero fror a frame index. 260 unsigned getKnownHighZeroBitsForFrameIndex() const { 261 return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2(); 262 } 263 264 int getLDSBankCount() const { 265 return LDSBankCount; 266 } 267 268 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const { 269 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16; 270 } 271 272 unsigned getConstantBusLimit(unsigned Opcode) const; 273 274 bool hasIntClamp() const { 275 return HasIntClamp; 276 } 277 278 bool hasFP64() const { 279 return FP64; 280 } 281 282 bool hasMIMG_R128() const { 283 return MIMG_R128; 284 } 285 286 bool hasHWFP64() const { 287 return FP64; 288 } 289 290 bool hasFastFMAF32() const { 291 return FastFMAF32; 292 } 293 294 bool hasHalfRate64Ops() const { 295 return HalfRate64Ops; 296 } 297 298 bool hasAddr64() const { 299 return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); 300 } 301 302 bool hasFlat() const { 303 return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS); 304 } 305 306 // Return true if the target only has the reverse operand versions of VALU 307 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32). 308 bool hasOnlyRevVALUShifts() const { 309 return getGeneration() >= VOLCANIC_ISLANDS; 310 } 311 312 bool hasFractBug() const { 313 return getGeneration() == SOUTHERN_ISLANDS; 314 } 315 316 bool hasBFE() const { 317 return true; 318 } 319 320 bool hasBFI() const { 321 return true; 322 } 323 324 bool hasBFM() const { 325 return hasBFE(); 326 } 327 328 bool hasBCNT(unsigned Size) const { 329 return true; 330 } 331 332 bool hasFFBL() const { 333 return true; 334 } 335 336 bool hasFFBH() const { 337 return true; 338 } 339 340 bool hasMed3_16() const { 341 return getGeneration() >= AMDGPUSubtarget::GFX9; 342 } 343 344 bool hasMin3Max3_16() const { 345 return getGeneration() >= AMDGPUSubtarget::GFX9; 346 } 347 348 bool hasFmaMixInsts() const { 349 return HasFmaMixInsts; 350 } 351 352 bool hasCARRY() const { 353 return true; 354 } 355 356 bool hasFMA() const { 357 return FMA; 358 } 359 360 bool hasSwap() const { 361 return GFX9Insts; 362 } 363 364 bool hasScalarPackInsts() const { 365 return GFX9Insts; 366 } 367 368 bool hasScalarMulHiInsts() const { 369 return GFX9Insts; 370 } 371 372 TrapHandlerAbi getTrapHandlerAbi() const { 373 return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; 374 } 375 376 /// True if the offset field of DS instructions works as expected. On SI, the 377 /// offset uses a 16-bit adder and does not always wrap properly. 378 bool hasUsableDSOffset() const { 379 return getGeneration() >= SEA_ISLANDS; 380 } 381 382 bool unsafeDSOffsetFoldingEnabled() const { 383 return EnableUnsafeDSOffsetFolding; 384 } 385 386 /// Condition output from div_scale is usable. 387 bool hasUsableDivScaleConditionOutput() const { 388 return getGeneration() != SOUTHERN_ISLANDS; 389 } 390 391 /// Extra wait hazard is needed in some cases before 392 /// s_cbranch_vccnz/s_cbranch_vccz. 393 bool hasReadVCCZBug() const { 394 return getGeneration() <= SEA_ISLANDS; 395 } 396 397 /// Writes to VCC_LO/VCC_HI update the VCCZ flag. 398 bool partialVCCWritesUpdateVCCZ() const { 399 return getGeneration() >= GFX10; 400 } 401 402 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR 403 /// was written by a VALU instruction. 404 bool hasSMRDReadVALUDefHazard() const { 405 return getGeneration() == SOUTHERN_ISLANDS; 406 } 407 408 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the 409 /// SGPR was written by a VALU Instruction. 410 bool hasVMEMReadSGPRVALUDefHazard() const { 411 return getGeneration() >= VOLCANIC_ISLANDS; 412 } 413 414 bool hasRFEHazards() const { 415 return getGeneration() >= VOLCANIC_ISLANDS; 416 } 417 418 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. 419 unsigned getSetRegWaitStates() const { 420 return getGeneration() <= SEA_ISLANDS ? 1 : 2; 421 } 422 423 bool dumpCode() const { 424 return DumpCode; 425 } 426 427 /// Return the amount of LDS that can be used that will not restrict the 428 /// occupancy lower than WaveCount. 429 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 430 const Function &) const; 431 432 bool supportsMinMaxDenormModes() const { 433 return getGeneration() >= AMDGPUSubtarget::GFX9; 434 } 435 436 /// \returns If target supports S_DENORM_MODE. 437 bool hasDenormModeInst() const { 438 return getGeneration() >= AMDGPUSubtarget::GFX10; 439 } 440 441 bool useFlatForGlobal() const { 442 return FlatForGlobal; 443 } 444 445 /// \returns If target supports ds_read/write_b128 and user enables generation 446 /// of ds_read/write_b128. 447 bool useDS128() const { 448 return CIInsts && EnableDS128; 449 } 450 451 /// \return If target supports ds_read/write_b96/128. 452 bool hasDS96AndDS128() const { 453 return CIInsts; 454 } 455 456 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 457 bool haveRoundOpsF64() const { 458 return CIInsts; 459 } 460 461 /// \returns If MUBUF instructions always perform range checking, even for 462 /// buffer resources used for private memory access. 463 bool privateMemoryResourceIsRangeChecked() const { 464 return getGeneration() < AMDGPUSubtarget::GFX9; 465 } 466 467 /// \returns If target requires PRT Struct NULL support (zero result registers 468 /// for sparse texture support). 469 bool usePRTStrictNull() const { 470 return EnablePRTStrictNull; 471 } 472 473 bool hasAutoWaitcntBeforeBarrier() const { 474 return AutoWaitcntBeforeBarrier; 475 } 476 477 bool hasUnalignedBufferAccess() const { 478 return UnalignedBufferAccess; 479 } 480 481 bool hasUnalignedBufferAccessEnabled() const { 482 return UnalignedBufferAccess && UnalignedAccessMode; 483 } 484 485 bool hasUnalignedDSAccess() const { 486 return UnalignedDSAccess; 487 } 488 489 bool hasUnalignedDSAccessEnabled() const { 490 return UnalignedDSAccess && UnalignedAccessMode; 491 } 492 493 bool hasUnalignedScratchAccess() const { 494 return UnalignedScratchAccess; 495 } 496 497 bool hasUnalignedAccessMode() const { 498 return UnalignedAccessMode; 499 } 500 501 bool hasApertureRegs() const { 502 return HasApertureRegs; 503 } 504 505 bool isTrapHandlerEnabled() const { 506 return TrapHandler; 507 } 508 509 bool isXNACKEnabled() const { 510 return TargetID.isXnackOnOrAny(); 511 } 512 513 bool isCuModeEnabled() const { 514 return EnableCuMode; 515 } 516 517 bool hasFlatAddressSpace() const { 518 return FlatAddressSpace; 519 } 520 521 bool hasFlatScrRegister() const { 522 return hasFlatAddressSpace(); 523 } 524 525 bool hasFlatInstOffsets() const { 526 return FlatInstOffsets; 527 } 528 529 bool hasFlatGlobalInsts() const { 530 return FlatGlobalInsts; 531 } 532 533 bool hasFlatScratchInsts() const { 534 return FlatScratchInsts; 535 } 536 537 // Check if target supports ST addressing mode with FLAT scratch instructions. 538 // The ST addressing mode means no registers are used, either VGPR or SGPR, 539 // but only immediate offset is swizzled and added to the FLAT scratch base. 540 bool hasFlatScratchSTMode() const { 541 return hasFlatScratchInsts() && hasGFX10_3Insts(); 542 } 543 544 bool hasScalarFlatScratchInsts() const { 545 return ScalarFlatScratchInsts; 546 } 547 548 bool hasGlobalAddTidInsts() const { 549 return GFX10_BEncoding; 550 } 551 552 bool hasAtomicCSub() const { 553 return GFX10_BEncoding; 554 } 555 556 bool hasMultiDwordFlatScratchAddressing() const { 557 return getGeneration() >= GFX9; 558 } 559 560 bool hasFlatSegmentOffsetBug() const { 561 return HasFlatSegmentOffsetBug; 562 } 563 564 bool hasFlatLgkmVMemCountInOrder() const { 565 return getGeneration() > GFX9; 566 } 567 568 bool hasD16LoadStore() const { 569 return getGeneration() >= GFX9; 570 } 571 572 bool d16PreservesUnusedBits() const { 573 return hasD16LoadStore() && !TargetID.isSramEccOnOrAny(); 574 } 575 576 bool hasD16Images() const { 577 return getGeneration() >= VOLCANIC_ISLANDS; 578 } 579 580 /// Return if most LDS instructions have an m0 use that require m0 to be 581 /// iniitalized. 582 bool ldsRequiresM0Init() const { 583 return getGeneration() < GFX9; 584 } 585 586 // True if the hardware rewinds and replays GWS operations if a wave is 587 // preempted. 588 // 589 // If this is false, a GWS operation requires testing if a nack set the 590 // MEM_VIOL bit, and repeating if so. 591 bool hasGWSAutoReplay() const { 592 return getGeneration() >= GFX9; 593 } 594 595 /// \returns if target has ds_gws_sema_release_all instruction. 596 bool hasGWSSemaReleaseAll() const { 597 return CIInsts; 598 } 599 600 /// \returns true if the target has integer add/sub instructions that do not 601 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32, 602 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier 603 /// for saturation. 604 bool hasAddNoCarry() const { 605 return AddNoCarryInsts; 606 } 607 608 bool hasUnpackedD16VMem() const { 609 return HasUnpackedD16VMem; 610 } 611 612 // Covers VS/PS/CS graphics shaders 613 bool isMesaGfxShader(const Function &F) const { 614 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); 615 } 616 617 bool hasMad64_32() const { 618 return getGeneration() >= SEA_ISLANDS; 619 } 620 621 bool hasSDWAOmod() const { 622 return HasSDWAOmod; 623 } 624 625 bool hasSDWAScalar() const { 626 return HasSDWAScalar; 627 } 628 629 bool hasSDWASdst() const { 630 return HasSDWASdst; 631 } 632 633 bool hasSDWAMac() const { 634 return HasSDWAMac; 635 } 636 637 bool hasSDWAOutModsVOPC() const { 638 return HasSDWAOutModsVOPC; 639 } 640 641 bool hasDLInsts() const { 642 return HasDLInsts; 643 } 644 645 bool hasDot1Insts() const { 646 return HasDot1Insts; 647 } 648 649 bool hasDot2Insts() const { 650 return HasDot2Insts; 651 } 652 653 bool hasDot3Insts() const { 654 return HasDot3Insts; 655 } 656 657 bool hasDot4Insts() const { 658 return HasDot4Insts; 659 } 660 661 bool hasDot5Insts() const { 662 return HasDot5Insts; 663 } 664 665 bool hasDot6Insts() const { 666 return HasDot6Insts; 667 } 668 669 bool hasMAIInsts() const { 670 return HasMAIInsts; 671 } 672 673 bool hasPkFmacF16Inst() const { 674 return HasPkFmacF16Inst; 675 } 676 677 bool hasAtomicFaddInsts() const { 678 return HasAtomicFaddInsts; 679 } 680 681 bool hasNoSdstCMPX() const { 682 return HasNoSdstCMPX; 683 } 684 685 bool hasVscnt() const { 686 return HasVscnt; 687 } 688 689 bool hasGetWaveIdInst() const { 690 return HasGetWaveIdInst; 691 } 692 693 bool hasSMemTimeInst() const { 694 return HasSMemTimeInst; 695 } 696 697 bool hasRegisterBanking() const { 698 return HasRegisterBanking; 699 } 700 701 bool hasVOP3Literal() const { 702 return HasVOP3Literal; 703 } 704 705 bool hasNoDataDepHazard() const { 706 return HasNoDataDepHazard; 707 } 708 709 bool vmemWriteNeedsExpWaitcnt() const { 710 return getGeneration() < SEA_ISLANDS; 711 } 712 713 // Scratch is allocated in 256 dword per wave blocks for the entire 714 // wavefront. When viewed from the perspecive of an arbitrary workitem, this 715 // is 4-byte aligned. 716 // 717 // Only 4-byte alignment is really needed to access anything. Transformations 718 // on the pointer value itself may rely on the alignment / known low bits of 719 // the pointer. Set this to something above the minimum to avoid needing 720 // dynamic realignment in common cases. 721 Align getStackAlignment() const { return Align(16); } 722 723 bool enableMachineScheduler() const override { 724 return true; 725 } 726 727 bool useAA() const override; 728 729 bool enableSubRegLiveness() const override { 730 return true; 731 } 732 733 void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } 734 bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } 735 736 // static wrappers 737 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); 738 739 // XXX - Why is this here if it isn't in the default pass set? 740 bool enableEarlyIfConversion() const override { 741 return true; 742 } 743 744 bool enableFlatScratch() const; 745 746 void overrideSchedPolicy(MachineSchedPolicy &Policy, 747 unsigned NumRegionInstrs) const override; 748 749 unsigned getMaxNumUserSGPRs() const { 750 return 16; 751 } 752 753 bool hasSMemRealTime() const { 754 return HasSMemRealTime; 755 } 756 757 bool hasMovrel() const { 758 return HasMovrel; 759 } 760 761 bool hasVGPRIndexMode() const { 762 return HasVGPRIndexMode; 763 } 764 765 bool useVGPRIndexMode() const; 766 767 bool hasScalarCompareEq64() const { 768 return getGeneration() >= VOLCANIC_ISLANDS; 769 } 770 771 bool hasScalarStores() const { 772 return HasScalarStores; 773 } 774 775 bool hasScalarAtomics() const { 776 return HasScalarAtomics; 777 } 778 779 bool hasLDSFPAtomics() const { 780 return GFX8Insts; 781 } 782 783 bool hasDPP() const { 784 return HasDPP; 785 } 786 787 bool hasDPPBroadcasts() const { 788 return HasDPP && getGeneration() < GFX10; 789 } 790 791 bool hasDPPWavefrontShifts() const { 792 return HasDPP && getGeneration() < GFX10; 793 } 794 795 bool hasDPP8() const { 796 return HasDPP8; 797 } 798 799 bool hasR128A16() const { 800 return HasR128A16; 801 } 802 803 bool hasGFX10A16() const { 804 return HasGFX10A16; 805 } 806 807 bool hasA16() const { return hasR128A16() || hasGFX10A16(); } 808 809 bool hasG16() const { return HasG16; } 810 811 bool hasOffset3fBug() const { 812 return HasOffset3fBug; 813 } 814 815 bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; } 816 817 bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; } 818 819 bool hasNSAEncoding() const { return HasNSAEncoding; } 820 821 bool hasGFX10_BEncoding() const { 822 return GFX10_BEncoding; 823 } 824 825 bool hasGFX10_3Insts() const { 826 return GFX10_3Insts; 827 } 828 829 bool hasMadF16() const; 830 831 bool enableSIScheduler() const { 832 return EnableSIScheduler; 833 } 834 835 bool loadStoreOptEnabled() const { 836 return EnableLoadStoreOpt; 837 } 838 839 bool hasSGPRInitBug() const { 840 return SGPRInitBug; 841 } 842 843 bool hasMFMAInlineLiteralBug() const { 844 return HasMFMAInlineLiteralBug; 845 } 846 847 bool has12DWordStoreHazard() const { 848 return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; 849 } 850 851 // \returns true if the subtarget supports DWORDX3 load/store instructions. 852 bool hasDwordx3LoadStores() const { 853 return CIInsts; 854 } 855 856 bool hasReadM0MovRelInterpHazard() const { 857 return getGeneration() == AMDGPUSubtarget::GFX9; 858 } 859 860 bool hasReadM0SendMsgHazard() const { 861 return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && 862 getGeneration() <= AMDGPUSubtarget::GFX9; 863 } 864 865 bool hasVcmpxPermlaneHazard() const { 866 return HasVcmpxPermlaneHazard; 867 } 868 869 bool hasVMEMtoScalarWriteHazard() const { 870 return HasVMEMtoScalarWriteHazard; 871 } 872 873 bool hasSMEMtoVectorWriteHazard() const { 874 return HasSMEMtoVectorWriteHazard; 875 } 876 877 bool hasLDSMisalignedBug() const { 878 return LDSMisalignedBug && !EnableCuMode; 879 } 880 881 bool hasInstFwdPrefetchBug() const { 882 return HasInstFwdPrefetchBug; 883 } 884 885 bool hasVcmpxExecWARHazard() const { 886 return HasVcmpxExecWARHazard; 887 } 888 889 bool hasLdsBranchVmemWARHazard() const { 890 return HasLdsBranchVmemWARHazard; 891 } 892 893 bool hasNSAtoVMEMBug() const { 894 return HasNSAtoVMEMBug; 895 } 896 897 bool hasHardClauses() const { return getGeneration() >= GFX10; } 898 899 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs 900 /// SGPRs 901 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; 902 903 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs 904 /// VGPRs 905 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; 906 907 /// Return occupancy for the given function. Used LDS and a number of 908 /// registers if provided. 909 /// Note, occupancy can be affected by the scratch allocation as well, but 910 /// we do not have enough information to compute it. 911 unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0, 912 unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; 913 914 /// \returns true if the flat_scratch register should be initialized with the 915 /// pointer to the wave's scratch memory rather than a size and offset. 916 bool flatScratchIsPointer() const { 917 return getGeneration() >= AMDGPUSubtarget::GFX9; 918 } 919 920 /// \returns true if the machine has merged shaders in which s0-s7 are 921 /// reserved by the hardware and user SGPRs start at s8 922 bool hasMergedShaders() const { 923 return getGeneration() >= GFX9; 924 } 925 926 /// \returns SGPR allocation granularity supported by the subtarget. 927 unsigned getSGPRAllocGranule() const { 928 return AMDGPU::IsaInfo::getSGPRAllocGranule(this); 929 } 930 931 /// \returns SGPR encoding granularity supported by the subtarget. 932 unsigned getSGPREncodingGranule() const { 933 return AMDGPU::IsaInfo::getSGPREncodingGranule(this); 934 } 935 936 /// \returns Total number of SGPRs supported by the subtarget. 937 unsigned getTotalNumSGPRs() const { 938 return AMDGPU::IsaInfo::getTotalNumSGPRs(this); 939 } 940 941 /// \returns Addressable number of SGPRs supported by the subtarget. 942 unsigned getAddressableNumSGPRs() const { 943 return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); 944 } 945 946 /// \returns Minimum number of SGPRs that meets the given number of waves per 947 /// execution unit requirement supported by the subtarget. 948 unsigned getMinNumSGPRs(unsigned WavesPerEU) const { 949 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); 950 } 951 952 /// \returns Maximum number of SGPRs that meets the given number of waves per 953 /// execution unit requirement supported by the subtarget. 954 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { 955 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); 956 } 957 958 /// \returns Reserved number of SGPRs for given function \p MF. 959 unsigned getReservedNumSGPRs(const MachineFunction &MF) const; 960 961 /// \returns Maximum number of SGPRs that meets number of waves per execution 962 /// unit requirement for function \p MF, or number of SGPRs explicitly 963 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. 964 /// 965 /// \returns Value that meets number of waves per execution unit requirement 966 /// if explicitly requested value cannot be converted to integer, violates 967 /// subtarget's specifications, or does not meet number of waves per execution 968 /// unit requirement. 969 unsigned getMaxNumSGPRs(const MachineFunction &MF) const; 970 971 /// \returns VGPR allocation granularity supported by the subtarget. 972 unsigned getVGPRAllocGranule() const { 973 return AMDGPU::IsaInfo::getVGPRAllocGranule(this); 974 } 975 976 /// \returns VGPR encoding granularity supported by the subtarget. 977 unsigned getVGPREncodingGranule() const { 978 return AMDGPU::IsaInfo::getVGPREncodingGranule(this); 979 } 980 981 /// \returns Total number of VGPRs supported by the subtarget. 982 unsigned getTotalNumVGPRs() const { 983 return AMDGPU::IsaInfo::getTotalNumVGPRs(this); 984 } 985 986 /// \returns Addressable number of VGPRs supported by the subtarget. 987 unsigned getAddressableNumVGPRs() const { 988 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); 989 } 990 991 /// \returns Minimum number of VGPRs that meets given number of waves per 992 /// execution unit requirement supported by the subtarget. 993 unsigned getMinNumVGPRs(unsigned WavesPerEU) const { 994 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); 995 } 996 997 /// \returns Maximum number of VGPRs that meets given number of waves per 998 /// execution unit requirement supported by the subtarget. 999 unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { 1000 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); 1001 } 1002 1003 /// \returns Maximum number of VGPRs that meets number of waves per execution 1004 /// unit requirement for function \p MF, or number of VGPRs explicitly 1005 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. 1006 /// 1007 /// \returns Value that meets number of waves per execution unit requirement 1008 /// if explicitly requested value cannot be converted to integer, violates 1009 /// subtarget's specifications, or does not meet number of waves per execution 1010 /// unit requirement. 1011 unsigned getMaxNumVGPRs(const MachineFunction &MF) const; 1012 1013 void getPostRAMutations( 1014 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) 1015 const override; 1016 1017 bool isWave32() const { 1018 return getWavefrontSize() == 32; 1019 } 1020 1021 bool isWave64() const { 1022 return getWavefrontSize() == 64; 1023 } 1024 1025 const TargetRegisterClass *getBoolRC() const { 1026 return getRegisterInfo()->getBoolRC(); 1027 } 1028 1029 /// \returns Maximum number of work groups per compute unit supported by the 1030 /// subtarget and limited by given \p FlatWorkGroupSize. 1031 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { 1032 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); 1033 } 1034 1035 /// \returns Minimum flat work group size supported by the subtarget. 1036 unsigned getMinFlatWorkGroupSize() const override { 1037 return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); 1038 } 1039 1040 /// \returns Maximum flat work group size supported by the subtarget. 1041 unsigned getMaxFlatWorkGroupSize() const override { 1042 return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); 1043 } 1044 1045 /// \returns Number of waves per execution unit required to support the given 1046 /// \p FlatWorkGroupSize. 1047 unsigned 1048 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { 1049 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); 1050 } 1051 1052 /// \returns Minimum number of waves per execution unit supported by the 1053 /// subtarget. 1054 unsigned getMinWavesPerEU() const override { 1055 return AMDGPU::IsaInfo::getMinWavesPerEU(this); 1056 } 1057 1058 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, 1059 SDep &Dep) const override; 1060 }; 1061 1062 } // end namespace llvm 1063 1064 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 1065