1//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===------------------------------------------------------------===// 8 9include "llvm/TableGen/SearchableTable.td" 10include "llvm/Target/Target.td" 11include "AMDGPUFeatures.td" 12include "AMDGPUPredicateControl.td" 13 14def p0 : PtrValueType<i64, 0>; 15def p1 : PtrValueType<i64, 1>; 16def p2 : PtrValueType<i32, 2>; 17def p3 : PtrValueType<i32, 3>; 18def p4 : PtrValueType<i64, 4>; 19def p5 : PtrValueType<i32, 5>; 20def p6 : PtrValueType<i32, 6>; 21 22//===------------------------------------------------------------===// 23// Subtarget Features (device properties) 24//===------------------------------------------------------------===// 25 26def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", 27 "FastFMAF32", 28 "true", 29 "Assuming f32 fma is at least as fast as mul + add" 30>; 31 32def FeatureFastDenormalF32 : SubtargetFeature<"fast-denormal-f32", 33 "FastDenormalF32", 34 "true", 35 "Enabling denormals does not cause f32 instructions to run at f64 rates" 36>; 37 38def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128", 39 "MIMG_R128", 40 "true", 41 "Support 128-bit texture resources" 42>; 43 44def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops", 45 "HalfRate64Ops", 46 "true", 47 "Most fp64 instructions are half rate instead of quarter" 48>; 49 50def FullRate64Ops : SubtargetFeature<"full-rate-64-ops", 51 "FullRate64Ops", 52 "true", 53 "Most fp64 instructions are full rate" 54>; 55 56def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", 57 "FlatAddressSpace", 58 "true", 59 "Support flat address space" 60>; 61 62def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets", 63 "FlatInstOffsets", 64 "true", 65 "Flat instructions have immediate offset addressing mode" 66>; 67 68def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts", 69 "FlatGlobalInsts", 70 "true", 71 "Have global_* flat memory instructions" 72>; 73 74def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts", 75 "FlatScratchInsts", 76 "true", 77 "Have scratch_* flat memory instructions" 78>; 79 80def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts", 81 "ScalarFlatScratchInsts", 82 "true", 83 "Have s_scratch_* flat memory instructions" 84>; 85 86def FeatureEnableFlatScratch : SubtargetFeature<"enable-flat-scratch", 87 "EnableFlatScratch", 88 "true", 89 "Use scratch_* flat memory instructions to access scratch" 90>; 91 92def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts", 93 "AddNoCarryInsts", 94 "true", 95 "Have VALU add/sub instructions without carry out" 96>; 97 98def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", 99 "UnalignedBufferAccess", 100 "true", 101 "Hardware supports unaligned global loads and stores" 102>; 103 104def FeatureTrapHandler: SubtargetFeature<"trap-handler", 105 "TrapHandler", 106 "true", 107 "Trap handler support" 108>; 109 110def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access", 111 "UnalignedScratchAccess", 112 "true", 113 "Support unaligned scratch loads and stores" 114>; 115 116def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access", 117 "UnalignedDSAccess", 118 "true", 119 "Hardware supports unaligned local and region loads and stores" 120>; 121 122def FeatureApertureRegs : SubtargetFeature<"aperture-regs", 123 "HasApertureRegs", 124 "true", 125 "Has Memory Aperture Base and Size Registers" 126>; 127 128def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts", 129 "HasMadMixInsts", 130 "true", 131 "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions" 132>; 133 134def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts", 135 "HasFmaMixInsts", 136 "true", 137 "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions" 138>; 139 140def FeatureSupportsXNACK : SubtargetFeature<"xnack-support", 141 "SupportsXNACK", 142 "true", 143 "Hardware supports XNACK" 144>; 145 146// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support 147// XNACK. The current default kernel driver setting is: 148// - graphics ring: XNACK disabled 149// - compute ring: XNACK enabled 150// 151// If XNACK is enabled, the VMEM latency can be worse. 152// If XNACK is disabled, the 2 SGPRs can be used for general purposes. 153def FeatureXNACK : SubtargetFeature<"xnack", 154 "EnableXNACK", 155 "true", 156 "Enable XNACK support" 157>; 158 159def FeatureTgSplit : SubtargetFeature<"tgsplit", 160 "EnableTgSplit", 161 "true", 162 "Enable threadgroup split execution" 163>; 164 165def FeatureCuMode : SubtargetFeature<"cumode", 166 "EnableCuMode", 167 "true", 168 "Enable CU wavefront execution mode" 169>; 170 171def FeaturePreciseMemory 172 : SubtargetFeature<"precise-memory", "EnablePreciseMemory", 173 "true", "Enable precise memory mode">; 174 175def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", 176 "SGPRInitBug", 177 "true", 178 "VI SGPR initialization bug requiring a fixed SGPR allocation size" 179>; 180 181def FeatureUserSGPRInit16Bug : SubtargetFeature<"user-sgpr-init16-bug", 182 "UserSGPRInit16Bug", 183 "true", 184 "Bug requiring at least 16 user+system SGPRs to be enabled" 185>; 186 187def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug", 188 "LDSMisalignedBug", 189 "true", 190 "Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode" 191>; 192 193def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug", 194 "HasMFMAInlineLiteralBug", 195 "true", 196 "MFMA cannot use inline literal as SrcC" 197>; 198 199def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard", 200 "HasVcmpxPermlaneHazard", 201 "true", 202 "TODO: describe me" 203>; 204 205def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard", 206 "HasVMEMtoScalarWriteHazard", 207 "true", 208 "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution." 209>; 210 211def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard", 212 "HasSMEMtoVectorWriteHazard", 213 "true", 214 "s_load_dword followed by v_cmp page faults" 215>; 216 217def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug", 218 "HasInstFwdPrefetchBug", 219 "true", 220 "S_INST_PREFETCH instruction causes shader to hang" 221>; 222 223def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard", 224 "HasVcmpxExecWARHazard", 225 "true", 226 "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)" 227>; 228 229def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard", 230 "HasLdsBranchVmemWARHazard", 231 "true", 232 "Switching between LDS and VMEM-tex not waiting VM_VSRC=0" 233>; 234 235class FeatureMaxHardClauseLength<int size> : SubtargetFeature< 236 "max-hard-clause-length-"#size, 237 "MaxHardClauseLength", 238 !cast<string>(size), 239 "Maximum number of instructions in an explicit S_CLAUSE is "#size 240>; 241 242/// Work around a hardware bug on some chips that can be triggered 243/// under certain circumstances when clauses are longer than 32 operations. 244def FeatureMaxHardClauseLength32 : FeatureMaxHardClauseLength<32>; 245/// While the S_CLAUSE instruction permits encoding clause lengths up to 64, 246/// hardware documentation for gfx10+ indicates that 63 is the maximum 247/// permitted clause length. 248def FeatureMaxHardClauseLength63 : FeatureMaxHardClauseLength<63>; 249 250def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug", 251 "HasNSAtoVMEMBug", 252 "true", 253 "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero" 254>; 255 256def FeatureNSAClauseBug : SubtargetFeature<"nsa-clause-bug", 257 "HasNSAClauseBug", 258 "true", 259 "MIMG-NSA in a hard clause has unpredictable results on GFX10.1" 260>; 261 262def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug", 263 "HasFlatSegmentOffsetBug", 264 "true", 265 "GFX10 bug where inst_offset is ignored when flat instructions access global memory" 266>; 267 268def FeatureNegativeScratchOffsetBug : SubtargetFeature<"negative-scratch-offset-bug", 269 "NegativeScratchOffsetBug", 270 "true", 271 "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9" 272>; 273 274def FeatureNegativeUnalignedScratchOffsetBug : SubtargetFeature<"negative-unaligned-scratch-offset-bug", 275 "NegativeUnalignedScratchOffsetBug", 276 "true", 277 "Scratch instructions with a VGPR offset and a negative immediate offset that is not a multiple of 4 read wrong memory on GFX10" 278>; 279 280def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug", 281 "HasOffset3fBug", 282 "true", 283 "Branch offset of 3f hardware bug" 284>; 285 286def FeatureImageStoreD16Bug : SubtargetFeature<"image-store-d16-bug", 287 "HasImageStoreD16Bug", 288 "true", 289 "Image Store D16 hardware bug" 290>; 291 292def FeatureImageGather4D16Bug : SubtargetFeature<"image-gather4-d16-bug", 293 "HasImageGather4D16Bug", 294 "true", 295 "Image Gather4 D16 hardware bug" 296>; 297 298def FeatureMADIntraFwdBug : SubtargetFeature<"mad-intra-fwd-bug", 299 "HasMADIntraFwdBug", 300 "true", 301 "MAD_U64/I64 intra instruction forwarding bug" 302>; 303 304def FeatureMSAALoadDstSelBug : SubtargetFeature<"msaa-load-dst-sel-bug", 305 "HasMSAALoadDstSelBug", 306 "true", 307 "MSAA loads not honoring dst_sel bug" 308>; 309 310def FeaturePrivEnabledTrap2NopBug : SubtargetFeature<"priv-enabled-trap2-nop-bug", 311 "HasPrivEnabledTrap2NopBug", 312 "true", 313 "Hardware that runs with PRIV=1 interpreting 's_trap 2' as a nop bug" 314>; 315 316class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature < 317 "ldsbankcount"#Value, 318 "LDSBankCount", 319 !cast<string>(Value), 320 "The number of LDS banks per compute unit." 321>; 322 323def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>; 324def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; 325 326def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", 327 "GCN3Encoding", 328 "true", 329 "Encoding format for VI" 330>; 331 332def FeatureCIInsts : SubtargetFeature<"ci-insts", 333 "CIInsts", 334 "true", 335 "Additional instructions for CI+" 336>; 337 338def FeatureGFX8Insts : SubtargetFeature<"gfx8-insts", 339 "GFX8Insts", 340 "true", 341 "Additional instructions for GFX8+" 342>; 343 344def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts", 345 "GFX9Insts", 346 "true", 347 "Additional instructions for GFX9+" 348>; 349 350def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts", 351 "GFX90AInsts", 352 "true", 353 "Additional instructions for GFX90A+" 354 // [HasAtomicFMinFMaxF64GlobalInsts, HasAtomicFMinFMaxF64FlatInsts] // TODO 355>; 356 357def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts", 358 "GFX940Insts", 359 "true", 360 "Additional instructions for GFX940+" 361>; 362 363def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts", 364 "GFX10Insts", 365 "true", 366 "Additional instructions for GFX10+" 367>; 368 369def FeatureGFX11Insts : SubtargetFeature<"gfx11-insts", 370 "GFX11Insts", 371 "true", 372 "Additional instructions for GFX11+" 373>; 374 375def FeatureGFX12Insts : SubtargetFeature<"gfx12-insts", 376 "GFX12Insts", 377 "true", 378 "Additional instructions for GFX12+" 379>; 380 381def FeatureGFX10_3Insts : SubtargetFeature<"gfx10-3-insts", 382 "GFX10_3Insts", 383 "true", 384 "Additional instructions for GFX10.3" 385>; 386 387def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts", 388 "GFX7GFX8GFX9Insts", 389 "true", 390 "Instructions shared in GFX7, GFX8, GFX9" 391>; 392 393def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime", 394 "HasSMemRealTime", 395 "true", 396 "Has s_memrealtime instruction" 397>; 398 399def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm", 400 "HasInv2PiInlineImm", 401 "true", 402 "Has 1 / (2 * pi) as inline immediate" 403>; 404 405def Feature16BitInsts : SubtargetFeature<"16-bit-insts", 406 "Has16BitInsts", 407 "true", 408 "Has i16/f16 instructions" 409>; 410 411def FeatureTrue16BitInsts : SubtargetFeature<"true16", 412 "HasTrue16BitInsts", 413 "true", 414 "True 16-bit operand instructions" 415>; 416 417def FeatureRealTrue16Insts : SubtargetFeature<"real-true16", 418 "EnableRealTrue16Insts", 419 "true", 420 "Use true 16-bit registers" 421>; 422 423def FeatureVOP3P : SubtargetFeature<"vop3p", 424 "HasVOP3PInsts", 425 "true", 426 "Has VOP3P packed instructions" 427>; 428 429def FeatureMovrel : SubtargetFeature<"movrel", 430 "HasMovrel", 431 "true", 432 "Has v_movrel*_b32 instructions" 433>; 434 435def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode", 436 "HasVGPRIndexMode", 437 "true", 438 "Has VGPR mode register indexing" 439>; 440 441def FeatureScalarDwordx3Loads : SubtargetFeature<"scalar-dwordx3-loads", 442 "HasScalarDwordx3Loads", 443 "true", 444 "Has 96-bit scalar load instructions" 445>; 446 447def FeatureScalarStores : SubtargetFeature<"scalar-stores", 448 "HasScalarStores", 449 "true", 450 "Has store scalar memory instructions" 451>; 452 453def FeatureScalarAtomics : SubtargetFeature<"scalar-atomics", 454 "HasScalarAtomics", 455 "true", 456 "Has atomic scalar memory instructions" 457>; 458 459def FeatureSDWA : SubtargetFeature<"sdwa", 460 "HasSDWA", 461 "true", 462 "Support SDWA (Sub-DWORD Addressing) extension" 463>; 464 465def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod", 466 "HasSDWAOmod", 467 "true", 468 "Support OMod with SDWA (Sub-DWORD Addressing) extension" 469>; 470 471def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar", 472 "HasSDWAScalar", 473 "true", 474 "Support scalar register with SDWA (Sub-DWORD Addressing) extension" 475>; 476 477def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst", 478 "HasSDWASdst", 479 "true", 480 "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension" 481>; 482 483def FeatureSDWAMac : SubtargetFeature<"sdwa-mav", 484 "HasSDWAMac", 485 "true", 486 "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension" 487>; 488 489def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc", 490 "HasSDWAOutModsVOPC", 491 "true", 492 "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension" 493>; 494 495def FeatureDPP : SubtargetFeature<"dpp", 496 "HasDPP", 497 "true", 498 "Support DPP (Data Parallel Primitives) extension" 499>; 500 501// DPP8 allows arbitrary cross-lane swizzling within groups of 8 lanes. 502def FeatureDPP8 : SubtargetFeature<"dpp8", 503 "HasDPP8", 504 "true", 505 "Support DPP8 (Data Parallel Primitives) extension" 506>; 507 508def FeatureDPALU_DPP : SubtargetFeature<"dpp-64bit", 509 "HasDPALU_DPP", 510 "true", 511 "Support DPP (Data Parallel Primitives) extension in DP ALU" 512>; 513 514def FeatureDPPSrc1SGPR : SubtargetFeature<"dpp-src1-sgpr", 515 "HasDPPSrc1SGPR", 516 "true", 517 "Support SGPR for Src1 of DPP instructions" 518>; 519 520def FeaturePackedFP32Ops : SubtargetFeature<"packed-fp32-ops", 521 "HasPackedFP32Ops", 522 "true", 523 "Support packed fp32 instructions" 524>; 525 526def FeatureR128A16 : SubtargetFeature<"r128-a16", 527 "HasR128A16", 528 "true", 529 "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128" 530>; 531 532def FeatureA16 : SubtargetFeature<"a16", 533 "HasA16", 534 "true", 535 "Support A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands" 536>; 537 538def FeatureG16 : SubtargetFeature<"g16", 539 "HasG16", 540 "true", 541 "Support G16 for 16-bit gradient image operands" 542>; 543 544def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding", 545 "HasNSAEncoding", 546 "true", 547 "Support NSA encoding for image instructions" 548>; 549 550def FeaturePartialNSAEncoding : SubtargetFeature<"partial-nsa-encoding", 551 "HasPartialNSAEncoding", 552 "true", 553 "Support partial NSA encoding for image instructions" 554>; 555 556def FeatureImageInsts : SubtargetFeature<"image-insts", 557 "HasImageInsts", 558 "true", 559 "Support image instructions" 560>; 561 562def FeatureExtendedImageInsts : SubtargetFeature<"extended-image-insts", 563 "HasExtendedImageInsts", 564 "true", 565 "Support mips != 0, lod != 0, gather4, and get_lod" 566>; 567 568def FeatureGFX10_AEncoding : SubtargetFeature<"gfx10_a-encoding", 569 "GFX10_AEncoding", 570 "true", 571 "Has BVH ray tracing instructions" 572>; 573 574def FeatureGFX10_BEncoding : SubtargetFeature<"gfx10_b-encoding", 575 "GFX10_BEncoding", 576 "true", 577 "Encoding format GFX10_B" 578>; 579 580def FeatureIntClamp : SubtargetFeature<"int-clamp-insts", 581 "HasIntClamp", 582 "true", 583 "Support clamp for integer destination" 584>; 585 586def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem", 587 "HasUnpackedD16VMem", 588 "true", 589 "Has unpacked d16 vmem instructions" 590>; 591 592def FeatureDLInsts : SubtargetFeature<"dl-insts", 593 "HasDLInsts", 594 "true", 595 "Has v_fmac_f32 and v_xnor_b32 instructions" 596>; 597 598def FeatureFmacF64Inst : SubtargetFeature<"fmacf64-inst", 599 "HasFmacF64Inst", 600 "true", 601 "Has v_fmac_f64 instruction" 602>; 603 604def FeatureDot1Insts : SubtargetFeature<"dot1-insts", 605 "HasDot1Insts", 606 "true", 607 "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions" 608>; 609 610def FeatureDot2Insts : SubtargetFeature<"dot2-insts", 611 "HasDot2Insts", 612 "true", 613 "Has v_dot2_i32_i16, v_dot2_u32_u16 instructions" 614>; 615 616def FeatureDot3Insts : SubtargetFeature<"dot3-insts", 617 "HasDot3Insts", 618 "true", 619 "Has v_dot8c_i32_i4 instruction" 620>; 621 622def FeatureDot4Insts : SubtargetFeature<"dot4-insts", 623 "HasDot4Insts", 624 "true", 625 "Has v_dot2c_i32_i16 instruction" 626>; 627 628def FeatureDot5Insts : SubtargetFeature<"dot5-insts", 629 "HasDot5Insts", 630 "true", 631 "Has v_dot2c_f32_f16 instruction" 632>; 633 634def FeatureDot6Insts : SubtargetFeature<"dot6-insts", 635 "HasDot6Insts", 636 "true", 637 "Has v_dot4c_i32_i8 instruction" 638>; 639 640def FeatureDot7Insts : SubtargetFeature<"dot7-insts", 641 "HasDot7Insts", 642 "true", 643 "Has v_dot4_u32_u8, v_dot8_u32_u4 instructions" 644>; 645 646def FeatureDot8Insts : SubtargetFeature<"dot8-insts", 647 "HasDot8Insts", 648 "true", 649 "Has v_dot4_i32_iu8, v_dot8_i32_iu4 instructions" 650>; 651 652def FeatureDot9Insts : SubtargetFeature<"dot9-insts", 653 "HasDot9Insts", 654 "true", 655 "Has v_dot2_f16_f16, v_dot2_bf16_bf16, v_dot2_f32_bf16 instructions" 656>; 657 658def FeatureDot10Insts : SubtargetFeature<"dot10-insts", 659 "HasDot10Insts", 660 "true", 661 "Has v_dot2_f32_f16 instruction" 662>; 663 664def FeatureDot11Insts : SubtargetFeature<"dot11-insts", 665 "HasDot11Insts", 666 "true", 667 "Has v_dot4_f32_fp8_fp8, v_dot4_f32_fp8_bf8, v_dot4_f32_bf8_fp8, v_dot4_f32_bf8_bf8 instructions" 668>; 669 670def FeatureMAIInsts : SubtargetFeature<"mai-insts", 671 "HasMAIInsts", 672 "true", 673 "Has mAI instructions" 674>; 675 676def FeatureFP8Insts : SubtargetFeature<"fp8-insts", 677 "HasFP8Insts", 678 "true", 679 "Has fp8 and bf8 instructions" 680>; 681 682def FeatureFP8ConversionInsts : SubtargetFeature<"fp8-conversion-insts", 683 "HasFP8ConversionInsts", 684 "true", 685 "Has fp8 and bf8 conversion instructions" 686>; 687 688def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", 689 "HasPkFmacF16Inst", 690 "true", 691 "Has v_pk_fmac_f16 instruction" 692>; 693 694def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts", 695 "HasAtomicDsPkAdd16Insts", 696 "true", 697 "Has ds_pk_add_bf16, ds_pk_add_f16, ds_pk_add_rtn_bf16, " 698 "ds_pk_add_rtn_f16 instructions" 699>; 700 701def FeatureAtomicFlatPkAdd16Insts : SubtargetFeature<"atomic-flat-pk-add-16-insts", 702 "HasAtomicFlatPkAdd16Insts", 703 "true", 704 "Has flat_atomic_pk_add_f16 and flat_atomic_pk_add_bf16 instructions" 705>; 706 707def FeatureAtomicFaddRtnInsts : SubtargetFeature<"atomic-fadd-rtn-insts", 708 "HasAtomicFaddRtnInsts", 709 "true", 710 "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that " 711 "return original value", 712 [FeatureFlatGlobalInsts] 713>; 714 715def FeatureAtomicFMinFMaxF32GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-global-f32", 716 "HasAtomicFMinFMaxF32GlobalInsts", 717 "true", 718 "Has global/buffer instructions for atomicrmw fmin/fmax for float" 719>; 720 721def FeatureAtomicFMinFMaxF64GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-global-f64", 722 "HasAtomicFMinFMaxF64GlobalInsts", 723 "true", 724 "Has global/buffer instructions for atomicrmw fmin/fmax for float" 725>; 726 727def FeatureAtomicFMinFMaxF32FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f32", 728 "HasAtomicFMinFMaxF32FlatInsts", 729 "true", 730 "Has flat memory instructions for atomicrmw fmin/fmax for float" 731>; 732 733def FeatureAtomicFMinFMaxF64FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f64", 734 "HasAtomicFMinFMaxF64FlatInsts", 735 "true", 736 "Has flat memory instructions for atomicrmw fmin/fmax for double" 737>; 738 739def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts", 740 "HasAtomicFaddNoRtnInsts", 741 "true", 742 "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that " 743 "don't return original value", 744 [FeatureFlatGlobalInsts] 745>; 746 747def FeatureAtomicBufferGlobalPkAddF16NoRtnInsts 748 : SubtargetFeature<"atomic-buffer-global-pk-add-f16-no-rtn-insts", 749 "HasAtomicBufferGlobalPkAddF16NoRtnInsts", 750 "true", 751 "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that " 752 "don't return original value", 753 [FeatureFlatGlobalInsts] 754>; 755 756def FeatureAtomicBufferGlobalPkAddF16Insts : SubtargetFeature<"atomic-buffer-global-pk-add-f16-insts", 757 "HasAtomicBufferGlobalPkAddF16Insts", 758 "true", 759 "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that " 760 "can return original value", 761 [FeatureFlatGlobalInsts] 762>; 763 764def FeatureAtomicGlobalPkAddBF16Inst : SubtargetFeature<"atomic-global-pk-add-bf16-inst", 765 "HasAtomicGlobalPkAddBF16Inst", 766 "true", 767 "Has global_atomic_pk_add_bf16 instruction", 768 [FeatureFlatGlobalInsts] 769>; 770 771def FeatureAtomicBufferPkAddBF16Inst : SubtargetFeature<"atomic-buffer-pk-add-bf16-inst", 772 "HasAtomicBufferPkAddBF16Inst", 773 "true", 774 "Has buffer_atomic_pk_add_bf16 instruction" 775>; 776 777def FeatureAtomicCSubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts", 778 "HasAtomicCSubNoRtnInsts", 779 "true", 780 "Has buffer_atomic_csub and global_atomic_csub instructions that don't " 781 "return original value" 782>; 783 784def FeatureFlatAtomicFaddF32Inst 785 : SubtargetFeature<"flat-atomic-fadd-f32-inst", 786 "HasFlatAtomicFaddF32Inst", 787 "true", 788 "Has flat_atomic_add_f32 instruction" 789>; 790 791def FeatureFlatBufferGlobalAtomicFaddF64Inst 792 : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", 793 "HasFlatBufferGlobalAtomicFaddF64Inst", 794 "true", 795 "Has flat, buffer, and global instructions for f64 atomic fadd" 796>; 797 798def FeatureMemoryAtomicFAddF32DenormalSupport 799 : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", 800 "HasMemoryAtomicFaddF32DenormalSupport", 801 "true", 802 "global/flat/buffer atomic fadd for float supports denormal handling" 803>; 804 805def FeatureAgentScopeFineGrainedRemoteMemoryAtomics 806 : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", 807 "HasAgentScopeFineGrainedRemoteMemoryAtomics", 808 "true", 809 "Agent (device) scoped atomic operations, excluding those directly " 810 "supported by PCIe (i.e. integer atomic add, exchange, and " 811 "compare-and-swap), are functional for allocations in host or peer " 812 "device memory." 813>; 814 815def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero", 816 "HasDefaultComponentZero", 817 "true", 818 "BUFFER/IMAGE store instructions set unspecified components to zero (before GFX12)" 819>; 820 821def FeatureDefaultComponentBroadcast : SubtargetFeature<"default-component-broadcast", 822 "HasDefaultComponentBroadcast", 823 "true", 824 "BUFFER/IMAGE store instructions set unspecified components to x component (GFX12)" 825>; 826 827def FeatureSupportsSRAMECC : SubtargetFeature<"sramecc-support", 828 "SupportsSRAMECC", 829 "true", 830 "Hardware supports SRAMECC" 831>; 832 833def FeatureSRAMECC : SubtargetFeature<"sramecc", 834 "EnableSRAMECC", 835 "true", 836 "Enable SRAMECC" 837>; 838 839def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx", 840 "HasNoSdstCMPX", 841 "true", 842 "V_CMPX does not write VCC/SGPR in addition to EXEC" 843>; 844 845def FeatureVscnt : SubtargetFeature<"vscnt", 846 "HasVscnt", 847 "true", 848 "Has separate store vscnt counter" 849>; 850 851def FeatureGetWaveIdInst : SubtargetFeature<"get-wave-id-inst", 852 "HasGetWaveIdInst", 853 "true", 854 "Has s_get_waveid_in_workgroup instruction" 855>; 856 857def FeatureSMemTimeInst : SubtargetFeature<"s-memtime-inst", 858 "HasSMemTimeInst", 859 "true", 860 "Has s_memtime instruction" 861>; 862 863def FeatureShaderCyclesRegister : SubtargetFeature<"shader-cycles-register", 864 "HasShaderCyclesRegister", 865 "true", 866 "Has SHADER_CYCLES hardware register" 867>; 868 869def FeatureShaderCyclesHiLoRegisters : SubtargetFeature<"shader-cycles-hi-lo-registers", 870 "HasShaderCyclesHiLoRegisters", 871 "true", 872 "Has SHADER_CYCLES_HI/LO hardware registers" 873>; 874 875def FeatureMadMacF32Insts : SubtargetFeature<"mad-mac-f32-insts", 876 "HasMadMacF32Insts", 877 "true", 878 "Has v_mad_f32/v_mac_f32/v_madak_f32/v_madmk_f32 instructions" 879>; 880 881def FeatureDsSrc2Insts : SubtargetFeature<"ds-src2-insts", 882 "HasDsSrc2Insts", 883 "true", 884 "Has ds_*_src2 instructions" 885>; 886 887def FeatureVOP3Literal : SubtargetFeature<"vop3-literal", 888 "HasVOP3Literal", 889 "true", 890 "Can use one literal in VOP3" 891>; 892 893def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard", 894 "HasNoDataDepHazard", 895 "true", 896 "Does not need SW waitstates" 897>; 898 899// Allocate 1536 VGPRs for wave32 and 768 VGPRs for wave64 900// with allocation granularity 24 for wave32 and 12 for wave64 901def Feature1_5xVGPRs : SubtargetFeature<"allocate1_5xvgprs", 902 "Has1_5xVGPRs", 903 "true", 904 "Has 50% more physical VGPRs and 50% larger allocation granule" 905>; 906 907 908def FeatureVOPD : SubtargetFeature<"vopd", 909 "HasVOPDInsts", 910 "true", 911 "Has VOPD dual issue wave32 instructions" 912>; 913 914def FeatureVALUTransUseHazard : SubtargetFeature<"valu-trans-use-hazard", 915 "HasVALUTransUseHazard", 916 "true", 917 "Hazard when TRANS instructions are closely followed by a use of the result" 918>; 919 920def FeatureForceStoreSC0SC1 : SubtargetFeature<"force-store-sc0-sc1", 921 "HasForceStoreSC0SC1", 922 "true", 923 "Has SC0 and SC1 on stores" 924>; 925 926def FeatureSALUFloatInsts : SubtargetFeature<"salu-float", 927 "HasSALUFloatInsts", 928 "true", 929 "Has SALU floating point instructions" 930>; 931 932def FeatureVGPRSingleUseHintInsts : SubtargetFeature<"vgpr-singleuse-hint", 933 "HasVGPRSingleUseHintInsts", 934 "true", 935 "Has single-use VGPR hint instructions" 936>; 937 938def FeaturePseudoScalarTrans : SubtargetFeature<"pseudo-scalar-trans", 939 "HasPseudoScalarTrans", 940 "true", 941 "Has Pseudo Scalar Transcendental instructions" 942>; 943 944def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset", 945 "HasRestrictedSOffset", 946 "true", 947 "Has restricted SOffset (immediate not supported)." 948>; 949 950def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority", 951 "HasRequiredExportPriority", 952 "true", 953 "Export priority must be explicitly manipulated on GFX11.5" 954>; 955 956def FeatureVmemWriteVgprInOrder : SubtargetFeature<"vmem-write-vgpr-in-order", 957 "HasVmemWriteVgprInOrder", 958 "true", 959 "VMEM instructions of the same type write VGPR results in order" 960>; 961 962//===------------------------------------------------------------===// 963// Subtarget Features (options and debugging) 964//===------------------------------------------------------------===// 965 966class FeatureMaxPrivateElementSize<int size> : SubtargetFeature< 967 "max-private-element-size-"#size, 968 "MaxPrivateElementSize", 969 !cast<string>(size), 970 "Maximum private access size may be "#size 971>; 972 973def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>; 974def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>; 975def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>; 976 977def FeatureDumpCode : SubtargetFeature <"DumpCode", 978 "DumpCode", 979 "true", 980 "Dump MachineInstrs in the CodeEmitter" 981>; 982 983def FeatureDumpCodeLower : SubtargetFeature <"dumpcode", 984 "DumpCode", 985 "true", 986 "Dump MachineInstrs in the CodeEmitter" 987>; 988 989// XXX - This should probably be removed once enabled by default 990def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", 991 "EnableLoadStoreOpt", 992 "true", 993 "Enable SI load/store optimizer pass" 994>; 995 996// Performance debugging feature. Allow using DS instruction immediate 997// offsets even if the base pointer can't be proven to be base. On SI, 998// base pointer values that won't give the same result as a 16-bit add 999// are not safe to fold, but this will override the conservative test 1000// for the base pointer. 1001def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature < 1002 "unsafe-ds-offset-folding", 1003 "EnableUnsafeDSOffsetFolding", 1004 "true", 1005 "Force using DS instruction immediate offsets on SI" 1006>; 1007 1008def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", 1009 "EnableSIScheduler", 1010 "true", 1011 "Enable SI Machine Scheduler" 1012>; 1013 1014def FeatureEnableDS128 : SubtargetFeature<"enable-ds128", 1015 "EnableDS128", 1016 "true", 1017 "Use ds_{read|write}_b128" 1018>; 1019 1020// Sparse texture support requires that all result registers are zeroed when 1021// PRTStrictNull is set to true. This feature is turned on for all architectures 1022// but is enabled as a feature in case there are situations where PRTStrictNull 1023// is disabled by the driver. 1024def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null", 1025 "EnablePRTStrictNull", 1026 "true", 1027 "Enable zeroing of result registers for sparse texture fetches" 1028>; 1029 1030// Unless +-flat-for-global is specified, turn on FlatForGlobal for 1031// all OS-es on VI and newer hardware to avoid assertion failures due 1032// to missing ADDR64 variants of MUBUF instructions. 1033// FIXME: moveToVALU should be able to handle converting addr64 MUBUF 1034// instructions. 1035 1036def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", 1037 "FlatForGlobal", 1038 "true", 1039 "Force to generate flat instruction for global" 1040>; 1041 1042def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature < 1043 "auto-waitcnt-before-barrier", 1044 "AutoWaitcntBeforeBarrier", 1045 "true", 1046 "Hardware automatically inserts waitcnt before barrier" 1047>; 1048 1049def FeatureBackOffBarrier : SubtargetFeature <"back-off-barrier", 1050 "BackOffBarrier", 1051 "true", 1052 "Hardware supports backing off s_barrier if an exception occurs" 1053>; 1054 1055def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range", 1056 "HasTrigReducedRange", 1057 "true", 1058 "Requires use of fract on arguments to trig instructions" 1059>; 1060 1061def FeatureKernargPreload : SubtargetFeature <"kernarg-preload", 1062 "KernargPreload", 1063 "true", 1064 "Hardware supports preloading of kernel arguments in user SGPRs." 1065>; 1066 1067// Alignment enforcement is controlled by a configuration register: 1068// SH_MEM_CONFIG.alignment_mode 1069def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode", 1070 "UnalignedAccessMode", 1071 "true", 1072 "Enable unaligned global, local and region loads and stores if the hardware" 1073 " supports it" 1074>; 1075 1076def FeaturePackedTID : SubtargetFeature<"packed-tid", 1077 "HasPackedTID", 1078 "true", 1079 "Workitem IDs are packed into v0 at kernel launch" 1080>; 1081 1082def FeatureArchitectedFlatScratch : SubtargetFeature<"architected-flat-scratch", 1083 "HasArchitectedFlatScratch", 1084 "true", 1085 "Flat Scratch register is a readonly SPI initialized architected register" 1086>; 1087 1088def FeatureArchitectedSGPRs : SubtargetFeature<"architected-sgprs", 1089 "HasArchitectedSGPRs", 1090 "true", 1091 "Enable the architected SGPRs" 1092>; 1093 1094def FeatureGDS : SubtargetFeature<"gds", 1095 "HasGDS", 1096 "true", 1097 "Has Global Data Share" 1098>; 1099 1100def FeatureGWS : SubtargetFeature<"gws", 1101 "HasGWS", 1102 "true", 1103 "Has Global Wave Sync" 1104>; 1105 1106def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6", 1107 "RequiresCOV6", 1108 "true", 1109 "Target Requires Code Object V6" 1110>; 1111 1112// Dummy feature used to disable assembler instructions. 1113def FeatureDisable : SubtargetFeature<"", 1114 "FeatureDisable","true", 1115 "Dummy feature to disable assembler instructions" 1116>; 1117 1118//===----------------------------------------------------------------------===// 1119 1120class GCNSubtargetFeatureGeneration <string Value, 1121 string FeatureName, 1122 list<SubtargetFeature> Implies> : 1123 SubtargetFeatureGeneration <Value, FeatureName, "GCNSubtarget", Implies>; 1124 1125def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", 1126 "southern-islands", 1127 [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, 1128 FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts, 1129 FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel, 1130 FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts, 1131 FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, 1132 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, 1133 FeatureVmemWriteVgprInOrder 1134 ] 1135>; 1136 1137def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", 1138 "sea-islands", 1139 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 1140 FeatureWavefrontSize64, FeatureFlatAddressSpace, 1141 FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, 1142 FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, 1143 FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess, 1144 FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, 1145 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, 1146 FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, 1147 FeatureVmemWriteVgprInOrder 1148 ] 1149>; 1150 1151def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", 1152 "volcanic-islands", 1153 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 1154 FeatureWavefrontSize64, FeatureFlatAddressSpace, 1155 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 1156 FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, 1157 FeatureScalarStores, FeatureInv2PiInlineImm, 1158 FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, 1159 FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts, 1160 FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, 1161 FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, 1162 FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, 1163 FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder 1164 ] 1165>; 1166 1167def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", 1168 "gfx9", 1169 [FeatureFP64, FeatureLocalMemorySize65536, 1170 FeatureWavefrontSize64, FeatureFlatAddressSpace, 1171 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 1172 FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, 1173 FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, 1174 FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1175 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, 1176 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1177 FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, 1178 FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, 1179 FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, 1180 FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, 1181 FeatureNegativeScratchOffsetBug, FeatureGWS, FeatureDefaultComponentZero, 1182 FeatureVmemWriteVgprInOrder 1183 ] 1184>; 1185 1186def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", 1187 "gfx10", 1188 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 1189 FeatureFlatAddressSpace, 1190 FeatureCIInsts, Feature16BitInsts, 1191 FeatureSMemRealTime, FeatureInv2PiInlineImm, 1192 FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P, 1193 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1194 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, 1195 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1196 FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts, 1197 FeatureNoSdstCMPX, FeatureVscnt, 1198 FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, 1199 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, 1200 FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16, 1201 FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts, 1202 FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, 1203 FeatureMaxHardClauseLength63, 1204 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, 1205 FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, 1206 FeatureVmemWriteVgprInOrder 1207 ] 1208>; 1209 1210def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11", 1211 "gfx11", 1212 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 1213 FeatureFlatAddressSpace, Feature16BitInsts, 1214 FeatureInv2PiInlineImm, FeatureApertureRegs, 1215 FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts, 1216 FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts, 1217 FeatureGFX11Insts, FeatureVOP3P, FeatureVOPD, FeatureTrue16BitInsts, 1218 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1219 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1220 FeatureAddNoCarryInsts, FeatureFmaMixInsts, 1221 FeatureNoSdstCMPX, FeatureVscnt, 1222 FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, 1223 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, 1224 FeatureA16, FeatureFastDenormalF32, FeatureG16, 1225 FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS, 1226 FeatureGWS, FeatureDefaultComponentZero, 1227 FeatureMaxHardClauseLength32, 1228 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, 1229 FeatureVmemWriteVgprInOrder 1230 ] 1231>; 1232 1233def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12", 1234 "gfx12", 1235 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 1236 FeatureFlatAddressSpace, Feature16BitInsts, 1237 FeatureInv2PiInlineImm, FeatureApertureRegs, 1238 FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts, 1239 FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts, 1240 FeatureGFX11Insts, FeatureGFX12Insts, FeatureVOP3P, FeatureVOPD, 1241 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1242 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1243 FeatureAddNoCarryInsts, FeatureFmaMixInsts, 1244 FeatureNoSdstCMPX, FeatureVscnt, 1245 FeatureVOP3Literal, FeatureDPP8, 1246 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, 1247 FeatureA16, FeatureFastDenormalF32, FeatureG16, 1248 FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, 1249 FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast, 1250 FeatureMaxHardClauseLength32, 1251 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, 1252 FeatureAgentScopeFineGrainedRemoteMemoryAtomics 1253 ] 1254>; 1255 1256//===----------------------------------------------------------------------===// 1257 1258class FeatureSet<list<SubtargetFeature> Features_> { 1259 list<SubtargetFeature> Features = Features_; 1260} 1261 1262def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands, 1263 FeatureFastFMAF32, 1264 HalfRate64Ops, 1265 FeatureLDSBankCount32]>; 1266 1267def FeatureISAVersion6_0_1 : FeatureSet< 1268 [FeatureSouthernIslands, 1269 FeatureLDSBankCount32]>; 1270 1271def FeatureISAVersion6_0_2 : FeatureSet< 1272 [FeatureSouthernIslands, 1273 FeatureLDSBankCount32]>; 1274 1275def FeatureISAVersion7_0_0 : FeatureSet< 1276 [FeatureSeaIslands, 1277 FeatureLDSBankCount32]>; 1278 1279def FeatureISAVersion7_0_1 : FeatureSet< 1280 [FeatureSeaIslands, 1281 HalfRate64Ops, 1282 FeatureLDSBankCount32, 1283 FeatureFastFMAF32]>; 1284 1285def FeatureISAVersion7_0_2 : FeatureSet< 1286 [FeatureSeaIslands, 1287 FeatureLDSBankCount16, 1288 FeatureFastFMAF32]>; 1289 1290def FeatureISAVersion7_0_3 : FeatureSet< 1291 [FeatureSeaIslands, 1292 FeatureLDSBankCount16]>; 1293 1294def FeatureISAVersion7_0_4 : FeatureSet< 1295 [FeatureSeaIslands, 1296 FeatureLDSBankCount32]>; 1297 1298def FeatureISAVersion7_0_5 : FeatureSet< 1299 [FeatureSeaIslands, 1300 FeatureLDSBankCount16]>; 1301 1302def FeatureISAVersion8_0_Common : FeatureSet< 1303 [FeatureVolcanicIslands, 1304 FeatureLDSBankCount32, 1305 FeatureUnpackedD16VMem]>; 1306 1307def FeatureISAVersion8_0_1 : FeatureSet< 1308 !listconcat(FeatureISAVersion8_0_Common.Features, 1309 [FeatureFastFMAF32, 1310 HalfRate64Ops, 1311 FeatureSupportsXNACK])>; 1312 1313def FeatureISAVersion8_0_2 : FeatureSet< 1314 !listconcat(FeatureISAVersion8_0_Common.Features, 1315 [FeatureSGPRInitBug])>; 1316 1317def FeatureISAVersion8_0_3 : FeatureSet< 1318 !listconcat(FeatureISAVersion8_0_Common.Features, 1319 [])>; 1320 1321def FeatureISAVersion8_0_5 : FeatureSet< 1322 !listconcat(FeatureISAVersion8_0_Common.Features, 1323 [FeatureSGPRInitBug])>; 1324 1325def FeatureISAVersion8_1_0 : FeatureSet< 1326 [FeatureVolcanicIslands, 1327 FeatureLDSBankCount16, 1328 FeatureSupportsXNACK, 1329 FeatureImageStoreD16Bug, 1330 FeatureImageGather4D16Bug]>; 1331 1332def FeatureISAVersion9_0_Common : FeatureSet< 1333 [FeatureGFX9, 1334 FeatureLDSBankCount32, 1335 FeatureImageInsts, 1336 FeatureMadMacF32Insts]>; 1337 1338def FeatureISAVersion9_0_Consumer_Common : FeatureSet< 1339 !listconcat(FeatureISAVersion9_0_Common.Features, 1340 [FeatureImageGather4D16Bug, 1341 FeatureDsSrc2Insts, 1342 FeatureExtendedImageInsts, 1343 FeatureGDS])>; 1344 1345def FeatureISAVersion9_Generic : FeatureSet< 1346 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1347 [FeatureRequiresCOV6])>; 1348 1349def FeatureISAVersion9_0_MI_Common : FeatureSet< 1350 !listconcat(FeatureISAVersion9_0_Common.Features, 1351 [FeatureFmaMixInsts, 1352 FeatureDLInsts, 1353 FeatureDot1Insts, 1354 FeatureDot2Insts, 1355 FeatureDot3Insts, 1356 FeatureDot4Insts, 1357 FeatureDot5Insts, 1358 FeatureDot6Insts, 1359 FeatureDot7Insts, 1360 FeatureDot10Insts, 1361 FeatureMAIInsts, 1362 FeaturePkFmacF16Inst, 1363 FeatureAtomicFaddNoRtnInsts, 1364 FeatureSupportsSRAMECC])>; 1365 1366def FeatureISAVersion9_0_0 : FeatureSet< 1367 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1368 [FeatureMadMixInsts])>; 1369 1370def FeatureISAVersion9_0_2 : FeatureSet< 1371 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1372 [FeatureMadMixInsts])>; 1373 1374def FeatureISAVersion9_0_4 : FeatureSet< 1375 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1376 [FeatureFmaMixInsts])>; 1377 1378def FeatureISAVersion9_0_6 : FeatureSet< 1379 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1380 [HalfRate64Ops, 1381 FeatureFmaMixInsts, 1382 FeatureDLInsts, 1383 FeatureDot1Insts, 1384 FeatureDot2Insts, 1385 FeatureDot7Insts, 1386 FeatureDot10Insts, 1387 FeatureSupportsSRAMECC])>; 1388 1389def FeatureISAVersion9_0_8 : FeatureSet< 1390 !listconcat(FeatureISAVersion9_0_MI_Common.Features, 1391 [FeatureGDS, 1392 HalfRate64Ops, 1393 FeatureDsSrc2Insts, 1394 FeatureExtendedImageInsts, 1395 FeatureAtomicBufferGlobalPkAddF16NoRtnInsts, 1396 FeatureMFMAInlineLiteralBug, 1397 FeatureImageGather4D16Bug])>; 1398 1399def FeatureISAVersion9_0_9 : FeatureSet< 1400 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1401 [FeatureMadMixInsts, 1402 FeatureImageInsts])>; 1403 1404def FeatureISAVersion9_0_A : FeatureSet< 1405 !listconcat(FeatureISAVersion9_0_MI_Common.Features, 1406 [FeatureGFX90AInsts, 1407 FeatureFmacF64Inst, 1408 FeatureDPALU_DPP, 1409 FeaturePackedFP32Ops, 1410 FeatureAtomicFaddRtnInsts, 1411 FeatureAtomicBufferGlobalPkAddF16Insts, 1412 FeaturePackedTID, 1413 FullRate64Ops, 1414 FeatureBackOffBarrier, 1415 FeatureKernargPreload, 1416 FeatureAtomicFMinFMaxF64GlobalInsts, 1417 FeatureAtomicFMinFMaxF64FlatInsts, 1418 FeatureFlatBufferGlobalAtomicFaddF64Inst 1419 ])>; 1420 1421def FeatureISAVersion9_0_C : FeatureSet< 1422 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1423 [FeatureMadMixInsts])>; 1424 1425def FeatureISAVersion9_4_Common : FeatureSet< 1426 [FeatureGFX9, 1427 FeatureGFX90AInsts, 1428 FeatureGFX940Insts, 1429 FeatureFmaMixInsts, 1430 FeatureLDSBankCount32, 1431 FeatureDLInsts, 1432 FeatureFmacF64Inst, 1433 FeatureDot1Insts, 1434 FeatureDot2Insts, 1435 FeatureDot3Insts, 1436 FeatureDot4Insts, 1437 FeatureDot5Insts, 1438 FeatureDot6Insts, 1439 FeatureDot7Insts, 1440 FeatureDot10Insts, 1441 FeatureAtomicDsPkAdd16Insts, 1442 FeatureAtomicFlatPkAdd16Insts, 1443 FeatureDPALU_DPP, 1444 FeaturePackedFP32Ops, 1445 FeatureMAIInsts, 1446 FeatureFP8Insts, 1447 FeatureFP8ConversionInsts, 1448 FeaturePkFmacF16Inst, 1449 FeatureAtomicFaddRtnInsts, 1450 FeatureAtomicFaddNoRtnInsts, 1451 FeatureAtomicBufferGlobalPkAddF16Insts, 1452 FeatureAtomicGlobalPkAddBF16Inst, 1453 FeatureFlatAtomicFaddF32Inst, 1454 FeatureSupportsSRAMECC, 1455 FeaturePackedTID, 1456 FeatureArchitectedFlatScratch, 1457 FullRate64Ops, 1458 FeatureBackOffBarrier, 1459 FeatureKernargPreload, 1460 FeatureAtomicFMinFMaxF64GlobalInsts, 1461 FeatureAtomicFMinFMaxF64FlatInsts, 1462 FeatureAgentScopeFineGrainedRemoteMemoryAtomics, 1463 FeatureMemoryAtomicFAddF32DenormalSupport, 1464 FeatureFlatBufferGlobalAtomicFaddF64Inst 1465 ]>; 1466 1467def FeatureISAVersion9_4_0 : FeatureSet< 1468 !listconcat(FeatureISAVersion9_4_Common.Features, 1469 [FeatureForceStoreSC0SC1])>; 1470 1471def FeatureISAVersion9_4_1 : FeatureSet< 1472 !listconcat(FeatureISAVersion9_4_Common.Features, 1473 [FeatureForceStoreSC0SC1])>; 1474 1475def FeatureISAVersion9_4_2 : FeatureSet< 1476 !listconcat(FeatureISAVersion9_4_Common.Features, 1477 [])>; 1478 1479def FeatureISAVersion10_Common : FeatureSet< 1480 [FeatureGFX10, 1481 FeatureLDSBankCount32, 1482 FeatureDLInsts, 1483 FeatureNSAEncoding, 1484 FeatureBackOffBarrier]>; 1485 1486def FeatureISAVersion10_1_Common : FeatureSet< 1487 !listconcat(FeatureISAVersion10_Common.Features, 1488 [FeatureScalarStores, 1489 FeatureScalarAtomics, 1490 FeatureScalarFlatScratchInsts, 1491 FeatureGetWaveIdInst, 1492 FeatureMadMacF32Insts, 1493 FeatureDsSrc2Insts, 1494 FeatureLdsMisalignedBug, 1495 FeatureSupportsXNACK, 1496 // gfx101x bugs 1497 FeatureVcmpxPermlaneHazard, 1498 FeatureVMEMtoScalarWriteHazard, 1499 FeatureSMEMtoVectorWriteHazard, 1500 FeatureInstFwdPrefetchBug, 1501 FeatureVcmpxExecWARHazard, 1502 FeatureLdsBranchVmemWARHazard, 1503 FeatureNSAtoVMEMBug, 1504 FeatureNSAClauseBug, 1505 FeatureOffset3fBug, 1506 FeatureFlatSegmentOffsetBug, 1507 FeatureNegativeUnalignedScratchOffsetBug])>; 1508 1509def FeatureISAVersion10_1_Generic : FeatureSet< 1510 !listconcat(FeatureISAVersion10_1_Common.Features, 1511 [FeatureRequiresCOV6])>; 1512 1513def FeatureISAVersion10_1_0 : FeatureSet< 1514 !listconcat(FeatureISAVersion10_1_Common.Features, 1515 [])>; 1516 1517def FeatureISAVersion10_1_1 : FeatureSet< 1518 !listconcat(FeatureISAVersion10_1_Common.Features, 1519 [FeatureDot1Insts, 1520 FeatureDot2Insts, 1521 FeatureDot5Insts, 1522 FeatureDot6Insts, 1523 FeatureDot7Insts, 1524 FeatureDot10Insts])>; 1525 1526def FeatureISAVersion10_1_2 : FeatureSet< 1527 !listconcat(FeatureISAVersion10_1_Common.Features, 1528 [FeatureDot1Insts, 1529 FeatureDot2Insts, 1530 FeatureDot5Insts, 1531 FeatureDot6Insts, 1532 FeatureDot7Insts, 1533 FeatureDot10Insts])>; 1534 1535def FeatureISAVersion10_1_3 : FeatureSet< 1536 !listconcat(FeatureISAVersion10_1_Common.Features, 1537 [FeatureGFX10_AEncoding])>; 1538 1539def FeatureISAVersion10_3_0 : FeatureSet< 1540 !listconcat(FeatureISAVersion10_Common.Features, 1541 [FeatureGFX10_AEncoding, 1542 FeatureGFX10_BEncoding, 1543 FeatureGFX10_3Insts, 1544 FeatureDot1Insts, 1545 FeatureDot2Insts, 1546 FeatureDot5Insts, 1547 FeatureDot6Insts, 1548 FeatureDot7Insts, 1549 FeatureDot10Insts, 1550 FeatureShaderCyclesRegister])>; 1551 1552def FeatureISAVersion10_3_Generic: FeatureSet< 1553 !listconcat(FeatureISAVersion10_3_0.Features, 1554 [FeatureRequiresCOV6])>; 1555 1556def FeatureISAVersion11_Common : FeatureSet< 1557 [FeatureGFX11, 1558 FeatureLDSBankCount32, 1559 FeatureDLInsts, 1560 FeatureDot5Insts, 1561 FeatureDot7Insts, 1562 FeatureDot8Insts, 1563 FeatureDot9Insts, 1564 FeatureDot10Insts, 1565 FeatureNSAEncoding, 1566 FeaturePartialNSAEncoding, 1567 FeatureShaderCyclesRegister, 1568 FeatureArchitectedFlatScratch, 1569 FeatureAtomicFaddRtnInsts, 1570 FeatureAtomicFaddNoRtnInsts, 1571 FeatureFlatAtomicFaddF32Inst, 1572 FeatureImageInsts, 1573 FeaturePackedTID, 1574 FeatureVcmpxPermlaneHazard, 1575 FeatureMemoryAtomicFAddF32DenormalSupport]>; 1576 1577// There are few workarounds that need to be 1578// added to all targets. This pessimizes codegen 1579// a bit on the generic GFX11 target. 1580def FeatureISAVersion11_Generic: FeatureSet< 1581 !listconcat(FeatureISAVersion11_Common.Features, 1582 [FeatureMSAALoadDstSelBug, 1583 FeatureVALUTransUseHazard, 1584 FeatureUserSGPRInit16Bug, 1585 FeatureMADIntraFwdBug, 1586 FeaturePrivEnabledTrap2NopBug, 1587 FeatureRequiresCOV6, 1588 FeatureRequiredExportPriority])>; 1589 1590def FeatureISAVersion11_0_Common : FeatureSet< 1591 !listconcat(FeatureISAVersion11_Common.Features, 1592 [FeatureMSAALoadDstSelBug, 1593 FeatureVALUTransUseHazard, 1594 FeatureMADIntraFwdBug, 1595 FeaturePrivEnabledTrap2NopBug])>; 1596 1597def FeatureISAVersion11_0_0 : FeatureSet< 1598 !listconcat(FeatureISAVersion11_0_Common.Features, 1599 [Feature1_5xVGPRs, 1600 FeatureUserSGPRInit16Bug])>; 1601 1602def FeatureISAVersion11_0_1 : FeatureSet< 1603 !listconcat(FeatureISAVersion11_0_Common.Features, 1604 [Feature1_5xVGPRs])>; 1605 1606def FeatureISAVersion11_0_2 : FeatureSet< 1607 !listconcat(FeatureISAVersion11_0_Common.Features, 1608 [FeatureUserSGPRInit16Bug])>; 1609 1610def FeatureISAVersion11_0_3 : FeatureSet< 1611 !listconcat(FeatureISAVersion11_0_Common.Features, 1612 [])>; 1613 1614def FeatureISAVersion11_5_0 : FeatureSet< 1615 !listconcat(FeatureISAVersion11_Common.Features, 1616 [FeatureSALUFloatInsts, 1617 FeatureDPPSrc1SGPR, 1618 FeatureVGPRSingleUseHintInsts, 1619 FeatureRequiredExportPriority])>; 1620 1621def FeatureISAVersion11_5_1 : FeatureSet< 1622 !listconcat(FeatureISAVersion11_Common.Features, 1623 [FeatureSALUFloatInsts, 1624 FeatureDPPSrc1SGPR, 1625 FeatureVGPRSingleUseHintInsts, 1626 Feature1_5xVGPRs, 1627 FeatureRequiredExportPriority])>; 1628 1629def FeatureISAVersion11_5_2 : FeatureSet< 1630 !listconcat(FeatureISAVersion11_Common.Features, 1631 [FeatureSALUFloatInsts, 1632 FeatureDPPSrc1SGPR, 1633 FeatureVGPRSingleUseHintInsts, 1634 FeatureRequiredExportPriority])>; 1635 1636def FeatureISAVersion12 : FeatureSet< 1637 [FeatureGFX12, 1638 FeatureLDSBankCount32, 1639 FeatureDLInsts, 1640 FeatureDot7Insts, 1641 FeatureDot8Insts, 1642 FeatureDot9Insts, 1643 FeatureDot10Insts, 1644 FeatureDot11Insts, 1645 FeatureNSAEncoding, 1646 FeaturePartialNSAEncoding, 1647 FeatureShaderCyclesHiLoRegisters, 1648 FeatureArchitectedFlatScratch, 1649 FeatureArchitectedSGPRs, 1650 FeatureAtomicFaddRtnInsts, 1651 FeatureAtomicFaddNoRtnInsts, 1652 FeatureAtomicDsPkAdd16Insts, 1653 FeatureAtomicFlatPkAdd16Insts, 1654 FeatureAtomicBufferGlobalPkAddF16Insts, 1655 FeatureAtomicGlobalPkAddBF16Inst, 1656 FeatureAtomicBufferPkAddBF16Inst, 1657 FeatureFlatAtomicFaddF32Inst, 1658 FeatureImageInsts, 1659 FeatureExtendedImageInsts, 1660 FeatureFP8ConversionInsts, 1661 FeaturePackedTID, 1662 FeatureVcmpxPermlaneHazard, 1663 FeatureSALUFloatInsts, 1664 FeaturePseudoScalarTrans, 1665 FeatureHasRestrictedSOffset, 1666 FeatureVGPRSingleUseHintInsts, 1667 FeatureScalarDwordx3Loads, 1668 FeatureDPPSrc1SGPR, 1669 FeatureMaxHardClauseLength32, 1670 Feature1_5xVGPRs, 1671 FeatureMemoryAtomicFAddF32DenormalSupport 1672 ]>; 1673 1674def FeatureISAVersion12_Generic: FeatureSet< 1675 !listconcat(FeatureISAVersion12.Features, 1676 [FeatureRequiresCOV6])>; 1677 1678//===----------------------------------------------------------------------===// 1679 1680def AMDGPUInstrInfo : InstrInfo { 1681 let guessInstructionProperties = 1; 1682} 1683 1684def AMDGPUAsmParser : AsmParser { 1685 // Some of the R600 registers have the same name, so this crashes. 1686 // For example T0_XYZW and T0_XY both have the asm name T0. 1687 let ShouldEmitMatchRegisterName = 0; 1688 1689 // Call the custom operand parser for all operands. 1690 let OperandParserMethod = "parseCustomOperand"; 1691 let CallCustomParserForAllOperands = true; 1692} 1693 1694def AMDGPUAsmWriter : AsmWriter { 1695 int PassSubtarget = 1; 1696} 1697 1698def AMDGPUAsmVariants { 1699 string Default = "Default"; 1700 int Default_ID = 0; 1701 string VOP3 = "VOP3"; 1702 int VOP3_ID = 1; 1703 string SDWA = "SDWA"; 1704 int SDWA_ID = 2; 1705 string SDWA9 = "SDWA9"; 1706 int SDWA9_ID = 3; 1707 string DPP = "DPP"; 1708 int DPP_ID = 4; 1709 string VOP3_DPP = "VOP3_DPP"; 1710 int VOP3_DPP_ID = 5; 1711 string Disable = "Disable"; 1712 int Disable_ID = 6; 1713} 1714 1715def DefaultAMDGPUAsmParserVariant : AsmParserVariant { 1716 let Variant = AMDGPUAsmVariants.Default_ID; 1717 let Name = AMDGPUAsmVariants.Default; 1718} 1719 1720def VOP3AsmParserVariant : AsmParserVariant { 1721 let Variant = AMDGPUAsmVariants.VOP3_ID; 1722 let Name = AMDGPUAsmVariants.VOP3; 1723} 1724 1725def SDWAAsmParserVariant : AsmParserVariant { 1726 let Variant = AMDGPUAsmVariants.SDWA_ID; 1727 let Name = AMDGPUAsmVariants.SDWA; 1728} 1729 1730def SDWA9AsmParserVariant : AsmParserVariant { 1731 let Variant = AMDGPUAsmVariants.SDWA9_ID; 1732 let Name = AMDGPUAsmVariants.SDWA9; 1733} 1734 1735def DPPAsmParserVariant : AsmParserVariant { 1736 let Variant = AMDGPUAsmVariants.DPP_ID; 1737 let Name = AMDGPUAsmVariants.DPP; 1738} 1739 1740def VOP3_DPPAsmParserVariant : AsmParserVariant { 1741 let Variant = AMDGPUAsmVariants.VOP3_DPP_ID; 1742 let Name = AMDGPUAsmVariants.VOP3_DPP; 1743} 1744 1745def AMDGPU : Target { 1746 // Pull in Instruction Info: 1747 let InstructionSet = AMDGPUInstrInfo; 1748 let AssemblyParsers = [AMDGPUAsmParser]; 1749 let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant, 1750 VOP3AsmParserVariant, 1751 SDWAAsmParserVariant, 1752 SDWA9AsmParserVariant, 1753 DPPAsmParserVariant, 1754 VOP3_DPPAsmParserVariant]; 1755 let AssemblyWriters = [AMDGPUAsmWriter]; 1756 let AllowRegisterRenaming = 1; 1757} 1758 1759// Dummy Instruction itineraries for pseudo instructions 1760def ALU_NULL : FuncUnit; 1761def NullALU : InstrItinClass; 1762 1763//===----------------------------------------------------------------------===// 1764// Predicate helper class 1765//===----------------------------------------------------------------------===// 1766 1767def isGFX6 : 1768 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">, 1769 AssemblerPredicate<(all_of FeatureSouthernIslands)>; 1770 1771def isGFX6GFX7 : 1772 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1773 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, 1774 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX10Insts))>; 1775 1776def isGFX6GFX7GFX10 : 1777 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1778 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1779 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1780 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX11Insts))>; 1781 1782def isGFX6GFX7GFX10Plus : 1783 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1784 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1785 "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, 1786 AssemblerPredicate<(all_of (not FeatureGCN3Encoding))>; 1787 1788def isGFX7Only : 1789 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, 1790 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX10Insts))>; 1791 1792def isGFX7GFX10 : 1793 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1794 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1795 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX11Insts))>; 1796 1797def isGFX7GFX10GFX11 : 1798 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1799 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" 1800 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 1801 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts)>; 1802 1803def isGFX7GFX8GFX9 : 1804 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1805 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1806 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1807 AssemblerPredicate<(all_of FeatureGFX7GFX8GFX9Insts)>; 1808 1809def isGFX6GFX7GFX8GFX9 : 1810 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1811 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1812 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1813 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1814 AssemblerPredicate<(all_of (not FeatureGFX10Insts))>; 1815 1816def isGFX6GFX7GFX8GFX9NotGFX90A : 1817 Predicate<"!Subtarget->hasGFX90AInsts() &&" 1818 "(Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1819 " Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1820 " Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1821 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 1822 AssemblerPredicate<(all_of (not FeatureGFX10Insts), (not FeatureGFX90AInsts))>; 1823 1824def isGFX6GFX7GFX8GFX9GFX10 : 1825 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1826 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1827 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1828 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 1829 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1830 AssemblerPredicate<(all_of (not FeatureGFX11Insts))>; 1831 1832def isNotGFX12Plus : 1833 Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::GFX11">, 1834 AssemblerPredicate<(all_of (not FeatureGFX12Insts))>; 1835 1836def isGFX7GFX8GFX9GFX10 : 1837 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1838 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1839 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 1840 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1841 AssemblerPredicate<(all_of FeatureCIInsts, (not FeatureGFX11Insts))>; 1842 1843def isGFX8GFX9GFX10GFX11 : 1844 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1845 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 1846 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" 1847 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 1848 AssemblerPredicate<(all_of FeatureGFX8Insts, (not FeatureGFX12Insts))>; 1849 1850def isGFX7Plus : 1851 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, 1852 AssemblerPredicate<(all_of FeatureCIInsts)>; 1853 1854def isGFX8Plus : 1855 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, 1856 AssemblerPredicate<(all_of FeatureGFX8Insts)>; 1857 1858def isGFX8Only : Predicate<"Subtarget->getGeneration() ==" 1859 "AMDGPUSubtarget::VOLCANIC_ISLANDS">, 1860 AssemblerPredicate <(all_of FeatureVolcanicIslands)>; 1861 1862def isGFX9Plus : 1863 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, 1864 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 1865 1866def isNotGFX9Plus : 1867 Predicate<"Subtarget->getGeneration() < AMDGPUSubtarget::GFX9">; 1868 1869def isGFX9Only : Predicate < 1870 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1871 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts)>; 1872 1873def isGCN3ExcludingGFX90A : 1874 Predicate<"Subtarget->isGCN3Encoding() && !Subtarget->hasGFX90AInsts()">, 1875 AssemblerPredicate<(all_of FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; 1876 1877def isGFX90APlus : 1878 Predicate<"Subtarget->hasGFX90AInsts()">, 1879 AssemblerPredicate<(all_of FeatureGFX90AInsts)>; 1880 1881def isNotGFX90APlus : 1882 Predicate<"!Subtarget->hasGFX90AInsts()">, 1883 AssemblerPredicate<(all_of (not FeatureGFX90AInsts))>; 1884 1885def isGFX8GFX9NotGFX90A : 1886 Predicate<"!Subtarget->hasGFX90AInsts() &&" 1887 "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1888 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 1889 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; 1890 1891def isGFX90AOnly : 1892 Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">, 1893 AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>; 1894 1895def isGFX908orGFX90A : 1896 Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX940Insts()">, 1897 AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX940Insts))>; 1898 1899def isGFX940Plus : 1900 Predicate<"Subtarget->hasGFX940Insts()">, 1901 AssemblerPredicate<(all_of FeatureGFX940Insts)>; 1902 1903def isGFX8GFX9NotGFX940 : 1904 Predicate<"!Subtarget->hasGFX940Insts() &&" 1905 "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1906 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 1907 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX940Insts))>; 1908 1909def isGFX8GFX9 : 1910 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1911 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1912 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding)>; 1913 1914def isGFX10Only : 1915 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1916 AssemblerPredicate<(all_of FeatureGFX10Insts, (not FeatureGFX11Insts))>; 1917 1918def isGFX10Plus : 1919 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, 1920 AssemblerPredicate<(all_of FeatureGFX10Insts)>; 1921 1922def isGFX10GFX11 : 1923 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" 1924 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 1925 AssemblerPredicate<(all_of FeatureGFX10Insts, (not FeatureGFX12Insts))>; 1926 1927def isGFX10Before1030 : 1928 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 &&" 1929 "!Subtarget->hasGFX10_3Insts()">, 1930 AssemblerPredicate<(all_of FeatureGFX10Insts,(not FeatureGFX10_3Insts))>; 1931 1932def isGFX9GFX10 : 1933 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 1934 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1935 AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX11Insts))>; 1936 1937def isGFX8GFX9GFX10 : 1938 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1939 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 1940 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1941 AssemblerPredicate<(all_of FeatureGFX8Insts, (not FeatureGFX11Insts))>; 1942 1943def isGFX11Only : 1944 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 1945 AssemblerPredicate<(all_of FeatureGFX11Insts, (not FeatureGFX12Insts))>; 1946 1947def isGFX11Plus : 1948 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">, 1949 AssemblerPredicate<(all_of FeatureGFX11Insts)>; 1950 1951def isGFX12Only : 1952 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX12">, 1953 AssemblerPredicate<(all_of FeatureGFX12Insts)>; 1954 1955def isGFX12Plus : 1956 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">, 1957 AssemblerPredicate<(all_of FeatureGFX12Insts)>; 1958 1959def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, 1960 AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; 1961 1962def HasFlatBufferGlobalAtomicFaddF64Inst : 1963 Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, 1964 AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; 1965 1966def HasAtomicFMinFMaxF32GlobalInsts : 1967 Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, 1968 AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF32GlobalInsts)>; 1969 1970def HasAtomicFMinFMaxF64GlobalInsts : 1971 Predicate<"Subtarget->hasAtomicFMinFMaxF64GlobalInsts()">, 1972 AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF64GlobalInsts)>; 1973 1974def HasAtomicFMinFMaxF32FlatInsts : 1975 Predicate<"Subtarget->hasAtomicFMinFMaxF32FlatInsts()">, 1976 AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF32FlatInsts)>; 1977 1978def HasAtomicFMinFMaxF64FlatInsts : 1979 Predicate<"Subtarget->hasAtomicFMinFMaxF64FlatInsts()">, 1980 AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF64FlatInsts)>; 1981 1982def HasLdsAtomicAddF64 : 1983 Predicate<"Subtarget->hasLdsAtomicAddF64()">, 1984 AssemblerPredicate<(any_of FeatureGFX90AInsts)>; 1985 1986def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">, 1987 AssemblerPredicate<(all_of FeatureFlatGlobalInsts)>; 1988def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">, 1989 AssemblerPredicate<(all_of FeatureFlatScratchInsts)>; 1990def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">, 1991 AssemblerPredicate<(all_of FeatureScalarFlatScratchInsts)>; 1992def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">, 1993 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 1994 1995def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">, 1996 AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>; 1997def HasFlatScratchSVSMode : Predicate<"Subtarget->hasFlatScratchSVSMode()">, 1998 AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>; 1999 2000def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">, 2001 AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>; 2002 2003def HasGFX10_BEncoding : Predicate<"Subtarget->hasGFX10_BEncoding()">, 2004 AssemblerPredicate<(all_of FeatureGFX10_BEncoding)>; 2005 2006def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">, 2007 AssemblerPredicate<(all_of FeatureUnpackedD16VMem)>; 2008def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">, 2009 AssemblerPredicate<(all_of (not FeatureUnpackedD16VMem))>; 2010 2011def HasRestrictedSOffset : Predicate<"Subtarget->hasRestrictedSOffset()">, 2012 AssemblerPredicate<(all_of FeatureHasRestrictedSOffset)>; 2013def HasUnrestrictedSOffset : Predicate<"!Subtarget->hasRestrictedSOffset()">, 2014 AssemblerPredicate<(all_of (not FeatureHasRestrictedSOffset))>; 2015 2016def D16PreservesUnusedBits : 2017 Predicate<"Subtarget->d16PreservesUnusedBits()">, 2018 AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureSRAMECC))>; 2019 2020def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">; 2021def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">; 2022 2023def HasExportInsts : Predicate<"Subtarget->hasExportInsts()">, 2024 AssemblerPredicate<(all_of (not FeatureGFX90AInsts))>; 2025 2026def HasVINTERPEncoding : Predicate<"Subtarget->hasVINTERPEncoding()">, 2027 AssemblerPredicate<(all_of FeatureGFX11Insts)>; 2028 2029def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, 2030 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 2031 2032def HasLDSFPAtomicAddF32 : Predicate<"Subtarget->hasLDSFPAtomicAddF32()">, 2033 AssemblerPredicate<(all_of FeatureGFX8Insts)>; 2034 2035def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">, 2036 AssemblerPredicate<(all_of FeatureAddNoCarryInsts)>; 2037 2038def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">; 2039 2040def HasXNACKEnabled : Predicate<"Subtarget->isXNACKEnabled()">; 2041 2042def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">, 2043 AssemblerPredicate<(all_of Feature16BitInsts)>; 2044 2045def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">, 2046 AssemblerPredicate<(all_of FeatureTrue16BitInsts)>; 2047def NotHasTrue16BitInsts : True16PredicateClass<"!Subtarget->hasTrue16BitInsts()">, 2048 AssemblerPredicate<(all_of (not FeatureTrue16BitInsts))>; 2049 2050// Control use of True16 instructions. The real True16 instructions are 2051// True16 instructions as they are defined in the ISA. Fake True16 2052// instructions have the same encoding as real ones but syntactically 2053// only allow 32-bit registers in operands and use low halves thereof. 2054def UseRealTrue16Insts : True16PredicateClass<"Subtarget->useRealTrue16Insts()">, 2055 AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>; 2056def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() && " 2057 "!Subtarget->useRealTrue16Insts()">, 2058 AssemblerPredicate<(all_of FeatureTrue16BitInsts)>; 2059 // FIXME When we default to RealTrue16 instead of Fake, change the line as follows. 2060 // AssemblerPredicate<(all_of FeatureTrue16BitInsts, (not FeatureRealTrue16Insts))>; 2061 2062def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, 2063 AssemblerPredicate<(all_of FeatureVOP3P)>; 2064 2065def NotHasMed3_16 : Predicate<"!Subtarget->hasMed3_16()">; 2066def HasMed3_16 : Predicate<"Subtarget->hasMed3_16()">; 2067 2068def HasMinMaxDenormModes : Predicate<"Subtarget->supportsMinMaxDenormModes()">; 2069def NotHasMinMaxDenormModes : Predicate<"!Subtarget->supportsMinMaxDenormModes()">; 2070 2071def HasFminFmaxLegacy : Predicate<"Subtarget->hasFminFmaxLegacy()">; 2072 2073def HasSDWA : Predicate<"Subtarget->hasSDWA()">, 2074 AssemblerPredicate<(all_of FeatureSDWA, FeatureVolcanicIslands)>; 2075 2076def HasSDWA9 : 2077 Predicate<"Subtarget->hasSDWA()">, 2078 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts,FeatureSDWA)>; 2079 2080def HasSDWA10 : 2081 Predicate<"Subtarget->hasSDWA()">, 2082 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureSDWA)>; 2083 2084def HasDPP : Predicate<"Subtarget->hasDPP()">, 2085 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureDPP)>; 2086 2087def HasDPP8 : Predicate<"Subtarget->hasDPP8()">, 2088 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP8)>; 2089 2090def HasDPALU_DPP : Predicate<"Subtarget->hasDPALU_DPP()">, 2091 AssemblerPredicate<(all_of FeatureDPALU_DPP)>; 2092 2093def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">, 2094 AssemblerPredicate<(all_of FeaturePackedFP32Ops)>; 2095 2096def HasPkMovB32 : Predicate<"Subtarget->hasPkMovB32()">, 2097 AssemblerPredicate<(all_of FeatureGFX90AInsts)>; 2098 2099def HasFmaakFmamkF32Insts : 2100 Predicate<"Subtarget->hasFmaakFmamkF32Insts()">, 2101 AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>; 2102 2103def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">, 2104 AssemblerPredicate<(all_of FeatureImageInsts)>; 2105 2106def HasExtendedImageInsts : Predicate<"Subtarget->hasExtendedImageInsts()">, 2107 AssemblerPredicate<(all_of FeatureExtendedImageInsts)>; 2108 2109def HasR128A16 : Predicate<"Subtarget->hasR128A16()">, 2110 AssemblerPredicate<(all_of FeatureR128A16)>; 2111 2112def HasA16 : Predicate<"Subtarget->hasA16()">, 2113 AssemblerPredicate<(all_of FeatureA16)>; 2114 2115def HasG16 : Predicate<"Subtarget->hasG16()">, 2116 AssemblerPredicate<(all_of FeatureG16)>; 2117 2118def HasDPP16 : Predicate<"Subtarget->hasDPP()">, 2119 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP)>; 2120 2121def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">, 2122 AssemblerPredicate<(all_of FeatureIntClamp)>; 2123 2124def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">, 2125 AssemblerPredicate<(all_of FeatureMadMixInsts)>; 2126 2127def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">, 2128 AssemblerPredicate<(all_of FeatureScalarStores)>; 2129 2130def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">, 2131 AssemblerPredicate<(all_of FeatureScalarAtomics)>; 2132 2133def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">, 2134 AssemblerPredicate<(all_of FeatureNoSdstCMPX)>; 2135 2136def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">, 2137 AssemblerPredicate<(all_of (not FeatureNoSdstCMPX))>; 2138 2139def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">; 2140def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">; 2141def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">, 2142 AssemblerPredicate<(all_of FeatureVGPRIndexMode)>; 2143def HasMovrel : Predicate<"Subtarget->hasMovrel()">, 2144 AssemblerPredicate<(all_of FeatureMovrel)>; 2145 2146def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">, 2147 AssemblerPredicate<(all_of FeatureFmaMixInsts)>; 2148 2149def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">, 2150 AssemblerPredicate<(all_of FeatureDLInsts)>; 2151 2152def HasFmacF64Inst : Predicate<"Subtarget->hasFmacF64Inst()">, 2153 AssemblerPredicate<(all_of FeatureFmacF64Inst)>; 2154 2155def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">, 2156 AssemblerPredicate<(all_of FeatureDot1Insts)>; 2157 2158def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">, 2159 AssemblerPredicate<(all_of FeatureDot2Insts)>; 2160 2161def HasDot3Insts : Predicate<"Subtarget->hasDot3Insts()">, 2162 AssemblerPredicate<(all_of FeatureDot3Insts)>; 2163 2164def HasDot4Insts : Predicate<"Subtarget->hasDot4Insts()">, 2165 AssemblerPredicate<(all_of FeatureDot4Insts)>; 2166 2167def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">, 2168 AssemblerPredicate<(all_of FeatureDot5Insts)>; 2169 2170def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">, 2171 AssemblerPredicate<(all_of FeatureDot6Insts)>; 2172 2173def HasDot7Insts : Predicate<"Subtarget->hasDot7Insts()">, 2174 AssemblerPredicate<(all_of FeatureDot7Insts)>; 2175 2176def HasDot8Insts : Predicate<"Subtarget->hasDot8Insts()">, 2177 AssemblerPredicate<(all_of FeatureDot8Insts)>; 2178 2179def HasDot9Insts : Predicate<"Subtarget->hasDot9Insts()">, 2180 AssemblerPredicate<(all_of FeatureDot9Insts)>; 2181 2182def HasDot10Insts : Predicate<"Subtarget->hasDot10Insts()">, 2183 AssemblerPredicate<(all_of FeatureDot10Insts)>; 2184 2185def HasDot11Insts : Predicate<"Subtarget->hasDot11Insts()">, 2186 AssemblerPredicate<(all_of FeatureDot11Insts)>; 2187 2188def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">, 2189 AssemblerPredicate<(all_of FeatureGetWaveIdInst)>; 2190 2191def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">, 2192 AssemblerPredicate<(all_of FeatureMAIInsts)>; 2193 2194def HasSMemRealTime : Predicate<"Subtarget->hasSMemRealTime()">, 2195 AssemblerPredicate<(all_of FeatureSMemRealTime)>; 2196 2197def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">, 2198 AssemblerPredicate<(all_of FeatureSMemTimeInst)>; 2199 2200def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">, 2201 AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>; 2202 2203def HasShaderCyclesHiLoRegisters : Predicate<"Subtarget->hasShaderCyclesHiLoRegisters()">; 2204 2205def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">, 2206 AssemblerPredicate<(all_of FeatureFP8Insts)>; 2207 2208def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">, 2209 AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>; 2210 2211def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">, 2212 AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>; 2213 2214def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">, 2215 AssemblerPredicate<(all_of FeatureMadMacF32Insts)>; 2216 2217def HasFmaLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts()">, 2218 AssemblerPredicate<(any_of FeatureGFX10_3Insts)>; 2219 2220def HasAtomicDsPkAdd16Insts : Predicate<"Subtarget->hasAtomicDsPkAdd16Insts()">, 2221 AssemblerPredicate<(any_of FeatureAtomicDsPkAdd16Insts)>; 2222 2223def HasAtomicFlatPkAdd16Insts : Predicate<"Subtarget->hasAtomicFlatPkAdd16Insts()">, 2224 AssemblerPredicate<(any_of FeatureAtomicFlatPkAdd16Insts)>; 2225 2226def HasAtomicFaddRtnInsts : Predicate<"Subtarget->hasAtomicFaddRtnInsts()">, 2227 AssemblerPredicate<(all_of FeatureAtomicFaddRtnInsts)>; 2228def HasAtomicFaddNoRtnInsts : Predicate<"Subtarget->hasAtomicFaddNoRtnInsts()">, 2229 AssemblerPredicate<(all_of FeatureAtomicFaddNoRtnInsts)>; 2230def HasAtomicBufferGlobalPkAddF16NoRtnInsts 2231 : Predicate<"Subtarget->hasAtomicBufferGlobalPkAddF16NoRtnInsts() || Subtarget->hasAtomicBufferGlobalPkAddF16Insts()">, 2232 AssemblerPredicate<(any_of FeatureAtomicBufferGlobalPkAddF16NoRtnInsts, FeatureAtomicBufferGlobalPkAddF16Insts)>; 2233def HasAtomicBufferGlobalPkAddF16Insts 2234 : Predicate<"Subtarget->hasAtomicBufferGlobalPkAddF16Insts()">, 2235 AssemblerPredicate<(all_of FeatureAtomicBufferGlobalPkAddF16Insts)>; 2236def HasAtomicGlobalPkAddBF16Inst 2237 : Predicate<"Subtarget->hasAtomicGlobalPkAddBF16Inst()">, 2238 AssemblerPredicate<(all_of FeatureAtomicGlobalPkAddBF16Inst)>; 2239def HasAtomicBufferPkAddBF16Inst 2240 : Predicate<"Subtarget->hasAtomicBufferPkAddBF16Inst()">, 2241 AssemblerPredicate<(all_of FeatureAtomicBufferPkAddBF16Inst)>; 2242def HasFlatAtomicFaddF32Inst 2243 : Predicate<"Subtarget->hasFlatAtomicFaddF32Inst()">, 2244 AssemblerPredicate<(all_of FeatureFlatAtomicFaddF32Inst)>; 2245 2246def HasDefaultComponentZero 2247 : Predicate<"Subtarget->hasDefaultComponentZero()">, 2248 AssemblerPredicate<(all_of FeatureDefaultComponentZero)>; 2249def HasDefaultComponentBroadcast 2250 : Predicate<"Subtarget->hasDefaultComponentBroadcast()">, 2251 AssemblerPredicate<(all_of FeatureDefaultComponentBroadcast)>; 2252 2253def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">, 2254 AssemblerPredicate<(all_of FeatureDsSrc2Insts)>; 2255 2256def EnableLateCFGStructurize : Predicate< 2257 "EnableLateStructurizeCFG">; 2258 2259def EnableFlatScratch : Predicate<"Subtarget->enableFlatScratch()">; 2260 2261def DisableFlatScratch : Predicate<"!Subtarget->enableFlatScratch()">; 2262 2263def HasUnalignedAccessMode : Predicate<"Subtarget->hasUnalignedAccessMode()">, 2264 AssemblerPredicate<(all_of FeatureUnalignedAccessMode)>; 2265 2266def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">; 2267 2268def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">; 2269 2270def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">, 2271 AssemblerPredicate<(all_of FeatureSALUFloatInsts)>; 2272 2273def HasVGPRSingleUseHintInsts : Predicate<"Subtarget->hasVGPRSingleUseHintInsts()">, 2274 AssemblerPredicate<(all_of FeatureVGPRSingleUseHintInsts)>; 2275 2276def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">, 2277 AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>; 2278 2279def HasGDS : Predicate<"Subtarget->hasGDS()">; 2280 2281def HasGWS : Predicate<"Subtarget->hasGWS()">; 2282 2283def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">; 2284def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">; 2285 2286def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">; 2287 2288def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">; 2289 2290// Include AMDGPU TD files 2291include "SISchedule.td" 2292include "GCNProcessors.td" 2293include "AMDGPUInstrInfo.td" 2294include "SIRegisterInfo.td" 2295include "AMDGPURegisterBanks.td" 2296include "AMDGPUInstructions.td" 2297include "SIInstrInfo.td" 2298include "AMDGPUCallingConv.td" 2299include "AMDGPUSearchableTables.td" 2300