1//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===------------------------------------------------------------===// 8 9include "llvm/TableGen/SearchableTable.td" 10include "llvm/Target/Target.td" 11include "AMDGPUFeatures.td" 12 13def p0 : PtrValueType<i64, 0>; 14def p1 : PtrValueType<i64, 1>; 15def p2 : PtrValueType<i32, 2>; 16def p3 : PtrValueType<i32, 3>; 17def p4 : PtrValueType<i64, 4>; 18def p5 : PtrValueType<i32, 5>; 19def p6 : PtrValueType<i32, 6>; 20 21//===------------------------------------------------------------===// 22// Subtarget Features (device properties) 23//===------------------------------------------------------------===// 24 25def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", 26 "FastFMAF32", 27 "true", 28 "Assuming f32 fma is at least as fast as mul + add" 29>; 30 31def FeatureFastDenormalF32 : SubtargetFeature<"fast-denormal-f32", 32 "FastDenormalF32", 33 "true", 34 "Enabling denormals does not cause f32 instructions to run at f64 rates" 35>; 36 37def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128", 38 "MIMG_R128", 39 "true", 40 "Support 128-bit texture resources" 41>; 42 43def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops", 44 "HalfRate64Ops", 45 "true", 46 "Most fp64 instructions are half rate instead of quarter" 47>; 48 49def FullRate64Ops : SubtargetFeature<"full-rate-64-ops", 50 "FullRate64Ops", 51 "true", 52 "Most fp64 instructions are full rate" 53>; 54 55def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", 56 "FlatAddressSpace", 57 "true", 58 "Support flat address space" 59>; 60 61def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets", 62 "FlatInstOffsets", 63 "true", 64 "Flat instructions have immediate offset addressing mode" 65>; 66 67def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts", 68 "FlatGlobalInsts", 69 "true", 70 "Have global_* flat memory instructions" 71>; 72 73def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts", 74 "FlatScratchInsts", 75 "true", 76 "Have scratch_* flat memory instructions" 77>; 78 79def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts", 80 "ScalarFlatScratchInsts", 81 "true", 82 "Have s_scratch_* flat memory instructions" 83>; 84 85def FeatureEnableFlatScratch : SubtargetFeature<"enable-flat-scratch", 86 "EnableFlatScratch", 87 "true", 88 "Use scratch_* flat memory instructions to access scratch" 89>; 90 91def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts", 92 "AddNoCarryInsts", 93 "true", 94 "Have VALU add/sub instructions without carry out" 95>; 96 97def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", 98 "UnalignedBufferAccess", 99 "true", 100 "Hardware supports unaligned global loads and stores" 101>; 102 103def FeatureTrapHandler: SubtargetFeature<"trap-handler", 104 "TrapHandler", 105 "true", 106 "Trap handler support" 107>; 108 109def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access", 110 "UnalignedScratchAccess", 111 "true", 112 "Support unaligned scratch loads and stores" 113>; 114 115def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access", 116 "UnalignedDSAccess", 117 "true", 118 "Hardware supports unaligned local and region loads and stores" 119>; 120 121def FeatureApertureRegs : SubtargetFeature<"aperture-regs", 122 "HasApertureRegs", 123 "true", 124 "Has Memory Aperture Base and Size Registers" 125>; 126 127def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts", 128 "HasMadMixInsts", 129 "true", 130 "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions" 131>; 132 133def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts", 134 "HasFmaMixInsts", 135 "true", 136 "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions" 137>; 138 139def FeatureSupportsXNACK : SubtargetFeature<"xnack-support", 140 "SupportsXNACK", 141 "true", 142 "Hardware supports XNACK" 143>; 144 145// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support 146// XNACK. The current default kernel driver setting is: 147// - graphics ring: XNACK disabled 148// - compute ring: XNACK enabled 149// 150// If XNACK is enabled, the VMEM latency can be worse. 151// If XNACK is disabled, the 2 SGPRs can be used for general purposes. 152def FeatureXNACK : SubtargetFeature<"xnack", 153 "EnableXNACK", 154 "true", 155 "Enable XNACK support" 156>; 157 158def FeatureTgSplit : SubtargetFeature<"tgsplit", 159 "EnableTgSplit", 160 "true", 161 "Enable threadgroup split execution" 162>; 163 164def FeatureCuMode : SubtargetFeature<"cumode", 165 "EnableCuMode", 166 "true", 167 "Enable CU wavefront execution mode" 168>; 169 170def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", 171 "SGPRInitBug", 172 "true", 173 "VI SGPR initialization bug requiring a fixed SGPR allocation size" 174>; 175 176def FeatureUserSGPRInit16Bug : SubtargetFeature<"user-sgpr-init16-bug", 177 "UserSGPRInit16Bug", 178 "true", 179 "Bug requiring at least 16 user+system SGPRs to be enabled" 180>; 181 182def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug", 183 "LDSMisalignedBug", 184 "true", 185 "Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode" 186>; 187 188def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug", 189 "HasMFMAInlineLiteralBug", 190 "true", 191 "MFMA cannot use inline literal as SrcC" 192>; 193 194def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard", 195 "HasVcmpxPermlaneHazard", 196 "true", 197 "TODO: describe me" 198>; 199 200def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard", 201 "HasVMEMtoScalarWriteHazard", 202 "true", 203 "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution." 204>; 205 206def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard", 207 "HasSMEMtoVectorWriteHazard", 208 "true", 209 "s_load_dword followed by v_cmp page faults" 210>; 211 212def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug", 213 "HasInstFwdPrefetchBug", 214 "true", 215 "S_INST_PREFETCH instruction causes shader to hang" 216>; 217 218def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard", 219 "HasVcmpxExecWARHazard", 220 "true", 221 "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)" 222>; 223 224def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard", 225 "HasLdsBranchVmemWARHazard", 226 "true", 227 "Switching between LDS and VMEM-tex not waiting VM_VSRC=0" 228>; 229 230def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug", 231 "HasNSAtoVMEMBug", 232 "true", 233 "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero" 234>; 235 236def FeatureNSAClauseBug : SubtargetFeature<"nsa-clause-bug", 237 "HasNSAClauseBug", 238 "true", 239 "MIMG-NSA in a hard clause has unpredictable results on GFX10.1" 240>; 241 242def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug", 243 "HasFlatSegmentOffsetBug", 244 "true", 245 "GFX10 bug where inst_offset is ignored when flat instructions access global memory" 246>; 247 248def FeatureNegativeScratchOffsetBug : SubtargetFeature<"negative-scratch-offset-bug", 249 "NegativeScratchOffsetBug", 250 "true", 251 "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9" 252>; 253 254def FeatureNegativeUnalignedScratchOffsetBug : SubtargetFeature<"negative-unaligned-scratch-offset-bug", 255 "NegativeUnalignedScratchOffsetBug", 256 "true", 257 "Scratch instructions with a VGPR offset and a negative immediate offset that is not a multiple of 4 read wrong memory on GFX10" 258>; 259 260def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug", 261 "HasOffset3fBug", 262 "true", 263 "Branch offset of 3f hardware bug" 264>; 265 266def FeatureImageStoreD16Bug : SubtargetFeature<"image-store-d16-bug", 267 "HasImageStoreD16Bug", 268 "true", 269 "Image Store D16 hardware bug" 270>; 271 272def FeatureImageGather4D16Bug : SubtargetFeature<"image-gather4-d16-bug", 273 "HasImageGather4D16Bug", 274 "true", 275 "Image Gather4 D16 hardware bug" 276>; 277 278def FeatureMADIntraFwdBug : SubtargetFeature<"mad-intra-fwd-bug", 279 "HasMADIntraFwdBug", 280 "true", 281 "MAD_U64/I64 intra instruction forwarding bug" 282>; 283 284def FeatureMSAALoadDstSelBug : SubtargetFeature<"msaa-load-dst-sel-bug", 285 "HasMSAALoadDstSelBug", 286 "true", 287 "MSAA loads not honoring dst_sel bug" 288>; 289 290class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature < 291 "ldsbankcount"#Value, 292 "LDSBankCount", 293 !cast<string>(Value), 294 "The number of LDS banks per compute unit." 295>; 296 297def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>; 298def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; 299 300def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", 301 "GCN3Encoding", 302 "true", 303 "Encoding format for VI" 304>; 305 306def FeatureCIInsts : SubtargetFeature<"ci-insts", 307 "CIInsts", 308 "true", 309 "Additional instructions for CI+" 310>; 311 312def FeatureGFX8Insts : SubtargetFeature<"gfx8-insts", 313 "GFX8Insts", 314 "true", 315 "Additional instructions for GFX8+" 316>; 317 318def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts", 319 "GFX9Insts", 320 "true", 321 "Additional instructions for GFX9+" 322>; 323 324def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts", 325 "GFX90AInsts", 326 "true", 327 "Additional instructions for GFX90A+" 328>; 329 330def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts", 331 "GFX940Insts", 332 "true", 333 "Additional instructions for GFX940+" 334>; 335 336def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts", 337 "GFX10Insts", 338 "true", 339 "Additional instructions for GFX10+" 340>; 341 342def FeatureGFX11Insts : SubtargetFeature<"gfx11-insts", 343 "GFX11Insts", 344 "true", 345 "Additional instructions for GFX11+" 346>; 347 348def FeatureGFX12Insts : SubtargetFeature<"gfx12-insts", 349 "GFX12Insts", 350 "true", 351 "Additional instructions for GFX12+" 352>; 353 354def FeatureGFX10_3Insts : SubtargetFeature<"gfx10-3-insts", 355 "GFX10_3Insts", 356 "true", 357 "Additional instructions for GFX10.3" 358>; 359 360def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts", 361 "GFX7GFX8GFX9Insts", 362 "true", 363 "Instructions shared in GFX7, GFX8, GFX9" 364>; 365 366def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime", 367 "HasSMemRealTime", 368 "true", 369 "Has s_memrealtime instruction" 370>; 371 372def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm", 373 "HasInv2PiInlineImm", 374 "true", 375 "Has 1 / (2 * pi) as inline immediate" 376>; 377 378def Feature16BitInsts : SubtargetFeature<"16-bit-insts", 379 "Has16BitInsts", 380 "true", 381 "Has i16/f16 instructions" 382>; 383 384def FeatureTrue16BitInsts : SubtargetFeature<"true16", 385 "HasTrue16BitInsts", 386 "true", 387 "True 16-bit operand instructions" 388>; 389 390def FeatureRealTrue16Insts : SubtargetFeature<"real-true16", 391 "EnableRealTrue16Insts", 392 "true", 393 "Use true 16-bit registers" 394>; 395 396def FeatureVOP3P : SubtargetFeature<"vop3p", 397 "HasVOP3PInsts", 398 "true", 399 "Has VOP3P packed instructions" 400>; 401 402def FeatureMovrel : SubtargetFeature<"movrel", 403 "HasMovrel", 404 "true", 405 "Has v_movrel*_b32 instructions" 406>; 407 408def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode", 409 "HasVGPRIndexMode", 410 "true", 411 "Has VGPR mode register indexing" 412>; 413 414def FeatureScalarDwordx3Loads : SubtargetFeature<"scalar-dwordx3-loads", 415 "HasScalarDwordx3Loads", 416 "true", 417 "Has 96-bit scalar load instructions" 418>; 419 420def FeatureScalarStores : SubtargetFeature<"scalar-stores", 421 "HasScalarStores", 422 "true", 423 "Has store scalar memory instructions" 424>; 425 426def FeatureScalarAtomics : SubtargetFeature<"scalar-atomics", 427 "HasScalarAtomics", 428 "true", 429 "Has atomic scalar memory instructions" 430>; 431 432def FeatureSDWA : SubtargetFeature<"sdwa", 433 "HasSDWA", 434 "true", 435 "Support SDWA (Sub-DWORD Addressing) extension" 436>; 437 438def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod", 439 "HasSDWAOmod", 440 "true", 441 "Support OMod with SDWA (Sub-DWORD Addressing) extension" 442>; 443 444def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar", 445 "HasSDWAScalar", 446 "true", 447 "Support scalar register with SDWA (Sub-DWORD Addressing) extension" 448>; 449 450def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst", 451 "HasSDWASdst", 452 "true", 453 "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension" 454>; 455 456def FeatureSDWAMac : SubtargetFeature<"sdwa-mav", 457 "HasSDWAMac", 458 "true", 459 "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension" 460>; 461 462def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc", 463 "HasSDWAOutModsVOPC", 464 "true", 465 "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension" 466>; 467 468def FeatureDPP : SubtargetFeature<"dpp", 469 "HasDPP", 470 "true", 471 "Support DPP (Data Parallel Primitives) extension" 472>; 473 474// DPP8 allows arbitrary cross-lane swizzling within groups of 8 lanes. 475def FeatureDPP8 : SubtargetFeature<"dpp8", 476 "HasDPP8", 477 "true", 478 "Support DPP8 (Data Parallel Primitives) extension" 479>; 480 481def FeatureDPALU_DPP : SubtargetFeature<"dpp-64bit", 482 "HasDPALU_DPP", 483 "true", 484 "Support DPP (Data Parallel Primitives) extension in DP ALU" 485>; 486 487def FeatureDPPSrc1SGPR : SubtargetFeature<"dpp-src1-sgpr", 488 "HasDPPSrc1SGPR", 489 "true", 490 "Support SGPR for Src1 of DPP instructions" 491>; 492 493def FeaturePackedFP32Ops : SubtargetFeature<"packed-fp32-ops", 494 "HasPackedFP32Ops", 495 "true", 496 "Support packed fp32 instructions" 497>; 498 499def FeatureR128A16 : SubtargetFeature<"r128-a16", 500 "HasR128A16", 501 "true", 502 "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128" 503>; 504 505def FeatureA16 : SubtargetFeature<"a16", 506 "HasA16", 507 "true", 508 "Support A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands" 509>; 510 511def FeatureG16 : SubtargetFeature<"g16", 512 "HasG16", 513 "true", 514 "Support G16 for 16-bit gradient image operands" 515>; 516 517def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding", 518 "HasNSAEncoding", 519 "true", 520 "Support NSA encoding for image instructions" 521>; 522 523def FeaturePartialNSAEncoding : SubtargetFeature<"partial-nsa-encoding", 524 "HasPartialNSAEncoding", 525 "true", 526 "Support partial NSA encoding for image instructions" 527>; 528 529def FeatureImageInsts : SubtargetFeature<"image-insts", 530 "HasImageInsts", 531 "true", 532 "Support image instructions" 533>; 534 535def FeatureExtendedImageInsts : SubtargetFeature<"extended-image-insts", 536 "HasExtendedImageInsts", 537 "true", 538 "Support mips != 0, lod != 0, gather4, and get_lod" 539>; 540 541def FeatureGFX10_AEncoding : SubtargetFeature<"gfx10_a-encoding", 542 "GFX10_AEncoding", 543 "true", 544 "Has BVH ray tracing instructions" 545>; 546 547def FeatureGFX10_BEncoding : SubtargetFeature<"gfx10_b-encoding", 548 "GFX10_BEncoding", 549 "true", 550 "Encoding format GFX10_B" 551>; 552 553def FeatureIntClamp : SubtargetFeature<"int-clamp-insts", 554 "HasIntClamp", 555 "true", 556 "Support clamp for integer destination" 557>; 558 559def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem", 560 "HasUnpackedD16VMem", 561 "true", 562 "Has unpacked d16 vmem instructions" 563>; 564 565def FeatureDLInsts : SubtargetFeature<"dl-insts", 566 "HasDLInsts", 567 "true", 568 "Has v_fmac_f32 and v_xnor_b32 instructions" 569>; 570 571def FeatureFmacF64Inst : SubtargetFeature<"fmacf64-inst", 572 "HasFmacF64Inst", 573 "true", 574 "Has v_fmac_f64 instruction" 575>; 576 577def FeatureDot1Insts : SubtargetFeature<"dot1-insts", 578 "HasDot1Insts", 579 "true", 580 "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions" 581>; 582 583def FeatureDot2Insts : SubtargetFeature<"dot2-insts", 584 "HasDot2Insts", 585 "true", 586 "Has v_dot2_i32_i16, v_dot2_u32_u16 instructions" 587>; 588 589def FeatureDot3Insts : SubtargetFeature<"dot3-insts", 590 "HasDot3Insts", 591 "true", 592 "Has v_dot8c_i32_i4 instruction" 593>; 594 595def FeatureDot4Insts : SubtargetFeature<"dot4-insts", 596 "HasDot4Insts", 597 "true", 598 "Has v_dot2c_i32_i16 instruction" 599>; 600 601def FeatureDot5Insts : SubtargetFeature<"dot5-insts", 602 "HasDot5Insts", 603 "true", 604 "Has v_dot2c_f32_f16 instruction" 605>; 606 607def FeatureDot6Insts : SubtargetFeature<"dot6-insts", 608 "HasDot6Insts", 609 "true", 610 "Has v_dot4c_i32_i8 instruction" 611>; 612 613def FeatureDot7Insts : SubtargetFeature<"dot7-insts", 614 "HasDot7Insts", 615 "true", 616 "Has v_dot4_u32_u8, v_dot8_u32_u4 instructions" 617>; 618 619def FeatureDot8Insts : SubtargetFeature<"dot8-insts", 620 "HasDot8Insts", 621 "true", 622 "Has v_dot4_i32_iu8, v_dot8_i32_iu4 instructions" 623>; 624 625def FeatureDot9Insts : SubtargetFeature<"dot9-insts", 626 "HasDot9Insts", 627 "true", 628 "Has v_dot2_f16_f16, v_dot2_bf16_bf16, v_dot2_f32_bf16 instructions" 629>; 630 631def FeatureDot10Insts : SubtargetFeature<"dot10-insts", 632 "HasDot10Insts", 633 "true", 634 "Has v_dot2_f32_f16 instruction" 635>; 636 637def FeatureMAIInsts : SubtargetFeature<"mai-insts", 638 "HasMAIInsts", 639 "true", 640 "Has mAI instructions" 641>; 642 643def FeatureFP8Insts : SubtargetFeature<"fp8-insts", 644 "HasFP8Insts", 645 "true", 646 "Has fp8 and bf8 instructions" 647>; 648 649def FeatureFP8ConversionInsts : SubtargetFeature<"fp8-conversion-insts", 650 "HasFP8ConversionInsts", 651 "true", 652 "Has fp8 and bf8 conversion instructions" 653>; 654 655def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", 656 "HasPkFmacF16Inst", 657 "true", 658 "Has v_pk_fmac_f16 instruction" 659>; 660 661def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts", 662 "HasAtomicDsPkAdd16Insts", 663 "true", 664 "Has ds_pk_add_bf16, ds_pk_add_f16, ds_pk_add_rtn_bf16, " 665 "ds_pk_add_rtn_f16 instructions" 666>; 667 668def FeatureAtomicFlatPkAdd16Insts : SubtargetFeature<"atomic-flat-pk-add-16-insts", 669 "HasAtomicFlatPkAdd16Insts", 670 "true", 671 "Has flat_atomic_pk_add_f16 and flat_atomic_pk_add_bf16 instructions" 672>; 673 674def FeatureAtomicFaddRtnInsts : SubtargetFeature<"atomic-fadd-rtn-insts", 675 "HasAtomicFaddRtnInsts", 676 "true", 677 "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that " 678 "return original value", 679 [FeatureFlatGlobalInsts] 680>; 681 682def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts", 683 "HasAtomicFaddNoRtnInsts", 684 "true", 685 "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that " 686 "don't return original value", 687 [FeatureFlatGlobalInsts] 688>; 689 690def FeatureAtomicBufferGlobalPkAddF16NoRtnInsts 691 : SubtargetFeature<"atomic-buffer-global-pk-add-f16-no-rtn-insts", 692 "HasAtomicBufferGlobalPkAddF16NoRtnInsts", 693 "true", 694 "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that " 695 "don't return original value", 696 [FeatureFlatGlobalInsts] 697>; 698 699def FeatureAtomicBufferGlobalPkAddF16Insts : SubtargetFeature<"atomic-buffer-global-pk-add-f16-insts", 700 "HasAtomicBufferGlobalPkAddF16Insts", 701 "true", 702 "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that " 703 "can return original value", 704 [FeatureFlatGlobalInsts] 705>; 706 707def FeatureAtomicGlobalPkAddBF16Inst : SubtargetFeature<"atomic-global-pk-add-bf16-inst", 708 "HasAtomicGlobalPkAddBF16Inst", 709 "true", 710 "Has global_atomic_pk_add_bf16 instruction", 711 [FeatureFlatGlobalInsts] 712>; 713 714def FeatureAtomicCSubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts", 715 "HasAtomicCSubNoRtnInsts", 716 "true", 717 "Has buffer_atomic_csub and global_atomic_csub instructions that don't " 718 "return original value" 719>; 720 721def FeatureFlatAtomicFaddF32Inst 722 : SubtargetFeature<"flat-atomic-fadd-f32-inst", 723 "HasFlatAtomicFaddF32Inst", 724 "true", 725 "Has flat_atomic_add_f32 instruction" 726>; 727 728def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero", 729 "HasDefaultComponentZero", 730 "true", 731 "BUFFER/IMAGE store instructions set unspecified components to zero (before GFX12)" 732>; 733 734def FeatureDefaultComponentBroadcast : SubtargetFeature<"default-component-broadcast", 735 "HasDefaultComponentBroadcast", 736 "true", 737 "BUFFER/IMAGE store instructions set unspecified components to x component (GFX12)" 738>; 739 740def FeatureSupportsSRAMECC : SubtargetFeature<"sramecc-support", 741 "SupportsSRAMECC", 742 "true", 743 "Hardware supports SRAMECC" 744>; 745 746def FeatureSRAMECC : SubtargetFeature<"sramecc", 747 "EnableSRAMECC", 748 "true", 749 "Enable SRAMECC" 750>; 751 752def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx", 753 "HasNoSdstCMPX", 754 "true", 755 "V_CMPX does not write VCC/SGPR in addition to EXEC" 756>; 757 758def FeatureVscnt : SubtargetFeature<"vscnt", 759 "HasVscnt", 760 "true", 761 "Has separate store vscnt counter" 762>; 763 764def FeatureGetWaveIdInst : SubtargetFeature<"get-wave-id-inst", 765 "HasGetWaveIdInst", 766 "true", 767 "Has s_get_waveid_in_workgroup instruction" 768>; 769 770def FeatureSMemTimeInst : SubtargetFeature<"s-memtime-inst", 771 "HasSMemTimeInst", 772 "true", 773 "Has s_memtime instruction" 774>; 775 776def FeatureShaderCyclesRegister : SubtargetFeature<"shader-cycles-register", 777 "HasShaderCyclesRegister", 778 "true", 779 "Has SHADER_CYCLES hardware register" 780>; 781 782def FeatureShaderCyclesHiLoRegisters : SubtargetFeature<"shader-cycles-hi-lo-registers", 783 "HasShaderCyclesHiLoRegisters", 784 "true", 785 "Has SHADER_CYCLES_HI/LO hardware registers" 786>; 787 788def FeatureMadMacF32Insts : SubtargetFeature<"mad-mac-f32-insts", 789 "HasMadMacF32Insts", 790 "true", 791 "Has v_mad_f32/v_mac_f32/v_madak_f32/v_madmk_f32 instructions" 792>; 793 794def FeatureDsSrc2Insts : SubtargetFeature<"ds-src2-insts", 795 "HasDsSrc2Insts", 796 "true", 797 "Has ds_*_src2 instructions" 798>; 799 800def FeatureVOP3Literal : SubtargetFeature<"vop3-literal", 801 "HasVOP3Literal", 802 "true", 803 "Can use one literal in VOP3" 804>; 805 806def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard", 807 "HasNoDataDepHazard", 808 "true", 809 "Does not need SW waitstates" 810>; 811 812def FeatureGFX11FullVGPRs : SubtargetFeature<"gfx11-full-vgprs", 813 "HasGFX11FullVGPRs", 814 "true", 815 "GFX11 with 50% more physical VGPRs and 50% larger allocation granule than GFX10" 816>; 817 818 819def FeatureVOPD : SubtargetFeature<"vopd", 820 "HasVOPDInsts", 821 "true", 822 "Has VOPD dual issue wave32 instructions" 823>; 824 825def FeatureVALUTransUseHazard : SubtargetFeature<"valu-trans-use-hazard", 826 "HasVALUTransUseHazard", 827 "true", 828 "Hazard when TRANS instructions are closely followed by a use of the result" 829>; 830 831def FeatureForceStoreSC0SC1 : SubtargetFeature<"force-store-sc0-sc1", 832 "HasForceStoreSC0SC1", 833 "true", 834 "Has SC0 and SC1 on stores" 835>; 836 837def FeatureSALUFloatInsts : SubtargetFeature<"salu-float", 838 "HasSALUFloatInsts", 839 "true", 840 "Has SALU floating point instructions" 841>; 842 843def FeatureVGPRSingleUseHintInsts : SubtargetFeature<"vgpr-singleuse-hint", 844 "HasVGPRSingleUseHintInsts", 845 "true", 846 "Has single-use VGPR hint instructions" 847>; 848 849def FeaturePseudoScalarTrans : SubtargetFeature<"pseudo-scalar-trans", 850 "HasPseudoScalarTrans", 851 "true", 852 "Has Pseudo Scalar Transcendental instructions" 853>; 854 855def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset", 856 "HasRestrictedSOffset", 857 "true", 858 "Has restricted SOffset (immediate not supported)." 859>; 860 861//===------------------------------------------------------------===// 862// Subtarget Features (options and debugging) 863//===------------------------------------------------------------===// 864 865class FeatureMaxPrivateElementSize<int size> : SubtargetFeature< 866 "max-private-element-size-"#size, 867 "MaxPrivateElementSize", 868 !cast<string>(size), 869 "Maximum private access size may be "#size 870>; 871 872def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>; 873def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>; 874def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>; 875 876def FeatureDumpCode : SubtargetFeature <"DumpCode", 877 "DumpCode", 878 "true", 879 "Dump MachineInstrs in the CodeEmitter" 880>; 881 882def FeatureDumpCodeLower : SubtargetFeature <"dumpcode", 883 "DumpCode", 884 "true", 885 "Dump MachineInstrs in the CodeEmitter" 886>; 887 888// XXX - This should probably be removed once enabled by default 889def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", 890 "EnableLoadStoreOpt", 891 "true", 892 "Enable SI load/store optimizer pass" 893>; 894 895// Performance debugging feature. Allow using DS instruction immediate 896// offsets even if the base pointer can't be proven to be base. On SI, 897// base pointer values that won't give the same result as a 16-bit add 898// are not safe to fold, but this will override the conservative test 899// for the base pointer. 900def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature < 901 "unsafe-ds-offset-folding", 902 "EnableUnsafeDSOffsetFolding", 903 "true", 904 "Force using DS instruction immediate offsets on SI" 905>; 906 907def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", 908 "EnableSIScheduler", 909 "true", 910 "Enable SI Machine Scheduler" 911>; 912 913def FeatureEnableDS128 : SubtargetFeature<"enable-ds128", 914 "EnableDS128", 915 "true", 916 "Use ds_{read|write}_b128" 917>; 918 919// Sparse texture support requires that all result registers are zeroed when 920// PRTStrictNull is set to true. This feature is turned on for all architectures 921// but is enabled as a feature in case there are situations where PRTStrictNull 922// is disabled by the driver. 923def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null", 924 "EnablePRTStrictNull", 925 "true", 926 "Enable zeroing of result registers for sparse texture fetches" 927>; 928 929// Unless +-flat-for-global is specified, turn on FlatForGlobal for 930// all OS-es on VI and newer hardware to avoid assertion failures due 931// to missing ADDR64 variants of MUBUF instructions. 932// FIXME: moveToVALU should be able to handle converting addr64 MUBUF 933// instructions. 934 935def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", 936 "FlatForGlobal", 937 "true", 938 "Force to generate flat instruction for global" 939>; 940 941def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature < 942 "auto-waitcnt-before-barrier", 943 "AutoWaitcntBeforeBarrier", 944 "true", 945 "Hardware automatically inserts waitcnt before barrier" 946>; 947 948def FeatureBackOffBarrier : SubtargetFeature <"back-off-barrier", 949 "BackOffBarrier", 950 "true", 951 "Hardware supports backing off s_barrier if an exception occurs" 952>; 953 954def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range", 955 "HasTrigReducedRange", 956 "true", 957 "Requires use of fract on arguments to trig instructions" 958>; 959 960def FeatureKernargPreload : SubtargetFeature <"kernarg-preload", 961 "KernargPreload", 962 "true", 963 "Hardware supports preloading of kernel arguments in user SGPRs." 964>; 965 966// Alignment enforcement is controlled by a configuration register: 967// SH_MEM_CONFIG.alignment_mode 968def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode", 969 "UnalignedAccessMode", 970 "true", 971 "Enable unaligned global, local and region loads and stores if the hardware" 972 " supports it" 973>; 974 975def FeaturePackedTID : SubtargetFeature<"packed-tid", 976 "HasPackedTID", 977 "true", 978 "Workitem IDs are packed into v0 at kernel launch" 979>; 980 981def FeatureArchitectedFlatScratch : SubtargetFeature<"architected-flat-scratch", 982 "HasArchitectedFlatScratch", 983 "true", 984 "Flat Scratch register is a readonly SPI initialized architected register" 985>; 986 987def FeatureArchitectedSGPRs : SubtargetFeature<"architected-sgprs", 988 "HasArchitectedSGPRs", 989 "true", 990 "Enable the architected SGPRs" 991>; 992 993def FeatureGDS : SubtargetFeature<"gds", 994 "HasGDS", 995 "true", 996 "Has Global Data Share" 997>; 998 999def FeatureGWS : SubtargetFeature<"gws", 1000 "HasGWS", 1001 "true", 1002 "Has Global Wave Sync" 1003>; 1004 1005// Dummy feature used to disable assembler instructions. 1006def FeatureDisable : SubtargetFeature<"", 1007 "FeatureDisable","true", 1008 "Dummy feature to disable assembler instructions" 1009>; 1010 1011//===----------------------------------------------------------------------===// 1012 1013class GCNSubtargetFeatureGeneration <string Value, 1014 string FeatureName, 1015 list<SubtargetFeature> Implies> : 1016 SubtargetFeatureGeneration <Value, FeatureName, "GCNSubtarget", Implies>; 1017 1018def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", 1019 "southern-islands", 1020 [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, 1021 FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts, 1022 FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel, 1023 FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts, 1024 FeatureGDS, FeatureGWS, FeatureDefaultComponentZero 1025 ] 1026>; 1027 1028def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", 1029 "sea-islands", 1030 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 1031 FeatureWavefrontSize64, FeatureFlatAddressSpace, 1032 FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, 1033 FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, 1034 FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess, 1035 FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero 1036 ] 1037>; 1038 1039def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", 1040 "volcanic-islands", 1041 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 1042 FeatureWavefrontSize64, FeatureFlatAddressSpace, 1043 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 1044 FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, 1045 FeatureScalarStores, FeatureInv2PiInlineImm, 1046 FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, 1047 FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts, 1048 FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, 1049 FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, 1050 FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, 1051 FeatureDefaultComponentZero 1052 ] 1053>; 1054 1055def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", 1056 "gfx9", 1057 [FeatureFP64, FeatureLocalMemorySize65536, 1058 FeatureWavefrontSize64, FeatureFlatAddressSpace, 1059 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 1060 FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, 1061 FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, 1062 FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1063 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, 1064 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1065 FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, 1066 FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, 1067 FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, 1068 FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, 1069 FeatureNegativeScratchOffsetBug, FeatureGWS, FeatureDefaultComponentZero 1070 ] 1071>; 1072 1073def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", 1074 "gfx10", 1075 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 1076 FeatureFlatAddressSpace, 1077 FeatureCIInsts, Feature16BitInsts, 1078 FeatureSMemRealTime, FeatureInv2PiInlineImm, 1079 FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P, 1080 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1081 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, 1082 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1083 FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts, 1084 FeatureNoSdstCMPX, FeatureVscnt, 1085 FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, 1086 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, 1087 FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16, 1088 FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts, 1089 FeatureGDS, FeatureGWS, FeatureDefaultComponentZero 1090 ] 1091>; 1092 1093def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11", 1094 "gfx11", 1095 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 1096 FeatureFlatAddressSpace, Feature16BitInsts, 1097 FeatureInv2PiInlineImm, FeatureApertureRegs, 1098 FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts, 1099 FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts, 1100 FeatureGFX11Insts, FeatureVOP3P, FeatureVOPD, FeatureTrue16BitInsts, 1101 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1102 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1103 FeatureAddNoCarryInsts, FeatureFmaMixInsts, 1104 FeatureNoSdstCMPX, FeatureVscnt, 1105 FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, 1106 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, 1107 FeatureA16, FeatureFastDenormalF32, FeatureG16, 1108 FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS, 1109 FeatureGWS, FeatureDefaultComponentZero 1110 ] 1111>; 1112 1113def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12", 1114 "gfx12", 1115 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 1116 FeatureFlatAddressSpace, Feature16BitInsts, 1117 FeatureInv2PiInlineImm, FeatureApertureRegs, 1118 FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts, 1119 FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts, 1120 FeatureGFX11Insts, FeatureGFX12Insts, FeatureVOP3P, FeatureVOPD, 1121 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1122 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1123 FeatureAddNoCarryInsts, FeatureFmaMixInsts, 1124 FeatureNoSdstCMPX, FeatureVscnt, 1125 FeatureVOP3Literal, FeatureDPP8, 1126 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, 1127 FeatureA16, FeatureFastDenormalF32, FeatureG16, 1128 FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, 1129 FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast 1130 ] 1131>; 1132 1133//===----------------------------------------------------------------------===// 1134 1135class FeatureSet<list<SubtargetFeature> Features_> { 1136 list<SubtargetFeature> Features = Features_; 1137} 1138 1139def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands, 1140 FeatureFastFMAF32, 1141 HalfRate64Ops, 1142 FeatureLDSBankCount32]>; 1143 1144def FeatureISAVersion6_0_1 : FeatureSet< 1145 [FeatureSouthernIslands, 1146 FeatureLDSBankCount32]>; 1147 1148def FeatureISAVersion6_0_2 : FeatureSet< 1149 [FeatureSouthernIslands, 1150 FeatureLDSBankCount32]>; 1151 1152def FeatureISAVersion7_0_0 : FeatureSet< 1153 [FeatureSeaIslands, 1154 FeatureLDSBankCount32]>; 1155 1156def FeatureISAVersion7_0_1 : FeatureSet< 1157 [FeatureSeaIslands, 1158 HalfRate64Ops, 1159 FeatureLDSBankCount32, 1160 FeatureFastFMAF32]>; 1161 1162def FeatureISAVersion7_0_2 : FeatureSet< 1163 [FeatureSeaIslands, 1164 FeatureLDSBankCount16, 1165 FeatureFastFMAF32]>; 1166 1167def FeatureISAVersion7_0_3 : FeatureSet< 1168 [FeatureSeaIslands, 1169 FeatureLDSBankCount16]>; 1170 1171def FeatureISAVersion7_0_4 : FeatureSet< 1172 [FeatureSeaIslands, 1173 FeatureLDSBankCount32]>; 1174 1175def FeatureISAVersion7_0_5 : FeatureSet< 1176 [FeatureSeaIslands, 1177 FeatureLDSBankCount16]>; 1178 1179def FeatureISAVersion8_0_Common : FeatureSet< 1180 [FeatureVolcanicIslands, 1181 FeatureLDSBankCount32, 1182 FeatureUnpackedD16VMem]>; 1183 1184def FeatureISAVersion8_0_1 : FeatureSet< 1185 !listconcat(FeatureISAVersion8_0_Common.Features, 1186 [FeatureFastFMAF32, 1187 HalfRate64Ops, 1188 FeatureSupportsXNACK])>; 1189 1190def FeatureISAVersion8_0_2 : FeatureSet< 1191 !listconcat(FeatureISAVersion8_0_Common.Features, 1192 [FeatureSGPRInitBug])>; 1193 1194def FeatureISAVersion8_0_3 : FeatureSet< 1195 !listconcat(FeatureISAVersion8_0_Common.Features, 1196 [])>; 1197 1198def FeatureISAVersion8_0_5 : FeatureSet< 1199 !listconcat(FeatureISAVersion8_0_Common.Features, 1200 [FeatureSGPRInitBug])>; 1201 1202def FeatureISAVersion8_1_0 : FeatureSet< 1203 [FeatureVolcanicIslands, 1204 FeatureLDSBankCount16, 1205 FeatureSupportsXNACK, 1206 FeatureImageStoreD16Bug, 1207 FeatureImageGather4D16Bug]>; 1208 1209def FeatureISAVersion9_0_Common : FeatureSet< 1210 [FeatureGFX9, 1211 FeatureLDSBankCount32, 1212 FeatureImageInsts, 1213 FeatureMadMacF32Insts]>; 1214 1215def FeatureISAVersion9_0_MI_Common : FeatureSet< 1216 !listconcat(FeatureISAVersion9_0_Common.Features, 1217 [FeatureFmaMixInsts, 1218 FeatureDLInsts, 1219 FeatureDot1Insts, 1220 FeatureDot2Insts, 1221 FeatureDot3Insts, 1222 FeatureDot4Insts, 1223 FeatureDot5Insts, 1224 FeatureDot6Insts, 1225 FeatureDot7Insts, 1226 FeatureDot10Insts, 1227 FeatureMAIInsts, 1228 FeaturePkFmacF16Inst, 1229 FeatureAtomicFaddNoRtnInsts, 1230 FeatureSupportsSRAMECC])>; 1231 1232def FeatureISAVersion9_0_0 : FeatureSet< 1233 !listconcat(FeatureISAVersion9_0_Common.Features, 1234 [FeatureGDS, 1235 FeatureMadMixInsts, 1236 FeatureDsSrc2Insts, 1237 FeatureExtendedImageInsts, 1238 FeatureImageGather4D16Bug])>; 1239 1240def FeatureISAVersion9_0_2 : FeatureSet< 1241 !listconcat(FeatureISAVersion9_0_Common.Features, 1242 [FeatureGDS, 1243 FeatureMadMixInsts, 1244 FeatureDsSrc2Insts, 1245 FeatureExtendedImageInsts, 1246 FeatureImageGather4D16Bug])>; 1247 1248def FeatureISAVersion9_0_4 : FeatureSet< 1249 !listconcat(FeatureISAVersion9_0_Common.Features, 1250 [FeatureGDS, 1251 FeatureDsSrc2Insts, 1252 FeatureExtendedImageInsts, 1253 FeatureFmaMixInsts, 1254 FeatureImageGather4D16Bug])>; 1255 1256def FeatureISAVersion9_0_6 : FeatureSet< 1257 !listconcat(FeatureISAVersion9_0_Common.Features, 1258 [FeatureGDS, 1259 HalfRate64Ops, 1260 FeatureFmaMixInsts, 1261 FeatureDsSrc2Insts, 1262 FeatureExtendedImageInsts, 1263 FeatureDLInsts, 1264 FeatureDot1Insts, 1265 FeatureDot2Insts, 1266 FeatureDot7Insts, 1267 FeatureDot10Insts, 1268 FeatureSupportsSRAMECC, 1269 FeatureImageGather4D16Bug])>; 1270 1271def FeatureISAVersion9_0_8 : FeatureSet< 1272 !listconcat(FeatureISAVersion9_0_MI_Common.Features, 1273 [FeatureGDS, 1274 HalfRate64Ops, 1275 FeatureDsSrc2Insts, 1276 FeatureExtendedImageInsts, 1277 FeatureAtomicBufferGlobalPkAddF16NoRtnInsts, 1278 FeatureMFMAInlineLiteralBug, 1279 FeatureImageGather4D16Bug])>; 1280 1281def FeatureISAVersion9_0_9 : FeatureSet< 1282 !listconcat(FeatureISAVersion9_0_Common.Features, 1283 [FeatureGDS, 1284 FeatureMadMixInsts, 1285 FeatureDsSrc2Insts, 1286 FeatureExtendedImageInsts, 1287 FeatureImageInsts, 1288 FeatureImageGather4D16Bug])>; 1289 1290def FeatureISAVersion9_0_A : FeatureSet< 1291 !listconcat(FeatureISAVersion9_0_MI_Common.Features, 1292 [FeatureGFX90AInsts, 1293 FeatureFmacF64Inst, 1294 FeatureDPALU_DPP, 1295 FeaturePackedFP32Ops, 1296 FeatureAtomicFaddRtnInsts, 1297 FeatureAtomicBufferGlobalPkAddF16Insts, 1298 FeaturePackedTID, 1299 FullRate64Ops, 1300 FeatureBackOffBarrier, 1301 FeatureKernargPreload])>; 1302 1303def FeatureISAVersion9_0_C : FeatureSet< 1304 !listconcat(FeatureISAVersion9_0_Common.Features, 1305 [FeatureGDS, 1306 FeatureMadMixInsts, 1307 FeatureDsSrc2Insts, 1308 FeatureExtendedImageInsts, 1309 FeatureImageGather4D16Bug])>; 1310 1311def FeatureISAVersion9_4_Common : FeatureSet< 1312 [FeatureGFX9, 1313 FeatureGFX90AInsts, 1314 FeatureGFX940Insts, 1315 FeatureFmaMixInsts, 1316 FeatureLDSBankCount32, 1317 FeatureDLInsts, 1318 FeatureFmacF64Inst, 1319 FeatureDot1Insts, 1320 FeatureDot2Insts, 1321 FeatureDot3Insts, 1322 FeatureDot4Insts, 1323 FeatureDot5Insts, 1324 FeatureDot6Insts, 1325 FeatureDot7Insts, 1326 FeatureDot10Insts, 1327 FeatureAtomicDsPkAdd16Insts, 1328 FeatureAtomicFlatPkAdd16Insts, 1329 FeatureDPALU_DPP, 1330 FeaturePackedFP32Ops, 1331 FeatureMAIInsts, 1332 FeatureFP8Insts, 1333 FeatureFP8ConversionInsts, 1334 FeaturePkFmacF16Inst, 1335 FeatureAtomicFaddRtnInsts, 1336 FeatureAtomicFaddNoRtnInsts, 1337 FeatureAtomicBufferGlobalPkAddF16Insts, 1338 FeatureAtomicGlobalPkAddBF16Inst, 1339 FeatureFlatAtomicFaddF32Inst, 1340 FeatureSupportsSRAMECC, 1341 FeaturePackedTID, 1342 FeatureArchitectedFlatScratch, 1343 FullRate64Ops, 1344 FeatureBackOffBarrier, 1345 FeatureKernargPreload]>; 1346 1347def FeatureISAVersion9_4_0 : FeatureSet< 1348 !listconcat(FeatureISAVersion9_4_Common.Features, 1349 [FeatureForceStoreSC0SC1])>; 1350 1351def FeatureISAVersion9_4_1 : FeatureSet< 1352 !listconcat(FeatureISAVersion9_4_Common.Features, 1353 [FeatureForceStoreSC0SC1])>; 1354 1355def FeatureISAVersion9_4_2 : FeatureSet< 1356 !listconcat(FeatureISAVersion9_4_Common.Features, 1357 [])>; 1358 1359def FeatureISAVersion10_Common : FeatureSet< 1360 [FeatureGFX10, 1361 FeatureLDSBankCount32, 1362 FeatureDLInsts, 1363 FeatureNSAEncoding, 1364 FeatureWavefrontSize32, 1365 FeatureBackOffBarrier]>; 1366 1367def FeatureISAVersion10_1_Common : FeatureSet< 1368 !listconcat(FeatureISAVersion10_Common.Features, 1369 [FeatureScalarStores, 1370 FeatureScalarAtomics, 1371 FeatureScalarFlatScratchInsts, 1372 FeatureGetWaveIdInst, 1373 FeatureMadMacF32Insts, 1374 FeatureDsSrc2Insts, 1375 FeatureLdsMisalignedBug, 1376 FeatureSupportsXNACK, 1377 // gfx101x bugs 1378 FeatureVcmpxPermlaneHazard, 1379 FeatureVMEMtoScalarWriteHazard, 1380 FeatureSMEMtoVectorWriteHazard, 1381 FeatureInstFwdPrefetchBug, 1382 FeatureVcmpxExecWARHazard, 1383 FeatureLdsBranchVmemWARHazard, 1384 FeatureNSAtoVMEMBug, 1385 FeatureNSAClauseBug, 1386 FeatureOffset3fBug, 1387 FeatureFlatSegmentOffsetBug, 1388 FeatureNegativeUnalignedScratchOffsetBug])>; 1389 1390def FeatureISAVersion10_1_0 : FeatureSet< 1391 !listconcat(FeatureISAVersion10_1_Common.Features, 1392 [])>; 1393 1394def FeatureISAVersion10_1_1 : FeatureSet< 1395 !listconcat(FeatureISAVersion10_1_Common.Features, 1396 [FeatureDot1Insts, 1397 FeatureDot2Insts, 1398 FeatureDot5Insts, 1399 FeatureDot6Insts, 1400 FeatureDot7Insts, 1401 FeatureDot10Insts])>; 1402 1403def FeatureISAVersion10_1_2 : FeatureSet< 1404 !listconcat(FeatureISAVersion10_1_Common.Features, 1405 [FeatureDot1Insts, 1406 FeatureDot2Insts, 1407 FeatureDot5Insts, 1408 FeatureDot6Insts, 1409 FeatureDot7Insts, 1410 FeatureDot10Insts])>; 1411 1412def FeatureISAVersion10_1_3 : FeatureSet< 1413 !listconcat(FeatureISAVersion10_1_Common.Features, 1414 [FeatureGFX10_AEncoding])>; 1415 1416def FeatureISAVersion10_3_0 : FeatureSet< 1417 !listconcat(FeatureISAVersion10_Common.Features, 1418 [FeatureGFX10_AEncoding, 1419 FeatureGFX10_BEncoding, 1420 FeatureGFX10_3Insts, 1421 FeatureDot1Insts, 1422 FeatureDot2Insts, 1423 FeatureDot5Insts, 1424 FeatureDot6Insts, 1425 FeatureDot7Insts, 1426 FeatureDot10Insts, 1427 FeatureShaderCyclesRegister])>; 1428 1429def FeatureISAVersion11_Common : FeatureSet< 1430 [FeatureGFX11, 1431 FeatureLDSBankCount32, 1432 FeatureDLInsts, 1433 FeatureDot5Insts, 1434 FeatureDot7Insts, 1435 FeatureDot8Insts, 1436 FeatureDot9Insts, 1437 FeatureDot10Insts, 1438 FeatureNSAEncoding, 1439 FeaturePartialNSAEncoding, 1440 FeatureWavefrontSize32, 1441 FeatureShaderCyclesRegister, 1442 FeatureArchitectedFlatScratch, 1443 FeatureAtomicFaddRtnInsts, 1444 FeatureAtomicFaddNoRtnInsts, 1445 FeatureFlatAtomicFaddF32Inst, 1446 FeatureImageInsts, 1447 FeaturePackedTID, 1448 FeatureVcmpxPermlaneHazard, 1449 FeatureMADIntraFwdBug]>; 1450 1451def FeatureISAVersion11_0_Common : FeatureSet< 1452 !listconcat(FeatureISAVersion11_Common.Features, 1453 [FeatureMSAALoadDstSelBug, 1454 FeatureVALUTransUseHazard])>; 1455 1456def FeatureISAVersion11_0_0 : FeatureSet< 1457 !listconcat(FeatureISAVersion11_0_Common.Features, 1458 [FeatureGFX11FullVGPRs, 1459 FeatureUserSGPRInit16Bug])>; 1460 1461def FeatureISAVersion11_0_1 : FeatureSet< 1462 !listconcat(FeatureISAVersion11_0_Common.Features, 1463 [FeatureGFX11FullVGPRs])>; 1464 1465def FeatureISAVersion11_0_2 : FeatureSet< 1466 !listconcat(FeatureISAVersion11_0_Common.Features, 1467 [FeatureUserSGPRInit16Bug])>; 1468 1469def FeatureISAVersion11_0_3 : FeatureSet< 1470 !listconcat(FeatureISAVersion11_0_Common.Features, 1471 [])>; 1472 1473def FeatureISAVersion11_5_0 : FeatureSet< 1474 !listconcat(FeatureISAVersion11_Common.Features, 1475 [FeatureSALUFloatInsts, 1476 FeatureDPPSrc1SGPR, 1477 FeatureVGPRSingleUseHintInsts])>; 1478 1479def FeatureISAVersion11_5_1 : FeatureSet< 1480 !listconcat(FeatureISAVersion11_Common.Features, 1481 [FeatureSALUFloatInsts, 1482 FeatureDPPSrc1SGPR, 1483 FeatureVGPRSingleUseHintInsts, 1484 FeatureGFX11FullVGPRs])>; 1485 1486def FeatureISAVersion12 : FeatureSet< 1487 [FeatureGFX12, 1488 FeatureLDSBankCount32, 1489 FeatureDLInsts, 1490 FeatureDot7Insts, 1491 FeatureDot8Insts, 1492 FeatureDot9Insts, 1493 FeatureDot10Insts, 1494 FeatureNSAEncoding, 1495 FeaturePartialNSAEncoding, 1496 FeatureWavefrontSize32, 1497 FeatureShaderCyclesHiLoRegisters, 1498 FeatureArchitectedFlatScratch, 1499 FeatureArchitectedSGPRs, 1500 FeatureAtomicFaddRtnInsts, 1501 FeatureAtomicFaddNoRtnInsts, 1502 FeatureAtomicDsPkAdd16Insts, 1503 FeatureAtomicFlatPkAdd16Insts, 1504 FeatureAtomicBufferGlobalPkAddF16Insts, 1505 FeatureAtomicGlobalPkAddBF16Inst, 1506 FeatureFlatAtomicFaddF32Inst, 1507 FeatureImageInsts, 1508 FeatureExtendedImageInsts, 1509 FeatureFP8ConversionInsts, 1510 FeaturePackedTID, 1511 FeatureVcmpxPermlaneHazard, 1512 FeatureSALUFloatInsts, 1513 FeaturePseudoScalarTrans, 1514 FeatureHasRestrictedSOffset, 1515 FeatureVGPRSingleUseHintInsts, 1516 FeatureScalarDwordx3Loads, 1517 FeatureDPPSrc1SGPR]>; 1518 1519//===----------------------------------------------------------------------===// 1520 1521def AMDGPUInstrInfo : InstrInfo { 1522 let guessInstructionProperties = 1; 1523} 1524 1525def AMDGPUAsmParser : AsmParser { 1526 // Some of the R600 registers have the same name, so this crashes. 1527 // For example T0_XYZW and T0_XY both have the asm name T0. 1528 let ShouldEmitMatchRegisterName = 0; 1529 1530 // Call the custom operand parser for all operands. 1531 let OperandParserMethod = "parseCustomOperand"; 1532 let CallCustomParserForAllOperands = true; 1533} 1534 1535def AMDGPUAsmWriter : AsmWriter { 1536 int PassSubtarget = 1; 1537} 1538 1539def AMDGPUAsmVariants { 1540 string Default = "Default"; 1541 int Default_ID = 0; 1542 string VOP3 = "VOP3"; 1543 int VOP3_ID = 1; 1544 string SDWA = "SDWA"; 1545 int SDWA_ID = 2; 1546 string SDWA9 = "SDWA9"; 1547 int SDWA9_ID = 3; 1548 string DPP = "DPP"; 1549 int DPP_ID = 4; 1550 string VOP3_DPP = "VOP3_DPP"; 1551 int VOP3_DPP_ID = 5; 1552 string Disable = "Disable"; 1553 int Disable_ID = 6; 1554} 1555 1556def DefaultAMDGPUAsmParserVariant : AsmParserVariant { 1557 let Variant = AMDGPUAsmVariants.Default_ID; 1558 let Name = AMDGPUAsmVariants.Default; 1559} 1560 1561def VOP3AsmParserVariant : AsmParserVariant { 1562 let Variant = AMDGPUAsmVariants.VOP3_ID; 1563 let Name = AMDGPUAsmVariants.VOP3; 1564} 1565 1566def SDWAAsmParserVariant : AsmParserVariant { 1567 let Variant = AMDGPUAsmVariants.SDWA_ID; 1568 let Name = AMDGPUAsmVariants.SDWA; 1569} 1570 1571def SDWA9AsmParserVariant : AsmParserVariant { 1572 let Variant = AMDGPUAsmVariants.SDWA9_ID; 1573 let Name = AMDGPUAsmVariants.SDWA9; 1574} 1575 1576def DPPAsmParserVariant : AsmParserVariant { 1577 let Variant = AMDGPUAsmVariants.DPP_ID; 1578 let Name = AMDGPUAsmVariants.DPP; 1579} 1580 1581def VOP3_DPPAsmParserVariant : AsmParserVariant { 1582 let Variant = AMDGPUAsmVariants.VOP3_DPP_ID; 1583 let Name = AMDGPUAsmVariants.VOP3_DPP; 1584} 1585 1586def AMDGPU : Target { 1587 // Pull in Instruction Info: 1588 let InstructionSet = AMDGPUInstrInfo; 1589 let AssemblyParsers = [AMDGPUAsmParser]; 1590 let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant, 1591 VOP3AsmParserVariant, 1592 SDWAAsmParserVariant, 1593 SDWA9AsmParserVariant, 1594 DPPAsmParserVariant, 1595 VOP3_DPPAsmParserVariant]; 1596 let AssemblyWriters = [AMDGPUAsmWriter]; 1597 let AllowRegisterRenaming = 1; 1598} 1599 1600// Dummy Instruction itineraries for pseudo instructions 1601def ALU_NULL : FuncUnit; 1602def NullALU : InstrItinClass; 1603 1604//===----------------------------------------------------------------------===// 1605// Predicate helper class 1606//===----------------------------------------------------------------------===// 1607 1608def isGFX6 : 1609 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">, 1610 AssemblerPredicate<(all_of FeatureSouthernIslands)>; 1611 1612def isGFX6GFX7 : 1613 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1614 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, 1615 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX10Insts))>; 1616 1617def isGFX6GFX7GFX10 : 1618 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1619 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1620 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1621 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX11Insts))>; 1622 1623def isGFX6GFX7GFX10Plus : 1624 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1625 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1626 "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, 1627 AssemblerPredicate<(all_of (not FeatureGCN3Encoding))>; 1628 1629def isGFX7Only : 1630 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, 1631 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX10Insts))>; 1632 1633def isGFX7GFX10 : 1634 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1635 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1636 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX11Insts))>; 1637 1638def isGFX7GFX10GFX11 : 1639 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1640 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" 1641 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 1642 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts)>; 1643 1644def isGFX7GFX8GFX9 : 1645 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1646 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1647 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1648 AssemblerPredicate<(all_of FeatureGFX7GFX8GFX9Insts)>; 1649 1650def isGFX6GFX7GFX8GFX9 : 1651 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1652 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1653 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1654 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1655 AssemblerPredicate<(all_of (not FeatureGFX10Insts))>; 1656 1657def isGFX6GFX7GFX8GFX9NotGFX90A : 1658 Predicate<"!Subtarget->hasGFX90AInsts() &&" 1659 "(Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1660 " Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1661 " Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1662 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 1663 AssemblerPredicate<(all_of (not FeatureGFX10Insts), (not FeatureGFX90AInsts))>; 1664 1665def isGFX6GFX7GFX8GFX9GFX10 : 1666 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1667 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1668 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1669 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 1670 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1671 AssemblerPredicate<(all_of (not FeatureGFX11Insts))>; 1672 1673def isNotGFX12Plus : 1674 Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::GFX11">, 1675 AssemblerPredicate<(all_of (not FeatureGFX12Insts))>; 1676 1677def isGFX7GFX8GFX9GFX10 : 1678 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1679 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1680 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 1681 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1682 AssemblerPredicate<(all_of FeatureCIInsts, (not FeatureGFX11Insts))>; 1683 1684def isGFX8GFX9GFX10GFX11 : 1685 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1686 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 1687 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" 1688 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 1689 AssemblerPredicate<(all_of FeatureGFX8Insts, (not FeatureGFX12Insts))>; 1690 1691def isGFX7Plus : 1692 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, 1693 AssemblerPredicate<(all_of FeatureCIInsts)>; 1694 1695def isGFX8Plus : 1696 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, 1697 AssemblerPredicate<(all_of FeatureGFX8Insts)>; 1698 1699def isGFX8Only : Predicate<"Subtarget->getGeneration() ==" 1700 "AMDGPUSubtarget::VOLCANIC_ISLANDS">, 1701 AssemblerPredicate <(all_of FeatureVolcanicIslands)>; 1702 1703def isGFX9Plus : 1704 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, 1705 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 1706 1707def isNotGFX9Plus : 1708 Predicate<"Subtarget->getGeneration() < AMDGPUSubtarget::GFX9">; 1709 1710def isGFX9Only : Predicate < 1711 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1712 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts)>; 1713 1714def isGCN3ExcludingGFX90A : 1715 Predicate<"Subtarget->isGCN3Encoding() && !Subtarget->hasGFX90AInsts()">, 1716 AssemblerPredicate<(all_of FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; 1717 1718def isGFX90APlus : 1719 Predicate<"Subtarget->hasGFX90AInsts()">, 1720 AssemblerPredicate<(all_of FeatureGFX90AInsts)>; 1721 1722def isNotGFX90APlus : 1723 Predicate<"!Subtarget->hasGFX90AInsts()">, 1724 AssemblerPredicate<(all_of (not FeatureGFX90AInsts))>; 1725 1726def isGFX8GFX9NotGFX90A : 1727 Predicate<"!Subtarget->hasGFX90AInsts() &&" 1728 "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1729 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 1730 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; 1731 1732def isGFX90AOnly : 1733 Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">, 1734 AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>; 1735 1736def isGFX908orGFX90A : 1737 Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX940Insts()">, 1738 AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX940Insts))>; 1739 1740def isGFX940Plus : 1741 Predicate<"Subtarget->hasGFX940Insts()">, 1742 AssemblerPredicate<(all_of FeatureGFX940Insts)>; 1743 1744def isGFX8GFX9NotGFX940 : 1745 Predicate<"!Subtarget->hasGFX940Insts() &&" 1746 "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1747 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 1748 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX940Insts))>; 1749 1750def isGFX8GFX9 : 1751 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1752 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1753 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding)>; 1754 1755def isGFX10Only : 1756 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1757 AssemblerPredicate<(all_of FeatureGFX10Insts, (not FeatureGFX11Insts))>; 1758 1759def isGFX10Plus : 1760 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, 1761 AssemblerPredicate<(all_of FeatureGFX10Insts)>; 1762 1763def isGFX10GFX11 : 1764 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" 1765 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 1766 AssemblerPredicate<(all_of FeatureGFX10Insts, (not FeatureGFX12Insts))>; 1767 1768def isGFX10Before1030 : 1769 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 &&" 1770 "!Subtarget->hasGFX10_3Insts()">, 1771 AssemblerPredicate<(all_of FeatureGFX10Insts,(not FeatureGFX10_3Insts))>; 1772 1773def isGFX9GFX10 : 1774 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 1775 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1776 AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX11Insts))>; 1777 1778def isGFX8GFX9GFX10 : 1779 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1780 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 1781 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1782 AssemblerPredicate<(all_of FeatureGFX8Insts, (not FeatureGFX11Insts))>; 1783 1784def isGFX11Only : 1785 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 1786 AssemblerPredicate<(all_of FeatureGFX11Insts, (not FeatureGFX12Insts))>; 1787 1788def isGFX11Plus : 1789 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">, 1790 AssemblerPredicate<(all_of FeatureGFX11Insts)>; 1791 1792def isGFX12Only : 1793 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX12">, 1794 AssemblerPredicate<(all_of FeatureGFX12Insts)>; 1795 1796def isGFX12Plus : 1797 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">, 1798 AssemblerPredicate<(all_of FeatureGFX12Insts)>; 1799 1800def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, 1801 AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; 1802 1803def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">, 1804 AssemblerPredicate<(all_of FeatureFlatGlobalInsts)>; 1805def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">, 1806 AssemblerPredicate<(all_of FeatureFlatScratchInsts)>; 1807def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">, 1808 AssemblerPredicate<(all_of FeatureScalarFlatScratchInsts)>; 1809def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">, 1810 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 1811 1812def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">, 1813 AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>; 1814def HasFlatScratchSVSMode : Predicate<"Subtarget->hasFlatScratchSVSMode()">, 1815 AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>; 1816 1817def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">, 1818 AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>; 1819 1820def HasGFX10_BEncoding : Predicate<"Subtarget->hasGFX10_BEncoding()">, 1821 AssemblerPredicate<(all_of FeatureGFX10_BEncoding)>; 1822 1823def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">, 1824 AssemblerPredicate<(all_of FeatureUnpackedD16VMem)>; 1825def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">, 1826 AssemblerPredicate<(all_of (not FeatureUnpackedD16VMem))>; 1827 1828def HasRestrictedSOffset : Predicate<"Subtarget->hasRestrictedSOffset()">, 1829 AssemblerPredicate<(all_of FeatureHasRestrictedSOffset)>; 1830def HasUnrestrictedSOffset : Predicate<"!Subtarget->hasRestrictedSOffset()">, 1831 AssemblerPredicate<(all_of (not FeatureHasRestrictedSOffset))>; 1832 1833def D16PreservesUnusedBits : 1834 Predicate<"Subtarget->d16PreservesUnusedBits()">, 1835 AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureSRAMECC))>; 1836 1837def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">; 1838def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">; 1839 1840def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, 1841 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 1842 1843def HasLDSFPAtomicAdd : Predicate<"Subtarget->hasLDSFPAtomicAdd()">, 1844 AssemblerPredicate<(all_of FeatureGFX8Insts)>; 1845 1846def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">, 1847 AssemblerPredicate<(all_of FeatureAddNoCarryInsts)>; 1848 1849def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">; 1850 1851def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">, 1852 AssemblerPredicate<(all_of Feature16BitInsts)>; 1853 1854def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">, 1855 AssemblerPredicate<(all_of FeatureTrue16BitInsts)>; 1856def NotHasTrue16BitInsts : Predicate<"!Subtarget->hasTrue16BitInsts()">; 1857 1858// Control use of True16 instructions. The real True16 instructions are 1859// True16 instructions as they are defined in the ISA. Fake True16 1860// instructions have the same encoding as real ones but syntactically 1861// only allow 32-bit registers in operands and use low halves thereof. 1862def UseRealTrue16Insts : Predicate<"Subtarget->useRealTrue16Insts()">, 1863 AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>; 1864def UseFakeTrue16Insts : Predicate<"Subtarget->hasTrue16BitInsts() && " 1865 "!Subtarget->useRealTrue16Insts()">; 1866 1867def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, 1868 AssemblerPredicate<(all_of FeatureVOP3P)>; 1869 1870def NotHasMed3_16 : Predicate<"!Subtarget->hasMed3_16()">; 1871 1872def HasMinMaxDenormModes : Predicate<"Subtarget->supportsMinMaxDenormModes()">; 1873def NotHasMinMaxDenormModes : Predicate<"!Subtarget->supportsMinMaxDenormModes()">; 1874 1875def HasFminFmaxLegacy : Predicate<"Subtarget->hasFminFmaxLegacy()">; 1876 1877def HasSDWA : Predicate<"Subtarget->hasSDWA()">, 1878 AssemblerPredicate<(all_of FeatureSDWA, FeatureVolcanicIslands)>; 1879 1880def HasSDWA9 : 1881 Predicate<"Subtarget->hasSDWA()">, 1882 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts,FeatureSDWA)>; 1883 1884def HasSDWA10 : 1885 Predicate<"Subtarget->hasSDWA()">, 1886 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureSDWA)>; 1887 1888def HasDPP : Predicate<"Subtarget->hasDPP()">, 1889 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureDPP)>; 1890 1891def HasDPP8 : Predicate<"Subtarget->hasDPP8()">, 1892 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP8)>; 1893 1894def HasDPALU_DPP : Predicate<"Subtarget->hasDPALU_DPP()">, 1895 AssemblerPredicate<(all_of FeatureDPALU_DPP)>; 1896 1897def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">, 1898 AssemblerPredicate<(all_of FeaturePackedFP32Ops)>; 1899 1900def HasPkMovB32 : Predicate<"Subtarget->hasPkMovB32()">, 1901 AssemblerPredicate<(all_of FeatureGFX90AInsts)>; 1902 1903def HasFmaakFmamkF32Insts : 1904 Predicate<"Subtarget->hasFmaakFmamkF32Insts()">, 1905 AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>; 1906 1907def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">, 1908 AssemblerPredicate<(all_of FeatureImageInsts)>; 1909 1910def HasExtendedImageInsts : Predicate<"Subtarget->hasExtendedImageInsts()">, 1911 AssemblerPredicate<(all_of FeatureExtendedImageInsts)>; 1912 1913def HasR128A16 : Predicate<"Subtarget->hasR128A16()">, 1914 AssemblerPredicate<(all_of FeatureR128A16)>; 1915 1916def HasA16 : Predicate<"Subtarget->hasA16()">, 1917 AssemblerPredicate<(all_of FeatureA16)>; 1918 1919def HasG16 : Predicate<"Subtarget->hasG16()">, 1920 AssemblerPredicate<(all_of FeatureG16)>; 1921 1922def HasDPP16 : Predicate<"Subtarget->hasDPP()">, 1923 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP)>; 1924 1925def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">, 1926 AssemblerPredicate<(all_of FeatureIntClamp)>; 1927 1928def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">, 1929 AssemblerPredicate<(all_of FeatureMadMixInsts)>; 1930 1931def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">, 1932 AssemblerPredicate<(all_of FeatureScalarStores)>; 1933 1934def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">, 1935 AssemblerPredicate<(all_of FeatureScalarAtomics)>; 1936 1937def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">, 1938 AssemblerPredicate<(all_of FeatureNoSdstCMPX)>; 1939 1940def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">, 1941 AssemblerPredicate<(all_of (not FeatureNoSdstCMPX))>; 1942 1943def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">; 1944def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">; 1945def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">, 1946 AssemblerPredicate<(all_of FeatureVGPRIndexMode)>; 1947def HasMovrel : Predicate<"Subtarget->hasMovrel()">, 1948 AssemblerPredicate<(all_of FeatureMovrel)>; 1949 1950def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">, 1951 AssemblerPredicate<(all_of FeatureFmaMixInsts)>; 1952 1953def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">, 1954 AssemblerPredicate<(all_of FeatureDLInsts)>; 1955 1956def HasFmacF64Inst : Predicate<"Subtarget->hasFmacF64Inst()">, 1957 AssemblerPredicate<(all_of FeatureFmacF64Inst)>; 1958 1959def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">, 1960 AssemblerPredicate<(all_of FeatureDot1Insts)>; 1961 1962def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">, 1963 AssemblerPredicate<(all_of FeatureDot2Insts)>; 1964 1965def HasDot3Insts : Predicate<"Subtarget->hasDot3Insts()">, 1966 AssemblerPredicate<(all_of FeatureDot3Insts)>; 1967 1968def HasDot4Insts : Predicate<"Subtarget->hasDot4Insts()">, 1969 AssemblerPredicate<(all_of FeatureDot4Insts)>; 1970 1971def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">, 1972 AssemblerPredicate<(all_of FeatureDot5Insts)>; 1973 1974def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">, 1975 AssemblerPredicate<(all_of FeatureDot6Insts)>; 1976 1977def HasDot7Insts : Predicate<"Subtarget->hasDot7Insts()">, 1978 AssemblerPredicate<(all_of FeatureDot7Insts)>; 1979 1980def HasDot8Insts : Predicate<"Subtarget->hasDot8Insts()">, 1981 AssemblerPredicate<(all_of FeatureDot8Insts)>; 1982 1983def HasDot9Insts : Predicate<"Subtarget->hasDot9Insts()">, 1984 AssemblerPredicate<(all_of FeatureDot9Insts)>; 1985 1986def HasDot10Insts : Predicate<"Subtarget->hasDot10Insts()">, 1987 AssemblerPredicate<(all_of FeatureDot10Insts)>; 1988 1989def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">, 1990 AssemblerPredicate<(all_of FeatureGetWaveIdInst)>; 1991 1992def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">, 1993 AssemblerPredicate<(all_of FeatureMAIInsts)>; 1994 1995def HasSMemRealTime : Predicate<"Subtarget->hasSMemRealTime()">, 1996 AssemblerPredicate<(all_of FeatureSMemRealTime)>; 1997 1998def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">, 1999 AssemblerPredicate<(all_of FeatureSMemTimeInst)>; 2000 2001def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">, 2002 AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>; 2003 2004def HasShaderCyclesHiLoRegisters : Predicate<"Subtarget->hasShaderCyclesHiLoRegisters()">; 2005 2006def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">, 2007 AssemblerPredicate<(all_of FeatureFP8Insts)>; 2008 2009def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">, 2010 AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>; 2011 2012def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">, 2013 AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>; 2014 2015def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">, 2016 AssemblerPredicate<(all_of FeatureMadMacF32Insts)>; 2017 2018def HasFmaLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts()">, 2019 AssemblerPredicate<(any_of FeatureGFX10_3Insts)>; 2020 2021def HasAtomicDsPkAdd16Insts : Predicate<"Subtarget->hasAtomicDsPkAdd16Insts()">, 2022 AssemblerPredicate<(any_of FeatureAtomicDsPkAdd16Insts)>; 2023 2024def HasAtomicFlatPkAdd16Insts : Predicate<"Subtarget->hasAtomicFlatPkAdd16Insts()">, 2025 AssemblerPredicate<(any_of FeatureAtomicFlatPkAdd16Insts)>; 2026 2027def HasAtomicFaddRtnInsts : Predicate<"Subtarget->hasAtomicFaddRtnInsts()">, 2028 AssemblerPredicate<(all_of FeatureAtomicFaddRtnInsts)>; 2029def HasAtomicFaddNoRtnInsts : Predicate<"Subtarget->hasAtomicFaddNoRtnInsts()">, 2030 AssemblerPredicate<(all_of FeatureAtomicFaddNoRtnInsts)>; 2031def HasAtomicBufferGlobalPkAddF16NoRtnInsts 2032 : Predicate<"Subtarget->hasAtomicBufferGlobalPkAddF16NoRtnInsts() || Subtarget->hasAtomicBufferGlobalPkAddF16Insts()">, 2033 AssemblerPredicate<(any_of FeatureAtomicBufferGlobalPkAddF16NoRtnInsts, FeatureAtomicBufferGlobalPkAddF16Insts)>; 2034def HasAtomicBufferGlobalPkAddF16Insts 2035 : Predicate<"Subtarget->hasAtomicBufferGlobalPkAddF16Insts()">, 2036 AssemblerPredicate<(all_of FeatureAtomicBufferGlobalPkAddF16Insts)>; 2037def HasAtomicGlobalPkAddBF16Inst 2038 : Predicate<"Subtarget->hasAtomicGlobalPkAddBF16Inst()">, 2039 AssemblerPredicate<(all_of FeatureAtomicGlobalPkAddBF16Inst)>; 2040def HasFlatAtomicFaddF32Inst 2041 : Predicate<"Subtarget->hasFlatAtomicFaddF32Inst()">, 2042 AssemblerPredicate<(all_of FeatureFlatAtomicFaddF32Inst)>; 2043 2044def HasDefaultComponentZero 2045 : Predicate<"Subtarget->hasDefaultComponentZero()">, 2046 AssemblerPredicate<(all_of FeatureDefaultComponentZero)>; 2047def HasDefaultComponentBroadcast 2048 : Predicate<"Subtarget->hasDefaultComponentBroadcast()">, 2049 AssemblerPredicate<(all_of FeatureDefaultComponentBroadcast)>; 2050 2051def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">, 2052 AssemblerPredicate<(all_of FeatureDsSrc2Insts)>; 2053 2054def EnableLateCFGStructurize : Predicate< 2055 "EnableLateStructurizeCFG">; 2056 2057def EnableFlatScratch : Predicate<"Subtarget->enableFlatScratch()">; 2058 2059def DisableFlatScratch : Predicate<"!Subtarget->enableFlatScratch()">; 2060 2061def HasUnalignedAccessMode : Predicate<"Subtarget->hasUnalignedAccessMode()">, 2062 AssemblerPredicate<(all_of FeatureUnalignedAccessMode)>; 2063 2064def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">; 2065 2066def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">; 2067 2068def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">, 2069 AssemblerPredicate<(all_of FeatureSALUFloatInsts)>; 2070 2071def HasVGPRSingleUseHintInsts : Predicate<"Subtarget->hasVGPRSingleUseHintInsts()">, 2072 AssemblerPredicate<(all_of FeatureVGPRSingleUseHintInsts)>; 2073 2074def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">, 2075 AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>; 2076 2077def HasGDS : Predicate<"Subtarget->hasGDS()">; 2078 2079def HasGWS : Predicate<"Subtarget->hasGWS()">; 2080 2081def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">; 2082def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">; 2083 2084def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">; 2085 2086def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">; 2087 2088// Include AMDGPU TD files 2089include "SISchedule.td" 2090include "GCNProcessors.td" 2091include "AMDGPUInstrInfo.td" 2092include "SIRegisterInfo.td" 2093include "AMDGPURegisterBanks.td" 2094include "AMDGPUInstructions.td" 2095include "SIInstrInfo.td" 2096include "AMDGPUCallingConv.td" 2097include "AMDGPUSearchableTables.td" 2098