1//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===------------------------------------------------------------===// 8 9include "llvm/TableGen/SearchableTable.td" 10include "llvm/Target/Target.td" 11include "AMDGPUFeatures.td" 12include "AMDGPUPredicateControl.td" 13 14def p0 : PtrValueType<i64, 0>; 15def p1 : PtrValueType<i64, 1>; 16def p2 : PtrValueType<i32, 2>; 17def p3 : PtrValueType<i32, 3>; 18def p4 : PtrValueType<i64, 4>; 19def p5 : PtrValueType<i32, 5>; 20def p6 : PtrValueType<i32, 6>; 21 22//===------------------------------------------------------------===// 23// Subtarget Features (device properties) 24//===------------------------------------------------------------===// 25 26def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", 27 "FastFMAF32", 28 "true", 29 "Assuming f32 fma is at least as fast as mul + add" 30>; 31 32def FeatureFastDenormalF32 : SubtargetFeature<"fast-denormal-f32", 33 "FastDenormalF32", 34 "true", 35 "Enabling denormals does not cause f32 instructions to run at f64 rates" 36>; 37 38def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128", 39 "MIMG_R128", 40 "true", 41 "Support 128-bit texture resources" 42>; 43 44def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops", 45 "HalfRate64Ops", 46 "true", 47 "Most fp64 instructions are half rate instead of quarter" 48>; 49 50def FullRate64Ops : SubtargetFeature<"full-rate-64-ops", 51 "FullRate64Ops", 52 "true", 53 "Most fp64 instructions are full rate" 54>; 55 56def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", 57 "FlatAddressSpace", 58 "true", 59 "Support flat address space" 60>; 61 62def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets", 63 "FlatInstOffsets", 64 "true", 65 "Flat instructions have immediate offset addressing mode" 66>; 67 68def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts", 69 "FlatGlobalInsts", 70 "true", 71 "Have global_* flat memory instructions" 72>; 73 74def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts", 75 "FlatScratchInsts", 76 "true", 77 "Have scratch_* flat memory instructions" 78>; 79 80def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts", 81 "ScalarFlatScratchInsts", 82 "true", 83 "Have s_scratch_* flat memory instructions" 84>; 85 86def FeatureEnableFlatScratch : SubtargetFeature<"enable-flat-scratch", 87 "EnableFlatScratch", 88 "true", 89 "Use scratch_* flat memory instructions to access scratch" 90>; 91 92def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts", 93 "AddNoCarryInsts", 94 "true", 95 "Have VALU add/sub instructions without carry out" 96>; 97 98def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", 99 "UnalignedBufferAccess", 100 "true", 101 "Hardware supports unaligned global loads and stores" 102>; 103 104def FeatureTrapHandler: SubtargetFeature<"trap-handler", 105 "TrapHandler", 106 "true", 107 "Trap handler support" 108>; 109 110def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access", 111 "UnalignedScratchAccess", 112 "true", 113 "Support unaligned scratch loads and stores" 114>; 115 116def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access", 117 "UnalignedDSAccess", 118 "true", 119 "Hardware supports unaligned local and region loads and stores" 120>; 121 122def FeatureRelaxedBufferOOBMode : SubtargetFeature<"relaxed-buffer-oob-mode", 123 "RelaxedBufferOOBMode", 124 "true", 125 "Disable strict out-of-bounds buffer guarantees. An OOB access may potentially cause an adjacent access to be treated as if it were also OOB" 126>; 127 128def FeatureApertureRegs : SubtargetFeature<"aperture-regs", 129 "HasApertureRegs", 130 "true", 131 "Has Memory Aperture Base and Size Registers" 132>; 133 134def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts", 135 "HasMadMixInsts", 136 "true", 137 "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions" 138>; 139 140def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts", 141 "HasFmaMixInsts", 142 "true", 143 "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions" 144>; 145 146def FeatureIEEEMinimumMaximumInsts : SubtargetFeature<"ieee-minimum-maximum-insts", 147 "HasIEEEMinimumMaximumInsts", 148 "true", 149 "Has v_minimum/maximum_f16/f32/f64, v_minimummaximum/maximumminimum_f16/f32 and v_pk_minimum/maximum_f16 instructions" 150>; 151 152def FeatureMinimum3Maximum3F32 : SubtargetFeature<"minimum3-maximum3-f32", 153 "HasMinimum3Maximum3F32", 154 "true", 155 "Has v_minimum3_f32 and v_maximum3_f32 instructions" 156>; 157 158def FeatureMinimum3Maximum3F16 : SubtargetFeature<"minimum3-maximum3-f16", 159 "HasMinimum3Maximum3F16", 160 "true", 161 "Has v_minimum3_f16 and v_maximum3_f16 instructions" 162>; 163 164def FeatureMinimum3Maximum3PKF16 : SubtargetFeature<"minimum3-maximum3-pkf16", 165 "HasMinimum3Maximum3PKF16", 166 "true", 167 "Has v_pk_minimum3_f16 and v_pk_maximum3_f16 instructions" 168>; 169 170def FeatureSupportsXNACK : SubtargetFeature<"xnack-support", 171 "SupportsXNACK", 172 "true", 173 "Hardware supports XNACK" 174>; 175 176// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support 177// XNACK. The current default kernel driver setting is: 178// - graphics ring: XNACK disabled 179// - compute ring: XNACK enabled 180// 181// If XNACK is enabled, the VMEM latency can be worse. 182// If XNACK is disabled, the 2 SGPRs can be used for general purposes. 183def FeatureXNACK : SubtargetFeature<"xnack", 184 "EnableXNACK", 185 "true", 186 "Enable XNACK support" 187>; 188 189def FeatureTgSplit : SubtargetFeature<"tgsplit", 190 "EnableTgSplit", 191 "true", 192 "Enable threadgroup split execution" 193>; 194 195def FeatureCuMode : SubtargetFeature<"cumode", 196 "EnableCuMode", 197 "true", 198 "Enable CU wavefront execution mode" 199>; 200 201def FeaturePreciseMemory 202 : SubtargetFeature<"precise-memory", "EnablePreciseMemory", 203 "true", "Enable precise memory mode">; 204 205def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", 206 "SGPRInitBug", 207 "true", 208 "VI SGPR initialization bug requiring a fixed SGPR allocation size" 209>; 210 211def FeatureUserSGPRInit16Bug : SubtargetFeature<"user-sgpr-init16-bug", 212 "UserSGPRInit16Bug", 213 "true", 214 "Bug requiring at least 16 user+system SGPRs to be enabled" 215>; 216 217def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug", 218 "LDSMisalignedBug", 219 "true", 220 "Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode" 221>; 222 223def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug", 224 "HasMFMAInlineLiteralBug", 225 "true", 226 "MFMA cannot use inline literal as SrcC" 227>; 228 229def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard", 230 "HasVcmpxPermlaneHazard", 231 "true", 232 "TODO: describe me" 233>; 234 235def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard", 236 "HasVMEMtoScalarWriteHazard", 237 "true", 238 "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution." 239>; 240 241def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard", 242 "HasSMEMtoVectorWriteHazard", 243 "true", 244 "s_load_dword followed by v_cmp page faults" 245>; 246 247def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug", 248 "HasInstFwdPrefetchBug", 249 "true", 250 "S_INST_PREFETCH instruction causes shader to hang" 251>; 252 253def FeatureSafeSmemPrefetch : SubtargetFeature<"safe-smem-prefetch", 254 "HasSafeSmemPrefetch", 255 "true", 256 "SMEM prefetches do not fail on illegal address" 257>; 258 259def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard", 260 "HasVcmpxExecWARHazard", 261 "true", 262 "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)" 263>; 264 265def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard", 266 "HasLdsBranchVmemWARHazard", 267 "true", 268 "Switching between LDS and VMEM-tex not waiting VM_VSRC=0" 269>; 270 271class FeatureMaxHardClauseLength<int size> : SubtargetFeature< 272 "max-hard-clause-length-"#size, 273 "MaxHardClauseLength", 274 !cast<string>(size), 275 "Maximum number of instructions in an explicit S_CLAUSE is "#size 276>; 277 278/// Work around a hardware bug on some chips that can be triggered 279/// under certain circumstances when clauses are longer than 32 operations. 280def FeatureMaxHardClauseLength32 : FeatureMaxHardClauseLength<32>; 281/// While the S_CLAUSE instruction permits encoding clause lengths up to 64, 282/// hardware documentation for gfx10+ indicates that 63 is the maximum 283/// permitted clause length. 284def FeatureMaxHardClauseLength63 : FeatureMaxHardClauseLength<63>; 285 286def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug", 287 "HasNSAtoVMEMBug", 288 "true", 289 "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero" 290>; 291 292def FeatureNSAClauseBug : SubtargetFeature<"nsa-clause-bug", 293 "HasNSAClauseBug", 294 "true", 295 "MIMG-NSA in a hard clause has unpredictable results on GFX10.1" 296>; 297 298def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug", 299 "HasFlatSegmentOffsetBug", 300 "true", 301 "GFX10 bug where inst_offset is ignored when flat instructions access global memory" 302>; 303 304def FeatureNegativeScratchOffsetBug : SubtargetFeature<"negative-scratch-offset-bug", 305 "NegativeScratchOffsetBug", 306 "true", 307 "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9" 308>; 309 310def FeatureNegativeUnalignedScratchOffsetBug : SubtargetFeature<"negative-unaligned-scratch-offset-bug", 311 "NegativeUnalignedScratchOffsetBug", 312 "true", 313 "Scratch instructions with a VGPR offset and a negative immediate offset that is not a multiple of 4 read wrong memory on GFX10" 314>; 315 316def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug", 317 "HasOffset3fBug", 318 "true", 319 "Branch offset of 3f hardware bug" 320>; 321 322def FeatureImageStoreD16Bug : SubtargetFeature<"image-store-d16-bug", 323 "HasImageStoreD16Bug", 324 "true", 325 "Image Store D16 hardware bug" 326>; 327 328def FeatureImageGather4D16Bug : SubtargetFeature<"image-gather4-d16-bug", 329 "HasImageGather4D16Bug", 330 "true", 331 "Image Gather4 D16 hardware bug" 332>; 333 334def FeatureMADIntraFwdBug : SubtargetFeature<"mad-intra-fwd-bug", 335 "HasMADIntraFwdBug", 336 "true", 337 "MAD_U64/I64 intra instruction forwarding bug" 338>; 339 340def FeatureMSAALoadDstSelBug : SubtargetFeature<"msaa-load-dst-sel-bug", 341 "HasMSAALoadDstSelBug", 342 "true", 343 "MSAA loads not honoring dst_sel bug" 344>; 345 346def FeaturePrivEnabledTrap2NopBug : SubtargetFeature<"priv-enabled-trap2-nop-bug", 347 "HasPrivEnabledTrap2NopBug", 348 "true", 349 "Hardware that runs with PRIV=1 interpreting 's_trap 2' as a nop bug" 350>; 351 352class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature < 353 "ldsbankcount"#Value, 354 "LDSBankCount", 355 !cast<string>(Value), 356 "The number of LDS banks per compute unit." 357>; 358 359def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>; 360def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; 361 362def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", 363 "GCN3Encoding", 364 "true", 365 "Encoding format for VI" 366>; 367 368def FeatureCIInsts : SubtargetFeature<"ci-insts", 369 "CIInsts", 370 "true", 371 "Additional instructions for CI+" 372>; 373 374def FeatureGFX8Insts : SubtargetFeature<"gfx8-insts", 375 "GFX8Insts", 376 "true", 377 "Additional instructions for GFX8+" 378>; 379 380def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts", 381 "GFX9Insts", 382 "true", 383 "Additional instructions for GFX9+" 384>; 385 386def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts", 387 "GFX90AInsts", 388 "true", 389 "Additional instructions for GFX90A+" 390 // [HasAtomicFMinFMaxF64GlobalInsts, HasAtomicFMinFMaxF64FlatInsts] // TODO 391>; 392 393def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts", 394 "GFX940Insts", 395 "true", 396 "Additional instructions for GFX940+" 397>; 398 399def FeaturePermlane16Swap : SubtargetFeature<"permlane16-swap", 400 "HasPermlane16Swap", 401 "true", 402 "Has v_permlane16_swap_b32 instructions" 403>; 404 405def FeaturePermlane32Swap : SubtargetFeature<"permlane32-swap", 406 "HasPermlane32Swap", 407 "true", 408 "Has v_permlane32_swap_b32 instructions" 409>; 410 411def FeatureFP8ConversionScaleInsts : SubtargetFeature<"fp8-cvt-scale-insts", 412 "HasFP8ConversionScaleInsts", 413 "true", 414 "Has fp8 conversion scale instructions" 415>; 416 417def FeatureBF8ConversionScaleInsts : SubtargetFeature<"bf8-cvt-scale-insts", 418 "HasBF8ConversionScaleInsts", 419 "true", 420 "Has bf8 conversion scale instructions" 421>; 422 423def FeatureFP4ConversionScaleInsts : SubtargetFeature<"fp4-cvt-scale-insts", 424 "HasFP4ConversionScaleInsts", 425 "true", 426 "Has fp4 conversion scale instructions" 427>; 428 429def FeatureFP6BF6ConversionScaleInsts : SubtargetFeature<"fp6bf6-cvt-scale-insts", 430 "HasFP6BF6ConversionScaleInsts", 431 "true", 432 "Has fp6 and bf6 conversion scale instructions" 433>; 434 435def FeatureF16BF16ToFP6BF6ConversionScaleInsts : SubtargetFeature<"f16bf16-to-fp6bf6-cvt-scale-insts", 436 "HasF16BF16ToFP6BF6ConversionScaleInsts", 437 "true", 438 "Has f16bf16 to fp6bf6 conversion scale instructions" 439>; 440 441def FeatureF32ToF16BF16ConversionSRInsts : SubtargetFeature<"f32-to-f16bf16-cvt-sr-insts", 442 "HasF32ToF16BF16ConversionSRInsts", 443 "true", 444 "Has f32 to f16bf16 conversion scale instructions" 445>; 446 447def FeatureAshrPkInsts : SubtargetFeature<"ashr-pk-insts", 448 "HasAshrPkInsts", 449 "true", 450 "Has Arithmetic Shift Pack instructions" 451>; 452 453def FeatureCvtPkF16F32Inst : SubtargetFeature<"cvt-pk-f16-f32-inst", 454 "HasCvtPkF16F32Inst", 455 "true", 456 "Has cvt_pk_f16_f32 instruction" 457>; 458 459def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts", 460 "GFX950Insts", 461 "true", 462 "Additional instructions for GFX950+", 463 [FeaturePermlane16Swap, 464 FeaturePermlane32Swap, 465 FeatureAshrPkInsts, 466 FeatureFP8ConversionScaleInsts, 467 FeatureBF8ConversionScaleInsts, 468 FeatureFP4ConversionScaleInsts, 469 FeatureFP6BF6ConversionScaleInsts, 470 FeatureF16BF16ToFP6BF6ConversionScaleInsts, 471 FeatureF32ToF16BF16ConversionSRInsts, 472 FeatureCvtPkF16F32Inst, 473 FeatureMinimum3Maximum3F32, 474 FeatureMinimum3Maximum3PKF16, 475 ] 476>; 477 478def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts", 479 "GFX10Insts", 480 "true", 481 "Additional instructions for GFX10+" 482>; 483 484def FeatureGFX11Insts : SubtargetFeature<"gfx11-insts", 485 "GFX11Insts", 486 "true", 487 "Additional instructions for GFX11+" 488>; 489 490def FeatureGFX12Insts : SubtargetFeature<"gfx12-insts", 491 "GFX12Insts", 492 "true", 493 "Additional instructions for GFX12+" 494>; 495 496def FeatureGFX1250Insts : SubtargetFeature<"gfx1250-insts", 497 "GFX1250Insts", 498 "true", 499 "Additional instructions for GFX1250+" 500>; 501 502def FeatureGFX10_3Insts : SubtargetFeature<"gfx10-3-insts", 503 "GFX10_3Insts", 504 "true", 505 "Additional instructions for GFX10.3" 506>; 507 508def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts", 509 "GFX7GFX8GFX9Insts", 510 "true", 511 "Instructions shared in GFX7, GFX8, GFX9" 512>; 513 514def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime", 515 "HasSMemRealTime", 516 "true", 517 "Has s_memrealtime instruction" 518>; 519 520def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm", 521 "HasInv2PiInlineImm", 522 "true", 523 "Has 1 / (2 * pi) as inline immediate" 524>; 525 526def Feature16BitInsts : SubtargetFeature<"16-bit-insts", 527 "Has16BitInsts", 528 "true", 529 "Has i16/f16 instructions" 530>; 531 532def FeatureTrue16BitInsts : SubtargetFeature<"true16", 533 "HasTrue16BitInsts", 534 "true", 535 "True 16-bit operand instructions" 536>; 537 538def FeatureRealTrue16Insts : SubtargetFeature<"real-true16", 539 "EnableRealTrue16Insts", 540 "true", 541 "Use true 16-bit registers" 542>; 543 544def FeatureBF16TransInsts : SubtargetFeature<"bf16-trans-insts", 545 "HasBF16TransInsts", 546 "true", 547 "Has bf16 transcendental instructions" 548>; 549 550def FeatureBF16ConversionInsts : SubtargetFeature<"bf16-cvt-insts", 551 "HasBF16ConversionInsts", 552 "true", 553 "Has bf16 conversion instructions" 554>; 555 556def FeatureVOP3P : SubtargetFeature<"vop3p", 557 "HasVOP3PInsts", 558 "true", 559 "Has VOP3P packed instructions" 560>; 561 562def FeatureMovrel : SubtargetFeature<"movrel", 563 "HasMovrel", 564 "true", 565 "Has v_movrel*_b32 instructions" 566>; 567 568def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode", 569 "HasVGPRIndexMode", 570 "true", 571 "Has VGPR mode register indexing" 572>; 573 574def FeatureScalarDwordx3Loads : SubtargetFeature<"scalar-dwordx3-loads", 575 "HasScalarDwordx3Loads", 576 "true", 577 "Has 96-bit scalar load instructions" 578>; 579 580def FeatureScalarStores : SubtargetFeature<"scalar-stores", 581 "HasScalarStores", 582 "true", 583 "Has store scalar memory instructions" 584>; 585 586def FeatureScalarAtomics : SubtargetFeature<"scalar-atomics", 587 "HasScalarAtomics", 588 "true", 589 "Has atomic scalar memory instructions" 590>; 591 592def FeatureSDWA : SubtargetFeature<"sdwa", 593 "HasSDWA", 594 "true", 595 "Support SDWA (Sub-DWORD Addressing) extension" 596>; 597 598def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod", 599 "HasSDWAOmod", 600 "true", 601 "Support OMod with SDWA (Sub-DWORD Addressing) extension" 602>; 603 604def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar", 605 "HasSDWAScalar", 606 "true", 607 "Support scalar register with SDWA (Sub-DWORD Addressing) extension" 608>; 609 610def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst", 611 "HasSDWASdst", 612 "true", 613 "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension" 614>; 615 616def FeatureSDWAMac : SubtargetFeature<"sdwa-mav", 617 "HasSDWAMac", 618 "true", 619 "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension" 620>; 621 622def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc", 623 "HasSDWAOutModsVOPC", 624 "true", 625 "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension" 626>; 627 628def FeatureDPP : SubtargetFeature<"dpp", 629 "HasDPP", 630 "true", 631 "Support DPP (Data Parallel Primitives) extension" 632>; 633 634// DPP8 allows arbitrary cross-lane swizzling within groups of 8 lanes. 635def FeatureDPP8 : SubtargetFeature<"dpp8", 636 "HasDPP8", 637 "true", 638 "Support DPP8 (Data Parallel Primitives) extension" 639>; 640 641def FeatureDPALU_DPP : SubtargetFeature<"dpp-64bit", 642 "HasDPALU_DPP", 643 "true", 644 "Support DPP (Data Parallel Primitives) extension in DP ALU" 645>; 646 647def FeatureDPPSrc1SGPR : SubtargetFeature<"dpp-src1-sgpr", 648 "HasDPPSrc1SGPR", 649 "true", 650 "Support SGPR for Src1 of DPP instructions" 651>; 652 653def FeaturePackedFP32Ops : SubtargetFeature<"packed-fp32-ops", 654 "HasPackedFP32Ops", 655 "true", 656 "Support packed fp32 instructions" 657>; 658 659def FeatureR128A16 : SubtargetFeature<"r128-a16", 660 "HasR128A16", 661 "true", 662 "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128" 663>; 664 665def FeatureA16 : SubtargetFeature<"a16", 666 "HasA16", 667 "true", 668 "Support A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands" 669>; 670 671def FeatureG16 : SubtargetFeature<"g16", 672 "HasG16", 673 "true", 674 "Support G16 for 16-bit gradient image operands" 675>; 676 677def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding", 678 "HasNSAEncoding", 679 "true", 680 "Support NSA encoding for image instructions" 681>; 682 683def FeaturePartialNSAEncoding : SubtargetFeature<"partial-nsa-encoding", 684 "HasPartialNSAEncoding", 685 "true", 686 "Support partial NSA encoding for image instructions" 687>; 688 689def FeatureImageInsts : SubtargetFeature<"image-insts", 690 "HasImageInsts", 691 "true", 692 "Support image instructions" 693>; 694 695def FeatureExtendedImageInsts : SubtargetFeature<"extended-image-insts", 696 "HasExtendedImageInsts", 697 "true", 698 "Support mips != 0, lod != 0, gather4, and get_lod" 699>; 700 701def FeatureGFX10_AEncoding : SubtargetFeature<"gfx10_a-encoding", 702 "GFX10_AEncoding", 703 "true", 704 "Has BVH ray tracing instructions" 705>; 706 707def FeatureGFX10_BEncoding : SubtargetFeature<"gfx10_b-encoding", 708 "GFX10_BEncoding", 709 "true", 710 "Encoding format GFX10_B" 711>; 712 713def FeatureIntClamp : SubtargetFeature<"int-clamp-insts", 714 "HasIntClamp", 715 "true", 716 "Support clamp for integer destination" 717>; 718 719def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem", 720 "HasUnpackedD16VMem", 721 "true", 722 "Has unpacked d16 vmem instructions" 723>; 724 725def FeatureDLInsts : SubtargetFeature<"dl-insts", 726 "HasDLInsts", 727 "true", 728 "Has v_fmac_f32 and v_xnor_b32 instructions" 729>; 730 731def FeatureFmacF64Inst : SubtargetFeature<"fmacf64-inst", 732 "HasFmacF64Inst", 733 "true", 734 "Has v_fmac_f64 instruction" 735>; 736 737def FeatureDot1Insts : SubtargetFeature<"dot1-insts", 738 "HasDot1Insts", 739 "true", 740 "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions" 741>; 742 743def FeatureDot2Insts : SubtargetFeature<"dot2-insts", 744 "HasDot2Insts", 745 "true", 746 "Has v_dot2_i32_i16, v_dot2_u32_u16 instructions" 747>; 748 749def FeatureDot3Insts : SubtargetFeature<"dot3-insts", 750 "HasDot3Insts", 751 "true", 752 "Has v_dot8c_i32_i4 instruction" 753>; 754 755def FeatureDot4Insts : SubtargetFeature<"dot4-insts", 756 "HasDot4Insts", 757 "true", 758 "Has v_dot2c_i32_i16 instruction" 759>; 760 761def FeatureDot5Insts : SubtargetFeature<"dot5-insts", 762 "HasDot5Insts", 763 "true", 764 "Has v_dot2c_f32_f16 instruction" 765>; 766 767def FeatureDot6Insts : SubtargetFeature<"dot6-insts", 768 "HasDot6Insts", 769 "true", 770 "Has v_dot4c_i32_i8 instruction" 771>; 772 773def FeatureDot7Insts : SubtargetFeature<"dot7-insts", 774 "HasDot7Insts", 775 "true", 776 "Has v_dot4_u32_u8, v_dot8_u32_u4 instructions" 777>; 778 779def FeatureDot8Insts : SubtargetFeature<"dot8-insts", 780 "HasDot8Insts", 781 "true", 782 "Has v_dot4_i32_iu8, v_dot8_i32_iu4 instructions" 783>; 784 785def FeatureDot9Insts : SubtargetFeature<"dot9-insts", 786 "HasDot9Insts", 787 "true", 788 "Has v_dot2_f16_f16, v_dot2_bf16_bf16 instructions" 789>; 790 791def FeatureDot10Insts : SubtargetFeature<"dot10-insts", 792 "HasDot10Insts", 793 "true", 794 "Has v_dot2_f32_f16 instruction" 795>; 796 797def FeatureDot11Insts : SubtargetFeature<"dot11-insts", 798 "HasDot11Insts", 799 "true", 800 "Has v_dot4_f32_fp8_fp8, v_dot4_f32_fp8_bf8, v_dot4_f32_bf8_fp8, v_dot4_f32_bf8_bf8 instructions" 801>; 802 803def FeatureDot12Insts : SubtargetFeature<"dot12-insts", 804 "HasDot12Insts", 805 "true", 806 "Has v_dot2_f32_bf16 instructions" 807>; 808 809def FeatureDot13Insts : SubtargetFeature<"dot13-insts", 810 "HasDot13Insts", 811 "true", 812 "Has v_dot2c_f32_bf16 instructions" 813>; 814 815 816def FeatureMAIInsts : SubtargetFeature<"mai-insts", 817 "HasMAIInsts", 818 "true", 819 "Has mAI instructions" 820>; 821 822def FeatureFP8Insts : SubtargetFeature<"fp8-insts", 823 "HasFP8Insts", 824 "true", 825 "Has fp8 and bf8 instructions" 826>; 827 828def FeatureFP8ConversionInsts : SubtargetFeature<"fp8-conversion-insts", 829 "HasFP8ConversionInsts", 830 "true", 831 "Has fp8 and bf8 conversion instructions" 832>; 833 834def FeatureFP8E5M3Insts : SubtargetFeature<"fp8e5m3-insts", 835 "HasFP8E5M3Insts", 836 "true", 837 "Has fp8 e5m3 format support" 838>; 839 840def FeatureCvtFP8VOP1Bug : SubtargetFeature<"cvt-fp8-vop1-bug", 841 "HasCvtFP8Vop1Bug", 842 "true", 843 "FP8/BF8 VOP1 form of conversion to F32 is unreliable", 844 [FeatureFP8ConversionInsts] 845>; 846 847def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", 848 "HasPkFmacF16Inst", 849 "true", 850 "Has v_pk_fmac_f16 instruction" 851>; 852 853def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts", 854 "HasAtomicDsPkAdd16Insts", 855 "true", 856 "Has ds_pk_add_bf16, ds_pk_add_f16, ds_pk_add_rtn_bf16, " 857 "ds_pk_add_rtn_f16 instructions" 858>; 859 860def FeatureAtomicFlatPkAdd16Insts : SubtargetFeature<"atomic-flat-pk-add-16-insts", 861 "HasAtomicFlatPkAdd16Insts", 862 "true", 863 "Has flat_atomic_pk_add_f16 and flat_atomic_pk_add_bf16 instructions" 864>; 865 866def FeatureAtomicFaddRtnInsts : SubtargetFeature<"atomic-fadd-rtn-insts", 867 "HasAtomicFaddRtnInsts", 868 "true", 869 "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that " 870 "return original value", 871 [FeatureFlatGlobalInsts] 872>; 873 874def FeatureAtomicFMinFMaxF32GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-global-f32", 875 "HasAtomicFMinFMaxF32GlobalInsts", 876 "true", 877 "Has global/buffer instructions for atomicrmw fmin/fmax for float" 878>; 879 880def FeatureAtomicFMinFMaxF64GlobalInsts : SubtargetFeature<"atomic-fmin-fmax-global-f64", 881 "HasAtomicFMinFMaxF64GlobalInsts", 882 "true", 883 "Has global/buffer instructions for atomicrmw fmin/fmax for float" 884>; 885 886def FeatureAtomicFMinFMaxF32FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f32", 887 "HasAtomicFMinFMaxF32FlatInsts", 888 "true", 889 "Has flat memory instructions for atomicrmw fmin/fmax for float" 890>; 891 892def FeatureAtomicFMinFMaxF64FlatInsts : SubtargetFeature<"atomic-fmin-fmax-flat-f64", 893 "HasAtomicFMinFMaxF64FlatInsts", 894 "true", 895 "Has flat memory instructions for atomicrmw fmin/fmax for double" 896>; 897 898def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts", 899 "HasAtomicFaddNoRtnInsts", 900 "true", 901 "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that " 902 "don't return original value", 903 [FeatureFlatGlobalInsts] 904>; 905 906def FeatureAtomicBufferGlobalPkAddF16NoRtnInsts 907 : SubtargetFeature<"atomic-buffer-global-pk-add-f16-no-rtn-insts", 908 "HasAtomicBufferGlobalPkAddF16NoRtnInsts", 909 "true", 910 "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that " 911 "don't return original value", 912 [FeatureFlatGlobalInsts] 913>; 914 915def FeatureAtomicBufferGlobalPkAddF16Insts : SubtargetFeature<"atomic-buffer-global-pk-add-f16-insts", 916 "HasAtomicBufferGlobalPkAddF16Insts", 917 "true", 918 "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that " 919 "can return original value", 920 [FeatureFlatGlobalInsts] 921>; 922 923def FeatureAtomicGlobalPkAddBF16Inst : SubtargetFeature<"atomic-global-pk-add-bf16-inst", 924 "HasAtomicGlobalPkAddBF16Inst", 925 "true", 926 "Has global_atomic_pk_add_bf16 instruction", 927 [FeatureFlatGlobalInsts] 928>; 929 930def FeatureAtomicBufferPkAddBF16Inst : SubtargetFeature<"atomic-buffer-pk-add-bf16-inst", 931 "HasAtomicBufferPkAddBF16Inst", 932 "true", 933 "Has buffer_atomic_pk_add_bf16 instruction" 934>; 935 936def FeatureAtomicCSubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts", 937 "HasAtomicCSubNoRtnInsts", 938 "true", 939 "Has buffer_atomic_csub and global_atomic_csub instructions that don't " 940 "return original value" 941>; 942 943def FeatureFlatAtomicFaddF32Inst 944 : SubtargetFeature<"flat-atomic-fadd-f32-inst", 945 "HasFlatAtomicFaddF32Inst", 946 "true", 947 "Has flat_atomic_add_f32 instruction" 948>; 949 950def FeatureFlatBufferGlobalAtomicFaddF64Inst 951 : SubtargetFeature<"flat-buffer-global-fadd-f64-inst", 952 "HasFlatBufferGlobalAtomicFaddF64Inst", 953 "true", 954 "Has flat, buffer, and global instructions for f64 atomic fadd" 955>; 956 957def FeatureMemoryAtomicFAddF32DenormalSupport 958 : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support", 959 "HasMemoryAtomicFaddF32DenormalSupport", 960 "true", 961 "global/flat/buffer atomic fadd for float supports denormal handling" 962>; 963 964def FeatureAgentScopeFineGrainedRemoteMemoryAtomics 965 : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics", 966 "HasAgentScopeFineGrainedRemoteMemoryAtomics", 967 "true", 968 "Agent (device) scoped atomic operations, excluding those directly " 969 "supported by PCIe (i.e. integer atomic add, exchange, and " 970 "compare-and-swap), are functional for allocations in host or peer " 971 "device memory." 972>; 973 974def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero", 975 "HasDefaultComponentZero", 976 "true", 977 "BUFFER/IMAGE store instructions set unspecified components to zero (before GFX12)" 978>; 979 980def FeatureDefaultComponentBroadcast : SubtargetFeature<"default-component-broadcast", 981 "HasDefaultComponentBroadcast", 982 "true", 983 "BUFFER/IMAGE store instructions set unspecified components to x component (GFX12)" 984>; 985 986def FeatureSupportsSRAMECC : SubtargetFeature<"sramecc-support", 987 "SupportsSRAMECC", 988 "true", 989 "Hardware supports SRAMECC" 990>; 991 992def FeatureSRAMECC : SubtargetFeature<"sramecc", 993 "EnableSRAMECC", 994 "true", 995 "Enable SRAMECC" 996>; 997 998def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx", 999 "HasNoSdstCMPX", 1000 "true", 1001 "V_CMPX does not write VCC/SGPR in addition to EXEC" 1002>; 1003 1004def FeatureVscnt : SubtargetFeature<"vscnt", 1005 "HasVscnt", 1006 "true", 1007 "Has separate store vscnt counter" 1008>; 1009 1010def FeatureGetWaveIdInst : SubtargetFeature<"get-wave-id-inst", 1011 "HasGetWaveIdInst", 1012 "true", 1013 "Has s_get_waveid_in_workgroup instruction" 1014>; 1015 1016def FeatureSMemTimeInst : SubtargetFeature<"s-memtime-inst", 1017 "HasSMemTimeInst", 1018 "true", 1019 "Has s_memtime instruction" 1020>; 1021 1022def FeatureShaderCyclesRegister : SubtargetFeature<"shader-cycles-register", 1023 "HasShaderCyclesRegister", 1024 "true", 1025 "Has SHADER_CYCLES hardware register" 1026>; 1027 1028def FeatureShaderCyclesHiLoRegisters : SubtargetFeature<"shader-cycles-hi-lo-registers", 1029 "HasShaderCyclesHiLoRegisters", 1030 "true", 1031 "Has SHADER_CYCLES_HI/LO hardware registers" 1032>; 1033 1034def FeatureMadMacF32Insts : SubtargetFeature<"mad-mac-f32-insts", 1035 "HasMadMacF32Insts", 1036 "true", 1037 "Has v_mad_f32/v_mac_f32/v_madak_f32/v_madmk_f32 instructions" 1038>; 1039 1040def FeatureDsSrc2Insts : SubtargetFeature<"ds-src2-insts", 1041 "HasDsSrc2Insts", 1042 "true", 1043 "Has ds_*_src2 instructions" 1044>; 1045 1046def FeatureVOP3Literal : SubtargetFeature<"vop3-literal", 1047 "HasVOP3Literal", 1048 "true", 1049 "Can use one literal in VOP3" 1050>; 1051 1052def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard", 1053 "HasNoDataDepHazard", 1054 "true", 1055 "Does not need SW waitstates" 1056>; 1057 1058// Allocate 1536 VGPRs for wave32 and 768 VGPRs for wave64 1059// with allocation granularity 24 for wave32 and 12 for wave64 1060def Feature1_5xVGPRs : SubtargetFeature<"allocate1_5xvgprs", 1061 "Has1_5xVGPRs", 1062 "true", 1063 "Has 50% more physical VGPRs and 50% larger allocation granule" 1064>; 1065 1066 1067def FeatureVOPD : SubtargetFeature<"vopd", 1068 "HasVOPDInsts", 1069 "true", 1070 "Has VOPD dual issue wave32 instructions" 1071>; 1072 1073def FeatureVALUTransUseHazard : SubtargetFeature<"valu-trans-use-hazard", 1074 "HasVALUTransUseHazard", 1075 "true", 1076 "Hazard when TRANS instructions are closely followed by a use of the result" 1077>; 1078 1079def FeatureSALUFloatInsts : SubtargetFeature<"salu-float", 1080 "HasSALUFloatInsts", 1081 "true", 1082 "Has SALU floating point instructions" 1083>; 1084 1085def FeaturePseudoScalarTrans : SubtargetFeature<"pseudo-scalar-trans", 1086 "HasPseudoScalarTrans", 1087 "true", 1088 "Has Pseudo Scalar Transcendental instructions" 1089>; 1090 1091def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset", 1092 "HasRestrictedSOffset", 1093 "true", 1094 "Has restricted SOffset (immediate not supported)." 1095>; 1096 1097def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority", 1098 "HasRequiredExportPriority", 1099 "true", 1100 "Export priority must be explicitly manipulated on GFX11.5" 1101>; 1102 1103def FeatureVmemWriteVgprInOrder : SubtargetFeature<"vmem-write-vgpr-in-order", 1104 "HasVmemWriteVgprInOrder", 1105 "true", 1106 "VMEM instructions of the same type write VGPR results in order" 1107>; 1108 1109def FeatureBitOp3Insts : SubtargetFeature<"bitop3-insts", 1110 "HasBitOp3Insts", 1111 "true", 1112 "Has v_bitop3_b32/v_bitop3_b16 instructions" 1113>; 1114 1115def FeatureTransposeLoadF4F6Insts : SubtargetFeature<"transpose-load-f4f6-insts", 1116 "HasTransposeLoadF4F6Insts", 1117 "true", 1118 "Has ds_load_tr4/tr6 and global_load_tr4/tr6 instructions" 1119>; 1120 1121def FeaturePrngInst : SubtargetFeature<"prng-inst", 1122 "HasPrngInst", 1123 "true", 1124 "Has v_prng_b32 instruction" 1125>; 1126 1127def FeatureBVHDualAndBVH8Insts : SubtargetFeature<"bvh-dual-bvh-8-insts", 1128 "HasBVHDualAndBVH8Insts", 1129 "true", 1130 "Has image_bvh_dual_intersect_ray and image_bvh8_intersect_ray instructions" 1131>; 1132 1133def FeaturePointSampleAccel : SubtargetFeature<"point-sample-accel", 1134 "HasPointSampleAccel", 1135 "true", 1136 "Has point sample acceleration feature" 1137>; 1138 1139def Feature64BitLiterals : SubtargetFeature<"64-bit-literals", 1140 "Has64BitLiterals", 1141 "true", 1142 "Can use 64-bit literals with single DWORD instructions" 1143>; 1144 1145def FeatureWaitXcnt : SubtargetFeature<"wait-xcnt", 1146 "HasWaitXcnt", 1147 "true", 1148 "Has s_wait_xcnt instruction" 1149>; 1150 1151def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst", 1152 "HasSetPrioIncWgInst", 1153 "true", 1154 "Has s_setprio_inc_wg instruction." 1155>; 1156 1157//===------------------------------------------------------------===// 1158// Subtarget Features (options and debugging) 1159//===------------------------------------------------------------===// 1160 1161class FeatureMaxPrivateElementSize<int size> : SubtargetFeature< 1162 "max-private-element-size-"#size, 1163 "MaxPrivateElementSize", 1164 !cast<string>(size), 1165 "Maximum private access size may be "#size 1166>; 1167 1168def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>; 1169def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>; 1170def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>; 1171 1172def FeatureDumpCode : SubtargetFeature <"DumpCode", 1173 "DumpCode", 1174 "true", 1175 "Dump MachineInstrs in the CodeEmitter" 1176>; 1177 1178def FeatureDumpCodeLower : SubtargetFeature <"dumpcode", 1179 "DumpCode", 1180 "true", 1181 "Dump MachineInstrs in the CodeEmitter" 1182>; 1183 1184// XXX - This should probably be removed once enabled by default 1185def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", 1186 "EnableLoadStoreOpt", 1187 "true", 1188 "Enable SI load/store optimizer pass" 1189>; 1190 1191// Performance debugging feature. Allow using DS instruction immediate 1192// offsets even if the base pointer can't be proven to be base. On SI, 1193// base pointer values that won't give the same result as a 16-bit add 1194// are not safe to fold, but this will override the conservative test 1195// for the base pointer. 1196def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature < 1197 "unsafe-ds-offset-folding", 1198 "EnableUnsafeDSOffsetFolding", 1199 "true", 1200 "Force using DS instruction immediate offsets on SI" 1201>; 1202 1203def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", 1204 "EnableSIScheduler", 1205 "true", 1206 "Enable SI Machine Scheduler" 1207>; 1208 1209def FeatureEnableDS128 : SubtargetFeature<"enable-ds128", 1210 "EnableDS128", 1211 "true", 1212 "Use ds_{read|write}_b128" 1213>; 1214 1215// Sparse texture support requires that all result registers are zeroed when 1216// PRTStrictNull is set to true. This feature is turned on for all architectures 1217// but is enabled as a feature in case there are situations where PRTStrictNull 1218// is disabled by the driver. 1219def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null", 1220 "EnablePRTStrictNull", 1221 "true", 1222 "Enable zeroing of result registers for sparse texture fetches" 1223>; 1224 1225// Unless +-flat-for-global is specified, turn on FlatForGlobal for 1226// all OS-es on VI and newer hardware to avoid assertion failures due 1227// to missing ADDR64 variants of MUBUF instructions. 1228// FIXME: moveToVALU should be able to handle converting addr64 MUBUF 1229// instructions. 1230 1231def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", 1232 "FlatForGlobal", 1233 "true", 1234 "Force to generate flat instruction for global" 1235>; 1236 1237def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature < 1238 "auto-waitcnt-before-barrier", 1239 "AutoWaitcntBeforeBarrier", 1240 "true", 1241 "Hardware automatically inserts waitcnt before barrier" 1242>; 1243 1244def FeatureBackOffBarrier : SubtargetFeature <"back-off-barrier", 1245 "BackOffBarrier", 1246 "true", 1247 "Hardware supports backing off s_barrier if an exception occurs" 1248>; 1249 1250def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range", 1251 "HasTrigReducedRange", 1252 "true", 1253 "Requires use of fract on arguments to trig instructions" 1254>; 1255 1256def FeatureKernargPreload : SubtargetFeature <"kernarg-preload", 1257 "KernargPreload", 1258 "true", 1259 "Hardware supports preloading of kernel arguments in user SGPRs." 1260>; 1261 1262// Alignment enforcement is controlled by a configuration register: 1263// SH_MEM_CONFIG.alignment_mode 1264def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode", 1265 "UnalignedAccessMode", 1266 "true", 1267 "Enable unaligned global, local and region loads and stores if the hardware" 1268 " supports it" 1269>; 1270 1271def FeaturePackedTID : SubtargetFeature<"packed-tid", 1272 "HasPackedTID", 1273 "true", 1274 "Workitem IDs are packed into v0 at kernel launch" 1275>; 1276 1277def FeatureArchitectedFlatScratch : SubtargetFeature<"architected-flat-scratch", 1278 "HasArchitectedFlatScratch", 1279 "true", 1280 "Flat Scratch register is a readonly SPI initialized architected register" 1281>; 1282 1283def FeatureArchitectedSGPRs : SubtargetFeature<"architected-sgprs", 1284 "HasArchitectedSGPRs", 1285 "true", 1286 "Enable the architected SGPRs" 1287>; 1288 1289def FeatureGDS : SubtargetFeature<"gds", 1290 "HasGDS", 1291 "true", 1292 "Has Global Data Share" 1293>; 1294 1295def FeatureGWS : SubtargetFeature<"gws", 1296 "HasGWS", 1297 "true", 1298 "Has Global Wave Sync" 1299>; 1300 1301def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6", 1302 "RequiresCOV6", 1303 "true", 1304 "Target Requires Code Object V6" 1305>; 1306 1307def FeatureXF32Insts : SubtargetFeature<"xf32-insts", 1308 "HasXF32Insts", 1309 "true", 1310 "Has instructions that support xf32 format, such as " 1311 "v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32" 1312 >; 1313 1314// FIXME: Remove after all users are migrated to attribute. 1315def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr", 1316 "DynamicVGPR", 1317 "true", 1318 "Enable dynamic VGPR mode" 1319>; 1320 1321// FIXME: Remove after all users are migrated to attribute. 1322def FeatureDynamicVGPRBlockSize32 : SubtargetFeature<"dynamic-vgpr-block-size-32", 1323 "DynamicVGPRBlockSize32", 1324 "true", 1325 "Use a block size of 32 for dynamic VGPR allocation (default is 16)" 1326>; 1327 1328// Enable the use of SCRATCH_STORE/LOAD_BLOCK instructions for saving and 1329// restoring the callee-saved registers. 1330def FeatureUseBlockVGPROpsForCSR : SubtargetFeature<"block-vgpr-csr", 1331 "UseBlockVGPROpsForCSR", 1332 "true", 1333 "Use block load/store for VGPR callee saved registers" 1334>; 1335 1336def FeatureLshlAddU64Inst 1337 : SubtargetFeature<"lshl-add-u64-inst", "HasLshlAddU64Inst", "true", 1338 "Has v_lshl_add_u64 instruction">; 1339 1340def FeatureMemToLDSLoad : SubtargetFeature<"vmem-to-lds-load-insts", 1341 "HasVMemToLDSLoad", 1342 "true", 1343 "The platform has memory to lds instructions (global_load w/lds bit set, buffer_load w/lds bit set or global_load_lds. This does not include scratch_load_lds." 1344>; 1345 1346def FeatureLdsBarrierArriveAtomic : SubtargetFeature< "lds-barrier-arrive-atomic", 1347 "HasLdsBarrierArriveAtomic", 1348 "true", 1349 "Has LDS barrier-arrive atomic instructions" 1350>; 1351 1352// Dummy feature used to disable assembler instructions. 1353def FeatureDisable : SubtargetFeature<"", 1354 "FeatureDisable","true", 1355 "Dummy feature to disable assembler instructions" 1356>; 1357 1358//===----------------------------------------------------------------------===// 1359 1360class GCNSubtargetFeatureGeneration <string Value, 1361 string FeatureName, 1362 list<SubtargetFeature> Implies> : 1363 SubtargetFeatureGeneration <Value, FeatureName, "GCNSubtarget", Implies>; 1364 1365def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", 1366 "southern-islands", 1367 [FeatureFP64, FeatureAddressableLocalMemorySize32768, FeatureMIMG_R128, 1368 FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts, 1369 FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel, 1370 FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts, 1371 FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, 1372 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, 1373 FeatureVmemWriteVgprInOrder 1374 ] 1375>; 1376 1377def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", 1378 "sea-islands", 1379 [FeatureFP64, FeatureAddressableLocalMemorySize65536, FeatureMIMG_R128, 1380 FeatureWavefrontSize64, FeatureFlatAddressSpace, 1381 FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, 1382 FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, 1383 FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess, 1384 FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero, 1385 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, 1386 FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, 1387 FeatureVmemWriteVgprInOrder 1388 ] 1389>; 1390 1391def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", 1392 "volcanic-islands", 1393 [FeatureFP64, FeatureAddressableLocalMemorySize65536, FeatureMIMG_R128, 1394 FeatureWavefrontSize64, FeatureFlatAddressSpace, 1395 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 1396 FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, 1397 FeatureScalarStores, FeatureInv2PiInlineImm, 1398 FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, 1399 FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts, 1400 FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, 1401 FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, 1402 FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, 1403 FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder 1404 ] 1405>; 1406 1407def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", 1408 "gfx9", 1409 [FeatureFP64, 1410 FeatureWavefrontSize64, FeatureFlatAddressSpace, 1411 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 1412 FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, 1413 FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, 1414 FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1415 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, 1416 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1417 FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, 1418 FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, 1419 FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, 1420 FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, 1421 FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS, 1422 FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad 1423 ] 1424>; 1425 1426def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", 1427 "gfx10", 1428 [FeatureFP64, FeatureAddressableLocalMemorySize65536, FeatureMIMG_R128, 1429 FeatureFlatAddressSpace, 1430 FeatureCIInsts, Feature16BitInsts, 1431 FeatureSMemRealTime, FeatureInv2PiInlineImm, 1432 FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P, 1433 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1434 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, 1435 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1436 FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts, 1437 FeatureNoSdstCMPX, FeatureVscnt, 1438 FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, 1439 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, 1440 FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16, 1441 FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, 1442 FeatureUnalignedDSAccess, FeatureImageInsts, FeatureGDS, FeatureGWS, 1443 FeatureDefaultComponentZero, FeatureMaxHardClauseLength63, 1444 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts, 1445 FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts, 1446 FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad 1447 ] 1448>; 1449 1450def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11", 1451 "gfx11", 1452 [FeatureFP64, FeatureAddressableLocalMemorySize65536, FeatureMIMG_R128, 1453 FeatureFlatAddressSpace, Feature16BitInsts, 1454 FeatureInv2PiInlineImm, FeatureApertureRegs, 1455 FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts, 1456 FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts, 1457 FeatureGFX11Insts, FeatureVOP3P, FeatureVOPD, FeatureTrue16BitInsts, 1458 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1459 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1460 FeatureAddNoCarryInsts, FeatureFmaMixInsts, 1461 FeatureNoSdstCMPX, FeatureVscnt, 1462 FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, 1463 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, 1464 FeatureA16, FeatureFastDenormalF32, FeatureG16, 1465 FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, 1466 FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS, 1467 FeatureDefaultComponentZero, FeatureMaxHardClauseLength32, 1468 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, 1469 FeatureVmemWriteVgprInOrder 1470 ] 1471>; 1472 1473def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12", 1474 "gfx12", 1475 [FeatureFP64, FeatureAddressableLocalMemorySize65536, FeatureMIMG_R128, 1476 FeatureFlatAddressSpace, Feature16BitInsts, 1477 FeatureInv2PiInlineImm, FeatureApertureRegs, 1478 FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts, 1479 FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts, 1480 FeatureGFX11Insts, FeatureGFX12Insts, FeatureVOP3P, FeatureVOPD, 1481 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 1482 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 1483 FeatureAddNoCarryInsts, FeatureFmaMixInsts, 1484 FeatureNoSdstCMPX, FeatureVscnt, 1485 FeatureVOP3Literal, FeatureDPP8, 1486 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, 1487 FeatureA16, FeatureFastDenormalF32, FeatureG16, 1488 FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess, 1489 FeatureUnalignedDSAccess, FeatureTrue16BitInsts, 1490 FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32, 1491 FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts, 1492 FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32, 1493 FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics 1494 ] 1495>; 1496 1497//===----------------------------------------------------------------------===// 1498 1499class FeatureSet<list<SubtargetFeature> Features_> { 1500 list<SubtargetFeature> Features = Features_; 1501} 1502 1503def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands, 1504 FeatureFastFMAF32, 1505 HalfRate64Ops, 1506 FeatureLDSBankCount32]>; 1507 1508def FeatureISAVersion6_0_1 : FeatureSet< 1509 [FeatureSouthernIslands, 1510 FeatureLDSBankCount32]>; 1511 1512def FeatureISAVersion6_0_2 : FeatureSet< 1513 [FeatureSouthernIslands, 1514 FeatureLDSBankCount32]>; 1515 1516def FeatureISAVersion7_0_0 : FeatureSet< 1517 [FeatureSeaIslands, 1518 FeatureLDSBankCount32]>; 1519 1520def FeatureISAVersion7_0_1 : FeatureSet< 1521 [FeatureSeaIslands, 1522 HalfRate64Ops, 1523 FeatureLDSBankCount32, 1524 FeatureFastFMAF32]>; 1525 1526def FeatureISAVersion7_0_2 : FeatureSet< 1527 [FeatureSeaIslands, 1528 FeatureLDSBankCount16, 1529 FeatureFastFMAF32]>; 1530 1531def FeatureISAVersion7_0_3 : FeatureSet< 1532 [FeatureSeaIslands, 1533 FeatureLDSBankCount16]>; 1534 1535def FeatureISAVersion7_0_4 : FeatureSet< 1536 [FeatureSeaIslands, 1537 FeatureLDSBankCount32]>; 1538 1539def FeatureISAVersion7_0_5 : FeatureSet< 1540 [FeatureSeaIslands, 1541 FeatureLDSBankCount16]>; 1542 1543def FeatureISAVersion8_0_Common : FeatureSet< 1544 [FeatureVolcanicIslands, 1545 FeatureLDSBankCount32, 1546 FeatureUnpackedD16VMem]>; 1547 1548def FeatureISAVersion8_0_1 : FeatureSet< 1549 !listconcat(FeatureISAVersion8_0_Common.Features, 1550 [FeatureFastFMAF32, 1551 HalfRate64Ops, 1552 FeatureSupportsXNACK])>; 1553 1554def FeatureISAVersion8_0_2 : FeatureSet< 1555 !listconcat(FeatureISAVersion8_0_Common.Features, 1556 [FeatureSGPRInitBug])>; 1557 1558def FeatureISAVersion8_0_3 : FeatureSet< 1559 !listconcat(FeatureISAVersion8_0_Common.Features, 1560 [])>; 1561 1562def FeatureISAVersion8_0_5 : FeatureSet< 1563 !listconcat(FeatureISAVersion8_0_Common.Features, 1564 [FeatureSGPRInitBug])>; 1565 1566def FeatureISAVersion8_1_0 : FeatureSet< 1567 [FeatureVolcanicIslands, 1568 FeatureLDSBankCount16, 1569 FeatureSupportsXNACK, 1570 FeatureImageStoreD16Bug, 1571 FeatureImageGather4D16Bug]>; 1572 1573def FeatureISAVersion9_0_Common : FeatureSet< 1574 [FeatureGFX9, 1575 FeatureAddressableLocalMemorySize65536, 1576 FeatureLDSBankCount32, 1577 FeatureImageInsts, 1578 FeatureMadMacF32Insts]>; 1579 1580def FeatureISAVersion9_0_Consumer_Common : FeatureSet< 1581 !listconcat(FeatureISAVersion9_0_Common.Features, 1582 [FeatureImageGather4D16Bug, 1583 FeatureDsSrc2Insts, 1584 FeatureExtendedImageInsts, 1585 FeatureGDS])>; 1586 1587def FeatureISAVersion9_Generic : FeatureSet< 1588 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1589 [FeatureRequiresCOV6])>; 1590 1591def FeatureISAVersion9_0_MI_Common : FeatureSet< 1592 !listconcat(FeatureISAVersion9_0_Common.Features, 1593 [FeatureFmaMixInsts, 1594 FeatureDLInsts, 1595 FeatureDot1Insts, 1596 FeatureDot2Insts, 1597 FeatureDot3Insts, 1598 FeatureDot4Insts, 1599 FeatureDot5Insts, 1600 FeatureDot6Insts, 1601 FeatureDot7Insts, 1602 FeatureDot10Insts, 1603 FeatureMAIInsts, 1604 FeaturePkFmacF16Inst, 1605 FeatureAtomicFaddNoRtnInsts, 1606 FeatureSupportsSRAMECC])>; 1607 1608def FeatureISAVersion9_0_0 : FeatureSet< 1609 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1610 [FeatureMadMixInsts])>; 1611 1612def FeatureISAVersion9_0_2 : FeatureSet< 1613 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1614 [FeatureMadMixInsts])>; 1615 1616def FeatureISAVersion9_0_4 : FeatureSet< 1617 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1618 [FeatureFmaMixInsts])>; 1619 1620def FeatureISAVersion9_0_6 : FeatureSet< 1621 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1622 [HalfRate64Ops, 1623 FeatureFmaMixInsts, 1624 FeatureDLInsts, 1625 FeatureDot1Insts, 1626 FeatureDot2Insts, 1627 FeatureDot7Insts, 1628 FeatureDot10Insts, 1629 FeatureSupportsSRAMECC])>; 1630 1631def FeatureISAVersion9_0_8 : FeatureSet< 1632 !listconcat(FeatureISAVersion9_0_MI_Common.Features, 1633 [FeatureGDS, 1634 HalfRate64Ops, 1635 FeatureDsSrc2Insts, 1636 FeatureExtendedImageInsts, 1637 FeatureAtomicBufferGlobalPkAddF16NoRtnInsts, 1638 FeatureMFMAInlineLiteralBug, 1639 FeatureImageGather4D16Bug])>; 1640 1641def FeatureISAVersion9_0_9 : FeatureSet< 1642 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1643 [FeatureMadMixInsts])>; 1644 1645def FeatureISAVersion9_0_A : FeatureSet< 1646 !listconcat(FeatureISAVersion9_0_MI_Common.Features, 1647 [FeatureGFX90AInsts, 1648 FeatureFmacF64Inst, 1649 FeatureDPALU_DPP, 1650 FeaturePackedFP32Ops, 1651 FeatureAtomicFaddRtnInsts, 1652 FeatureAtomicBufferGlobalPkAddF16Insts, 1653 FeaturePackedTID, 1654 FullRate64Ops, 1655 FeatureBackOffBarrier, 1656 FeatureKernargPreload, 1657 FeatureAtomicFMinFMaxF64GlobalInsts, 1658 FeatureAtomicFMinFMaxF64FlatInsts, 1659 FeatureFlatBufferGlobalAtomicFaddF64Inst 1660 ])>; 1661 1662def FeatureISAVersion9_0_C : FeatureSet< 1663 !listconcat(FeatureISAVersion9_0_Consumer_Common.Features, 1664 [FeatureMadMixInsts])>; 1665 1666def FeatureISAVersion9_4_Common : FeatureSet< 1667 [FeatureGFX9, 1668 FeatureGFX90AInsts, 1669 FeatureGFX940Insts, 1670 FeatureFmaMixInsts, 1671 FeatureLDSBankCount32, 1672 FeatureDLInsts, 1673 FeatureFmacF64Inst, 1674 FeatureDot1Insts, 1675 FeatureDot2Insts, 1676 FeatureDot3Insts, 1677 FeatureDot4Insts, 1678 FeatureDot5Insts, 1679 FeatureDot6Insts, 1680 FeatureDot7Insts, 1681 FeatureDot10Insts, 1682 FeatureAtomicDsPkAdd16Insts, 1683 FeatureAtomicFlatPkAdd16Insts, 1684 FeatureDPALU_DPP, 1685 FeaturePackedFP32Ops, 1686 FeatureMAIInsts, 1687 FeaturePkFmacF16Inst, 1688 FeatureAtomicFaddRtnInsts, 1689 FeatureAtomicFaddNoRtnInsts, 1690 FeatureAtomicBufferGlobalPkAddF16Insts, 1691 FeatureAtomicGlobalPkAddBF16Inst, 1692 FeatureFlatAtomicFaddF32Inst, 1693 FeatureSupportsSRAMECC, 1694 FeaturePackedTID, 1695 FeatureArchitectedFlatScratch, 1696 FullRate64Ops, 1697 FeatureBackOffBarrier, 1698 FeatureKernargPreload, 1699 FeatureAtomicFMinFMaxF64GlobalInsts, 1700 FeatureAtomicFMinFMaxF64FlatInsts, 1701 FeatureAgentScopeFineGrainedRemoteMemoryAtomics, 1702 FeatureMemoryAtomicFAddF32DenormalSupport, 1703 FeatureFlatBufferGlobalAtomicFaddF64Inst, 1704 FeatureLshlAddU64Inst, 1705 ]>; 1706 1707def FeatureISAVersion9_5_Common : FeatureSet< 1708 !listconcat(FeatureISAVersion9_4_Common.Features, 1709 [FeatureAddressableLocalMemorySize163840, 1710 FeatureFP8Insts, 1711 FeatureFP8ConversionInsts, 1712 FeatureGFX950Insts, 1713 FeaturePrngInst, 1714 FeatureBF16ConversionInsts, 1715 FeatureBitOp3Insts, 1716 FeatureFP8ConversionScaleInsts, 1717 FeatureBF8ConversionScaleInsts, 1718 FeatureFP4ConversionScaleInsts, 1719 FeatureFP6BF6ConversionScaleInsts, 1720 FeatureDot12Insts, 1721 FeatureDot13Insts, 1722 FeatureAtomicBufferPkAddBF16Inst 1723 ])>; 1724 1725def FeatureISAVersion9_4_2 : FeatureSet< 1726 !listconcat(FeatureISAVersion9_4_Common.Features, 1727 [ 1728 FeatureAddressableLocalMemorySize65536, 1729 FeatureFP8Insts, 1730 FeatureFP8ConversionInsts, 1731 FeatureCvtFP8VOP1Bug, 1732 FeatureXF32Insts 1733 ])>; 1734 1735def FeatureISAVersion9_4_Generic : FeatureSet< 1736 !listconcat(FeatureISAVersion9_4_Common.Features, 1737 [FeatureAddressableLocalMemorySize65536, 1738 FeatureRequiresCOV6])>; 1739 1740def FeatureISAVersion9_5_0 : FeatureSet<FeatureISAVersion9_5_Common.Features>; 1741 1742def FeatureISAVersion10_Common : FeatureSet< 1743 [FeatureGFX10, 1744 FeatureLDSBankCount32, 1745 FeatureDLInsts, 1746 FeatureNSAEncoding, 1747 FeatureBackOffBarrier]>; 1748 1749def FeatureISAVersion10_1_Common : FeatureSet< 1750 !listconcat(FeatureISAVersion10_Common.Features, 1751 [FeatureScalarStores, 1752 FeatureScalarAtomics, 1753 FeatureScalarFlatScratchInsts, 1754 FeatureGetWaveIdInst, 1755 FeatureMadMacF32Insts, 1756 FeatureDsSrc2Insts, 1757 FeatureLdsMisalignedBug, 1758 FeatureSupportsXNACK, 1759 // gfx101x bugs 1760 FeatureVcmpxPermlaneHazard, 1761 FeatureVMEMtoScalarWriteHazard, 1762 FeatureSMEMtoVectorWriteHazard, 1763 FeatureInstFwdPrefetchBug, 1764 FeatureVcmpxExecWARHazard, 1765 FeatureLdsBranchVmemWARHazard, 1766 FeatureNSAtoVMEMBug, 1767 FeatureNSAClauseBug, 1768 FeatureOffset3fBug, 1769 FeatureFlatSegmentOffsetBug, 1770 FeatureNegativeUnalignedScratchOffsetBug])>; 1771 1772def FeatureISAVersion10_1_Generic : FeatureSet< 1773 !listconcat(FeatureISAVersion10_1_Common.Features, 1774 [FeatureRequiresCOV6])>; 1775 1776def FeatureISAVersion10_1_0 : FeatureSet< 1777 !listconcat(FeatureISAVersion10_1_Common.Features, 1778 [])>; 1779 1780def FeatureISAVersion10_1_1 : FeatureSet< 1781 !listconcat(FeatureISAVersion10_1_Common.Features, 1782 [FeatureDot1Insts, 1783 FeatureDot2Insts, 1784 FeatureDot5Insts, 1785 FeatureDot6Insts, 1786 FeatureDot7Insts, 1787 FeatureDot10Insts])>; 1788 1789def FeatureISAVersion10_1_2 : FeatureSet< 1790 !listconcat(FeatureISAVersion10_1_Common.Features, 1791 [FeatureDot1Insts, 1792 FeatureDot2Insts, 1793 FeatureDot5Insts, 1794 FeatureDot6Insts, 1795 FeatureDot7Insts, 1796 FeatureDot10Insts])>; 1797 1798def FeatureISAVersion10_1_3 : FeatureSet< 1799 !listconcat(FeatureISAVersion10_1_Common.Features, 1800 [FeatureGFX10_AEncoding])>; 1801 1802def FeatureISAVersion10_3_0 : FeatureSet< 1803 !listconcat(FeatureISAVersion10_Common.Features, 1804 [FeatureGFX10_AEncoding, 1805 FeatureGFX10_BEncoding, 1806 FeatureGFX10_3Insts, 1807 FeatureDot1Insts, 1808 FeatureDot2Insts, 1809 FeatureDot5Insts, 1810 FeatureDot6Insts, 1811 FeatureDot7Insts, 1812 FeatureDot10Insts, 1813 FeatureShaderCyclesRegister])>; 1814 1815def FeatureISAVersion10_3_Generic: FeatureSet< 1816 !listconcat(FeatureISAVersion10_3_0.Features, 1817 [FeatureRequiresCOV6])>; 1818 1819def FeatureISAVersion11_Common : FeatureSet< 1820 [FeatureGFX11, 1821 FeatureLDSBankCount32, 1822 FeatureDLInsts, 1823 FeatureDot5Insts, 1824 FeatureDot7Insts, 1825 FeatureDot8Insts, 1826 FeatureDot9Insts, 1827 FeatureDot10Insts, 1828 FeatureDot12Insts, 1829 FeatureNSAEncoding, 1830 FeaturePartialNSAEncoding, 1831 FeatureShaderCyclesRegister, 1832 FeatureArchitectedFlatScratch, 1833 FeatureAtomicFaddRtnInsts, 1834 FeatureAtomicFaddNoRtnInsts, 1835 FeatureFlatAtomicFaddF32Inst, 1836 FeatureImageInsts, 1837 FeaturePackedTID, 1838 FeatureVcmpxPermlaneHazard, 1839 FeatureMemoryAtomicFAddF32DenormalSupport]>; 1840 1841// There are few workarounds that need to be 1842// added to all targets. This pessimizes codegen 1843// a bit on the generic GFX11 target. 1844def FeatureISAVersion11_Generic: FeatureSet< 1845 !listconcat(FeatureISAVersion11_Common.Features, 1846 [FeatureMSAALoadDstSelBug, 1847 FeatureVALUTransUseHazard, 1848 FeatureUserSGPRInit16Bug, 1849 FeatureMADIntraFwdBug, 1850 FeaturePrivEnabledTrap2NopBug, 1851 FeatureRequiresCOV6, 1852 FeatureRequiredExportPriority])>; 1853 1854def FeatureISAVersion11_0_Common : FeatureSet< 1855 !listconcat(FeatureISAVersion11_Common.Features, 1856 [FeatureMSAALoadDstSelBug, 1857 FeatureVALUTransUseHazard, 1858 FeatureMADIntraFwdBug, 1859 FeaturePrivEnabledTrap2NopBug, 1860 FeatureRealTrue16Insts])>; 1861 1862def FeatureISAVersion11_0_0 : FeatureSet< 1863 !listconcat(FeatureISAVersion11_0_Common.Features, 1864 [Feature1_5xVGPRs, 1865 FeatureUserSGPRInit16Bug])>; 1866 1867def FeatureISAVersion11_0_1 : FeatureSet< 1868 !listconcat(FeatureISAVersion11_0_Common.Features, 1869 [Feature1_5xVGPRs])>; 1870 1871def FeatureISAVersion11_0_2 : FeatureSet< 1872 !listconcat(FeatureISAVersion11_0_Common.Features, 1873 [FeatureUserSGPRInit16Bug])>; 1874 1875def FeatureISAVersion11_0_3 : FeatureSet< 1876 !listconcat(FeatureISAVersion11_0_Common.Features, 1877 [])>; 1878 1879def FeatureISAVersion11_5_Common : FeatureSet< 1880 !listconcat(FeatureISAVersion11_Common.Features, 1881 [FeatureSALUFloatInsts, 1882 FeatureDPPSrc1SGPR, 1883 FeatureRequiredExportPriority])>; 1884 1885def FeatureISAVersion11_5_0 : FeatureSet< 1886 !listconcat(FeatureISAVersion11_5_Common.Features, 1887 [FeaturePointSampleAccel])>; 1888 1889def FeatureISAVersion11_5_1 : FeatureSet< 1890 !listconcat(FeatureISAVersion11_5_Common.Features, 1891 [Feature1_5xVGPRs, 1892 FeaturePointSampleAccel])>; 1893 1894def FeatureISAVersion11_5_2 : FeatureSet< 1895 !listconcat(FeatureISAVersion11_5_Common.Features, 1896 [FeaturePointSampleAccel])>; 1897 1898def FeatureISAVersion11_5_3 : FeatureSet< 1899 !listconcat(FeatureISAVersion11_5_Common.Features, 1900 [])>; 1901 1902def FeatureISAVersion12 : FeatureSet< 1903 [FeatureGFX12, 1904 FeatureLDSBankCount32, 1905 FeatureDLInsts, 1906 FeatureDot7Insts, 1907 FeatureDot8Insts, 1908 FeatureDot9Insts, 1909 FeatureDot10Insts, 1910 FeatureDot11Insts, 1911 FeatureDot12Insts, 1912 FeatureNSAEncoding, 1913 FeaturePartialNSAEncoding, 1914 FeatureShaderCyclesHiLoRegisters, 1915 FeatureArchitectedFlatScratch, 1916 FeatureArchitectedSGPRs, 1917 FeatureAtomicFaddRtnInsts, 1918 FeatureAtomicFaddNoRtnInsts, 1919 FeatureAtomicDsPkAdd16Insts, 1920 FeatureAtomicFlatPkAdd16Insts, 1921 FeatureAtomicBufferGlobalPkAddF16Insts, 1922 FeatureAtomicGlobalPkAddBF16Inst, 1923 FeatureAtomicBufferPkAddBF16Inst, 1924 FeatureFlatAtomicFaddF32Inst, 1925 FeatureImageInsts, 1926 FeatureExtendedImageInsts, 1927 FeatureFP8ConversionInsts, 1928 FeatureIEEEMinimumMaximumInsts, 1929 FeaturePackedTID, 1930 FeatureVcmpxPermlaneHazard, 1931 FeatureSALUFloatInsts, 1932 FeaturePseudoScalarTrans, 1933 FeatureHasRestrictedSOffset, 1934 FeatureScalarDwordx3Loads, 1935 FeatureDPPSrc1SGPR, 1936 FeatureMaxHardClauseLength32, 1937 Feature1_5xVGPRs, 1938 FeatureMemoryAtomicFAddF32DenormalSupport, 1939 FeatureBVHDualAndBVH8Insts 1940 ]>; 1941 1942def FeatureISAVersion12_50 : FeatureSet< 1943 [FeatureGFX12, 1944 FeatureGFX1250Insts, 1945 FeatureCuMode, 1946 Feature64BitLiterals, 1947 FeatureLDSBankCount32, 1948 FeatureDLInsts, 1949 FeatureFmacF64Inst, 1950 FeaturePackedFP32Ops, 1951 FeatureDot7Insts, 1952 FeatureDot8Insts, 1953 FeatureWavefrontSize32, 1954 FeatureShaderCyclesHiLoRegisters, 1955 FeatureArchitectedFlatScratch, 1956 FeatureArchitectedSGPRs, 1957 FeatureAtomicFaddRtnInsts, 1958 FeatureAtomicFaddNoRtnInsts, 1959 FeatureAtomicDsPkAdd16Insts, 1960 FeatureAtomicFlatPkAdd16Insts, 1961 FeatureAtomicBufferGlobalPkAddF16Insts, 1962 FeatureAtomicGlobalPkAddBF16Inst, 1963 FeatureAtomicBufferPkAddBF16Inst, 1964 FeatureFlatAtomicFaddF32Inst, 1965 FeatureFP8ConversionInsts, 1966 FeatureFP8E5M3Insts, 1967 FeaturePackedTID, 1968 FeatureVcmpxPermlaneHazard, 1969 FeatureSALUFloatInsts, 1970 FeaturePseudoScalarTrans, 1971 FeatureHasRestrictedSOffset, 1972 FeatureScalarDwordx3Loads, 1973 FeatureDPPSrc1SGPR, 1974 FeatureBitOp3Insts, 1975 FeatureTransposeLoadF4F6Insts, 1976 FeatureBF16TransInsts, 1977 FeatureBF16ConversionInsts, 1978 FeatureCvtPkF16F32Inst, 1979 FeatureMinimum3Maximum3PKF16, 1980 FeaturePrngInst, 1981 FeaturePermlane16Swap, 1982 FeatureAshrPkInsts, 1983 FeatureSupportsSRAMECC, 1984 FeatureMaxHardClauseLength63, 1985 FeatureWaitXcnt, 1986 FeatureAtomicFMinFMaxF64GlobalInsts, 1987 FeatureAtomicFMinFMaxF64FlatInsts, 1988 FeatureFlatBufferGlobalAtomicFaddF64Inst, 1989 FeatureMemoryAtomicFAddF32DenormalSupport, 1990 FeatureKernargPreload, 1991 FeatureLshlAddU64Inst, 1992 FeatureLdsBarrierArriveAtomic, 1993 FeatureSetPrioIncWgInst, 1994]>; 1995 1996def FeatureISAVersion12_Generic: FeatureSet< 1997 !listconcat(FeatureISAVersion12.Features, 1998 [FeatureRequiresCOV6])>; 1999 2000//===----------------------------------------------------------------------===// 2001 2002def AMDGPUInstrInfo : InstrInfo { 2003 let guessInstructionProperties = 1; 2004} 2005 2006def AMDGPUAsmParser : AsmParser { 2007 // Some of the R600 registers have the same name, so this crashes. 2008 // For example T0_XYZW and T0_XY both have the asm name T0. 2009 let ShouldEmitMatchRegisterName = 0; 2010 2011 // Call the custom operand parser for all operands. 2012 let OperandParserMethod = "parseCustomOperand"; 2013 let CallCustomParserForAllOperands = true; 2014} 2015 2016def AMDGPUAsmWriter : AsmWriter { 2017 int PassSubtarget = 1; 2018} 2019 2020def AMDGPUAsmVariants { 2021 string Default = "Default"; 2022 int Default_ID = 0; 2023 string VOP3 = "VOP3"; 2024 int VOP3_ID = 1; 2025 string SDWA = "SDWA"; 2026 int SDWA_ID = 2; 2027 string SDWA9 = "SDWA9"; 2028 int SDWA9_ID = 3; 2029 string DPP = "DPP"; 2030 int DPP_ID = 4; 2031 string VOP3_DPP = "VOP3_DPP"; 2032 int VOP3_DPP_ID = 5; 2033 string Disable = "Disable"; 2034 int Disable_ID = 6; 2035} 2036 2037def DefaultAMDGPUAsmParserVariant : AsmParserVariant { 2038 let Variant = AMDGPUAsmVariants.Default_ID; 2039 let Name = AMDGPUAsmVariants.Default; 2040} 2041 2042def VOP3AsmParserVariant : AsmParserVariant { 2043 let Variant = AMDGPUAsmVariants.VOP3_ID; 2044 let Name = AMDGPUAsmVariants.VOP3; 2045} 2046 2047def SDWAAsmParserVariant : AsmParserVariant { 2048 let Variant = AMDGPUAsmVariants.SDWA_ID; 2049 let Name = AMDGPUAsmVariants.SDWA; 2050} 2051 2052def SDWA9AsmParserVariant : AsmParserVariant { 2053 let Variant = AMDGPUAsmVariants.SDWA9_ID; 2054 let Name = AMDGPUAsmVariants.SDWA9; 2055} 2056 2057def DPPAsmParserVariant : AsmParserVariant { 2058 let Variant = AMDGPUAsmVariants.DPP_ID; 2059 let Name = AMDGPUAsmVariants.DPP; 2060} 2061 2062def VOP3_DPPAsmParserVariant : AsmParserVariant { 2063 let Variant = AMDGPUAsmVariants.VOP3_DPP_ID; 2064 let Name = AMDGPUAsmVariants.VOP3_DPP; 2065} 2066 2067def AMDGPU : Target { 2068 // Pull in Instruction Info: 2069 let InstructionSet = AMDGPUInstrInfo; 2070 let AssemblyParsers = [AMDGPUAsmParser]; 2071 let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant, 2072 VOP3AsmParserVariant, 2073 SDWAAsmParserVariant, 2074 SDWA9AsmParserVariant, 2075 DPPAsmParserVariant, 2076 VOP3_DPPAsmParserVariant]; 2077 let AssemblyWriters = [AMDGPUAsmWriter]; 2078 let AllowRegisterRenaming = 1; 2079} 2080 2081// Dummy Instruction itineraries for pseudo instructions 2082def ALU_NULL : FuncUnit; 2083def NullALU : InstrItinClass; 2084 2085//===----------------------------------------------------------------------===// 2086// Predicate helper class 2087//===----------------------------------------------------------------------===// 2088 2089def isGFX6 : 2090 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">, 2091 AssemblerPredicate<(all_of FeatureSouthernIslands)>; 2092 2093def isGFX6GFX7 : 2094 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 2095 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, 2096 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX10Insts))>; 2097 2098def isGFX6GFX7GFX10 : 2099 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 2100 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 2101 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 2102 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX11Insts))>; 2103 2104def isGFX6GFX7GFX10Plus : 2105 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 2106 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 2107 "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, 2108 AssemblerPredicate<(all_of (not FeatureGCN3Encoding))>; 2109 2110def isGFX7Only : 2111 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, 2112 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX10Insts))>; 2113 2114def isGFX7GFX10 : 2115 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 2116 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 2117 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX11Insts))>; 2118 2119def isGFX7GFX10GFX11 : 2120 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 2121 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" 2122 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 2123 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts)>; 2124 2125def isGFX7GFX8GFX9 : 2126 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 2127 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2128 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 2129 AssemblerPredicate<(all_of FeatureGFX7GFX8GFX9Insts)>; 2130 2131def isGFX6GFX7GFX8GFX9 : 2132 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 2133 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 2134 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2135 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 2136 AssemblerPredicate<(all_of (not FeatureGFX10Insts))>; 2137 2138def isGFX6GFX7GFX8GFX9NotGFX90A : 2139 Predicate<"!Subtarget->hasGFX90AInsts() &&" 2140 "(Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 2141 " Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 2142 " Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2143 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 2144 AssemblerPredicate<(all_of (not FeatureGFX10Insts), (not FeatureGFX90AInsts))>; 2145 2146def isGFX6GFX7GFX8GFX9GFX10 : 2147 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 2148 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 2149 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2150 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 2151 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 2152 AssemblerPredicate<(all_of (not FeatureGFX11Insts))>; 2153 2154def isNotGFX12Plus : 2155 Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::GFX11">, 2156 AssemblerPredicate<(all_of (not FeatureGFX12Insts))>; 2157 2158def isGFX7GFX8GFX9GFX10 : 2159 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 2160 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2161 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 2162 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 2163 AssemblerPredicate<(all_of FeatureCIInsts, (not FeatureGFX11Insts))>; 2164 2165def isGFX8GFX9GFX10GFX11 : 2166 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2167 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 2168 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" 2169 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 2170 AssemblerPredicate<(all_of FeatureGFX8Insts, (not FeatureGFX12Insts))>; 2171 2172def isGFX7Plus : 2173 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, 2174 AssemblerPredicate<(all_of FeatureCIInsts)>; 2175 2176def isGFX8Plus : 2177 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, 2178 AssemblerPredicate<(all_of FeatureGFX8Insts)>; 2179 2180def isGFX8Only : Predicate<"Subtarget->getGeneration() ==" 2181 "AMDGPUSubtarget::VOLCANIC_ISLANDS">, 2182 AssemblerPredicate <(all_of FeatureVolcanicIslands)>; 2183 2184def isGFX9Plus : 2185 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, 2186 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 2187 2188def isNotGFX9Plus : 2189 Predicate<"Subtarget->getGeneration() < AMDGPUSubtarget::GFX9">; 2190 2191def isGFX9Only : Predicate < 2192 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 2193 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts)>; 2194 2195def isGCN3ExcludingGFX90A : 2196 Predicate<"Subtarget->isGCN3Encoding() && !Subtarget->hasGFX90AInsts()">, 2197 AssemblerPredicate<(all_of FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; 2198 2199def isGFX90APlus : 2200 Predicate<"Subtarget->hasGFX90AInsts()">, 2201 AssemblerPredicate<(all_of FeatureGFX90AInsts)>; 2202 2203def isNotGFX90APlus : 2204 Predicate<"!Subtarget->hasGFX90AInsts()">, 2205 AssemblerPredicate<(all_of (not FeatureGFX90AInsts))>; 2206 2207def isGFX8GFX9NotGFX90A : 2208 Predicate<"!Subtarget->hasGFX90AInsts() &&" 2209 "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2210 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 2211 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; 2212 2213def isGFX90AOnly : 2214 Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">, 2215 AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>; 2216 2217def isGFX908orGFX90A : 2218 Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX940Insts()">, 2219 AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX940Insts))>; 2220 2221def isGFX940Plus : 2222 Predicate<"Subtarget->hasGFX940Insts()">, 2223 AssemblerPredicate<(all_of FeatureGFX940Insts)>; 2224 2225def isNotGFX940Plus : 2226 Predicate<"!Subtarget->hasGFX940Insts()">, 2227 AssemblerPredicate<(all_of (not FeatureGFX940Insts))>; 2228 2229def HasGFX950Insts : 2230 Predicate<"Subtarget->hasGFX950Insts()">, 2231 AssemblerPredicate<(all_of FeatureGFX950Insts)>; 2232 2233def HasPermlane16Swap : 2234 Predicate<"Subtarget->hasPermlane16Swap()">, 2235 AssemblerPredicate<(all_of FeaturePermlane16Swap)>; 2236 2237def HasPermlane32Swap : 2238 Predicate<"Subtarget->hasPermlane32Swap()">, 2239 AssemblerPredicate<(all_of FeaturePermlane32Swap)>; 2240 2241def isGFX8GFX9NotGFX940 : 2242 Predicate<"!Subtarget->hasGFX940Insts() &&" 2243 "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2244 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 2245 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX940Insts))>; 2246 2247def isGFX8GFX9 : 2248 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2249 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 2250 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding)>; 2251 2252def isGFX10Only : 2253 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 2254 AssemblerPredicate<(all_of FeatureGFX10Insts, (not FeatureGFX11Insts))>; 2255 2256def isGFX10Plus : 2257 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, 2258 AssemblerPredicate<(all_of FeatureGFX10Insts)>; 2259 2260def isGFX10GFX11 : 2261 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" 2262 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 2263 AssemblerPredicate<(all_of FeatureGFX10Insts, (not FeatureGFX12Insts))>; 2264 2265def isGFX10Before1030 : 2266 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 &&" 2267 "!Subtarget->hasGFX10_3Insts()">, 2268 AssemblerPredicate<(all_of FeatureGFX10Insts,(not FeatureGFX10_3Insts))>; 2269 2270def isGFX9GFX10 : 2271 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 2272 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 2273 AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX11Insts))>; 2274 2275def isGFX9GFX10GFX11 : 2276 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9 &&" 2277 "Subtarget->getGeneration() < AMDGPUSubtarget::GFX12">, 2278 AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX12Insts))>; 2279 2280def isGFX8GFX9GFX10 : 2281 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 2282 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" 2283 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 2284 AssemblerPredicate<(all_of FeatureGFX8Insts, (not FeatureGFX11Insts))>; 2285 2286def isGFX11Only : 2287 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, 2288 AssemblerPredicate<(all_of FeatureGFX11Insts, (not FeatureGFX12Insts))>; 2289 2290def isGFX11Plus : 2291 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">, 2292 AssemblerPredicate<(all_of FeatureGFX11Insts)>; 2293 2294def isGFX12Only : 2295 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX12">, 2296 AssemblerPredicate<(all_of FeatureGFX12Insts)>; 2297 2298def isGFX12Not12_50 : 2299 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX12 && !Subtarget->hasGFX1250Insts()">, 2300 AssemblerPredicate<(all_of FeatureGFX12Insts, (not FeatureGFX1250Insts))>; 2301 2302def isGFX12Plus : 2303 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">, 2304 AssemblerPredicate<(all_of FeatureGFX12Insts)>; 2305 2306def isGFX12PlusNot12_50 : 2307 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 && !Subtarget->hasGFX1250Insts()">, 2308 AssemblerPredicate<(all_of FeatureGFX12Insts, (not FeatureGFX1250Insts))>; 2309 2310def isGFX125xOnly : 2311 Predicate<"Subtarget->hasGFX1250Insts()">, 2312 AssemblerPredicate<(all_of FeatureGFX1250Insts)>; 2313 2314def isGFX1250Plus : 2315 Predicate<"Subtarget->hasGFX1250Insts()">, 2316 AssemblerPredicate<(all_of FeatureGFX1250Insts)>; 2317 2318def isNotGFX1250Plus : 2319 Predicate<"!Subtarget->hasGFX1250Insts()">, 2320 AssemblerPredicate<(all_of (not FeatureGFX1250Insts))>; 2321 2322def isGFX940orGFX1250 : 2323 Predicate<"Subtarget->hasGFX940Insts() || Subtarget->hasGFX1250Insts()">, 2324 AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX1250Insts)>; 2325 2326def HasIEEEMinimumMaximumInsts : 2327 Predicate<"Subtarget->hasIEEEMinimumMaximumInsts()">, 2328 AssemblerPredicate<(all_of FeatureIEEEMinimumMaximumInsts)>; 2329 2330def HasMinimum3Maximum3F32 : 2331 Predicate<"Subtarget->hasMinimum3Maximum3F32()">, 2332 AssemblerPredicate<(all_of FeatureMinimum3Maximum3F32)>; 2333 2334def HasMinimum3Maximum3F16 : 2335 Predicate<"Subtarget->hasMinimum3Maximum3F16()">, 2336 AssemblerPredicate<(all_of FeatureMinimum3Maximum3F16)>; 2337 2338def HasMinimum3Maximum3PKF16 : 2339 Predicate<"Subtarget->hasMinimum3Maximum3PKF16()">, 2340 AssemblerPredicate<(all_of FeatureMinimum3Maximum3PKF16)>; 2341 2342 2343def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, 2344 AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; 2345 2346def HasFlatBufferGlobalAtomicFaddF64Inst : 2347 Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">, 2348 AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>; 2349 2350def HasAtomicFMinFMaxF32GlobalInsts : 2351 Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">, 2352 AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF32GlobalInsts)>; 2353 2354def HasAtomicFMinFMaxF64GlobalInsts : 2355 Predicate<"Subtarget->hasAtomicFMinFMaxF64GlobalInsts()">, 2356 AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF64GlobalInsts)>; 2357 2358def HasAtomicFMinFMaxF32FlatInsts : 2359 Predicate<"Subtarget->hasAtomicFMinFMaxF32FlatInsts()">, 2360 AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF32FlatInsts)>; 2361 2362def HasAtomicFMinFMaxF64FlatInsts : 2363 Predicate<"Subtarget->hasAtomicFMinFMaxF64FlatInsts()">, 2364 AssemblerPredicate<(any_of FeatureAtomicFMinFMaxF64FlatInsts)>; 2365 2366def HasLdsAtomicAddF64 : 2367 Predicate<"Subtarget->hasLdsAtomicAddF64()">, 2368 AssemblerPredicate<(any_of FeatureGFX90AInsts)>; 2369 2370def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">, 2371 AssemblerPredicate<(all_of FeatureFlatGlobalInsts)>; 2372def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">, 2373 AssemblerPredicate<(all_of FeatureFlatScratchInsts)>; 2374def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">, 2375 AssemblerPredicate<(all_of FeatureScalarFlatScratchInsts)>; 2376def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">, 2377 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 2378 2379def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">, 2380 AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>; 2381def HasFlatScratchSVSMode : Predicate<"Subtarget->hasFlatScratchSVSMode()">, 2382 AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>; 2383 2384def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">, 2385 AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>; 2386 2387def HasGFX10_BEncoding : Predicate<"Subtarget->hasGFX10_BEncoding()">, 2388 AssemblerPredicate<(all_of FeatureGFX10_BEncoding)>; 2389 2390def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">, 2391 AssemblerPredicate<(all_of FeatureUnpackedD16VMem)>; 2392def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">, 2393 AssemblerPredicate<(all_of (not FeatureUnpackedD16VMem))>; 2394 2395def HasRestrictedSOffset : Predicate<"Subtarget->hasRestrictedSOffset()">, 2396 AssemblerPredicate<(all_of FeatureHasRestrictedSOffset)>; 2397def HasUnrestrictedSOffset : Predicate<"!Subtarget->hasRestrictedSOffset()">, 2398 AssemblerPredicate<(all_of (not FeatureHasRestrictedSOffset))>; 2399 2400def D16PreservesUnusedBits : 2401 Predicate<"Subtarget->d16PreservesUnusedBits()">, 2402 AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureSRAMECC))>; 2403 2404def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">; 2405def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">; 2406 2407def HasMTBUFInsts : Predicate<"Subtarget->hasMTBUFInsts()">, 2408 AssemblerPredicate<(all_of (not FeatureGFX1250Insts))>; 2409 2410def HasFormattedMUBUFInsts : Predicate<"Subtarget->hasFormattedMUBUFInsts()">, 2411 AssemblerPredicate<(all_of (not FeatureGFX1250Insts))>; 2412 2413def HasExportInsts : Predicate<"Subtarget->hasExportInsts()">, 2414 AssemblerPredicate<(all_of (not FeatureGFX90AInsts), (not FeatureGFX1250Insts))>; 2415 2416def HasVINTERPEncoding : Predicate<"Subtarget->hasVINTERPEncoding()">, 2417 AssemblerPredicate<(all_of FeatureGFX11Insts, (not FeatureGFX1250Insts))>; 2418 2419def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, 2420 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 2421 2422def HasLDSFPAtomicAddF32 : Predicate<"Subtarget->hasLDSFPAtomicAddF32()">, 2423 AssemblerPredicate<(all_of FeatureGFX8Insts)>; 2424 2425def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">, 2426 AssemblerPredicate<(all_of FeatureAddNoCarryInsts)>; 2427 2428def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">; 2429 2430def HasXNACKEnabled : Predicate<"Subtarget->isXNACKEnabled()">; 2431 2432def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">, 2433 AssemblerPredicate<(all_of Feature16BitInsts)>; 2434 2435def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">, 2436 AssemblerPredicate<(all_of FeatureTrue16BitInsts)>; 2437def NotHasTrue16BitInsts : True16PredicateClass<"!Subtarget->hasTrue16BitInsts()">, 2438 AssemblerPredicate<(all_of (not FeatureTrue16BitInsts))>; 2439 2440// Control use of True16 instructions. The real True16 instructions are 2441// True16 instructions as they are defined in the ISA. Fake True16 2442// instructions have the same encoding as real ones but syntactically 2443// only allow 32-bit registers in operands and use low halves thereof. 2444def UseRealTrue16Insts : True16PredicateClass<"Subtarget->useRealTrue16Insts()">, 2445 AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>; 2446def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() && " 2447 "!Subtarget->useRealTrue16Insts()">, 2448 AssemblerPredicate<(all_of FeatureTrue16BitInsts)>; 2449 // FIXME When we default to RealTrue16 instead of Fake, change the line as follows. 2450 // AssemblerPredicate<(all_of FeatureTrue16BitInsts, (not FeatureRealTrue16Insts))>; 2451 2452def HasBF16TransInsts : Predicate<"Subtarget->hasBF16TransInsts()">, 2453 AssemblerPredicate<(all_of FeatureBF16TransInsts)>; 2454 2455def HasBF16ConversionInsts : Predicate<"Subtarget->hasBF16ConversionInsts()">, 2456 AssemblerPredicate<(all_of FeatureBF16ConversionInsts)>; 2457 2458def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, 2459 AssemblerPredicate<(all_of FeatureVOP3P)>; 2460 2461def NotHasMed3_16 : Predicate<"!Subtarget->hasMed3_16()">; 2462def HasMed3_16 : Predicate<"Subtarget->hasMed3_16()">; 2463 2464def HasMinMaxDenormModes : Predicate<"Subtarget->supportsMinMaxDenormModes()">; 2465def NotHasMinMaxDenormModes : Predicate<"!Subtarget->supportsMinMaxDenormModes()">; 2466 2467def HasFminFmaxLegacy : Predicate<"Subtarget->hasFminFmaxLegacy()">; 2468 2469def HasSDWA : Predicate<"Subtarget->hasSDWA()">; 2470 2471def HasSDWA8 : Predicate<"Subtarget->hasSDWA()">, 2472 AssemblerPredicate<(all_of (not FeatureGFX9Insts), FeatureSDWA)>; 2473 2474def HasSDWA9 : 2475 Predicate<"Subtarget->hasSDWA()">, 2476 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts,FeatureSDWA)>; 2477 2478def HasSDWA10 : 2479 Predicate<"Subtarget->hasSDWA()">, 2480 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureSDWA)>; 2481 2482def HasDPP : Predicate<"Subtarget->hasDPP()">, 2483 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureDPP)>; 2484 2485def HasDPP8 : Predicate<"Subtarget->hasDPP8()">, 2486 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP8)>; 2487 2488def HasDPALU_DPP : Predicate<"Subtarget->hasDPALU_DPP()">, 2489 AssemblerPredicate<(all_of FeatureDPALU_DPP)>; 2490 2491def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">, 2492 AssemblerPredicate<(all_of FeaturePackedFP32Ops)>; 2493 2494def HasPkMovB32 : Predicate<"Subtarget->hasPkMovB32()">, 2495 AssemblerPredicate<(all_of FeatureGFX90AInsts)>; 2496 2497def HasFmaakFmamkF32Insts : 2498 Predicate<"Subtarget->hasFmaakFmamkF32Insts()">, 2499 AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>; 2500 2501def HasFmaakFmamkF64Insts : 2502 Predicate<"Subtarget->hasFmaakFmamkF64Insts()">, 2503 AssemblerPredicate<(any_of FeatureGFX1250Insts)>; 2504 2505def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">, 2506 AssemblerPredicate<(all_of FeatureImageInsts)>; 2507 2508def HasExtendedImageInsts : Predicate<"Subtarget->hasExtendedImageInsts()">, 2509 AssemblerPredicate<(all_of FeatureExtendedImageInsts)>; 2510 2511def HasR128A16 : Predicate<"Subtarget->hasR128A16()">, 2512 AssemblerPredicate<(all_of FeatureR128A16)>; 2513 2514def HasA16 : Predicate<"Subtarget->hasA16()">, 2515 AssemblerPredicate<(all_of FeatureA16)>; 2516 2517def HasG16 : Predicate<"Subtarget->hasG16()">, 2518 AssemblerPredicate<(all_of FeatureG16)>; 2519 2520def HasDPP16 : Predicate<"Subtarget->hasDPP()">, 2521 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP)>; 2522 2523def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">, 2524 AssemblerPredicate<(all_of FeatureIntClamp)>; 2525 2526def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">, 2527 AssemblerPredicate<(all_of FeatureMadMixInsts)>; 2528 2529def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">, 2530 AssemblerPredicate<(all_of FeatureScalarStores)>; 2531 2532def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">, 2533 AssemblerPredicate<(all_of FeatureScalarAtomics)>; 2534 2535def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">, 2536 AssemblerPredicate<(all_of FeatureNoSdstCMPX)>; 2537 2538def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">, 2539 AssemblerPredicate<(all_of (not FeatureNoSdstCMPX))>; 2540 2541def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">; 2542def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">; 2543def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">, 2544 AssemblerPredicate<(all_of FeatureVGPRIndexMode)>; 2545def HasMovrel : Predicate<"Subtarget->hasMovrel()">, 2546 AssemblerPredicate<(all_of FeatureMovrel)>; 2547 2548def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">, 2549 AssemblerPredicate<(all_of FeatureFmaMixInsts)>; 2550 2551def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">, 2552 AssemblerPredicate<(all_of FeatureDLInsts)>; 2553 2554def HasFmacF64Inst : Predicate<"Subtarget->hasFmacF64Inst()">, 2555 AssemblerPredicate<(all_of FeatureFmacF64Inst)>; 2556 2557def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">, 2558 AssemblerPredicate<(all_of FeatureDot1Insts)>; 2559 2560def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">, 2561 AssemblerPredicate<(all_of FeatureDot2Insts)>; 2562 2563def HasDot3Insts : Predicate<"Subtarget->hasDot3Insts()">, 2564 AssemblerPredicate<(all_of FeatureDot3Insts)>; 2565 2566def HasDot4Insts : Predicate<"Subtarget->hasDot4Insts()">, 2567 AssemblerPredicate<(all_of FeatureDot4Insts)>; 2568 2569def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">, 2570 AssemblerPredicate<(all_of FeatureDot5Insts)>; 2571 2572def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">, 2573 AssemblerPredicate<(all_of FeatureDot6Insts)>; 2574 2575def HasDot7Insts : Predicate<"Subtarget->hasDot7Insts()">, 2576 AssemblerPredicate<(all_of FeatureDot7Insts)>; 2577 2578def HasDot8Insts : Predicate<"Subtarget->hasDot8Insts()">, 2579 AssemblerPredicate<(all_of FeatureDot8Insts)>; 2580 2581def HasDot9Insts : Predicate<"Subtarget->hasDot9Insts()">, 2582 AssemblerPredicate<(all_of FeatureDot9Insts)>; 2583 2584def HasDot10Insts : Predicate<"Subtarget->hasDot10Insts()">, 2585 AssemblerPredicate<(all_of FeatureDot10Insts)>; 2586 2587def HasDot11Insts : Predicate<"Subtarget->hasDot11Insts()">, 2588 AssemblerPredicate<(all_of FeatureDot11Insts)>; 2589 2590def HasDot12Insts : Predicate<"Subtarget->hasDot12Insts()">, 2591 AssemblerPredicate<(all_of FeatureDot12Insts)>; 2592 2593def HasDot13Insts : Predicate<"Subtarget->hasDot13Insts()">, 2594 AssemblerPredicate<(all_of FeatureDot13Insts)>; 2595 2596def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">, 2597 AssemblerPredicate<(all_of FeatureGetWaveIdInst)>; 2598 2599def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">, 2600 AssemblerPredicate<(all_of FeatureMAIInsts)>; 2601 2602def HasSMemRealTime : Predicate<"Subtarget->hasSMemRealTime()">, 2603 AssemblerPredicate<(all_of FeatureSMemRealTime)>; 2604 2605def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">, 2606 AssemblerPredicate<(all_of FeatureSMemTimeInst)>; 2607 2608def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">, 2609 AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>; 2610 2611def HasShaderCyclesHiLoRegisters : Predicate<"Subtarget->hasShaderCyclesHiLoRegisters()">; 2612 2613def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">, 2614 AssemblerPredicate<(all_of FeatureFP8Insts)>; 2615 2616def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">, 2617 AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>; 2618 2619def HasFP8E5M3Insts : Predicate<"Subtarget->hasFP8E5M3Insts()">, 2620 AssemblerPredicate<(all_of FeatureFP8E5M3Insts)>; 2621 2622def NotHasFP8E5M3Insts : Predicate<"!Subtarget->hasFP8E5M3Insts()">, 2623 AssemblerPredicate<(all_of (not FeatureFP8E5M3Insts))>; 2624 2625def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">, 2626 AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>; 2627 2628def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">, 2629 AssemblerPredicate<(all_of FeatureMadMacF32Insts)>; 2630 2631def HasFmaLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts()">, 2632 AssemblerPredicate<(any_of FeatureGFX10_3Insts)>; 2633 2634def HasAtomicDsPkAdd16Insts : Predicate<"Subtarget->hasAtomicDsPkAdd16Insts()">, 2635 AssemblerPredicate<(any_of FeatureAtomicDsPkAdd16Insts)>; 2636 2637def HasAtomicFlatPkAdd16Insts : Predicate<"Subtarget->hasAtomicFlatPkAdd16Insts()">, 2638 AssemblerPredicate<(any_of FeatureAtomicFlatPkAdd16Insts)>; 2639 2640def HasAtomicFaddRtnInsts : Predicate<"Subtarget->hasAtomicFaddRtnInsts()">, 2641 AssemblerPredicate<(all_of FeatureAtomicFaddRtnInsts)>; 2642def HasAtomicFaddNoRtnInsts : Predicate<"Subtarget->hasAtomicFaddNoRtnInsts()">, 2643 AssemblerPredicate<(all_of FeatureAtomicFaddNoRtnInsts)>; 2644def HasAtomicBufferGlobalPkAddF16NoRtnInsts 2645 : Predicate<"Subtarget->hasAtomicBufferGlobalPkAddF16NoRtnInsts() || Subtarget->hasAtomicBufferGlobalPkAddF16Insts()">, 2646 AssemblerPredicate<(any_of FeatureAtomicBufferGlobalPkAddF16NoRtnInsts, FeatureAtomicBufferGlobalPkAddF16Insts)>; 2647def HasAtomicBufferGlobalPkAddF16Insts 2648 : Predicate<"Subtarget->hasAtomicBufferGlobalPkAddF16Insts()">, 2649 AssemblerPredicate<(all_of FeatureAtomicBufferGlobalPkAddF16Insts)>; 2650def HasAtomicGlobalPkAddBF16Inst 2651 : Predicate<"Subtarget->hasAtomicGlobalPkAddBF16Inst()">, 2652 AssemblerPredicate<(all_of FeatureAtomicGlobalPkAddBF16Inst)>; 2653def HasAtomicBufferPkAddBF16Inst 2654 : Predicate<"Subtarget->hasAtomicBufferPkAddBF16Inst()">, 2655 AssemblerPredicate<(all_of FeatureAtomicBufferPkAddBF16Inst)>; 2656def HasFlatAtomicFaddF32Inst 2657 : Predicate<"Subtarget->hasFlatAtomicFaddF32Inst()">, 2658 AssemblerPredicate<(all_of FeatureFlatAtomicFaddF32Inst)>; 2659 2660def HasDefaultComponentZero 2661 : Predicate<"Subtarget->hasDefaultComponentZero()">, 2662 AssemblerPredicate<(all_of FeatureDefaultComponentZero)>; 2663def HasDefaultComponentBroadcast 2664 : Predicate<"Subtarget->hasDefaultComponentBroadcast()">, 2665 AssemblerPredicate<(all_of FeatureDefaultComponentBroadcast)>; 2666 2667def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">, 2668 AssemblerPredicate<(all_of FeatureDsSrc2Insts)>; 2669 2670def EnableFlatScratch : Predicate<"Subtarget->enableFlatScratch()">; 2671 2672def DisableFlatScratch : Predicate<"!Subtarget->enableFlatScratch()">; 2673 2674def HasUnalignedAccessMode : Predicate<"Subtarget->hasUnalignedAccessMode()">, 2675 AssemblerPredicate<(all_of FeatureUnalignedAccessMode)>; 2676 2677def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">; 2678 2679def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">; 2680 2681def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">, 2682 AssemblerPredicate<(all_of FeatureSALUFloatInsts)>; 2683 2684def NotHasSALUFloatInsts : Predicate<"!Subtarget->hasSALUFloatInsts()">, 2685 AssemblerPredicate<(all_of (not FeatureSALUFloatInsts))>; 2686 2687def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">, 2688 AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>; 2689 2690def HasBitOp3Insts : Predicate<"Subtarget->hasBitOp3Insts()">, 2691 AssemblerPredicate<(all_of FeatureBitOp3Insts)>; 2692 2693def HasTransposeLoadF4F6Insts : Predicate<"Subtarget->hasTransposeLoadF4F6Insts()">, 2694 AssemblerPredicate<(all_of FeatureTransposeLoadF4F6Insts)>; 2695 2696def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">, 2697 AssemblerPredicate<(all_of FeaturePrngInst)>; 2698 2699def HasBVHDualAndBVH8Insts : Predicate<"Subtarget->hasBVHDualAndBVH8Insts()">, 2700 AssemblerPredicate<(all_of FeatureBVHDualAndBVH8Insts)>; 2701 2702def Has64BitLiterals : Predicate<"Subtarget->has64BitLiterals()">, 2703 AssemblerPredicate<(all_of Feature64BitLiterals)>; 2704 2705def HasWaitXcnt : Predicate<"Subtarget->hasWaitXcnt()">, 2706 AssemblerPredicate<(all_of FeatureWaitXcnt)>; 2707 2708def HasFP8ConversionScaleInsts : Predicate<"Subtarget->hasFP8ConversionScaleInsts()">, 2709 AssemblerPredicate<(all_of FeatureFP8ConversionScaleInsts)>; 2710 2711def HasBF8ConversionScaleInsts : Predicate<"Subtarget->hasBF8ConversionScaleInsts()">, 2712 AssemblerPredicate<(all_of FeatureBF8ConversionScaleInsts)>; 2713 2714def HasFP4ConversionScaleInsts : Predicate<"Subtarget->hasFP4ConversionScaleInsts()">, 2715 AssemblerPredicate<(all_of FeatureFP4ConversionScaleInsts)>; 2716 2717def HasFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasFP6BF6ConversionScaleInsts()">, 2718 AssemblerPredicate<(all_of FeatureFP6BF6ConversionScaleInsts)>; 2719 2720def HasF16BF16ToFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasF16BF16ToFP6BF6ConversionScaleInsts()">, 2721 AssemblerPredicate<(all_of FeatureF16BF16ToFP6BF6ConversionScaleInsts)>; 2722 2723def HasCvtPkF16F32Inst : Predicate<"Subtarget->hasCvtPkF16F32Inst()">, 2724 AssemblerPredicate<(all_of FeatureCvtPkF16F32Inst)>; 2725 2726def HasF32ToF16BF16ConversionSRInsts : Predicate<"Subtarget->hasF32ToF16BF16ConversionSRInsts()">, 2727 AssemblerPredicate<(all_of FeatureF32ToF16BF16ConversionSRInsts)>; 2728 2729def HasGDS : Predicate<"Subtarget->hasGDS()">; 2730 2731def HasGWS : Predicate<"Subtarget->hasGWS()">; 2732 2733def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">; 2734def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">; 2735 2736def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">; 2737 2738def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">; 2739 2740def HasXF32Insts : Predicate<"Subtarget->hasXF32Insts()">, 2741 AssemblerPredicate<(all_of FeatureXF32Insts)>; 2742 2743def HasAshrPkInsts : Predicate<"Subtarget->hasAshrPkInsts()">, 2744 AssemblerPredicate<(all_of FeatureAshrPkInsts)>; 2745 2746def HasLshlAddU64Inst : Predicate<"Subtarget->hasLshlAddU64Inst()">, 2747 AssemblerPredicate<(all_of FeatureLshlAddU64Inst)>; 2748 2749def HasLdsBarrierArriveAtomic : Predicate<"Subtarget->hasLdsBarrierArriveAtomic()">, 2750 AssemblerPredicate<(all_of FeatureLdsBarrierArriveAtomic)>; 2751 2752def HasSetPrioIncWgInst : Predicate<"Subtarget->hasSetPrioIncWgInst()">, 2753 AssemblerPredicate<(all_of FeatureSetPrioIncWgInst)>; 2754 2755// Include AMDGPU TD files 2756include "SISchedule.td" 2757include "GCNProcessors.td" 2758include "AMDGPUInstrInfo.td" 2759include "SIRegisterInfo.td" 2760include "AMDGPURegisterBanks.td" 2761include "AMDGPUInstructions.td" 2762include "SIInstrInfo.td" 2763include "AMDGPUCallingConv.td" 2764include "AMDGPUSearchableTables.td" 2765