1//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===------------------------------------------------------------===// 8 9include "llvm/TableGen/SearchableTable.td" 10include "llvm/Target/Target.td" 11include "AMDGPUFeatures.td" 12 13def p0 : PtrValueType<i64, 0>; 14def p1 : PtrValueType<i64, 1>; 15def p2 : PtrValueType<i32, 2>; 16def p3 : PtrValueType<i32, 3>; 17def p4 : PtrValueType<i64, 4>; 18def p5 : PtrValueType<i32, 5>; 19def p6 : PtrValueType<i32, 6>; 20 21class BoolToList<bit Value> { 22 list<int> ret = !if(Value, [1]<int>, []<int>); 23} 24 25//===------------------------------------------------------------===// 26// Subtarget Features (device properties) 27//===------------------------------------------------------------===// 28 29def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", 30 "FastFMAF32", 31 "true", 32 "Assuming f32 fma is at least as fast as mul + add" 33>; 34 35def FeatureFastDenormalF32 : SubtargetFeature<"fast-denormal-f32", 36 "FastDenormalF32", 37 "true", 38 "Enabling denormals does not cause f32 instructions to run at f64 rates" 39>; 40 41def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128", 42 "MIMG_R128", 43 "true", 44 "Support 128-bit texture resources" 45>; 46 47def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops", 48 "HalfRate64Ops", 49 "true", 50 "Most fp64 instructions are half rate instead of quarter" 51>; 52 53def FullRate64Ops : SubtargetFeature<"full-rate-64-ops", 54 "FullRate64Ops", 55 "true", 56 "Most fp64 instructions are full rate" 57>; 58 59def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", 60 "FlatAddressSpace", 61 "true", 62 "Support flat address space" 63>; 64 65def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets", 66 "FlatInstOffsets", 67 "true", 68 "Flat instructions have immediate offset addressing mode" 69>; 70 71def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts", 72 "FlatGlobalInsts", 73 "true", 74 "Have global_* flat memory instructions" 75>; 76 77def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts", 78 "FlatScratchInsts", 79 "true", 80 "Have scratch_* flat memory instructions" 81>; 82 83def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts", 84 "ScalarFlatScratchInsts", 85 "true", 86 "Have s_scratch_* flat memory instructions" 87>; 88 89def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts", 90 "AddNoCarryInsts", 91 "true", 92 "Have VALU add/sub instructions without carry out" 93>; 94 95def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", 96 "UnalignedBufferAccess", 97 "true", 98 "Hardware supports unaligned global loads and stores" 99>; 100 101def FeatureTrapHandler: SubtargetFeature<"trap-handler", 102 "TrapHandler", 103 "true", 104 "Trap handler support" 105>; 106 107def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access", 108 "UnalignedScratchAccess", 109 "true", 110 "Support unaligned scratch loads and stores" 111>; 112 113def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access", 114 "UnalignedDSAccess", 115 "true", 116 "Hardware supports unaligned local and region loads and stores" 117>; 118 119def FeatureApertureRegs : SubtargetFeature<"aperture-regs", 120 "HasApertureRegs", 121 "true", 122 "Has Memory Aperture Base and Size Registers" 123>; 124 125def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts", 126 "HasMadMixInsts", 127 "true", 128 "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions" 129>; 130 131def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts", 132 "HasFmaMixInsts", 133 "true", 134 "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions" 135>; 136 137def FeatureSupportsXNACK : SubtargetFeature<"xnack-support", 138 "SupportsXNACK", 139 "true", 140 "Hardware supports XNACK" 141>; 142 143// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support 144// XNACK. The current default kernel driver setting is: 145// - graphics ring: XNACK disabled 146// - compute ring: XNACK enabled 147// 148// If XNACK is enabled, the VMEM latency can be worse. 149// If XNACK is disabled, the 2 SGPRs can be used for general purposes. 150def FeatureXNACK : SubtargetFeature<"xnack", 151 "EnableXNACK", 152 "true", 153 "Enable XNACK support" 154>; 155 156def FeatureTgSplit : SubtargetFeature<"tgsplit", 157 "EnableTgSplit", 158 "true", 159 "Enable threadgroup split execution" 160>; 161 162def FeatureCuMode : SubtargetFeature<"cumode", 163 "EnableCuMode", 164 "true", 165 "Enable CU wavefront execution mode" 166>; 167 168def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", 169 "SGPRInitBug", 170 "true", 171 "VI SGPR initialization bug requiring a fixed SGPR allocation size" 172>; 173 174def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug", 175 "LDSMisalignedBug", 176 "true", 177 "Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode" 178>; 179 180def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug", 181 "HasMFMAInlineLiteralBug", 182 "true", 183 "MFMA cannot use inline literal as SrcC" 184>; 185 186def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard", 187 "HasVcmpxPermlaneHazard", 188 "true", 189 "TODO: describe me" 190>; 191 192def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard", 193 "HasVMEMtoScalarWriteHazard", 194 "true", 195 "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution." 196>; 197 198def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard", 199 "HasSMEMtoVectorWriteHazard", 200 "true", 201 "s_load_dword followed by v_cmp page faults" 202>; 203 204def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug", 205 "HasInstFwdPrefetchBug", 206 "true", 207 "S_INST_PREFETCH instruction causes shader to hang" 208>; 209 210def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard", 211 "HasVcmpxExecWARHazard", 212 "true", 213 "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)" 214>; 215 216def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard", 217 "HasLdsBranchVmemWARHazard", 218 "true", 219 "Switching between LDS and VMEM-tex not waiting VM_VSRC=0" 220>; 221 222def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug", 223 "HasNSAtoVMEMBug", 224 "true", 225 "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero" 226>; 227 228def FeatureNSAClauseBug : SubtargetFeature<"nsa-clause-bug", 229 "HasNSAClauseBug", 230 "true", 231 "MIMG-NSA in a hard clause has unpredictable results on GFX10.1" 232>; 233 234def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug", 235 "HasFlatSegmentOffsetBug", 236 "true", 237 "GFX10 bug where inst_offset is ignored when flat instructions access global memory" 238>; 239 240def FeatureNegativeScratchOffsetBug : SubtargetFeature<"negative-scratch-offset-bug", 241 "NegativeScratchOffsetBug", 242 "true", 243 "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9" 244>; 245 246def FeatureNegativeUnalignedScratchOffsetBug : SubtargetFeature<"negative-unaligned-scratch-offset-bug", 247 "NegativeUnalignedScratchOffsetBug", 248 "true", 249 "Scratch instructions with a VGPR offset and a negative immediate offset that is not a multiple of 4 read wrong memory on GFX10" 250>; 251 252def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug", 253 "HasOffset3fBug", 254 "true", 255 "Branch offset of 3f hardware bug" 256>; 257 258def FeatureImageStoreD16Bug : SubtargetFeature<"image-store-d16-bug", 259 "HasImageStoreD16Bug", 260 "true", 261 "Image Store D16 hardware bug" 262>; 263 264def FeatureImageGather4D16Bug : SubtargetFeature<"image-gather4-d16-bug", 265 "HasImageGather4D16Bug", 266 "true", 267 "Image Gather4 D16 hardware bug" 268>; 269 270class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature < 271 "ldsbankcount"#Value, 272 "LDSBankCount", 273 !cast<string>(Value), 274 "The number of LDS banks per compute unit." 275>; 276 277def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>; 278def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; 279 280def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", 281 "GCN3Encoding", 282 "true", 283 "Encoding format for VI" 284>; 285 286def FeatureCIInsts : SubtargetFeature<"ci-insts", 287 "CIInsts", 288 "true", 289 "Additional instructions for CI+" 290>; 291 292def FeatureGFX8Insts : SubtargetFeature<"gfx8-insts", 293 "GFX8Insts", 294 "true", 295 "Additional instructions for GFX8+" 296>; 297 298def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts", 299 "GFX9Insts", 300 "true", 301 "Additional instructions for GFX9+" 302>; 303 304def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts", 305 "GFX90AInsts", 306 "true", 307 "Additional instructions for GFX90A+" 308>; 309 310def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts", 311 "GFX10Insts", 312 "true", 313 "Additional instructions for GFX10+" 314>; 315 316def FeatureGFX10_3Insts : SubtargetFeature<"gfx10-3-insts", 317 "GFX10_3Insts", 318 "true", 319 "Additional instructions for GFX10.3" 320>; 321 322def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts", 323 "GFX7GFX8GFX9Insts", 324 "true", 325 "Instructions shared in GFX7, GFX8, GFX9" 326>; 327 328def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime", 329 "HasSMemRealTime", 330 "true", 331 "Has s_memrealtime instruction" 332>; 333 334def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm", 335 "HasInv2PiInlineImm", 336 "true", 337 "Has 1 / (2 * pi) as inline immediate" 338>; 339 340def Feature16BitInsts : SubtargetFeature<"16-bit-insts", 341 "Has16BitInsts", 342 "true", 343 "Has i16/f16 instructions" 344>; 345 346def FeatureVOP3P : SubtargetFeature<"vop3p", 347 "HasVOP3PInsts", 348 "true", 349 "Has VOP3P packed instructions" 350>; 351 352def FeatureMovrel : SubtargetFeature<"movrel", 353 "HasMovrel", 354 "true", 355 "Has v_movrel*_b32 instructions" 356>; 357 358def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode", 359 "HasVGPRIndexMode", 360 "true", 361 "Has VGPR mode register indexing" 362>; 363 364def FeatureScalarStores : SubtargetFeature<"scalar-stores", 365 "HasScalarStores", 366 "true", 367 "Has store scalar memory instructions" 368>; 369 370def FeatureScalarAtomics : SubtargetFeature<"scalar-atomics", 371 "HasScalarAtomics", 372 "true", 373 "Has atomic scalar memory instructions" 374>; 375 376def FeatureSDWA : SubtargetFeature<"sdwa", 377 "HasSDWA", 378 "true", 379 "Support SDWA (Sub-DWORD Addressing) extension" 380>; 381 382def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod", 383 "HasSDWAOmod", 384 "true", 385 "Support OMod with SDWA (Sub-DWORD Addressing) extension" 386>; 387 388def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar", 389 "HasSDWAScalar", 390 "true", 391 "Support scalar register with SDWA (Sub-DWORD Addressing) extension" 392>; 393 394def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst", 395 "HasSDWASdst", 396 "true", 397 "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension" 398>; 399 400def FeatureSDWAMac : SubtargetFeature<"sdwa-mav", 401 "HasSDWAMac", 402 "true", 403 "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension" 404>; 405 406def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc", 407 "HasSDWAOutModsVOPC", 408 "true", 409 "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension" 410>; 411 412def FeatureDPP : SubtargetFeature<"dpp", 413 "HasDPP", 414 "true", 415 "Support DPP (Data Parallel Primitives) extension" 416>; 417 418// DPP8 allows arbitrary cross-lane swizzling within groups of 8 lanes. 419def FeatureDPP8 : SubtargetFeature<"dpp8", 420 "HasDPP8", 421 "true", 422 "Support DPP8 (Data Parallel Primitives) extension" 423>; 424 425def Feature64BitDPP : SubtargetFeature<"dpp-64bit", 426 "Has64BitDPP", 427 "true", 428 "Support DPP (Data Parallel Primitives) extension" 429>; 430 431def FeaturePackedFP32Ops : SubtargetFeature<"packed-fp32-ops", 432 "HasPackedFP32Ops", 433 "true", 434 "Support packed fp32 instructions" 435>; 436 437def FeatureR128A16 : SubtargetFeature<"r128-a16", 438 "HasR128A16", 439 "true", 440 "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128" 441>; 442 443def FeatureGFX10A16 : SubtargetFeature<"a16", 444 "HasGFX10A16", 445 "true", 446 "Support gfx10-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands" 447>; 448 449def FeatureG16 : SubtargetFeature<"g16", 450 "HasG16", 451 "true", 452 "Support G16 for 16-bit gradient image operands" 453>; 454 455def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding", 456 "HasNSAEncoding", 457 "true", 458 "Support NSA encoding for image instructions" 459>; 460 461def FeatureExtendedImageInsts : SubtargetFeature<"extended-image-insts", 462 "HasExtendedImageInsts", 463 "true", 464 "Support mips != 0, lod != 0, gather4, and get_lod" 465>; 466 467def FeatureGFX10_AEncoding : SubtargetFeature<"gfx10_a-encoding", 468 "GFX10_AEncoding", 469 "true", 470 "Has BVH ray tracing instructions" 471>; 472 473def FeatureGFX10_BEncoding : SubtargetFeature<"gfx10_b-encoding", 474 "GFX10_BEncoding", 475 "true", 476 "Encoding format GFX10_B" 477>; 478 479def FeatureIntClamp : SubtargetFeature<"int-clamp-insts", 480 "HasIntClamp", 481 "true", 482 "Support clamp for integer destination" 483>; 484 485def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem", 486 "HasUnpackedD16VMem", 487 "true", 488 "Has unpacked d16 vmem instructions" 489>; 490 491def FeatureDLInsts : SubtargetFeature<"dl-insts", 492 "HasDLInsts", 493 "true", 494 "Has v_fmac_f32 and v_xnor_b32 instructions" 495>; 496 497def FeatureDot1Insts : SubtargetFeature<"dot1-insts", 498 "HasDot1Insts", 499 "true", 500 "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions" 501>; 502 503def FeatureDot2Insts : SubtargetFeature<"dot2-insts", 504 "HasDot2Insts", 505 "true", 506 "Has v_dot2_i32_i16, v_dot2_u32_u16 instructions" 507>; 508 509def FeatureDot3Insts : SubtargetFeature<"dot3-insts", 510 "HasDot3Insts", 511 "true", 512 "Has v_dot8c_i32_i4 instruction" 513>; 514 515def FeatureDot4Insts : SubtargetFeature<"dot4-insts", 516 "HasDot4Insts", 517 "true", 518 "Has v_dot2c_i32_i16 instruction" 519>; 520 521def FeatureDot5Insts : SubtargetFeature<"dot5-insts", 522 "HasDot5Insts", 523 "true", 524 "Has v_dot2c_f32_f16 instruction" 525>; 526 527def FeatureDot6Insts : SubtargetFeature<"dot6-insts", 528 "HasDot6Insts", 529 "true", 530 "Has v_dot4c_i32_i8 instruction" 531>; 532 533def FeatureDot7Insts : SubtargetFeature<"dot7-insts", 534 "HasDot7Insts", 535 "true", 536 "Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions" 537>; 538 539def FeatureMAIInsts : SubtargetFeature<"mai-insts", 540 "HasMAIInsts", 541 "true", 542 "Has mAI instructions" 543>; 544 545def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", 546 "HasPkFmacF16Inst", 547 "true", 548 "Has v_pk_fmac_f16 instruction" 549>; 550 551def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts", 552 "HasAtomicFaddInsts", 553 "true", 554 "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, " 555 "global_atomic_pk_add_f16 instructions", 556 [FeatureFlatGlobalInsts] 557>; 558 559def FeatureSupportsSRAMECC : SubtargetFeature<"sramecc-support", 560 "SupportsSRAMECC", 561 "true", 562 "Hardware supports SRAMECC" 563>; 564 565def FeatureSRAMECC : SubtargetFeature<"sramecc", 566 "EnableSRAMECC", 567 "true", 568 "Enable SRAMECC" 569>; 570 571def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx", 572 "HasNoSdstCMPX", 573 "true", 574 "V_CMPX does not write VCC/SGPR in addition to EXEC" 575>; 576 577def FeatureVscnt : SubtargetFeature<"vscnt", 578 "HasVscnt", 579 "true", 580 "Has separate store vscnt counter" 581>; 582 583def FeatureGetWaveIdInst : SubtargetFeature<"get-wave-id-inst", 584 "HasGetWaveIdInst", 585 "true", 586 "Has s_get_waveid_in_workgroup instruction" 587>; 588 589def FeatureSMemTimeInst : SubtargetFeature<"s-memtime-inst", 590 "HasSMemTimeInst", 591 "true", 592 "Has s_memtime instruction" 593>; 594 595def FeatureShaderCyclesRegister : SubtargetFeature<"shader-cycles-register", 596 "HasShaderCyclesRegister", 597 "true", 598 "Has SHADER_CYCLES hardware register" 599>; 600 601def FeatureMadMacF32Insts : SubtargetFeature<"mad-mac-f32-insts", 602 "HasMadMacF32Insts", 603 "true", 604 "Has v_mad_f32/v_mac_f32/v_madak_f32/v_madmk_f32 instructions" 605>; 606 607def FeatureDsSrc2Insts : SubtargetFeature<"ds-src2-insts", 608 "HasDsSrc2Insts", 609 "true", 610 "Has ds_*_src2 instructions" 611>; 612 613def FeatureRegisterBanking : SubtargetFeature<"register-banking", 614 "HasRegisterBanking", 615 "true", 616 "Has register banking" 617>; 618 619def FeatureVOP3Literal : SubtargetFeature<"vop3-literal", 620 "HasVOP3Literal", 621 "true", 622 "Can use one literal in VOP3" 623>; 624 625def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard", 626 "HasNoDataDepHazard", 627 "true", 628 "Does not need SW waitstates" 629>; 630 631class SubtargetFeatureNSAMaxSize <int Value> : SubtargetFeature < 632 "nsa-max-size-"#Value, 633 "NSAMaxSize", 634 !cast<string>(Value), 635 "The maximum non-sequential address size in VGPRs." 636>; 637 638def FeatureNSAMaxSize5 : SubtargetFeatureNSAMaxSize<5>; 639def FeatureNSAMaxSize13 : SubtargetFeatureNSAMaxSize<13>; 640 641//===------------------------------------------------------------===// 642// Subtarget Features (options and debugging) 643//===------------------------------------------------------------===// 644 645class FeatureMaxPrivateElementSize<int size> : SubtargetFeature< 646 "max-private-element-size-"#size, 647 "MaxPrivateElementSize", 648 !cast<string>(size), 649 "Maximum private access size may be "#size 650>; 651 652def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>; 653def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>; 654def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>; 655 656def FeatureDumpCode : SubtargetFeature <"DumpCode", 657 "DumpCode", 658 "true", 659 "Dump MachineInstrs in the CodeEmitter" 660>; 661 662def FeatureDumpCodeLower : SubtargetFeature <"dumpcode", 663 "DumpCode", 664 "true", 665 "Dump MachineInstrs in the CodeEmitter" 666>; 667 668// XXX - This should probably be removed once enabled by default 669def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", 670 "EnableLoadStoreOpt", 671 "true", 672 "Enable SI load/store optimizer pass" 673>; 674 675// Performance debugging feature. Allow using DS instruction immediate 676// offsets even if the base pointer can't be proven to be base. On SI, 677// base pointer values that won't give the same result as a 16-bit add 678// are not safe to fold, but this will override the conservative test 679// for the base pointer. 680def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature < 681 "unsafe-ds-offset-folding", 682 "EnableUnsafeDSOffsetFolding", 683 "true", 684 "Force using DS instruction immediate offsets on SI" 685>; 686 687def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", 688 "EnableSIScheduler", 689 "true", 690 "Enable SI Machine Scheduler" 691>; 692 693def FeatureEnableDS128 : SubtargetFeature<"enable-ds128", 694 "EnableDS128", 695 "true", 696 "Use ds_{read|write}_b128" 697>; 698 699// Sparse texture support requires that all result registers are zeroed when 700// PRTStrictNull is set to true. This feature is turned on for all architectures 701// but is enabled as a feature in case there are situations where PRTStrictNull 702// is disabled by the driver. 703def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null", 704 "EnablePRTStrictNull", 705 "true", 706 "Enable zeroing of result registers for sparse texture fetches" 707>; 708 709// Unless +-flat-for-global is specified, turn on FlatForGlobal for 710// all OS-es on VI and newer hardware to avoid assertion failures due 711// to missing ADDR64 variants of MUBUF instructions. 712// FIXME: moveToVALU should be able to handle converting addr64 MUBUF 713// instructions. 714 715def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", 716 "FlatForGlobal", 717 "true", 718 "Force to generate flat instruction for global" 719>; 720 721def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature < 722 "auto-waitcnt-before-barrier", 723 "AutoWaitcntBeforeBarrier", 724 "true", 725 "Hardware automatically inserts waitcnt before barrier" 726>; 727 728def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range", 729 "HasTrigReducedRange", 730 "true", 731 "Requires use of fract on arguments to trig instructions" 732>; 733 734// Alignment enforcement is controlled by a configuration register: 735// SH_MEM_CONFIG.alignment_mode 736def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode", 737 "UnalignedAccessMode", 738 "true", 739 "Enable unaligned global, local and region loads and stores if the hardware" 740 " supports it" 741>; 742 743def FeaturePackedTID : SubtargetFeature<"packed-tid", 744 "HasPackedTID", 745 "true", 746 "Workitem IDs are packed into v0 at kernel launch" 747>; 748 749def FeatureArchitectedFlatScratch : SubtargetFeature<"architected-flat-scratch", 750 "HasArchitectedFlatScratch", 751 "true", 752 "Flat Scratch register is a readonly SPI initialized architected register" 753>; 754 755// Dummy feature used to disable assembler instructions. 756def FeatureDisable : SubtargetFeature<"", 757 "FeatureDisable","true", 758 "Dummy feature to disable assembler instructions" 759>; 760 761class GCNSubtargetFeatureGeneration <string Value, 762 string FeatureName, 763 list<SubtargetFeature> Implies> : 764 SubtargetFeatureGeneration <Value, FeatureName, "GCNSubtarget", Implies>; 765 766def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", 767 "southern-islands", 768 [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, 769 FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts, 770 FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel, 771 FeatureTrigReducedRange, FeatureExtendedImageInsts 772 ] 773>; 774 775def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", 776 "sea-islands", 777 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 778 FeatureWavefrontSize64, FeatureFlatAddressSpace, 779 FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, 780 FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, 781 FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess 782 ] 783>; 784 785def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", 786 "volcanic-islands", 787 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 788 FeatureWavefrontSize64, FeatureFlatAddressSpace, 789 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 790 FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, 791 FeatureScalarStores, FeatureInv2PiInlineImm, 792 FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, 793 FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts, 794 FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, 795 FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, 796 FeatureUnalignedBufferAccess 797 ] 798>; 799 800def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", 801 "gfx9", 802 [FeatureFP64, FeatureLocalMemorySize65536, 803 FeatureWavefrontSize64, FeatureFlatAddressSpace, 804 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 805 FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, 806 FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, 807 FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 808 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, 809 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 810 FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, 811 FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, 812 FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, 813 FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, 814 FeatureNegativeScratchOffsetBug 815 ] 816>; 817 818def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", 819 "gfx10", 820 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 821 FeatureFlatAddressSpace, 822 FeatureCIInsts, Feature16BitInsts, 823 FeatureSMemRealTime, FeatureInv2PiInlineImm, 824 FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P, 825 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 826 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, 827 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 828 FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts, 829 FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking, 830 FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, 831 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, 832 FeatureGFX10A16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16, 833 FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess 834 ] 835>; 836 837class FeatureSet<list<SubtargetFeature> Features_> { 838 list<SubtargetFeature> Features = Features_; 839} 840 841def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands, 842 FeatureFastFMAF32, 843 HalfRate64Ops, 844 FeatureLDSBankCount32]>; 845 846def FeatureISAVersion6_0_1 : FeatureSet< 847 [FeatureSouthernIslands, 848 FeatureLDSBankCount32]>; 849 850def FeatureISAVersion6_0_2 : FeatureSet< 851 [FeatureSouthernIslands, 852 FeatureLDSBankCount32]>; 853 854def FeatureISAVersion7_0_0 : FeatureSet< 855 [FeatureSeaIslands, 856 FeatureLDSBankCount32]>; 857 858def FeatureISAVersion7_0_1 : FeatureSet< 859 [FeatureSeaIslands, 860 HalfRate64Ops, 861 FeatureLDSBankCount32, 862 FeatureFastFMAF32]>; 863 864def FeatureISAVersion7_0_2 : FeatureSet< 865 [FeatureSeaIslands, 866 FeatureLDSBankCount16, 867 FeatureFastFMAF32]>; 868 869def FeatureISAVersion7_0_3 : FeatureSet< 870 [FeatureSeaIslands, 871 FeatureLDSBankCount16]>; 872 873def FeatureISAVersion7_0_4 : FeatureSet< 874 [FeatureSeaIslands, 875 FeatureLDSBankCount32]>; 876 877def FeatureISAVersion7_0_5 : FeatureSet< 878 [FeatureSeaIslands, 879 FeatureLDSBankCount16]>; 880 881def FeatureISAVersion8_0_1 : FeatureSet< 882 [FeatureVolcanicIslands, 883 FeatureFastFMAF32, 884 HalfRate64Ops, 885 FeatureLDSBankCount32, 886 FeatureSupportsXNACK, 887 FeatureUnpackedD16VMem]>; 888 889def FeatureISAVersion8_0_2 : FeatureSet< 890 [FeatureVolcanicIslands, 891 FeatureLDSBankCount32, 892 FeatureSGPRInitBug, 893 FeatureUnpackedD16VMem]>; 894 895def FeatureISAVersion8_0_3 : FeatureSet< 896 [FeatureVolcanicIslands, 897 FeatureLDSBankCount32, 898 FeatureUnpackedD16VMem]>; 899 900def FeatureISAVersion8_0_5 : FeatureSet< 901 [FeatureVolcanicIslands, 902 FeatureLDSBankCount32, 903 FeatureSGPRInitBug, 904 FeatureUnpackedD16VMem]>; 905 906def FeatureISAVersion8_1_0 : FeatureSet< 907 [FeatureVolcanicIslands, 908 FeatureLDSBankCount16, 909 FeatureSupportsXNACK, 910 FeatureImageStoreD16Bug, 911 FeatureImageGather4D16Bug]>; 912 913def FeatureISAVersion9_0_0 : FeatureSet< 914 [FeatureGFX9, 915 FeatureMadMixInsts, 916 FeatureLDSBankCount32, 917 FeatureDsSrc2Insts, 918 FeatureExtendedImageInsts, 919 FeatureMadMacF32Insts, 920 FeatureImageGather4D16Bug]>; 921 922def FeatureISAVersion9_0_2 : FeatureSet< 923 [FeatureGFX9, 924 FeatureMadMixInsts, 925 FeatureLDSBankCount32, 926 FeatureDsSrc2Insts, 927 FeatureExtendedImageInsts, 928 FeatureMadMacF32Insts, 929 FeatureImageGather4D16Bug]>; 930 931def FeatureISAVersion9_0_4 : FeatureSet< 932 [FeatureGFX9, 933 FeatureLDSBankCount32, 934 FeatureDsSrc2Insts, 935 FeatureExtendedImageInsts, 936 FeatureMadMacF32Insts, 937 FeatureFmaMixInsts, 938 FeatureImageGather4D16Bug]>; 939 940def FeatureISAVersion9_0_6 : FeatureSet< 941 [FeatureGFX9, 942 HalfRate64Ops, 943 FeatureFmaMixInsts, 944 FeatureLDSBankCount32, 945 FeatureDsSrc2Insts, 946 FeatureExtendedImageInsts, 947 FeatureMadMacF32Insts, 948 FeatureDLInsts, 949 FeatureDot1Insts, 950 FeatureDot2Insts, 951 FeatureDot7Insts, 952 FeatureSupportsSRAMECC, 953 FeatureImageGather4D16Bug]>; 954 955def FeatureISAVersion9_0_8 : FeatureSet< 956 [FeatureGFX9, 957 HalfRate64Ops, 958 FeatureFmaMixInsts, 959 FeatureLDSBankCount32, 960 FeatureDsSrc2Insts, 961 FeatureExtendedImageInsts, 962 FeatureMadMacF32Insts, 963 FeatureDLInsts, 964 FeatureDot1Insts, 965 FeatureDot2Insts, 966 FeatureDot3Insts, 967 FeatureDot4Insts, 968 FeatureDot5Insts, 969 FeatureDot6Insts, 970 FeatureDot7Insts, 971 FeatureMAIInsts, 972 FeaturePkFmacF16Inst, 973 FeatureAtomicFaddInsts, 974 FeatureSupportsSRAMECC, 975 FeatureMFMAInlineLiteralBug, 976 FeatureImageGather4D16Bug]>; 977 978def FeatureISAVersion9_0_9 : FeatureSet< 979 [FeatureGFX9, 980 FeatureMadMixInsts, 981 FeatureLDSBankCount32, 982 FeatureDsSrc2Insts, 983 FeatureExtendedImageInsts, 984 FeatureMadMacF32Insts, 985 FeatureImageGather4D16Bug]>; 986 987def FeatureISAVersion9_0_A : FeatureSet< 988 [FeatureGFX9, 989 FeatureGFX90AInsts, 990 FeatureFmaMixInsts, 991 FeatureLDSBankCount32, 992 FeatureDLInsts, 993 FeatureDot1Insts, 994 FeatureDot2Insts, 995 FeatureDot3Insts, 996 FeatureDot4Insts, 997 FeatureDot5Insts, 998 FeatureDot6Insts, 999 FeatureDot7Insts, 1000 Feature64BitDPP, 1001 FeaturePackedFP32Ops, 1002 FeatureMAIInsts, 1003 FeaturePkFmacF16Inst, 1004 FeatureAtomicFaddInsts, 1005 FeatureMadMacF32Insts, 1006 FeatureSupportsSRAMECC, 1007 FeaturePackedTID, 1008 FullRate64Ops]>; 1009 1010def FeatureISAVersion9_0_C : FeatureSet< 1011 [FeatureGFX9, 1012 FeatureMadMixInsts, 1013 FeatureLDSBankCount32, 1014 FeatureDsSrc2Insts, 1015 FeatureExtendedImageInsts, 1016 FeatureMadMacF32Insts, 1017 FeatureImageGather4D16Bug]>; 1018 1019// TODO: Organize more features into groups. 1020def FeatureGroup { 1021 // Bugs present on gfx10.1. 1022 list<SubtargetFeature> GFX10_1_Bugs = [ 1023 FeatureVcmpxPermlaneHazard, 1024 FeatureVMEMtoScalarWriteHazard, 1025 FeatureSMEMtoVectorWriteHazard, 1026 FeatureInstFwdPrefetchBug, 1027 FeatureVcmpxExecWARHazard, 1028 FeatureLdsBranchVmemWARHazard, 1029 FeatureNSAtoVMEMBug, 1030 FeatureNSAClauseBug, 1031 FeatureOffset3fBug, 1032 FeatureFlatSegmentOffsetBug, 1033 FeatureNegativeUnalignedScratchOffsetBug 1034 ]; 1035} 1036 1037def FeatureISAVersion10_1_0 : FeatureSet< 1038 !listconcat(FeatureGroup.GFX10_1_Bugs, 1039 [FeatureGFX10, 1040 FeatureLDSBankCount32, 1041 FeatureDLInsts, 1042 FeatureNSAEncoding, 1043 FeatureNSAMaxSize5, 1044 FeatureWavefrontSize32, 1045 FeatureScalarStores, 1046 FeatureScalarAtomics, 1047 FeatureScalarFlatScratchInsts, 1048 FeatureGetWaveIdInst, 1049 FeatureMadMacF32Insts, 1050 FeatureDsSrc2Insts, 1051 FeatureLdsMisalignedBug, 1052 FeatureSupportsXNACK])>; 1053 1054def FeatureISAVersion10_1_1 : FeatureSet< 1055 !listconcat(FeatureGroup.GFX10_1_Bugs, 1056 [FeatureGFX10, 1057 FeatureLDSBankCount32, 1058 FeatureDLInsts, 1059 FeatureDot1Insts, 1060 FeatureDot2Insts, 1061 FeatureDot5Insts, 1062 FeatureDot6Insts, 1063 FeatureDot7Insts, 1064 FeatureNSAEncoding, 1065 FeatureNSAMaxSize5, 1066 FeatureWavefrontSize32, 1067 FeatureScalarStores, 1068 FeatureScalarAtomics, 1069 FeatureScalarFlatScratchInsts, 1070 FeatureGetWaveIdInst, 1071 FeatureMadMacF32Insts, 1072 FeatureDsSrc2Insts, 1073 FeatureLdsMisalignedBug, 1074 FeatureSupportsXNACK])>; 1075 1076def FeatureISAVersion10_1_2 : FeatureSet< 1077 !listconcat(FeatureGroup.GFX10_1_Bugs, 1078 [FeatureGFX10, 1079 FeatureLDSBankCount32, 1080 FeatureDLInsts, 1081 FeatureDot1Insts, 1082 FeatureDot2Insts, 1083 FeatureDot5Insts, 1084 FeatureDot6Insts, 1085 FeatureDot7Insts, 1086 FeatureNSAEncoding, 1087 FeatureNSAMaxSize5, 1088 FeatureWavefrontSize32, 1089 FeatureScalarStores, 1090 FeatureScalarAtomics, 1091 FeatureScalarFlatScratchInsts, 1092 FeatureGetWaveIdInst, 1093 FeatureMadMacF32Insts, 1094 FeatureDsSrc2Insts, 1095 FeatureLdsMisalignedBug, 1096 FeatureSupportsXNACK])>; 1097 1098def FeatureISAVersion10_1_3 : FeatureSet< 1099 !listconcat(FeatureGroup.GFX10_1_Bugs, 1100 [FeatureGFX10, 1101 FeatureGFX10_AEncoding, 1102 FeatureLDSBankCount32, 1103 FeatureDLInsts, 1104 FeatureNSAEncoding, 1105 FeatureNSAMaxSize5, 1106 FeatureWavefrontSize32, 1107 FeatureScalarStores, 1108 FeatureScalarAtomics, 1109 FeatureScalarFlatScratchInsts, 1110 FeatureGetWaveIdInst, 1111 FeatureMadMacF32Insts, 1112 FeatureDsSrc2Insts, 1113 FeatureLdsMisalignedBug, 1114 FeatureSupportsXNACK])>; 1115 1116def FeatureISAVersion10_3_0 : FeatureSet< 1117 [FeatureGFX10, 1118 FeatureGFX10_AEncoding, 1119 FeatureGFX10_BEncoding, 1120 FeatureGFX10_3Insts, 1121 FeatureLDSBankCount32, 1122 FeatureDLInsts, 1123 FeatureDot1Insts, 1124 FeatureDot2Insts, 1125 FeatureDot5Insts, 1126 FeatureDot6Insts, 1127 FeatureDot7Insts, 1128 FeatureNSAEncoding, 1129 FeatureNSAMaxSize13, 1130 FeatureWavefrontSize32, 1131 FeatureShaderCyclesRegister]>; 1132 1133//===----------------------------------------------------------------------===// 1134 1135def AMDGPUInstrInfo : InstrInfo { 1136 let guessInstructionProperties = 1; 1137 let noNamedPositionallyEncodedOperands = 1; 1138} 1139 1140def AMDGPUAsmParser : AsmParser { 1141 // Some of the R600 registers have the same name, so this crashes. 1142 // For example T0_XYZW and T0_XY both have the asm name T0. 1143 let ShouldEmitMatchRegisterName = 0; 1144} 1145 1146def AMDGPUAsmWriter : AsmWriter { 1147 int PassSubtarget = 1; 1148} 1149 1150def AMDGPUAsmVariants { 1151 string Default = "Default"; 1152 int Default_ID = 0; 1153 string VOP3 = "VOP3"; 1154 int VOP3_ID = 1; 1155 string SDWA = "SDWA"; 1156 int SDWA_ID = 2; 1157 string SDWA9 = "SDWA9"; 1158 int SDWA9_ID = 3; 1159 string DPP = "DPP"; 1160 int DPP_ID = 4; 1161 string Disable = "Disable"; 1162 int Disable_ID = 5; 1163} 1164 1165def DefaultAMDGPUAsmParserVariant : AsmParserVariant { 1166 let Variant = AMDGPUAsmVariants.Default_ID; 1167 let Name = AMDGPUAsmVariants.Default; 1168} 1169 1170def VOP3AsmParserVariant : AsmParserVariant { 1171 let Variant = AMDGPUAsmVariants.VOP3_ID; 1172 let Name = AMDGPUAsmVariants.VOP3; 1173} 1174 1175def SDWAAsmParserVariant : AsmParserVariant { 1176 let Variant = AMDGPUAsmVariants.SDWA_ID; 1177 let Name = AMDGPUAsmVariants.SDWA; 1178} 1179 1180def SDWA9AsmParserVariant : AsmParserVariant { 1181 let Variant = AMDGPUAsmVariants.SDWA9_ID; 1182 let Name = AMDGPUAsmVariants.SDWA9; 1183} 1184 1185 1186def DPPAsmParserVariant : AsmParserVariant { 1187 let Variant = AMDGPUAsmVariants.DPP_ID; 1188 let Name = AMDGPUAsmVariants.DPP; 1189} 1190 1191def AMDGPU : Target { 1192 // Pull in Instruction Info: 1193 let InstructionSet = AMDGPUInstrInfo; 1194 let AssemblyParsers = [AMDGPUAsmParser]; 1195 let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant, 1196 VOP3AsmParserVariant, 1197 SDWAAsmParserVariant, 1198 SDWA9AsmParserVariant, 1199 DPPAsmParserVariant]; 1200 let AssemblyWriters = [AMDGPUAsmWriter]; 1201 let AllowRegisterRenaming = 1; 1202} 1203 1204// Dummy Instruction itineraries for pseudo instructions 1205def ALU_NULL : FuncUnit; 1206def NullALU : InstrItinClass; 1207 1208//===----------------------------------------------------------------------===// 1209// Predicate helper class 1210//===----------------------------------------------------------------------===// 1211 1212def isGFX6 : 1213 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">, 1214 AssemblerPredicate<(all_of FeatureSouthernIslands)>; 1215 1216def isGFX6GFX7 : 1217 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1218 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, 1219 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX10Insts))>; 1220 1221def isGFX6GFX7GFX10 : 1222 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1223 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1224 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1225 AssemblerPredicate<(all_of (not FeatureGCN3Encoding))>; 1226 1227def isGFX7Only : 1228 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, 1229 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX10Insts))>; 1230 1231def isGFX7GFX10 : 1232 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1233 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 1234 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts)>; 1235 1236def isGFX7GFX8GFX9 : 1237 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1238 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1239 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1240 AssemblerPredicate<(all_of FeatureGFX7GFX8GFX9Insts)>; 1241 1242def isGFX6GFX7GFX8GFX9 : 1243 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1244 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1245 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1246 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1247 AssemblerPredicate<(all_of (not FeatureGFX10Insts))>; 1248 1249def isGFX6GFX7GFX8GFX9NotGFX90A : 1250 Predicate<"!Subtarget->hasGFX90AInsts() &&" 1251 "(Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 1252 " Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 1253 " Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1254 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 1255 AssemblerPredicate<(all_of (not FeatureGFX10Insts), (not FeatureGFX90AInsts))>; 1256 1257def isGFX7Plus : 1258 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, 1259 AssemblerPredicate<(all_of FeatureCIInsts)>; 1260 1261def isGFX8Plus : 1262 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, 1263 AssemblerPredicate<(all_of FeatureGFX8Insts)>; 1264 1265def isGFX8Only : Predicate<"Subtarget->getGeneration() ==" 1266 "AMDGPUSubtarget::VOLCANIC_ISLANDS">, 1267 AssemblerPredicate <(all_of FeatureVolcanicIslands)>; 1268 1269def isGFX9Plus : 1270 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, 1271 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 1272 1273def isGFX9Only : Predicate < 1274 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1275 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts)>; 1276 1277def isGCN3ExcludingGFX90A : 1278 Predicate<"Subtarget->isGCN3Encoding() && !Subtarget->hasGFX90AInsts()">, 1279 AssemblerPredicate<(all_of FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; 1280 1281def isGFX90APlus : 1282 Predicate<"Subtarget->hasGFX90AInsts()">, 1283 AssemblerPredicate<(all_of FeatureGFX90AInsts)>; 1284 1285def isNotGFX90APlus : 1286 Predicate<"!Subtarget->hasGFX90AInsts()">, 1287 AssemblerPredicate<(all_of (not FeatureGFX90AInsts))>; 1288 1289def isGFX8GFX9NotGFX90A : 1290 Predicate<"!Subtarget->hasGFX90AInsts() &&" 1291 "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1292 " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, 1293 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>; 1294 1295def isGFX90AOnly : 1296 Predicate<"Subtarget->hasGFX90AInsts()">, 1297 AssemblerPredicate<(all_of FeatureGFX90AInsts)>; 1298 1299def isGFX908orGFX90A : 1300 Predicate<"Subtarget->hasMAIInsts()">, 1301 AssemblerPredicate<(all_of FeatureMAIInsts)>; 1302 1303def isGFX8GFX9 : 1304 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1305 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1306 AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding)>; 1307 1308def isGFX10Plus : 1309 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, 1310 AssemblerPredicate<(all_of FeatureGFX10Insts)>; 1311 1312def isGFX10Before1030 : 1313 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 &&" 1314 "!Subtarget->hasGFX10_3Insts()">, 1315 AssemblerPredicate<(all_of FeatureGFX10Insts,(not FeatureGFX10_3Insts))>; 1316 1317def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, 1318 AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; 1319 1320def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">, 1321 AssemblerPredicate<(all_of FeatureFlatGlobalInsts)>; 1322def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">, 1323 AssemblerPredicate<(all_of FeatureFlatScratchInsts)>; 1324def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">, 1325 AssemblerPredicate<(all_of FeatureScalarFlatScratchInsts)>; 1326def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">, 1327 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 1328 1329def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">, 1330 AssemblerPredicate<(any_of FeatureGFX10_3Insts)>; 1331 1332def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">, 1333 AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>; 1334 1335def HasGFX10_BEncoding : Predicate<"Subtarget->hasGFX10_BEncoding()">, 1336 AssemblerPredicate<(all_of FeatureGFX10_BEncoding)>; 1337 1338def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">, 1339 AssemblerPredicate<(all_of FeatureUnpackedD16VMem)>; 1340def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">, 1341 AssemblerPredicate<(all_of (not FeatureUnpackedD16VMem))>; 1342 1343def D16PreservesUnusedBits : 1344 Predicate<"Subtarget->d16PreservesUnusedBits()">, 1345 AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureSRAMECC))>; 1346 1347def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">; 1348def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">; 1349 1350def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, 1351 AssemblerPredicate<(all_of FeatureGFX9Insts)>; 1352 1353def HasLDSFPAtomicAdd : Predicate<"Subtarget->hasLDSFPAtomicAdd()">, 1354 AssemblerPredicate<(all_of FeatureGFX8Insts)>; 1355 1356def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">, 1357 AssemblerPredicate<(all_of FeatureAddNoCarryInsts)>; 1358 1359def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">; 1360 1361def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">, 1362 AssemblerPredicate<(all_of Feature16BitInsts)>; 1363def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, 1364 AssemblerPredicate<(all_of FeatureVOP3P)>; 1365 1366def HasMinMaxDenormModes : Predicate<"Subtarget->supportsMinMaxDenormModes()">; 1367def NotHasMinMaxDenormModes : Predicate<"!Subtarget->supportsMinMaxDenormModes()">; 1368 1369def HasSDWA : Predicate<"Subtarget->hasSDWA()">, 1370 AssemblerPredicate<(all_of FeatureSDWA, FeatureVolcanicIslands)>; 1371 1372def HasSDWA9 : 1373 Predicate<"Subtarget->hasSDWA()">, 1374 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts,FeatureSDWA)>; 1375 1376def HasSDWA10 : 1377 Predicate<"Subtarget->hasSDWA()">, 1378 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureSDWA)>; 1379 1380def HasDPP : Predicate<"Subtarget->hasDPP()">, 1381 AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureDPP)>; 1382 1383def HasDPP8 : Predicate<"Subtarget->hasDPP8()">, 1384 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP8)>; 1385 1386def Has64BitDPP : Predicate<"Subtarget->has64BitDPP()">, 1387 AssemblerPredicate<(all_of Feature64BitDPP)>; 1388 1389def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">, 1390 AssemblerPredicate<(all_of FeaturePackedFP32Ops)>; 1391 1392def HasFmaakFmamkF32Insts : 1393 Predicate<"Subtarget->hasFmaakFmamkF32Insts()">, 1394 AssemblerPredicate<(any_of FeatureGFX10Insts)>; 1395 1396def HasExtendedImageInsts : Predicate<"Subtarget->hasExtendedImageInsts()">, 1397 AssemblerPredicate<(all_of FeatureExtendedImageInsts)>; 1398 1399def HasR128A16 : Predicate<"Subtarget->hasR128A16()">, 1400 AssemblerPredicate<(all_of FeatureR128A16)>; 1401 1402def HasGFX10A16 : Predicate<"Subtarget->hasGFX10A16()">, 1403 AssemblerPredicate<(all_of FeatureGFX10A16)>; 1404 1405def HasG16 : Predicate<"Subtarget->hasG16()">, 1406 AssemblerPredicate<(all_of FeatureG16)>; 1407 1408def HasDPP16 : Predicate<"Subtarget->hasDPP()">, 1409 AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP)>; 1410 1411def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">, 1412 AssemblerPredicate<(all_of FeatureIntClamp)>; 1413 1414def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">, 1415 AssemblerPredicate<(all_of FeatureMadMixInsts)>; 1416 1417def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">, 1418 AssemblerPredicate<(all_of FeatureScalarStores)>; 1419 1420def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">, 1421 AssemblerPredicate<(all_of FeatureScalarAtomics)>; 1422 1423def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">, 1424 AssemblerPredicate<(all_of FeatureNoSdstCMPX)>; 1425 1426def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">, 1427 AssemblerPredicate<(all_of (not FeatureNoSdstCMPX))>; 1428 1429def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">; 1430def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">; 1431def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">, 1432 AssemblerPredicate<(all_of FeatureVGPRIndexMode)>; 1433def HasMovrel : Predicate<"Subtarget->hasMovrel()">, 1434 AssemblerPredicate<(all_of FeatureMovrel)>; 1435 1436def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">, 1437 AssemblerPredicate<(all_of FeatureFmaMixInsts)>; 1438 1439def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">, 1440 AssemblerPredicate<(all_of FeatureDLInsts)>; 1441 1442def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">, 1443 AssemblerPredicate<(all_of FeatureDot1Insts)>; 1444 1445def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">, 1446 AssemblerPredicate<(all_of FeatureDot2Insts)>; 1447 1448def HasDot3Insts : Predicate<"Subtarget->hasDot3Insts()">, 1449 AssemblerPredicate<(all_of FeatureDot3Insts)>; 1450 1451def HasDot4Insts : Predicate<"Subtarget->hasDot4Insts()">, 1452 AssemblerPredicate<(all_of FeatureDot4Insts)>; 1453 1454def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">, 1455 AssemblerPredicate<(all_of FeatureDot5Insts)>; 1456 1457def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">, 1458 AssemblerPredicate<(all_of FeatureDot6Insts)>; 1459 1460def HasDot7Insts : Predicate<"Subtarget->hasDot7Insts()">, 1461 AssemblerPredicate<(all_of FeatureDot7Insts)>; 1462 1463def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">, 1464 AssemblerPredicate<(all_of FeatureGetWaveIdInst)>; 1465 1466def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">, 1467 AssemblerPredicate<(all_of FeatureMAIInsts)>; 1468 1469def HasSMemRealTime : Predicate<"Subtarget->hasSMemRealTime()">, 1470 AssemblerPredicate<(all_of FeatureSMemRealTime)>; 1471 1472def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">, 1473 AssemblerPredicate<(all_of FeatureSMemTimeInst)>; 1474 1475def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">, 1476 AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>; 1477 1478def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">, 1479 AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>; 1480 1481def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">, 1482 AssemblerPredicate<(all_of FeatureMadMacF32Insts)>; 1483 1484def HasFmaLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts()">, 1485 AssemblerPredicate<(any_of FeatureGFX10_3Insts)>; 1486 1487def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">, 1488 AssemblerPredicate<(all_of FeatureAtomicFaddInsts)>; 1489 1490def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">, 1491 AssemblerPredicate<(all_of FeatureDsSrc2Insts)>; 1492 1493def EnableLateCFGStructurize : Predicate< 1494 "EnableLateStructurizeCFG">; 1495 1496def EnableFlatScratch : Predicate<"Subtarget->enableFlatScratch()">; 1497 1498def DisableFlatScratch : Predicate<"!Subtarget->enableFlatScratch()">; 1499 1500def HasUnalignedAccessMode : Predicate<"Subtarget->hasUnalignedAccessMode()">, 1501 AssemblerPredicate<(all_of FeatureUnalignedAccessMode)>; 1502 1503// Include AMDGPU TD files 1504include "SISchedule.td" 1505include "GCNProcessors.td" 1506include "AMDGPUInstrInfo.td" 1507include "SIRegisterInfo.td" 1508include "AMDGPURegisterBanks.td" 1509include "AMDGPUInstructions.td" 1510include "SIInstrInfo.td" 1511include "AMDGPUCallingConv.td" 1512include "AMDGPUSearchableTables.td" 1513