1//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===------------------------------------------------------------===// 8 9include "llvm/TableGen/SearchableTable.td" 10include "llvm/Target/Target.td" 11include "AMDGPUFeatures.td" 12 13class BoolToList<bit Value> { 14 list<int> ret = !if(Value, [1]<int>, []<int>); 15} 16 17//===------------------------------------------------------------===// 18// Subtarget Features (device properties) 19//===------------------------------------------------------------===// 20 21def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", 22 "FastFMAF32", 23 "true", 24 "Assuming f32 fma is at least as fast as mul + add" 25>; 26 27def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128", 28 "MIMG_R128", 29 "true", 30 "Support 128-bit texture resources" 31>; 32 33def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops", 34 "HalfRate64Ops", 35 "true", 36 "Most fp64 instructions are half rate instead of quarter" 37>; 38 39def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", 40 "FlatAddressSpace", 41 "true", 42 "Support flat address space" 43>; 44 45def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets", 46 "FlatInstOffsets", 47 "true", 48 "Flat instructions have immediate offset addressing mode" 49>; 50 51def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts", 52 "FlatGlobalInsts", 53 "true", 54 "Have global_* flat memory instructions" 55>; 56 57def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts", 58 "FlatScratchInsts", 59 "true", 60 "Have scratch_* flat memory instructions" 61>; 62 63def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts", 64 "ScalarFlatScratchInsts", 65 "true", 66 "Have s_scratch_* flat memory instructions" 67>; 68 69def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts", 70 "AddNoCarryInsts", 71 "true", 72 "Have VALU add/sub instructions without carry out" 73>; 74 75def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", 76 "UnalignedBufferAccess", 77 "true", 78 "Support unaligned global loads and stores" 79>; 80 81def FeatureTrapHandler: SubtargetFeature<"trap-handler", 82 "TrapHandler", 83 "true", 84 "Trap handler support" 85>; 86 87def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access", 88 "UnalignedScratchAccess", 89 "true", 90 "Support unaligned scratch loads and stores" 91>; 92 93def FeatureApertureRegs : SubtargetFeature<"aperture-regs", 94 "HasApertureRegs", 95 "true", 96 "Has Memory Aperture Base and Size Registers" 97>; 98 99def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts", 100 "HasMadMixInsts", 101 "true", 102 "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions" 103>; 104 105def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts", 106 "HasFmaMixInsts", 107 "true", 108 "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions" 109>; 110 111def FeatureDoesNotSupportXNACK : SubtargetFeature<"no-xnack-support", 112 "DoesNotSupportXNACK", 113 "true", 114 "Hardware does not support XNACK" 115>; 116 117// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support 118// XNACK. The current default kernel driver setting is: 119// - graphics ring: XNACK disabled 120// - compute ring: XNACK enabled 121// 122// If XNACK is enabled, the VMEM latency can be worse. 123// If XNACK is disabled, the 2 SGPRs can be used for general purposes. 124def FeatureXNACK : SubtargetFeature<"xnack", 125 "EnableXNACK", 126 "true", 127 "Enable XNACK support" 128>; 129 130def FeatureCuMode : SubtargetFeature<"cumode", 131 "EnableCuMode", 132 "true", 133 "Enable CU wavefront execution mode" 134>; 135 136def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", 137 "SGPRInitBug", 138 "true", 139 "VI SGPR initialization bug requiring a fixed SGPR allocation size" 140>; 141 142def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug", 143 "LDSMisalignedBug", 144 "true", 145 "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode" 146>; 147 148def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard", 149 "HasVcmpxPermlaneHazard", 150 "true", 151 "TODO: describe me" 152>; 153 154def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard", 155 "HasVMEMtoScalarWriteHazard", 156 "true", 157 "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution." 158>; 159 160def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard", 161 "HasSMEMtoVectorWriteHazard", 162 "true", 163 "s_load_dword followed by v_cmp page faults" 164>; 165 166def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug", 167 "HasInstFwdPrefetchBug", 168 "true", 169 "S_INST_PREFETCH instruction causes shader to hang" 170>; 171 172def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard", 173 "HasVcmpxExecWARHazard", 174 "true", 175 "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)" 176>; 177 178def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard", 179 "HasLdsBranchVmemWARHazard", 180 "true", 181 "Switching between LDS and VMEM-tex not waiting VM_VSRC=0" 182>; 183 184def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug", 185 "HasNSAtoVMEMBug", 186 "true", 187 "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero" 188>; 189 190def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug", 191 "HasFlatSegmentOffsetBug", 192 "true", 193 "GFX10 bug, inst_offset ignored in flat segment" 194>; 195 196def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug", 197 "HasOffset3fBug", 198 "true", 199 "Branch offset of 3f hardware bug" 200>; 201 202class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature < 203 "ldsbankcount"#Value, 204 "LDSBankCount", 205 !cast<string>(Value), 206 "The number of LDS banks per compute unit." 207>; 208 209def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>; 210def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; 211 212def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", 213 "GCN3Encoding", 214 "true", 215 "Encoding format for VI" 216>; 217 218def FeatureCIInsts : SubtargetFeature<"ci-insts", 219 "CIInsts", 220 "true", 221 "Additional instructions for CI+" 222>; 223 224def FeatureGFX8Insts : SubtargetFeature<"gfx8-insts", 225 "GFX8Insts", 226 "true", 227 "Additional instructions for GFX8+" 228>; 229 230def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts", 231 "GFX9Insts", 232 "true", 233 "Additional instructions for GFX9+" 234>; 235 236def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts", 237 "GFX10Insts", 238 "true", 239 "Additional instructions for GFX10+" 240>; 241 242def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts", 243 "GFX7GFX8GFX9Insts", 244 "true", 245 "Instructions shared in GFX7, GFX8, GFX9" 246>; 247 248def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime", 249 "HasSMemRealTime", 250 "true", 251 "Has s_memrealtime instruction" 252>; 253 254def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm", 255 "HasInv2PiInlineImm", 256 "true", 257 "Has 1 / (2 * pi) as inline immediate" 258>; 259 260def Feature16BitInsts : SubtargetFeature<"16-bit-insts", 261 "Has16BitInsts", 262 "true", 263 "Has i16/f16 instructions" 264>; 265 266def FeatureVOP3P : SubtargetFeature<"vop3p", 267 "HasVOP3PInsts", 268 "true", 269 "Has VOP3P packed instructions" 270>; 271 272def FeatureMovrel : SubtargetFeature<"movrel", 273 "HasMovrel", 274 "true", 275 "Has v_movrel*_b32 instructions" 276>; 277 278def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode", 279 "HasVGPRIndexMode", 280 "true", 281 "Has VGPR mode register indexing" 282>; 283 284def FeatureScalarStores : SubtargetFeature<"scalar-stores", 285 "HasScalarStores", 286 "true", 287 "Has store scalar memory instructions" 288>; 289 290def FeatureScalarAtomics : SubtargetFeature<"scalar-atomics", 291 "HasScalarAtomics", 292 "true", 293 "Has atomic scalar memory instructions" 294>; 295 296def FeatureSDWA : SubtargetFeature<"sdwa", 297 "HasSDWA", 298 "true", 299 "Support SDWA (Sub-DWORD Addressing) extension" 300>; 301 302def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod", 303 "HasSDWAOmod", 304 "true", 305 "Support OMod with SDWA (Sub-DWORD Addressing) extension" 306>; 307 308def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar", 309 "HasSDWAScalar", 310 "true", 311 "Support scalar register with SDWA (Sub-DWORD Addressing) extension" 312>; 313 314def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst", 315 "HasSDWASdst", 316 "true", 317 "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension" 318>; 319 320def FeatureSDWAMac : SubtargetFeature<"sdwa-mav", 321 "HasSDWAMac", 322 "true", 323 "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension" 324>; 325 326def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc", 327 "HasSDWAOutModsVOPC", 328 "true", 329 "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension" 330>; 331 332def FeatureDPP : SubtargetFeature<"dpp", 333 "HasDPP", 334 "true", 335 "Support DPP (Data Parallel Primitives) extension" 336>; 337 338// DPP8 allows arbitrary cross-lane swizzling withing groups of 8 lanes. 339def FeatureDPP8 : SubtargetFeature<"dpp8", 340 "HasDPP8", 341 "true", 342 "Support DPP8 (Data Parallel Primitives) extension" 343>; 344 345def FeatureR128A16 : SubtargetFeature<"r128-a16", 346 "HasR128A16", 347 "true", 348 "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9" 349>; 350 351def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding", 352 "HasNSAEncoding", 353 "true", 354 "Support NSA encoding for image instructions" 355>; 356 357def FeatureIntClamp : SubtargetFeature<"int-clamp-insts", 358 "HasIntClamp", 359 "true", 360 "Support clamp for integer destination" 361>; 362 363def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem", 364 "HasUnpackedD16VMem", 365 "true", 366 "Has unpacked d16 vmem instructions" 367>; 368 369def FeatureDLInsts : SubtargetFeature<"dl-insts", 370 "HasDLInsts", 371 "true", 372 "Has v_fmac_f32 and v_xnor_b32 instructions" 373>; 374 375def FeatureDot1Insts : SubtargetFeature<"dot1-insts", 376 "HasDot1Insts", 377 "true", 378 "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions" 379>; 380 381def FeatureDot2Insts : SubtargetFeature<"dot2-insts", 382 "HasDot2Insts", 383 "true", 384 "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions" 385>; 386 387def FeatureDot3Insts : SubtargetFeature<"dot3-insts", 388 "HasDot3Insts", 389 "true", 390 "Has v_dot8c_i32_i4 instruction" 391>; 392 393def FeatureDot4Insts : SubtargetFeature<"dot4-insts", 394 "HasDot4Insts", 395 "true", 396 "Has v_dot2c_i32_i16 instruction" 397>; 398 399def FeatureDot5Insts : SubtargetFeature<"dot5-insts", 400 "HasDot5Insts", 401 "true", 402 "Has v_dot2c_f32_f16 instruction" 403>; 404 405def FeatureDot6Insts : SubtargetFeature<"dot6-insts", 406 "HasDot6Insts", 407 "true", 408 "Has v_dot4c_i32_i8 instruction" 409>; 410 411def FeatureMAIInsts : SubtargetFeature<"mai-insts", 412 "HasMAIInsts", 413 "true", 414 "Has mAI instructions" 415>; 416 417def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", 418 "HasPkFmacF16Inst", 419 "true", 420 "Has v_pk_fmac_f16 instruction" 421>; 422 423def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts", 424 "HasAtomicFaddInsts", 425 "true", 426 "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, " 427 "global_atomic_pk_add_f16 instructions" 428>; 429 430def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support", 431 "DoesNotSupportSRAMECC", 432 "true", 433 "Hardware does not support SRAM ECC" 434>; 435 436def FeatureSRAMECC : SubtargetFeature<"sram-ecc", 437 "EnableSRAMECC", 438 "true", 439 "Enable SRAM ECC" 440>; 441 442def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx", 443 "HasNoSdstCMPX", 444 "true", 445 "V_CMPX does not write VCC/SGPR in addition to EXEC" 446>; 447 448def FeatureVscnt : SubtargetFeature<"vscnt", 449 "HasVscnt", 450 "true", 451 "Has separate store vscnt counter" 452>; 453 454def FeatureRegisterBanking : SubtargetFeature<"register-banking", 455 "HasRegisterBanking", 456 "true", 457 "Has register banking" 458>; 459 460def FeatureVOP3Literal : SubtargetFeature<"vop3-literal", 461 "HasVOP3Literal", 462 "true", 463 "Can use one literal in VOP3" 464>; 465 466def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard", 467 "HasNoDataDepHazard", 468 "true", 469 "Does not need SW waitstates" 470>; 471 472//===------------------------------------------------------------===// 473// Subtarget Features (options and debugging) 474//===------------------------------------------------------------===// 475 476// Denormal handling for fp64 and fp16 is controlled by the same 477// config register when fp16 supported. 478// TODO: Do we need a separate f16 setting when not legal? 479def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals", 480 "FP64FP16Denormals", 481 "true", 482 "Enable double and half precision denormal handling", 483 [FeatureFP64] 484>; 485 486def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", 487 "FP64FP16Denormals", 488 "true", 489 "Enable double and half precision denormal handling", 490 [FeatureFP64, FeatureFP64FP16Denormals] 491>; 492 493def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals", 494 "FP64FP16Denormals", 495 "true", 496 "Enable half precision denormal handling", 497 [FeatureFP64FP16Denormals] 498>; 499 500def FeatureFPExceptions : SubtargetFeature<"fp-exceptions", 501 "FPExceptions", 502 "true", 503 "Enable floating point exceptions" 504>; 505 506class FeatureMaxPrivateElementSize<int size> : SubtargetFeature< 507 "max-private-element-size-"#size, 508 "MaxPrivateElementSize", 509 !cast<string>(size), 510 "Maximum private access size may be "#size 511>; 512 513def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>; 514def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>; 515def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>; 516 517def FeatureDumpCode : SubtargetFeature <"DumpCode", 518 "DumpCode", 519 "true", 520 "Dump MachineInstrs in the CodeEmitter" 521>; 522 523def FeatureDumpCodeLower : SubtargetFeature <"dumpcode", 524 "DumpCode", 525 "true", 526 "Dump MachineInstrs in the CodeEmitter" 527>; 528 529// XXX - This should probably be removed once enabled by default 530def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", 531 "EnableLoadStoreOpt", 532 "true", 533 "Enable SI load/store optimizer pass" 534>; 535 536// Performance debugging feature. Allow using DS instruction immediate 537// offsets even if the base pointer can't be proven to be base. On SI, 538// base pointer values that won't give the same result as a 16-bit add 539// are not safe to fold, but this will override the conservative test 540// for the base pointer. 541def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature < 542 "unsafe-ds-offset-folding", 543 "EnableUnsafeDSOffsetFolding", 544 "true", 545 "Force using DS instruction immediate offsets on SI" 546>; 547 548def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", 549 "EnableSIScheduler", 550 "true", 551 "Enable SI Machine Scheduler" 552>; 553 554def FeatureEnableDS128 : SubtargetFeature<"enable-ds128", 555 "EnableDS128", 556 "true", 557 "Use ds_{read|write}_b128" 558>; 559 560// Sparse texture support requires that all result registers are zeroed when 561// PRTStrictNull is set to true. This feature is turned on for all architectures 562// but is enabled as a feature in case there are situations where PRTStrictNull 563// is disabled by the driver. 564def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null", 565 "EnablePRTStrictNull", 566 "true", 567 "Enable zeroing of result registers for sparse texture fetches" 568>; 569 570// Unless +-flat-for-global is specified, turn on FlatForGlobal for 571// all OS-es on VI and newer hardware to avoid assertion failures due 572// to missing ADDR64 variants of MUBUF instructions. 573// FIXME: moveToVALU should be able to handle converting addr64 MUBUF 574// instructions. 575 576def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", 577 "FlatForGlobal", 578 "true", 579 "Force to generate flat instruction for global" 580>; 581 582def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature < 583 "auto-waitcnt-before-barrier", 584 "AutoWaitcntBeforeBarrier", 585 "true", 586 "Hardware automatically inserts waitcnt before barrier" 587>; 588 589def FeatureCodeObjectV3 : SubtargetFeature < 590 "code-object-v3", 591 "CodeObjectV3", 592 "true", 593 "Generate code object version 3" 594>; 595 596def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range", 597 "HasTrigReducedRange", 598 "true", 599 "Requires use of fract on arguments to trig instructions" 600>; 601 602// Dummy feature used to disable assembler instructions. 603def FeatureDisable : SubtargetFeature<"", 604 "FeatureDisable","true", 605 "Dummy feature to disable assembler instructions" 606>; 607 608class GCNSubtargetFeatureGeneration <string Value, 609 string FeatureName, 610 list<SubtargetFeature> Implies> : 611 SubtargetFeatureGeneration <Value, FeatureName, "GCNSubtarget", Implies>; 612 613def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", 614 "southern-islands", 615 [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, 616 FeatureWavefrontSize64, 617 FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange, 618 FeatureDoesNotSupportSRAMECC, FeatureDoesNotSupportXNACK] 619>; 620 621def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", 622 "sea-islands", 623 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 624 FeatureWavefrontSize64, FeatureFlatAddressSpace, 625 FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, 626 FeatureGFX7GFX8GFX9Insts, FeatureDoesNotSupportSRAMECC] 627>; 628 629def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", 630 "volcanic-islands", 631 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 632 FeatureWavefrontSize64, FeatureFlatAddressSpace, 633 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 634 FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, 635 FeatureScalarStores, FeatureInv2PiInlineImm, 636 FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, 637 FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC, 638 FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts 639 ] 640>; 641 642def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", 643 "gfx9", 644 [FeatureFP64, FeatureLocalMemorySize65536, 645 FeatureWavefrontSize64, FeatureFlatAddressSpace, 646 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 647 FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, 648 FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, 649 FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 650 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, 651 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 652 FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, 653 FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16 654 ] 655>; 656 657def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", 658 "gfx10", 659 [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, 660 FeatureFlatAddressSpace, 661 FeatureCIInsts, Feature16BitInsts, 662 FeatureSMemRealTime, FeatureInv2PiInlineImm, 663 FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P, 664 FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, 665 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, 666 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, 667 FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts, 668 FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking, 669 FeatureVOP3Literal, FeatureDPP8, 670 FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC 671 ] 672>; 673 674class FeatureSet<list<SubtargetFeature> Features_> { 675 list<SubtargetFeature> Features = Features_; 676} 677 678def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands, 679 FeatureFastFMAF32, 680 HalfRate64Ops, 681 FeatureLDSBankCount32, 682 FeatureDoesNotSupportXNACK, 683 FeatureCodeObjectV3]>; 684 685def FeatureISAVersion6_0_1 : FeatureSet< 686 [FeatureSouthernIslands, 687 FeatureLDSBankCount32, 688 FeatureDoesNotSupportXNACK, 689 FeatureCodeObjectV3]>; 690 691def FeatureISAVersion7_0_0 : FeatureSet< 692 [FeatureSeaIslands, 693 FeatureLDSBankCount32, 694 FeatureDoesNotSupportXNACK, 695 FeatureCodeObjectV3]>; 696 697def FeatureISAVersion7_0_1 : FeatureSet< 698 [FeatureSeaIslands, 699 HalfRate64Ops, 700 FeatureLDSBankCount32, 701 FeatureFastFMAF32, 702 FeatureDoesNotSupportXNACK, 703 FeatureCodeObjectV3]>; 704 705def FeatureISAVersion7_0_2 : FeatureSet< 706 [FeatureSeaIslands, 707 FeatureLDSBankCount16, 708 FeatureFastFMAF32, 709 FeatureDoesNotSupportXNACK, 710 FeatureCodeObjectV3]>; 711 712def FeatureISAVersion7_0_3 : FeatureSet< 713 [FeatureSeaIslands, 714 FeatureLDSBankCount16, 715 FeatureDoesNotSupportXNACK, 716 FeatureCodeObjectV3]>; 717 718def FeatureISAVersion7_0_4 : FeatureSet< 719 [FeatureSeaIslands, 720 FeatureLDSBankCount32, 721 FeatureDoesNotSupportXNACK, 722 FeatureCodeObjectV3]>; 723 724def FeatureISAVersion8_0_1 : FeatureSet< 725 [FeatureVolcanicIslands, 726 FeatureFastFMAF32, 727 HalfRate64Ops, 728 FeatureLDSBankCount32, 729 FeatureXNACK, 730 FeatureUnpackedD16VMem, 731 FeatureCodeObjectV3]>; 732 733def FeatureISAVersion8_0_2 : FeatureSet< 734 [FeatureVolcanicIslands, 735 FeatureLDSBankCount32, 736 FeatureSGPRInitBug, 737 FeatureUnpackedD16VMem, 738 FeatureDoesNotSupportXNACK, 739 FeatureCodeObjectV3]>; 740 741def FeatureISAVersion8_0_3 : FeatureSet< 742 [FeatureVolcanicIslands, 743 FeatureLDSBankCount32, 744 FeatureUnpackedD16VMem, 745 FeatureDoesNotSupportXNACK, 746 FeatureCodeObjectV3]>; 747 748def FeatureISAVersion8_1_0 : FeatureSet< 749 [FeatureVolcanicIslands, 750 FeatureLDSBankCount16, 751 FeatureXNACK, 752 FeatureCodeObjectV3]>; 753 754def FeatureISAVersion9_0_0 : FeatureSet< 755 [FeatureGFX9, 756 FeatureMadMixInsts, 757 FeatureLDSBankCount32, 758 FeatureCodeObjectV3, 759 FeatureDoesNotSupportXNACK, 760 FeatureDoesNotSupportSRAMECC]>; 761 762def FeatureISAVersion9_0_2 : FeatureSet< 763 [FeatureGFX9, 764 FeatureMadMixInsts, 765 FeatureLDSBankCount32, 766 FeatureXNACK, 767 FeatureDoesNotSupportSRAMECC, 768 FeatureCodeObjectV3]>; 769 770def FeatureISAVersion9_0_4 : FeatureSet< 771 [FeatureGFX9, 772 FeatureLDSBankCount32, 773 FeatureFmaMixInsts, 774 FeatureDoesNotSupportXNACK, 775 FeatureDoesNotSupportSRAMECC, 776 FeatureCodeObjectV3]>; 777 778def FeatureISAVersion9_0_6 : FeatureSet< 779 [FeatureGFX9, 780 HalfRate64Ops, 781 FeatureFmaMixInsts, 782 FeatureLDSBankCount32, 783 FeatureDLInsts, 784 FeatureDot1Insts, 785 FeatureDot2Insts, 786 FeatureDoesNotSupportXNACK, 787 FeatureCodeObjectV3]>; 788 789def FeatureISAVersion9_0_8 : FeatureSet< 790 [FeatureGFX9, 791 HalfRate64Ops, 792 FeatureFmaMixInsts, 793 FeatureLDSBankCount32, 794 FeatureDLInsts, 795 FeatureDot1Insts, 796 FeatureDot2Insts, 797 FeatureDot3Insts, 798 FeatureDot4Insts, 799 FeatureDot5Insts, 800 FeatureDot6Insts, 801 FeatureMAIInsts, 802 FeaturePkFmacF16Inst, 803 FeatureAtomicFaddInsts, 804 FeatureSRAMECC, 805 FeatureCodeObjectV3]>; 806 807def FeatureISAVersion9_0_9 : FeatureSet< 808 [FeatureGFX9, 809 FeatureMadMixInsts, 810 FeatureLDSBankCount32, 811 FeatureXNACK, 812 FeatureCodeObjectV3]>; 813 814// TODO: Organize more features into groups. 815def FeatureGroup { 816 // Bugs present on gfx10.1. 817 list<SubtargetFeature> GFX10_1_Bugs = [ 818 FeatureVcmpxPermlaneHazard, 819 FeatureVMEMtoScalarWriteHazard, 820 FeatureSMEMtoVectorWriteHazard, 821 FeatureInstFwdPrefetchBug, 822 FeatureVcmpxExecWARHazard, 823 FeatureLdsBranchVmemWARHazard, 824 FeatureNSAtoVMEMBug, 825 FeatureOffset3fBug, 826 FeatureFlatSegmentOffsetBug 827 ]; 828} 829 830def FeatureISAVersion10_1_0 : FeatureSet< 831 !listconcat(FeatureGroup.GFX10_1_Bugs, 832 [FeatureGFX10, 833 FeatureLDSBankCount32, 834 FeatureDLInsts, 835 FeatureNSAEncoding, 836 FeatureWavefrontSize32, 837 FeatureScalarStores, 838 FeatureScalarAtomics, 839 FeatureScalarFlatScratchInsts, 840 FeatureLdsMisalignedBug, 841 FeatureDoesNotSupportXNACK, 842 FeatureCodeObjectV3])>; 843 844def FeatureISAVersion10_1_1 : FeatureSet< 845 !listconcat(FeatureGroup.GFX10_1_Bugs, 846 [FeatureGFX10, 847 FeatureLDSBankCount32, 848 FeatureDLInsts, 849 FeatureDot1Insts, 850 FeatureDot2Insts, 851 FeatureDot5Insts, 852 FeatureDot6Insts, 853 FeatureNSAEncoding, 854 FeatureWavefrontSize32, 855 FeatureScalarStores, 856 FeatureScalarAtomics, 857 FeatureScalarFlatScratchInsts, 858 FeatureDoesNotSupportXNACK, 859 FeatureCodeObjectV3])>; 860 861def FeatureISAVersion10_1_2 : FeatureSet< 862 !listconcat(FeatureGroup.GFX10_1_Bugs, 863 [FeatureGFX10, 864 FeatureLDSBankCount32, 865 FeatureDLInsts, 866 FeatureDot1Insts, 867 FeatureDot2Insts, 868 FeatureDot5Insts, 869 FeatureDot6Insts, 870 FeatureNSAEncoding, 871 FeatureWavefrontSize32, 872 FeatureScalarStores, 873 FeatureScalarAtomics, 874 FeatureScalarFlatScratchInsts, 875 FeatureLdsMisalignedBug, 876 FeatureDoesNotSupportXNACK, 877 FeatureCodeObjectV3])>; 878 879//===----------------------------------------------------------------------===// 880 881def AMDGPUInstrInfo : InstrInfo { 882 let guessInstructionProperties = 1; 883 let noNamedPositionallyEncodedOperands = 1; 884} 885 886def AMDGPUAsmParser : AsmParser { 887 // Some of the R600 registers have the same name, so this crashes. 888 // For example T0_XYZW and T0_XY both have the asm name T0. 889 let ShouldEmitMatchRegisterName = 0; 890} 891 892def AMDGPUAsmWriter : AsmWriter { 893 int PassSubtarget = 1; 894} 895 896def AMDGPUAsmVariants { 897 string Default = "Default"; 898 int Default_ID = 0; 899 string VOP3 = "VOP3"; 900 int VOP3_ID = 1; 901 string SDWA = "SDWA"; 902 int SDWA_ID = 2; 903 string SDWA9 = "SDWA9"; 904 int SDWA9_ID = 3; 905 string DPP = "DPP"; 906 int DPP_ID = 4; 907 string Disable = "Disable"; 908 int Disable_ID = 5; 909} 910 911def DefaultAMDGPUAsmParserVariant : AsmParserVariant { 912 let Variant = AMDGPUAsmVariants.Default_ID; 913 let Name = AMDGPUAsmVariants.Default; 914} 915 916def VOP3AsmParserVariant : AsmParserVariant { 917 let Variant = AMDGPUAsmVariants.VOP3_ID; 918 let Name = AMDGPUAsmVariants.VOP3; 919} 920 921def SDWAAsmParserVariant : AsmParserVariant { 922 let Variant = AMDGPUAsmVariants.SDWA_ID; 923 let Name = AMDGPUAsmVariants.SDWA; 924} 925 926def SDWA9AsmParserVariant : AsmParserVariant { 927 let Variant = AMDGPUAsmVariants.SDWA9_ID; 928 let Name = AMDGPUAsmVariants.SDWA9; 929} 930 931 932def DPPAsmParserVariant : AsmParserVariant { 933 let Variant = AMDGPUAsmVariants.DPP_ID; 934 let Name = AMDGPUAsmVariants.DPP; 935} 936 937def AMDGPU : Target { 938 // Pull in Instruction Info: 939 let InstructionSet = AMDGPUInstrInfo; 940 let AssemblyParsers = [AMDGPUAsmParser]; 941 let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant, 942 VOP3AsmParserVariant, 943 SDWAAsmParserVariant, 944 SDWA9AsmParserVariant, 945 DPPAsmParserVariant]; 946 let AssemblyWriters = [AMDGPUAsmWriter]; 947 let AllowRegisterRenaming = 1; 948} 949 950// Dummy Instruction itineraries for pseudo instructions 951def ALU_NULL : FuncUnit; 952def NullALU : InstrItinClass; 953 954//===----------------------------------------------------------------------===// 955// Predicate helper class 956//===----------------------------------------------------------------------===// 957 958def isGFX6 : 959 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">, 960 AssemblerPredicate<"FeatureSouthernIslands">; 961 962def isGFX6GFX7 : 963 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 964 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, 965 AssemblerPredicate<"!FeatureGCN3Encoding,!FeatureGFX10Insts">; 966 967def isGFX6GFX7GFX10 : 968 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 969 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 970 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 971 AssemblerPredicate<"!FeatureGCN3Encoding">; 972 973def isGFX7Only : 974 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, 975 AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts,!FeatureGFX10Insts">; 976 977def isGFX7GFX10 : 978 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 979 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, 980 AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts">; 981 982def isGFX7GFX8GFX9 : 983 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 984 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 985 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 986 AssemblerPredicate<"FeatureGFX7GFX8GFX9Insts">; 987 988def isGFX6GFX7GFX8GFX9 : 989 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 990 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" 991 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 992 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 993 AssemblerPredicate<"!FeatureGFX10Insts">; 994 995def isGFX7Plus : 996 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, 997 AssemblerPredicate<"FeatureCIInsts">; 998 999def isGFX8Plus : 1000 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, 1001 AssemblerPredicate<"FeatureGFX8Insts">; 1002 1003def isGFX8Only : Predicate<"Subtarget->getGeneration() ==" 1004 "AMDGPUSubtarget::VOLCANIC_ISLANDS">, 1005 AssemblerPredicate <"FeatureVolcanicIslands">; 1006 1007def isGFX9Plus : 1008 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, 1009 AssemblerPredicate<"FeatureGFX9Insts">; 1010 1011def isGFX9Only : Predicate < 1012 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1013 AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts">; 1014 1015def isGFX8GFX9 : 1016 Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" 1017 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, 1018 AssemblerPredicate<"FeatureGFX8Insts,FeatureGCN3Encoding">; 1019 1020def isGFX10Plus : 1021 Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, 1022 AssemblerPredicate<"FeatureGFX10Insts">; 1023 1024def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, 1025 AssemblerPredicate<"FeatureFlatAddressSpace">; 1026 1027def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">, 1028 AssemblerPredicate<"FeatureFlatGlobalInsts">; 1029def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">, 1030 AssemblerPredicate<"FeatureFlatScratchInsts">; 1031def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">, 1032 AssemblerPredicate<"FeatureScalarFlatScratchInsts">; 1033def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">, 1034 AssemblerPredicate<"FeatureGFX9Insts">; 1035 1036def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">, 1037 AssemblerPredicate<"FeatureUnpackedD16VMem">; 1038def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">, 1039 AssemblerPredicate<"!FeatureUnpackedD16VMem">; 1040 1041def D16PreservesUnusedBits : 1042 Predicate<"Subtarget->d16PreservesUnusedBits()">, 1043 AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">; 1044 1045def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">; 1046def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">; 1047 1048def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, 1049 AssemblerPredicate<"FeatureGFX9Insts">; 1050 1051def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">, 1052 AssemblerPredicate<"FeatureAddNoCarryInsts">; 1053 1054def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">; 1055 1056def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">, 1057 AssemblerPredicate<"Feature16BitInsts">; 1058def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, 1059 AssemblerPredicate<"FeatureVOP3P">; 1060 1061def HasSDWA : Predicate<"Subtarget->hasSDWA()">, 1062 AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">; 1063 1064def HasSDWA9 : 1065 Predicate<"Subtarget->hasSDWA()">, 1066 AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts,FeatureSDWA">; 1067 1068def HasSDWA10 : 1069 Predicate<"Subtarget->hasSDWA()">, 1070 AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureSDWA">; 1071 1072def HasDPP : Predicate<"Subtarget->hasDPP()">, 1073 AssemblerPredicate<"FeatureGCN3Encoding,FeatureDPP">; 1074 1075def HasDPP8 : Predicate<"Subtarget->hasDPP8()">, 1076 AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP8">; 1077 1078def HasR128A16 : Predicate<"Subtarget->hasR128A16()">, 1079 AssemblerPredicate<"FeatureR128A16">; 1080 1081def HasDPP16 : Predicate<"Subtarget->hasDPP()">, 1082 AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP">; 1083 1084def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">, 1085 AssemblerPredicate<"FeatureIntClamp">; 1086 1087def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">, 1088 AssemblerPredicate<"FeatureMadMixInsts">; 1089 1090def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">, 1091 AssemblerPredicate<"FeatureScalarStores">; 1092 1093def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">, 1094 AssemblerPredicate<"FeatureScalarAtomics">; 1095 1096def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">, 1097 AssemblerPredicate<"FeatureNoSdstCMPX">; 1098 1099def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">, 1100 AssemblerPredicate<"!FeatureNoSdstCMPX">; 1101 1102def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">; 1103def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">; 1104def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">, 1105 AssemblerPredicate<"FeatureVGPRIndexMode">; 1106def HasMovrel : Predicate<"Subtarget->hasMovrel()">, 1107 AssemblerPredicate<"FeatureMovrel">; 1108 1109def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">, 1110 AssemblerPredicate<"FeatureFmaMixInsts">; 1111 1112def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">, 1113 AssemblerPredicate<"FeatureDLInsts">; 1114 1115def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">, 1116 AssemblerPredicate<"FeatureDot1Insts">; 1117 1118def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">, 1119 AssemblerPredicate<"FeatureDot2Insts">; 1120 1121def HasDot3Insts : Predicate<"Subtarget->hasDot3Insts()">, 1122 AssemblerPredicate<"FeatureDot3Insts">; 1123 1124def HasDot4Insts : Predicate<"Subtarget->hasDot4Insts()">, 1125 AssemblerPredicate<"FeatureDot4Insts">; 1126 1127def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">, 1128 AssemblerPredicate<"FeatureDot5Insts">; 1129 1130def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">, 1131 AssemblerPredicate<"FeatureDot6Insts">; 1132 1133def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">, 1134 AssemblerPredicate<"FeatureMAIInsts">; 1135 1136def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">, 1137 AssemblerPredicate<"FeaturePkFmacF16Inst">; 1138 1139def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">, 1140 AssemblerPredicate<"FeatureAtomicFaddInsts">; 1141 1142def HasOffset3fBug : Predicate<"!Subtarget->hasOffset3fBug()">, 1143 AssemblerPredicate<"FeatureOffset3fBug">; 1144 1145def EnableLateCFGStructurize : Predicate< 1146 "EnableLateStructurizeCFG">; 1147 1148// Include AMDGPU TD files 1149include "SISchedule.td" 1150include "GCNProcessors.td" 1151include "AMDGPUInstrInfo.td" 1152include "AMDGPURegisterInfo.td" 1153include "AMDGPURegisterBanks.td" 1154include "AMDGPUInstructions.td" 1155include "SIInstrInfo.td" 1156include "AMDGPUCallingConv.td" 1157include "AMDGPUSearchableTables.td" 1158