xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td (revision f2530c80db7b29b95368fce956b3a778f096b368)
1//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===------------------------------------------------------------===//
8
9include "llvm/TableGen/SearchableTable.td"
10include "llvm/Target/Target.td"
11include "AMDGPUFeatures.td"
12
13class BoolToList<bit Value> {
14  list<int> ret = !if(Value, [1]<int>, []<int>);
15}
16
17//===------------------------------------------------------------===//
18// Subtarget Features (device properties)
19//===------------------------------------------------------------===//
20
21def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
22  "FastFMAF32",
23  "true",
24  "Assuming f32 fma is at least as fast as mul + add"
25>;
26
27def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128",
28  "MIMG_R128",
29  "true",
30  "Support 128-bit texture resources"
31>;
32
33def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
34  "HalfRate64Ops",
35  "true",
36  "Most fp64 instructions are half rate instead of quarter"
37>;
38
39def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
40  "FlatAddressSpace",
41  "true",
42  "Support flat address space"
43>;
44
45def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets",
46  "FlatInstOffsets",
47  "true",
48  "Flat instructions have immediate offset addressing mode"
49>;
50
51def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts",
52  "FlatGlobalInsts",
53  "true",
54  "Have global_* flat memory instructions"
55>;
56
57def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts",
58  "FlatScratchInsts",
59  "true",
60  "Have scratch_* flat memory instructions"
61>;
62
63def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts",
64  "ScalarFlatScratchInsts",
65  "true",
66  "Have s_scratch_* flat memory instructions"
67>;
68
69def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
70  "AddNoCarryInsts",
71  "true",
72  "Have VALU add/sub instructions without carry out"
73>;
74
75def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
76  "UnalignedBufferAccess",
77  "true",
78  "Support unaligned global loads and stores"
79>;
80
81def FeatureTrapHandler: SubtargetFeature<"trap-handler",
82  "TrapHandler",
83  "true",
84  "Trap handler support"
85>;
86
87def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
88  "UnalignedScratchAccess",
89  "true",
90  "Support unaligned scratch loads and stores"
91>;
92
93def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
94  "HasApertureRegs",
95  "true",
96  "Has Memory Aperture Base and Size Registers"
97>;
98
99def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts",
100  "HasMadMixInsts",
101  "true",
102  "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions"
103>;
104
105def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
106  "HasFmaMixInsts",
107  "true",
108  "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
109>;
110
111def FeatureDoesNotSupportXNACK : SubtargetFeature<"no-xnack-support",
112  "DoesNotSupportXNACK",
113  "true",
114  "Hardware does not support XNACK"
115>;
116
117// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
118// XNACK. The current default kernel driver setting is:
119// - graphics ring: XNACK disabled
120// - compute ring: XNACK enabled
121//
122// If XNACK is enabled, the VMEM latency can be worse.
123// If XNACK is disabled, the 2 SGPRs can be used for general purposes.
124def FeatureXNACK : SubtargetFeature<"xnack",
125  "EnableXNACK",
126  "true",
127  "Enable XNACK support"
128>;
129
130def FeatureCuMode : SubtargetFeature<"cumode",
131  "EnableCuMode",
132  "true",
133  "Enable CU wavefront execution mode"
134>;
135
136def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
137  "SGPRInitBug",
138  "true",
139  "VI SGPR initialization bug requiring a fixed SGPR allocation size"
140>;
141
142def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
143  "LDSMisalignedBug",
144  "true",
145  "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
146>;
147
148def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard",
149  "HasVcmpxPermlaneHazard",
150  "true",
151  "TODO: describe me"
152>;
153
154def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard",
155  "HasVMEMtoScalarWriteHazard",
156  "true",
157  "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution."
158>;
159
160def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard",
161  "HasSMEMtoVectorWriteHazard",
162  "true",
163  "s_load_dword followed by v_cmp page faults"
164>;
165
166def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug",
167  "HasInstFwdPrefetchBug",
168  "true",
169  "S_INST_PREFETCH instruction causes shader to hang"
170>;
171
172def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
173  "HasVcmpxExecWARHazard",
174  "true",
175  "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)"
176>;
177
178def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard",
179  "HasLdsBranchVmemWARHazard",
180  "true",
181  "Switching between LDS and VMEM-tex not waiting VM_VSRC=0"
182>;
183
184def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug",
185  "HasNSAtoVMEMBug",
186  "true",
187  "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero"
188>;
189
190def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug",
191  "HasFlatSegmentOffsetBug",
192  "true",
193  "GFX10 bug, inst_offset ignored in flat segment"
194>;
195
196def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug",
197  "HasOffset3fBug",
198  "true",
199  "Branch offset of 3f hardware bug"
200>;
201
202class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
203  "ldsbankcount"#Value,
204  "LDSBankCount",
205  !cast<string>(Value),
206  "The number of LDS banks per compute unit."
207>;
208
209def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
210def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
211
212def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
213  "GCN3Encoding",
214  "true",
215  "Encoding format for VI"
216>;
217
218def FeatureCIInsts : SubtargetFeature<"ci-insts",
219  "CIInsts",
220  "true",
221  "Additional instructions for CI+"
222>;
223
224def FeatureGFX8Insts : SubtargetFeature<"gfx8-insts",
225  "GFX8Insts",
226  "true",
227  "Additional instructions for GFX8+"
228>;
229
230def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
231  "GFX9Insts",
232  "true",
233  "Additional instructions for GFX9+"
234>;
235
236def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
237  "GFX10Insts",
238  "true",
239  "Additional instructions for GFX10+"
240>;
241
242def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts",
243  "GFX7GFX8GFX9Insts",
244  "true",
245  "Instructions shared in GFX7, GFX8, GFX9"
246>;
247
248def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
249  "HasSMemRealTime",
250  "true",
251  "Has s_memrealtime instruction"
252>;
253
254def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm",
255  "HasInv2PiInlineImm",
256  "true",
257  "Has 1 / (2 * pi) as inline immediate"
258>;
259
260def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
261  "Has16BitInsts",
262  "true",
263  "Has i16/f16 instructions"
264>;
265
266def FeatureVOP3P : SubtargetFeature<"vop3p",
267  "HasVOP3PInsts",
268  "true",
269  "Has VOP3P packed instructions"
270>;
271
272def FeatureMovrel : SubtargetFeature<"movrel",
273  "HasMovrel",
274  "true",
275  "Has v_movrel*_b32 instructions"
276>;
277
278def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
279  "HasVGPRIndexMode",
280  "true",
281  "Has VGPR mode register indexing"
282>;
283
284def FeatureScalarStores : SubtargetFeature<"scalar-stores",
285  "HasScalarStores",
286  "true",
287  "Has store scalar memory instructions"
288>;
289
290def FeatureScalarAtomics : SubtargetFeature<"scalar-atomics",
291  "HasScalarAtomics",
292  "true",
293  "Has atomic scalar memory instructions"
294>;
295
296def FeatureSDWA : SubtargetFeature<"sdwa",
297  "HasSDWA",
298  "true",
299  "Support SDWA (Sub-DWORD Addressing) extension"
300>;
301
302def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod",
303  "HasSDWAOmod",
304  "true",
305  "Support OMod with SDWA (Sub-DWORD Addressing) extension"
306>;
307
308def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar",
309  "HasSDWAScalar",
310  "true",
311  "Support scalar register with SDWA (Sub-DWORD Addressing) extension"
312>;
313
314def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst",
315  "HasSDWASdst",
316  "true",
317  "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension"
318>;
319
320def FeatureSDWAMac : SubtargetFeature<"sdwa-mav",
321  "HasSDWAMac",
322  "true",
323  "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension"
324>;
325
326def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc",
327  "HasSDWAOutModsVOPC",
328  "true",
329  "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension"
330>;
331
332def FeatureDPP : SubtargetFeature<"dpp",
333  "HasDPP",
334  "true",
335  "Support DPP (Data Parallel Primitives) extension"
336>;
337
338// DPP8 allows arbitrary cross-lane swizzling withing groups of 8 lanes.
339def FeatureDPP8 : SubtargetFeature<"dpp8",
340  "HasDPP8",
341  "true",
342  "Support DPP8 (Data Parallel Primitives) extension"
343>;
344
345def FeatureR128A16 : SubtargetFeature<"r128-a16",
346  "HasR128A16",
347  "true",
348  "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
349>;
350
351def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
352  "HasNSAEncoding",
353  "true",
354  "Support NSA encoding for image instructions"
355>;
356
357def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
358  "HasIntClamp",
359  "true",
360  "Support clamp for integer destination"
361>;
362
363def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem",
364  "HasUnpackedD16VMem",
365  "true",
366  "Has unpacked d16 vmem instructions"
367>;
368
369def FeatureDLInsts : SubtargetFeature<"dl-insts",
370  "HasDLInsts",
371  "true",
372  "Has v_fmac_f32 and v_xnor_b32 instructions"
373>;
374
375def FeatureDot1Insts : SubtargetFeature<"dot1-insts",
376  "HasDot1Insts",
377  "true",
378  "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions"
379>;
380
381def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
382  "HasDot2Insts",
383  "true",
384  "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
385>;
386
387def FeatureDot3Insts : SubtargetFeature<"dot3-insts",
388  "HasDot3Insts",
389  "true",
390  "Has v_dot8c_i32_i4 instruction"
391>;
392
393def FeatureDot4Insts : SubtargetFeature<"dot4-insts",
394  "HasDot4Insts",
395  "true",
396  "Has v_dot2c_i32_i16 instruction"
397>;
398
399def FeatureDot5Insts : SubtargetFeature<"dot5-insts",
400  "HasDot5Insts",
401  "true",
402  "Has v_dot2c_f32_f16 instruction"
403>;
404
405def FeatureDot6Insts : SubtargetFeature<"dot6-insts",
406  "HasDot6Insts",
407  "true",
408  "Has v_dot4c_i32_i8 instruction"
409>;
410
411def FeatureMAIInsts : SubtargetFeature<"mai-insts",
412  "HasMAIInsts",
413  "true",
414  "Has mAI instructions"
415>;
416
417def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
418  "HasPkFmacF16Inst",
419  "true",
420  "Has v_pk_fmac_f16 instruction"
421>;
422
423def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts",
424  "HasAtomicFaddInsts",
425  "true",
426  "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, "
427  "global_atomic_pk_add_f16 instructions"
428>;
429
430def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
431  "DoesNotSupportSRAMECC",
432  "true",
433  "Hardware does not support SRAM ECC"
434>;
435
436def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
437  "EnableSRAMECC",
438  "true",
439  "Enable SRAM ECC"
440>;
441
442def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx",
443  "HasNoSdstCMPX",
444  "true",
445  "V_CMPX does not write VCC/SGPR in addition to EXEC"
446>;
447
448def FeatureVscnt : SubtargetFeature<"vscnt",
449  "HasVscnt",
450  "true",
451  "Has separate store vscnt counter"
452>;
453
454def FeatureRegisterBanking : SubtargetFeature<"register-banking",
455  "HasRegisterBanking",
456  "true",
457  "Has register banking"
458>;
459
460def FeatureVOP3Literal : SubtargetFeature<"vop3-literal",
461  "HasVOP3Literal",
462  "true",
463  "Can use one literal in VOP3"
464>;
465
466def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
467  "HasNoDataDepHazard",
468  "true",
469  "Does not need SW waitstates"
470>;
471
472//===------------------------------------------------------------===//
473// Subtarget Features (options and debugging)
474//===------------------------------------------------------------===//
475
476// Denormal handling for fp64 and fp16 is controlled by the same
477// config register when fp16 supported.
478// TODO: Do we need a separate f16 setting when not legal?
479def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
480  "FP64FP16Denormals",
481  "true",
482  "Enable double and half precision denormal handling",
483  [FeatureFP64]
484>;
485
486def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
487  "FP64FP16Denormals",
488  "true",
489  "Enable double and half precision denormal handling",
490  [FeatureFP64, FeatureFP64FP16Denormals]
491>;
492
493def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
494  "FP64FP16Denormals",
495  "true",
496  "Enable half precision denormal handling",
497  [FeatureFP64FP16Denormals]
498>;
499
500def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
501  "FPExceptions",
502  "true",
503  "Enable floating point exceptions"
504>;
505
506class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
507  "max-private-element-size-"#size,
508  "MaxPrivateElementSize",
509  !cast<string>(size),
510  "Maximum private access size may be "#size
511>;
512
513def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
514def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
515def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
516
517def FeatureDumpCode : SubtargetFeature <"DumpCode",
518  "DumpCode",
519  "true",
520  "Dump MachineInstrs in the CodeEmitter"
521>;
522
523def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
524  "DumpCode",
525  "true",
526  "Dump MachineInstrs in the CodeEmitter"
527>;
528
529// XXX - This should probably be removed once enabled by default
530def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
531  "EnableLoadStoreOpt",
532  "true",
533  "Enable SI load/store optimizer pass"
534>;
535
536// Performance debugging feature. Allow using DS instruction immediate
537// offsets even if the base pointer can't be proven to be base. On SI,
538// base pointer values that won't give the same result as a 16-bit add
539// are not safe to fold, but this will override the conservative test
540// for the base pointer.
541def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
542  "unsafe-ds-offset-folding",
543  "EnableUnsafeDSOffsetFolding",
544  "true",
545  "Force using DS instruction immediate offsets on SI"
546>;
547
548def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
549  "EnableSIScheduler",
550  "true",
551  "Enable SI Machine Scheduler"
552>;
553
554def FeatureEnableDS128 : SubtargetFeature<"enable-ds128",
555  "EnableDS128",
556  "true",
557  "Use ds_{read|write}_b128"
558>;
559
560// Sparse texture support requires that all result registers are zeroed when
561// PRTStrictNull is set to true. This feature is turned on for all architectures
562// but is enabled as a feature in case there are situations where PRTStrictNull
563// is disabled by the driver.
564def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null",
565  "EnablePRTStrictNull",
566  "true",
567  "Enable zeroing of result registers for sparse texture fetches"
568>;
569
570// Unless +-flat-for-global is specified, turn on FlatForGlobal for
571// all OS-es on VI and newer hardware to avoid assertion failures due
572// to missing ADDR64 variants of MUBUF instructions.
573// FIXME: moveToVALU should be able to handle converting addr64 MUBUF
574// instructions.
575
576def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
577  "FlatForGlobal",
578  "true",
579  "Force to generate flat instruction for global"
580>;
581
582def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature <
583  "auto-waitcnt-before-barrier",
584  "AutoWaitcntBeforeBarrier",
585  "true",
586  "Hardware automatically inserts waitcnt before barrier"
587>;
588
589def FeatureCodeObjectV3 : SubtargetFeature <
590  "code-object-v3",
591  "CodeObjectV3",
592  "true",
593  "Generate code object version 3"
594>;
595
596def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
597  "HasTrigReducedRange",
598  "true",
599  "Requires use of fract on arguments to trig instructions"
600>;
601
602// Dummy feature used to disable assembler instructions.
603def FeatureDisable : SubtargetFeature<"",
604  "FeatureDisable","true",
605  "Dummy feature to disable assembler instructions"
606>;
607
608class GCNSubtargetFeatureGeneration <string Value,
609                                     string FeatureName,
610                                     list<SubtargetFeature> Implies> :
611        SubtargetFeatureGeneration <Value, FeatureName, "GCNSubtarget", Implies>;
612
613def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
614    "southern-islands",
615  [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
616  FeatureWavefrontSize64,
617  FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange,
618  FeatureDoesNotSupportSRAMECC, FeatureDoesNotSupportXNACK]
619>;
620
621def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
622    "sea-islands",
623  [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
624  FeatureWavefrontSize64, FeatureFlatAddressSpace,
625  FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
626  FeatureGFX7GFX8GFX9Insts, FeatureDoesNotSupportSRAMECC]
627>;
628
629def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
630  "volcanic-islands",
631  [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
632   FeatureWavefrontSize64, FeatureFlatAddressSpace,
633   FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
634   FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
635   FeatureScalarStores, FeatureInv2PiInlineImm,
636   FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
637   FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC,
638   FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts
639  ]
640>;
641
642def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
643  "gfx9",
644  [FeatureFP64, FeatureLocalMemorySize65536,
645   FeatureWavefrontSize64, FeatureFlatAddressSpace,
646   FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
647   FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
648   FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
649   FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
650   FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
651   FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
652   FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
653   FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
654  ]
655>;
656
657def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
658  "gfx10",
659  [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
660   FeatureFlatAddressSpace,
661   FeatureCIInsts, Feature16BitInsts,
662   FeatureSMemRealTime, FeatureInv2PiInlineImm,
663   FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P,
664   FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
665   FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
666   FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
667   FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
668   FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
669   FeatureVOP3Literal, FeatureDPP8,
670   FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC
671  ]
672>;
673
674class FeatureSet<list<SubtargetFeature> Features_> {
675  list<SubtargetFeature> Features = Features_;
676}
677
678def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands,
679   FeatureFastFMAF32,
680   HalfRate64Ops,
681   FeatureLDSBankCount32,
682   FeatureDoesNotSupportXNACK,
683   FeatureCodeObjectV3]>;
684
685def FeatureISAVersion6_0_1 : FeatureSet<
686  [FeatureSouthernIslands,
687   FeatureLDSBankCount32,
688   FeatureDoesNotSupportXNACK,
689   FeatureCodeObjectV3]>;
690
691def FeatureISAVersion7_0_0 : FeatureSet<
692  [FeatureSeaIslands,
693   FeatureLDSBankCount32,
694   FeatureDoesNotSupportXNACK,
695   FeatureCodeObjectV3]>;
696
697def FeatureISAVersion7_0_1 : FeatureSet<
698  [FeatureSeaIslands,
699   HalfRate64Ops,
700   FeatureLDSBankCount32,
701   FeatureFastFMAF32,
702   FeatureDoesNotSupportXNACK,
703   FeatureCodeObjectV3]>;
704
705def FeatureISAVersion7_0_2 : FeatureSet<
706  [FeatureSeaIslands,
707   FeatureLDSBankCount16,
708   FeatureFastFMAF32,
709   FeatureDoesNotSupportXNACK,
710   FeatureCodeObjectV3]>;
711
712def FeatureISAVersion7_0_3 : FeatureSet<
713  [FeatureSeaIslands,
714   FeatureLDSBankCount16,
715   FeatureDoesNotSupportXNACK,
716   FeatureCodeObjectV3]>;
717
718def FeatureISAVersion7_0_4 : FeatureSet<
719  [FeatureSeaIslands,
720   FeatureLDSBankCount32,
721   FeatureDoesNotSupportXNACK,
722   FeatureCodeObjectV3]>;
723
724def FeatureISAVersion8_0_1 : FeatureSet<
725  [FeatureVolcanicIslands,
726   FeatureFastFMAF32,
727   HalfRate64Ops,
728   FeatureLDSBankCount32,
729   FeatureXNACK,
730   FeatureUnpackedD16VMem,
731   FeatureCodeObjectV3]>;
732
733def FeatureISAVersion8_0_2 : FeatureSet<
734  [FeatureVolcanicIslands,
735   FeatureLDSBankCount32,
736   FeatureSGPRInitBug,
737   FeatureUnpackedD16VMem,
738   FeatureDoesNotSupportXNACK,
739   FeatureCodeObjectV3]>;
740
741def FeatureISAVersion8_0_3 : FeatureSet<
742  [FeatureVolcanicIslands,
743   FeatureLDSBankCount32,
744   FeatureUnpackedD16VMem,
745   FeatureDoesNotSupportXNACK,
746   FeatureCodeObjectV3]>;
747
748def FeatureISAVersion8_1_0 : FeatureSet<
749  [FeatureVolcanicIslands,
750   FeatureLDSBankCount16,
751   FeatureXNACK,
752   FeatureCodeObjectV3]>;
753
754def FeatureISAVersion9_0_0 : FeatureSet<
755  [FeatureGFX9,
756   FeatureMadMixInsts,
757   FeatureLDSBankCount32,
758   FeatureCodeObjectV3,
759   FeatureDoesNotSupportXNACK,
760   FeatureDoesNotSupportSRAMECC]>;
761
762def FeatureISAVersion9_0_2 : FeatureSet<
763  [FeatureGFX9,
764   FeatureMadMixInsts,
765   FeatureLDSBankCount32,
766   FeatureXNACK,
767   FeatureDoesNotSupportSRAMECC,
768   FeatureCodeObjectV3]>;
769
770def FeatureISAVersion9_0_4 : FeatureSet<
771  [FeatureGFX9,
772   FeatureLDSBankCount32,
773   FeatureFmaMixInsts,
774   FeatureDoesNotSupportXNACK,
775   FeatureDoesNotSupportSRAMECC,
776   FeatureCodeObjectV3]>;
777
778def FeatureISAVersion9_0_6 : FeatureSet<
779  [FeatureGFX9,
780   HalfRate64Ops,
781   FeatureFmaMixInsts,
782   FeatureLDSBankCount32,
783   FeatureDLInsts,
784   FeatureDot1Insts,
785   FeatureDot2Insts,
786   FeatureDoesNotSupportXNACK,
787   FeatureCodeObjectV3]>;
788
789def FeatureISAVersion9_0_8 : FeatureSet<
790  [FeatureGFX9,
791   HalfRate64Ops,
792   FeatureFmaMixInsts,
793   FeatureLDSBankCount32,
794   FeatureDLInsts,
795   FeatureDot1Insts,
796   FeatureDot2Insts,
797   FeatureDot3Insts,
798   FeatureDot4Insts,
799   FeatureDot5Insts,
800   FeatureDot6Insts,
801   FeatureMAIInsts,
802   FeaturePkFmacF16Inst,
803   FeatureAtomicFaddInsts,
804   FeatureSRAMECC,
805   FeatureCodeObjectV3]>;
806
807def FeatureISAVersion9_0_9 : FeatureSet<
808  [FeatureGFX9,
809   FeatureMadMixInsts,
810   FeatureLDSBankCount32,
811   FeatureXNACK,
812   FeatureCodeObjectV3]>;
813
814// TODO: Organize more features into groups.
815def FeatureGroup {
816  // Bugs present on gfx10.1.
817  list<SubtargetFeature> GFX10_1_Bugs = [
818    FeatureVcmpxPermlaneHazard,
819    FeatureVMEMtoScalarWriteHazard,
820    FeatureSMEMtoVectorWriteHazard,
821    FeatureInstFwdPrefetchBug,
822    FeatureVcmpxExecWARHazard,
823    FeatureLdsBranchVmemWARHazard,
824    FeatureNSAtoVMEMBug,
825    FeatureOffset3fBug,
826    FeatureFlatSegmentOffsetBug
827   ];
828}
829
830def FeatureISAVersion10_1_0 : FeatureSet<
831  !listconcat(FeatureGroup.GFX10_1_Bugs,
832    [FeatureGFX10,
833     FeatureLDSBankCount32,
834     FeatureDLInsts,
835     FeatureNSAEncoding,
836     FeatureWavefrontSize32,
837     FeatureScalarStores,
838     FeatureScalarAtomics,
839     FeatureScalarFlatScratchInsts,
840     FeatureLdsMisalignedBug,
841     FeatureDoesNotSupportXNACK,
842     FeatureCodeObjectV3])>;
843
844def FeatureISAVersion10_1_1 : FeatureSet<
845  !listconcat(FeatureGroup.GFX10_1_Bugs,
846    [FeatureGFX10,
847     FeatureLDSBankCount32,
848     FeatureDLInsts,
849     FeatureDot1Insts,
850     FeatureDot2Insts,
851     FeatureDot5Insts,
852     FeatureDot6Insts,
853     FeatureNSAEncoding,
854     FeatureWavefrontSize32,
855     FeatureScalarStores,
856     FeatureScalarAtomics,
857     FeatureScalarFlatScratchInsts,
858     FeatureDoesNotSupportXNACK,
859     FeatureCodeObjectV3])>;
860
861def FeatureISAVersion10_1_2 : FeatureSet<
862  !listconcat(FeatureGroup.GFX10_1_Bugs,
863    [FeatureGFX10,
864     FeatureLDSBankCount32,
865     FeatureDLInsts,
866     FeatureDot1Insts,
867     FeatureDot2Insts,
868     FeatureDot5Insts,
869     FeatureDot6Insts,
870     FeatureNSAEncoding,
871     FeatureWavefrontSize32,
872     FeatureScalarStores,
873     FeatureScalarAtomics,
874     FeatureScalarFlatScratchInsts,
875     FeatureLdsMisalignedBug,
876     FeatureDoesNotSupportXNACK,
877     FeatureCodeObjectV3])>;
878
879//===----------------------------------------------------------------------===//
880
881def AMDGPUInstrInfo : InstrInfo {
882  let guessInstructionProperties = 1;
883  let noNamedPositionallyEncodedOperands = 1;
884}
885
886def AMDGPUAsmParser : AsmParser {
887  // Some of the R600 registers have the same name, so this crashes.
888  // For example T0_XYZW and T0_XY both have the asm name T0.
889  let ShouldEmitMatchRegisterName = 0;
890}
891
892def AMDGPUAsmWriter : AsmWriter {
893  int PassSubtarget = 1;
894}
895
896def AMDGPUAsmVariants {
897  string Default = "Default";
898  int Default_ID = 0;
899  string VOP3 = "VOP3";
900  int VOP3_ID = 1;
901  string SDWA = "SDWA";
902  int SDWA_ID = 2;
903  string SDWA9 = "SDWA9";
904  int SDWA9_ID = 3;
905  string DPP = "DPP";
906  int DPP_ID = 4;
907  string Disable = "Disable";
908  int Disable_ID = 5;
909}
910
911def DefaultAMDGPUAsmParserVariant : AsmParserVariant {
912  let Variant = AMDGPUAsmVariants.Default_ID;
913  let Name = AMDGPUAsmVariants.Default;
914}
915
916def VOP3AsmParserVariant : AsmParserVariant {
917  let Variant = AMDGPUAsmVariants.VOP3_ID;
918  let Name = AMDGPUAsmVariants.VOP3;
919}
920
921def SDWAAsmParserVariant : AsmParserVariant {
922  let Variant = AMDGPUAsmVariants.SDWA_ID;
923  let Name = AMDGPUAsmVariants.SDWA;
924}
925
926def SDWA9AsmParserVariant : AsmParserVariant {
927  let Variant = AMDGPUAsmVariants.SDWA9_ID;
928  let Name = AMDGPUAsmVariants.SDWA9;
929}
930
931
932def DPPAsmParserVariant : AsmParserVariant {
933  let Variant = AMDGPUAsmVariants.DPP_ID;
934  let Name = AMDGPUAsmVariants.DPP;
935}
936
937def AMDGPU : Target {
938  // Pull in Instruction Info:
939  let InstructionSet = AMDGPUInstrInfo;
940  let AssemblyParsers = [AMDGPUAsmParser];
941  let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant,
942                                VOP3AsmParserVariant,
943                                SDWAAsmParserVariant,
944                                SDWA9AsmParserVariant,
945                                DPPAsmParserVariant];
946  let AssemblyWriters = [AMDGPUAsmWriter];
947  let AllowRegisterRenaming = 1;
948}
949
950// Dummy Instruction itineraries for pseudo instructions
951def ALU_NULL : FuncUnit;
952def NullALU : InstrItinClass;
953
954//===----------------------------------------------------------------------===//
955// Predicate helper class
956//===----------------------------------------------------------------------===//
957
958def isGFX6 :
959  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">,
960  AssemblerPredicate<"FeatureSouthernIslands">;
961
962def isGFX6GFX7 :
963  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
964            "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
965  AssemblerPredicate<"!FeatureGCN3Encoding,!FeatureGFX10Insts">;
966
967def isGFX6GFX7GFX10 :
968  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
969            "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
970            "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
971  AssemblerPredicate<"!FeatureGCN3Encoding">;
972
973def isGFX7Only :
974  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
975  AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts,!FeatureGFX10Insts">;
976
977def isGFX7GFX10 :
978  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
979            "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
980  AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts">;
981
982def isGFX7GFX8GFX9 :
983  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
984            "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
985            "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
986  AssemblerPredicate<"FeatureGFX7GFX8GFX9Insts">;
987
988def isGFX6GFX7GFX8GFX9 :
989  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
990            "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
991            "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
992            "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
993  AssemblerPredicate<"!FeatureGFX10Insts">;
994
995def isGFX7Plus :
996  Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
997  AssemblerPredicate<"FeatureCIInsts">;
998
999def isGFX8Plus :
1000  Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
1001  AssemblerPredicate<"FeatureGFX8Insts">;
1002
1003def isGFX8Only : Predicate<"Subtarget->getGeneration() =="
1004                           "AMDGPUSubtarget::VOLCANIC_ISLANDS">,
1005  AssemblerPredicate <"FeatureVolcanicIslands">;
1006
1007def isGFX9Plus :
1008  Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
1009  AssemblerPredicate<"FeatureGFX9Insts">;
1010
1011def isGFX9Only : Predicate <
1012  "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
1013  AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts">;
1014
1015def isGFX8GFX9 :
1016  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
1017            "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
1018  AssemblerPredicate<"FeatureGFX8Insts,FeatureGCN3Encoding">;
1019
1020def isGFX10Plus :
1021  Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
1022  AssemblerPredicate<"FeatureGFX10Insts">;
1023
1024def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
1025  AssemblerPredicate<"FeatureFlatAddressSpace">;
1026
1027def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
1028  AssemblerPredicate<"FeatureFlatGlobalInsts">;
1029def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
1030  AssemblerPredicate<"FeatureFlatScratchInsts">;
1031def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">,
1032  AssemblerPredicate<"FeatureScalarFlatScratchInsts">;
1033def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
1034  AssemblerPredicate<"FeatureGFX9Insts">;
1035
1036def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
1037  AssemblerPredicate<"FeatureUnpackedD16VMem">;
1038def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
1039  AssemblerPredicate<"!FeatureUnpackedD16VMem">;
1040
1041def D16PreservesUnusedBits :
1042  Predicate<"Subtarget->d16PreservesUnusedBits()">,
1043  AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">;
1044
1045def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
1046def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
1047
1048def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
1049  AssemblerPredicate<"FeatureGFX9Insts">;
1050
1051def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
1052  AssemblerPredicate<"FeatureAddNoCarryInsts">;
1053
1054def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;
1055
1056def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
1057  AssemblerPredicate<"Feature16BitInsts">;
1058def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
1059  AssemblerPredicate<"FeatureVOP3P">;
1060
1061def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
1062  AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">;
1063
1064def HasSDWA9 :
1065  Predicate<"Subtarget->hasSDWA()">,
1066  AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts,FeatureSDWA">;
1067
1068def HasSDWA10 :
1069  Predicate<"Subtarget->hasSDWA()">,
1070  AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureSDWA">;
1071
1072def HasDPP : Predicate<"Subtarget->hasDPP()">,
1073  AssemblerPredicate<"FeatureGCN3Encoding,FeatureDPP">;
1074
1075def HasDPP8 : Predicate<"Subtarget->hasDPP8()">,
1076  AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP8">;
1077
1078def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
1079  AssemblerPredicate<"FeatureR128A16">;
1080
1081def HasDPP16 : Predicate<"Subtarget->hasDPP()">,
1082  AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP">;
1083
1084def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
1085  AssemblerPredicate<"FeatureIntClamp">;
1086
1087def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,
1088  AssemblerPredicate<"FeatureMadMixInsts">;
1089
1090def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">,
1091  AssemblerPredicate<"FeatureScalarStores">;
1092
1093def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">,
1094  AssemblerPredicate<"FeatureScalarAtomics">;
1095
1096def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">,
1097  AssemblerPredicate<"FeatureNoSdstCMPX">;
1098
1099def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">,
1100  AssemblerPredicate<"!FeatureNoSdstCMPX">;
1101
1102def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
1103def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
1104def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,
1105                      AssemblerPredicate<"FeatureVGPRIndexMode">;
1106def HasMovrel : Predicate<"Subtarget->hasMovrel()">,
1107                AssemblerPredicate<"FeatureMovrel">;
1108
1109def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
1110  AssemblerPredicate<"FeatureFmaMixInsts">;
1111
1112def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
1113  AssemblerPredicate<"FeatureDLInsts">;
1114
1115def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">,
1116  AssemblerPredicate<"FeatureDot1Insts">;
1117
1118def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">,
1119  AssemblerPredicate<"FeatureDot2Insts">;
1120
1121def HasDot3Insts : Predicate<"Subtarget->hasDot3Insts()">,
1122  AssemblerPredicate<"FeatureDot3Insts">;
1123
1124def HasDot4Insts : Predicate<"Subtarget->hasDot4Insts()">,
1125  AssemblerPredicate<"FeatureDot4Insts">;
1126
1127def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">,
1128  AssemblerPredicate<"FeatureDot5Insts">;
1129
1130def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
1131  AssemblerPredicate<"FeatureDot6Insts">;
1132
1133def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">,
1134  AssemblerPredicate<"FeatureMAIInsts">;
1135
1136def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">,
1137  AssemblerPredicate<"FeaturePkFmacF16Inst">;
1138
1139def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">,
1140  AssemblerPredicate<"FeatureAtomicFaddInsts">;
1141
1142def HasOffset3fBug : Predicate<"!Subtarget->hasOffset3fBug()">,
1143  AssemblerPredicate<"FeatureOffset3fBug">;
1144
1145def EnableLateCFGStructurize : Predicate<
1146  "EnableLateStructurizeCFG">;
1147
1148// Include AMDGPU TD files
1149include "SISchedule.td"
1150include "GCNProcessors.td"
1151include "AMDGPUInstrInfo.td"
1152include "AMDGPURegisterInfo.td"
1153include "AMDGPURegisterBanks.td"
1154include "AMDGPUInstructions.td"
1155include "SIInstrInfo.td"
1156include "AMDGPUCallingConv.td"
1157include "AMDGPUSearchableTables.td"
1158