xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td (revision 7fdf597e96a02165cfe22ff357b857d5fa15ed8a)
1//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This describes the calling conventions for the AMD Radeon GPUs.
10//
11//===----------------------------------------------------------------------===//
12
13// Inversion of CCIfInReg
14class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
15class CCIfExtend<CCAction A>
16  : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
17
18// Calling convention for SI
19def CC_SI_Gfx : CallingConv<[
20  // 0-3 are reserved for the stack buffer descriptor
21  // 30-31 are reserved for the return address
22  // 32 is reserved for the stack pointer
23  // 33 is reserved for the frame pointer
24  // 34 is reserved for the base pointer
25  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
26    SGPR4, SGPR5, SGPR6, SGPR7,
27    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
28    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
29    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29
30  ]>>>,
31
32  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
33    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
34    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
35    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
36    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
37  ]>>>,
38
39  CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>>
40]>;
41
42def RetCC_SI_Gfx : CallingConv<[
43  CCIfType<[i1], CCPromoteToType<i32>>,
44  CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
45
46  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
47    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
48    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
49    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
50    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
51    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
52    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
53    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
54    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
55    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
56    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
57    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
58    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
59    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
60    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
61    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
62    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
63    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
64  ]>>>,
65]>;
66
67def CC_SI_SHADER : CallingConv<[
68
69  CCIfType<[i1], CCPromoteToType<i32>>,
70
71  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
72    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
73    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
74    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
75    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
76    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
77    SGPR40, SGPR41, SGPR42, SGPR43
78  ]>>>,
79
80  // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
81  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
82    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
83    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
84    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
85    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
86    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
87    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
88    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
89    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
90    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
91    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
92    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
93    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
94    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
95    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
96    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
97    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
98    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
99  ]>>>
100]>;
101
102def RetCC_SI_Shader : CallingConv<[
103  CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
104  CCIfType<[i32, i16, v2i16] , CCAssignToReg<[
105    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
106    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
107    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
108    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
109    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
110    SGPR40, SGPR41, SGPR42, SGPR43
111  ]>>,
112
113  // 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
114  CCIfType<[f32, f16, v2f16, bf16, v2bf16] , CCAssignToReg<[
115    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
116    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
117    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
118    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
119    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
120    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
121    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
122    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
123    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
124    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
125    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
126    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
127    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
128    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
129    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
130    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
131    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
132  ]>>
133]>;
134
135def CSR_AMDGPU_VGPRs : CalleeSavedRegs<
136  // The CSRs & scratch-registers are interleaved at a split boundary of 8.
137  (add (sequence "VGPR%u", 40, 47),
138    (sequence "VGPR%u", 56, 63),
139    (sequence "VGPR%u", 72, 79),
140    (sequence "VGPR%u", 88, 95),
141    (sequence "VGPR%u", 104, 111),
142    (sequence "VGPR%u", 120, 127),
143    (sequence "VGPR%u", 136, 143),
144    (sequence "VGPR%u", 152, 159),
145    (sequence "VGPR%u", 168, 175),
146    (sequence "VGPR%u", 184, 191),
147    (sequence "VGPR%u", 200, 207),
148    (sequence "VGPR%u", 216, 223),
149    (sequence "VGPR%u", 232, 239),
150    (sequence "VGPR%u", 248, 255))
151>;
152
153def CSR_AMDGPU_AGPRs : CalleeSavedRegs<
154  (sequence "AGPR%u", 32, 255)
155>;
156
157def CSR_AMDGPU_SGPRs : CalleeSavedRegs<
158  (sequence "SGPR%u", 30, 105)
159>;
160
161def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs<
162  (add (sequence "SGPR%u", 4, 31), (sequence "SGPR%u", 64, 105))
163>;
164
165def CSR_AMDGPU : CalleeSavedRegs<
166  (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs)
167>;
168
169def CSR_AMDGPU_GFX90AInsts : CalleeSavedRegs<
170  (add CSR_AMDGPU, CSR_AMDGPU_AGPRs)
171>;
172
173def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs<
174  (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs)
175>;
176
177def CSR_AMDGPU_SI_Gfx_GFX90AInsts : CalleeSavedRegs<
178  (add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs)
179>;
180
181def CSR_AMDGPU_CS_ChainPreserve : CalleeSavedRegs<
182  (sequence "VGPR%u", 8, 255)
183>;
184
185def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>;
186
187// Calling convention for leaf functions
188def CC_AMDGPU_Func : CallingConv<[
189  CCIfByVal<CCPassByVal<4, 4>>,
190  CCIfType<[i1], CCPromoteToType<i32>>,
191  CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
192
193  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<
194    !foreach(i, !range(0, 30), !cast<Register>("SGPR"#i))  // SGPR0-29
195  >>>,
196
197  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1, bf16, v2bf16], CCAssignToReg<[
198    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
199    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
200    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
201    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
202  CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>>
203]>;
204
205// Calling convention for leaf functions
206def RetCC_AMDGPU_Func : CallingConv<[
207  CCIfType<[i1], CCPromoteToType<i32>>,
208  CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
209  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, bf16, v2bf16], CCAssignToReg<[
210    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
211    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
212    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
213    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
214]>;
215
216def CC_AMDGPU : CallingConv<[
217   CCIf<"static_cast<const GCNSubtarget&>"
218         "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
219           "AMDGPUSubtarget::SOUTHERN_ISLANDS",
220        CCDelegateTo<CC_SI_SHADER>>,
221   CCIf<"static_cast<const GCNSubtarget&>"
222         "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
223           "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
224        CCDelegateTo<CC_AMDGPU_Func>>
225]>;
226
227def CC_AMDGPU_CS_CHAIN : CallingConv<[
228  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<
229    !foreach(i, !range(105), !cast<Register>("SGPR"#i))
230  >>>,
231
232  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<
233    !foreach(i, !range(8, 255), !cast<Register>("VGPR"#i))
234  >>>
235]>;
236
237// Trivial class to denote when a def is used only to get a RegMask, i.e.
238// SaveList is ignored and the def is not used as part of any calling
239// convention.
240class RegMask<dag mask> : CalleeSavedRegs<mask>;
241
242def AMDGPU_AllVGPRs : RegMask<
243  (sequence "VGPR%u", 0, 255)
244>;
245
246def AMDGPU_AllAGPRs : RegMask<
247  (sequence "AGPR%u", 0, 255)
248>;
249
250def AMDGPU_AllVectorRegs : RegMask<
251  (add AMDGPU_AllVGPRs, AMDGPU_AllAGPRs)
252>;
253
254def AMDGPU_AllAllocatableSRegs : RegMask<
255  (add (sequence "SGPR%u", 0, 105), VCC_LO, VCC_HI)
256>;
257