xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td (revision e1e636193db45630c7881246d25902e57c43d24e)
1//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This describes the calling conventions for the AMD Radeon GPUs.
10//
11//===----------------------------------------------------------------------===//
12
13// Inversion of CCIfInReg
14class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
15class CCIfExtend<CCAction A>
16  : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
17
18// Calling convention for SI
19def CC_SI_Gfx : CallingConv<[
20  // 0-3 are reserved for the stack buffer descriptor
21  // 30-31 are reserved for the return address
22  // 32 is reserved for the stack pointer
23  // 33 is reserved for the frame pointer
24  // 34 is reserved for the base pointer
25  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
26    SGPR4, SGPR5, SGPR6, SGPR7,
27    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
28    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
29    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29
30  ]>>>,
31
32  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
33    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
34    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
35    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
36    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
37  ]>>>,
38
39  CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>>
40]>;
41
42def RetCC_SI_Gfx : CallingConv<[
43  CCIfType<[i1], CCPromoteToType<i32>>,
44  CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
45
46  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
47    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
48    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
49    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
50    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
51    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
52    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
53    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
54    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
55    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
56    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
57    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
58    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
59    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
60    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
61    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
62    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
63    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
64  ]>>>,
65]>;
66
67def CC_SI_SHADER : CallingConv<[
68
69  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
70    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
71    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
72    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
73    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
74    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
75    SGPR40, SGPR41, SGPR42, SGPR43
76  ]>>>,
77
78  // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
79  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
80    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
81    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
82    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
83    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
84    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
85    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
86    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
87    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
88    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
89    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
90    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
91    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
92    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
93    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
94    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
95    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
96    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
97  ]>>>
98]>;
99
100def RetCC_SI_Shader : CallingConv<[
101  CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
102  CCIfType<[i32, i16, v2i16] , CCAssignToReg<[
103    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
104    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
105    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
106    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
107    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
108    SGPR40, SGPR41, SGPR42, SGPR43
109  ]>>,
110
111  // 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
112  CCIfType<[f32, f16, v2f16, bf16, v2bf16] , CCAssignToReg<[
113    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
114    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
115    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
116    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
117    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
118    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
119    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
120    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
121    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
122    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
123    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
124    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
125    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
126    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
127    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
128    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
129    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
130  ]>>
131]>;
132
133def CSR_AMDGPU_VGPRs : CalleeSavedRegs<
134  // The CSRs & scratch-registers are interleaved at a split boundary of 8.
135  (add (sequence "VGPR%u", 40, 47),
136    (sequence "VGPR%u", 56, 63),
137    (sequence "VGPR%u", 72, 79),
138    (sequence "VGPR%u", 88, 95),
139    (sequence "VGPR%u", 104, 111),
140    (sequence "VGPR%u", 120, 127),
141    (sequence "VGPR%u", 136, 143),
142    (sequence "VGPR%u", 152, 159),
143    (sequence "VGPR%u", 168, 175),
144    (sequence "VGPR%u", 184, 191),
145    (sequence "VGPR%u", 200, 207),
146    (sequence "VGPR%u", 216, 223),
147    (sequence "VGPR%u", 232, 239),
148    (sequence "VGPR%u", 248, 255))
149>;
150
151def CSR_AMDGPU_AGPRs : CalleeSavedRegs<
152  (sequence "AGPR%u", 32, 255)
153>;
154
155def CSR_AMDGPU_SGPRs : CalleeSavedRegs<
156  (sequence "SGPR%u", 30, 105)
157>;
158
159def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs<
160  (add (sequence "SGPR%u", 4, 31), (sequence "SGPR%u", 64, 105))
161>;
162
163def CSR_AMDGPU : CalleeSavedRegs<
164  (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs)
165>;
166
167def CSR_AMDGPU_GFX90AInsts : CalleeSavedRegs<
168  (add CSR_AMDGPU, CSR_AMDGPU_AGPRs)
169>;
170
171def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs<
172  (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs)
173>;
174
175def CSR_AMDGPU_SI_Gfx_GFX90AInsts : CalleeSavedRegs<
176  (add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs)
177>;
178
179def CSR_AMDGPU_CS_ChainPreserve : CalleeSavedRegs<
180  (sequence "VGPR%u", 8, 255)
181>;
182
183def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>;
184
185// Calling convention for leaf functions
186def CC_AMDGPU_Func : CallingConv<[
187  CCIfByVal<CCPassByVal<4, 4>>,
188  CCIfType<[i1], CCPromoteToType<i32>>,
189  CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
190
191  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<
192    !foreach(i, !range(0, 30), !cast<Register>("SGPR"#i))  // SGPR0-29
193  >>>,
194
195  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1, bf16, v2bf16], CCAssignToReg<[
196    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
197    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
198    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
199    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
200  CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>>
201]>;
202
203// Calling convention for leaf functions
204def RetCC_AMDGPU_Func : CallingConv<[
205  CCIfType<[i1], CCPromoteToType<i32>>,
206  CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
207  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, bf16, v2bf16], CCAssignToReg<[
208    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
209    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
210    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
211    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
212]>;
213
214def CC_AMDGPU : CallingConv<[
215   CCIf<"static_cast<const GCNSubtarget&>"
216         "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
217           "AMDGPUSubtarget::SOUTHERN_ISLANDS",
218        CCDelegateTo<CC_SI_SHADER>>,
219   CCIf<"static_cast<const GCNSubtarget&>"
220         "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
221           "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
222        CCDelegateTo<CC_AMDGPU_Func>>
223]>;
224
225def CC_AMDGPU_CS_CHAIN : CallingConv<[
226  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<
227    !foreach(i, !range(105), !cast<Register>("SGPR"#i))
228  >>>,
229
230  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<
231    !foreach(i, !range(8, 255), !cast<Register>("VGPR"#i))
232  >>>
233]>;
234
235// Trivial class to denote when a def is used only to get a RegMask, i.e.
236// SaveList is ignored and the def is not used as part of any calling
237// convention.
238class RegMask<dag mask> : CalleeSavedRegs<mask>;
239
240def AMDGPU_AllVGPRs : RegMask<
241  (sequence "VGPR%u", 0, 255)
242>;
243
244def AMDGPU_AllAGPRs : RegMask<
245  (sequence "AGPR%u", 0, 255)
246>;
247
248def AMDGPU_AllVectorRegs : RegMask<
249  (add AMDGPU_AllVGPRs, AMDGPU_AllAGPRs)
250>;
251
252def AMDGPU_AllAllocatableSRegs : RegMask<
253  (add (sequence "SGPR%u", 0, 105), VCC_LO, VCC_HI)
254>;
255