1//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This describes the calling conventions for the AMD Radeon GPUs. 10// 11//===----------------------------------------------------------------------===// 12 13// Inversion of CCIfInReg 14class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {} 15class CCIfExtend<CCAction A> 16 : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>; 17 18// Calling convention for SI 19def CC_SI_Gfx : CallingConv<[ 20 // 0-3 are reserved for the stack buffer descriptor 21 // 30-31 are reserved for the return address 22 // 32 is reserved for the stack pointer 23 // 33 is reserved for the frame pointer 24 // 34 is reserved for the base pointer 25 CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ 26 SGPR4, SGPR5, SGPR6, SGPR7, 27 SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, 28 SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, 29 SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29 30 ]>>>, 31 32 CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ 33 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, 34 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, 35 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, 36 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31 37 ]>>>, 38 39 CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>> 40]>; 41 42def RetCC_SI_Gfx : CallingConv<[ 43 CCIfType<[i1], CCPromoteToType<i32>>, 44 CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, 45 46 CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ 47 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, 48 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, 49 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, 50 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31, 51 VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39, 52 VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47, 53 VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55, 54 VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63, 55 VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71, 56 VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79, 57 VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87, 58 VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95, 59 VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103, 60 VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111, 61 VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119, 62 VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127, 63 VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135 64 ]>>>, 65]>; 66 67def CC_SI_SHADER : CallingConv<[ 68 69 CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ 70 SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, 71 SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, 72 SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, 73 SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31, 74 SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39, 75 SGPR40, SGPR41, SGPR42, SGPR43 76 ]>>>, 77 78 // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs. 79 CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ 80 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, 81 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, 82 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, 83 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31, 84 VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39, 85 VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47, 86 VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55, 87 VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63, 88 VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71, 89 VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79, 90 VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87, 91 VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95, 92 VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103, 93 VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111, 94 VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119, 95 VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127, 96 VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135 97 ]>>> 98]>; 99 100def RetCC_SI_Shader : CallingConv<[ 101 CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, 102 CCIfType<[i32, i16, v2i16] , CCAssignToReg<[ 103 SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, 104 SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, 105 SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, 106 SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31, 107 SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39, 108 SGPR40, SGPR41, SGPR42, SGPR43 109 ]>>, 110 111 // 32*4 + 4 is the minimum for a fetch shader with 32 outputs. 112 CCIfType<[f32, f16, v2f16, bf16, v2bf16] , CCAssignToReg<[ 113 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, 114 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, 115 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, 116 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31, 117 VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39, 118 VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47, 119 VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55, 120 VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63, 121 VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71, 122 VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79, 123 VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87, 124 VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95, 125 VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103, 126 VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111, 127 VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119, 128 VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127, 129 VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135 130 ]>> 131]>; 132 133def CSR_AMDGPU_VGPRs : CalleeSavedRegs< 134 // The CSRs & scratch-registers are interleaved at a split boundary of 8. 135 (add (sequence "VGPR%u", 40, 47), 136 (sequence "VGPR%u", 56, 63), 137 (sequence "VGPR%u", 72, 79), 138 (sequence "VGPR%u", 88, 95), 139 (sequence "VGPR%u", 104, 111), 140 (sequence "VGPR%u", 120, 127), 141 (sequence "VGPR%u", 136, 143), 142 (sequence "VGPR%u", 152, 159), 143 (sequence "VGPR%u", 168, 175), 144 (sequence "VGPR%u", 184, 191), 145 (sequence "VGPR%u", 200, 207), 146 (sequence "VGPR%u", 216, 223), 147 (sequence "VGPR%u", 232, 239), 148 (sequence "VGPR%u", 248, 255)) 149>; 150 151def CSR_AMDGPU_AGPRs : CalleeSavedRegs< 152 (sequence "AGPR%u", 32, 255) 153>; 154 155def CSR_AMDGPU_SGPRs : CalleeSavedRegs< 156 (sequence "SGPR%u", 30, 105) 157>; 158 159def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs< 160 (add (sequence "SGPR%u", 4, 31), (sequence "SGPR%u", 64, 105)) 161>; 162 163def CSR_AMDGPU : CalleeSavedRegs< 164 (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs) 165>; 166 167def CSR_AMDGPU_GFX90AInsts : CalleeSavedRegs< 168 (add CSR_AMDGPU, CSR_AMDGPU_AGPRs) 169>; 170 171def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs< 172 (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs) 173>; 174 175def CSR_AMDGPU_SI_Gfx_GFX90AInsts : CalleeSavedRegs< 176 (add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs) 177>; 178 179def CSR_AMDGPU_CS_ChainPreserve : CalleeSavedRegs< 180 (sequence "VGPR%u", 8, 255) 181>; 182 183def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>; 184 185// Calling convention for leaf functions 186def CC_AMDGPU_Func : CallingConv<[ 187 CCIfByVal<CCPassByVal<4, 4>>, 188 CCIfType<[i1], CCPromoteToType<i32>>, 189 CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>, 190 191 CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 192 !foreach(i, !range(0, 30), !cast<Register>("SGPR"#i)) // SGPR0-29 193 >>>, 194 195 CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1, bf16, v2bf16], CCAssignToReg<[ 196 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, 197 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, 198 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, 199 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, 200 CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>> 201]>; 202 203// Calling convention for leaf functions 204def RetCC_AMDGPU_Func : CallingConv<[ 205 CCIfType<[i1], CCPromoteToType<i32>>, 206 CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, 207 CCIfType<[i32, f32, i16, f16, v2i16, v2f16, bf16, v2bf16], CCAssignToReg<[ 208 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, 209 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, 210 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, 211 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, 212]>; 213 214def CC_AMDGPU : CallingConv<[ 215 CCIf<"static_cast<const GCNSubtarget&>" 216 "(State.getMachineFunction().getSubtarget()).getGeneration() >= " 217 "AMDGPUSubtarget::SOUTHERN_ISLANDS", 218 CCDelegateTo<CC_SI_SHADER>>, 219 CCIf<"static_cast<const GCNSubtarget&>" 220 "(State.getMachineFunction().getSubtarget()).getGeneration() >= " 221 "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C", 222 CCDelegateTo<CC_AMDGPU_Func>> 223]>; 224 225def CC_AMDGPU_CS_CHAIN : CallingConv<[ 226 CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 227 !foreach(i, !range(105), !cast<Register>("SGPR"#i)) 228 >>>, 229 230 CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 231 !foreach(i, !range(8, 255), !cast<Register>("VGPR"#i)) 232 >>> 233]>; 234 235// Trivial class to denote when a def is used only to get a RegMask, i.e. 236// SaveList is ignored and the def is not used as part of any calling 237// convention. 238class RegMask<dag mask> : CalleeSavedRegs<mask>; 239 240def AMDGPU_AllVGPRs : RegMask< 241 (sequence "VGPR%u", 0, 255) 242>; 243 244def AMDGPU_AllAGPRs : RegMask< 245 (sequence "AGPR%u", 0, 255) 246>; 247 248def AMDGPU_AllVectorRegs : RegMask< 249 (add AMDGPU_AllVGPRs, AMDGPU_AllAGPRs) 250>; 251 252def AMDGPU_AllAllocatableSRegs : RegMask< 253 (add (sequence "SGPR%u", 0, 105), VCC_LO, VCC_HI) 254>; 255