1//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This describes the calling conventions for the AMD Radeon GPUs. 10// 11//===----------------------------------------------------------------------===// 12 13// Inversion of CCIfInReg 14class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {} 15class CCIfExtend<CCAction A> 16 : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>; 17 18// Calling convention for SI 19def CC_SI_Gfx : CallingConv<[ 20 // 0-3 are reserved for the stack buffer descriptor 21 // 30-31 are reserved for the return address 22 // 32 is reserved for the stack pointer 23 // 33 is reserved for the frame pointer 24 // 34 is reserved for the base pointer 25 CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ 26 SGPR4, SGPR5, SGPR6, SGPR7, 27 SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, 28 SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, 29 SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29 30 ]>>>, 31 32 CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ 33 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, 34 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, 35 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, 36 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31 37 ]>>>, 38 39 CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>> 40]>; 41 42def RetCC_SI_Gfx : CallingConv<[ 43 CCIfType<[i1], CCPromoteToType<i32>>, 44 CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, 45 46 CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ 47 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, 48 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, 49 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, 50 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31, 51 VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39, 52 VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47, 53 VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55, 54 VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63, 55 VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71, 56 VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79, 57 VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87, 58 VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95, 59 VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103, 60 VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111, 61 VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119, 62 VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127, 63 VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135 64 ]>>>, 65]>; 66 67def CC_SI_SHADER : CallingConv<[ 68 69 CCIfType<[i1], CCPromoteToType<i32>>, 70 71 CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ 72 SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, 73 SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, 74 SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, 75 SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31, 76 SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39, 77 SGPR40, SGPR41, SGPR42, SGPR43 78 ]>>>, 79 80 // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs. 81 CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ 82 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, 83 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, 84 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, 85 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31, 86 VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39, 87 VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47, 88 VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55, 89 VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63, 90 VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71, 91 VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79, 92 VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87, 93 VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95, 94 VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103, 95 VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111, 96 VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119, 97 VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127, 98 VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135 99 ]>>> 100]>; 101 102def RetCC_SI_Shader : CallingConv<[ 103 CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, 104 CCIfType<[i32, i16, v2i16] , CCAssignToReg<[ 105 SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, 106 SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, 107 SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, 108 SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31, 109 SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39, 110 SGPR40, SGPR41, SGPR42, SGPR43 111 ]>>, 112 113 // 32*4 + 4 is the minimum for a fetch shader with 32 outputs. 114 CCIfType<[f32, f16, v2f16, bf16, v2bf16] , CCAssignToReg<[ 115 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, 116 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, 117 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, 118 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31, 119 VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39, 120 VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47, 121 VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55, 122 VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63, 123 VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71, 124 VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79, 125 VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87, 126 VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95, 127 VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103, 128 VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111, 129 VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119, 130 VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127, 131 VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135 132 ]>> 133]>; 134 135def CSR_AMDGPU_VGPRs : CalleeSavedRegs< 136 // The CSRs & scratch-registers are interleaved at a split boundary of 8. 137 (add (sequence "VGPR%u", 40, 47), 138 (sequence "VGPR%u", 56, 63), 139 (sequence "VGPR%u", 72, 79), 140 (sequence "VGPR%u", 88, 95), 141 (sequence "VGPR%u", 104, 111), 142 (sequence "VGPR%u", 120, 127), 143 (sequence "VGPR%u", 136, 143), 144 (sequence "VGPR%u", 152, 159), 145 (sequence "VGPR%u", 168, 175), 146 (sequence "VGPR%u", 184, 191), 147 (sequence "VGPR%u", 200, 207), 148 (sequence "VGPR%u", 216, 223), 149 (sequence "VGPR%u", 232, 239), 150 (sequence "VGPR%u", 248, 255)) 151>; 152 153def CSR_AMDGPU_AGPRs : CalleeSavedRegs< 154 (sequence "AGPR%u", 32, 255) 155>; 156 157def CSR_AMDGPU_SGPRs : CalleeSavedRegs< 158 (sequence "SGPR%u", 30, 105) 159>; 160 161def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs< 162 (add (sequence "SGPR%u", 4, 31), (sequence "SGPR%u", 64, 105)) 163>; 164 165def CSR_AMDGPU : CalleeSavedRegs< 166 (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs) 167>; 168 169def CSR_AMDGPU_GFX90AInsts : CalleeSavedRegs< 170 (add CSR_AMDGPU, CSR_AMDGPU_AGPRs) 171>; 172 173def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs< 174 (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs) 175>; 176 177def CSR_AMDGPU_SI_Gfx_GFX90AInsts : CalleeSavedRegs< 178 (add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs) 179>; 180 181def CSR_AMDGPU_CS_ChainPreserve : CalleeSavedRegs< 182 (sequence "VGPR%u", 8, 255) 183>; 184 185def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>; 186 187// Calling convention for leaf functions 188def CC_AMDGPU_Func : CallingConv<[ 189 CCIfByVal<CCPassByVal<4, 4>>, 190 CCIfType<[i1], CCPromoteToType<i32>>, 191 CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>, 192 193 CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 194 !foreach(i, !range(0, 30), !cast<Register>("SGPR"#i)) // SGPR0-29 195 >>>, 196 197 CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1, bf16, v2bf16], CCAssignToReg<[ 198 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, 199 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, 200 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, 201 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, 202 CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>> 203]>; 204 205// Calling convention for leaf functions 206def RetCC_AMDGPU_Func : CallingConv<[ 207 CCIfType<[i1], CCPromoteToType<i32>>, 208 CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, 209 CCIfType<[i32, f32, i16, f16, v2i16, v2f16, bf16, v2bf16], CCAssignToReg<[ 210 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, 211 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, 212 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, 213 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, 214]>; 215 216def CC_AMDGPU : CallingConv<[ 217 CCIf<"static_cast<const GCNSubtarget&>" 218 "(State.getMachineFunction().getSubtarget()).getGeneration() >= " 219 "AMDGPUSubtarget::SOUTHERN_ISLANDS", 220 CCDelegateTo<CC_SI_SHADER>>, 221 CCIf<"static_cast<const GCNSubtarget&>" 222 "(State.getMachineFunction().getSubtarget()).getGeneration() >= " 223 "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C", 224 CCDelegateTo<CC_AMDGPU_Func>> 225]>; 226 227def CC_AMDGPU_CS_CHAIN : CallingConv<[ 228 CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 229 !foreach(i, !range(105), !cast<Register>("SGPR"#i)) 230 >>>, 231 232 CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 233 !foreach(i, !range(8, 255), !cast<Register>("VGPR"#i)) 234 >>> 235]>; 236 237// Trivial class to denote when a def is used only to get a RegMask, i.e. 238// SaveList is ignored and the def is not used as part of any calling 239// convention. 240class RegMask<dag mask> : CalleeSavedRegs<mask>; 241 242def AMDGPU_AllVGPRs : RegMask< 243 (sequence "VGPR%u", 0, 255) 244>; 245 246def AMDGPU_AllAGPRs : RegMask< 247 (sequence "AGPR%u", 0, 255) 248>; 249 250def AMDGPU_AllVectorRegs : RegMask< 251 (add AMDGPU_AllVGPRs, AMDGPU_AllAGPRs) 252>; 253 254def AMDGPU_AllAllocatableSRegs : RegMask< 255 (add (sequence "SGPR%u", 0, 105), VCC_LO, VCC_HI) 256>; 257