1//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This describes the calling conventions for the AMD Radeon GPUs. 10// 11//===----------------------------------------------------------------------===// 12 13// Inversion of CCIfInReg 14class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {} 15class CCIfExtend<CCAction A> 16 : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>; 17 18// Calling convention for SI 19def CC_SI_Gfx : CallingConv<[ 20 // 0-3 are reserved for the stack buffer descriptor 21 // 30-31 are reserved for the return address 22 // 32 is reserved for the stack pointer 23 // 33 is reserved for the frame pointer 24 // 34 is reserved for the base pointer 25 CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 26 !foreach(i, !range(4, 30), !cast<Register>("SGPR"#i)) // SGPR4-29 27 >>>, 28 29 CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 30 !foreach(i, !range(0, 32), !cast<Register>("VGPR"#i)) // VGPR0-31 31 >>>, 32 33 CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>> 34]>; 35 36def RetCC_SI_Gfx : CallingConv<[ 37 CCIfType<[i1], CCPromoteToType<i32>>, 38 CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, 39 40 CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 41 !foreach(i, !range(0, 136), !cast<Register>("VGPR"#i)) // VGPR0-135 42 >>>, 43]>; 44 45def CC_SI_SHADER : CallingConv<[ 46 47 CCIfType<[i1], CCPromoteToType<i32>>, 48 49 CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 50 !foreach(i, !range(0, 44), !cast<Register>("SGPR"#i)) // SGPR0-43 51 >>>, 52 53 // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs. 54 CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 55 !foreach(i, !range(0, 136), !cast<Register>("VGPR"#i)) // VGPR0-135 56 >>> 57]>; 58 59def RetCC_SI_Shader : CallingConv<[ 60 CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, 61 CCIfType<[i32, i16, v2i16] , CCAssignToReg< 62 !foreach(i, !range(0, 44), !cast<Register>("SGPR"#i)) // SGPR0-43 63 >>, 64 65 // 32*4 + 4 is the minimum for a fetch shader with 32 outputs. 66 CCIfType<[f32, f16, v2f16, bf16, v2bf16] , CCAssignToReg< 67 !foreach(i, !range(0, 136), !cast<Register>("VGPR"#i)) // VGPR0-135 68 >> 69]>; 70 71def CSR_AMDGPU_VGPRs : CalleeSavedRegs< 72 // The CSRs & scratch-registers are interleaved at a split boundary of 8. 73 (add (sequence "VGPR%u", 40, 47), 74 (sequence "VGPR%u", 56, 63), 75 (sequence "VGPR%u", 72, 79), 76 (sequence "VGPR%u", 88, 95), 77 (sequence "VGPR%u", 104, 111), 78 (sequence "VGPR%u", 120, 127), 79 (sequence "VGPR%u", 136, 143), 80 (sequence "VGPR%u", 152, 159), 81 (sequence "VGPR%u", 168, 175), 82 (sequence "VGPR%u", 184, 191), 83 (sequence "VGPR%u", 200, 207), 84 (sequence "VGPR%u", 216, 223), 85 (sequence "VGPR%u", 232, 239), 86 (sequence "VGPR%u", 248, 255)) 87>; 88 89def CSR_AMDGPU_AGPRs : CalleeSavedRegs< 90 (sequence "AGPR%u", 32, 255) 91>; 92 93def CSR_AMDGPU_SGPRs : CalleeSavedRegs< 94 // Ensure that s30-s31 (return address), s32 (stack pointer), s33 (frame pointer), 95 // and s34 (base pointer) are callee-saved. The striped layout starts from s40, 96 // with a stripe width of 8. The last stripe is 10 wide instead of 8, to avoid 97 // ending with a 2-wide stripe. 98 (add (sequence "SGPR%u", 30, 39), 99 (sequence "SGPR%u", 48, 55), 100 (sequence "SGPR%u", 64, 71), 101 (sequence "SGPR%u", 80, 87), 102 (sequence "SGPR%u", 96, 105)) 103>; 104 105def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs< 106 (add (sequence "SGPR%u", 4, 31), (sequence "SGPR%u", 64, 105)) 107>; 108 109def CSR_AMDGPU : CalleeSavedRegs< 110 (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs) 111>; 112 113def CSR_AMDGPU_GFX90AInsts : CalleeSavedRegs< 114 (add CSR_AMDGPU, CSR_AMDGPU_AGPRs) 115>; 116 117def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs< 118 (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs) 119>; 120 121def CSR_AMDGPU_SI_Gfx_GFX90AInsts : CalleeSavedRegs< 122 (add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs) 123>; 124 125def CSR_AMDGPU_CS_ChainPreserve : CalleeSavedRegs< 126 (sequence "VGPR%u", 8, 255) 127>; 128 129def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>; 130 131// Calling convention for leaf functions 132def CC_AMDGPU_Func : CallingConv<[ 133 CCIfByVal<CCPassByVal<4, 4>>, 134 CCIfType<[i1], CCPromoteToType<i32>>, 135 CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>, 136 137 CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 138 !foreach(i, !range(0, 30), !cast<Register>("SGPR"#i)) // SGPR0-29 139 >>>, 140 141 CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1, bf16, v2bf16], CCAssignToReg< 142 !foreach(i, !range(0, 32), !cast<Register>("VGPR"#i)) // VGPR0-31 143 >>, 144 CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>> 145]>; 146 147// Calling convention for leaf functions 148def RetCC_AMDGPU_Func : CallingConv<[ 149 CCIfType<[i1], CCPromoteToType<i32>>, 150 CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, 151 CCIfType<[i32, f32, i16, f16, v2i16, v2f16, bf16, v2bf16], CCAssignToReg< 152 !foreach(i, !range(0, 32), !cast<Register>("VGPR"#i)) // VGPR0-31 153 >>, 154]>; 155 156def CC_AMDGPU : CallingConv<[ 157 CCIf<"State.getMachineFunction().getSubtarget<GCNSubtarget>().getGeneration() >= " 158 "AMDGPUSubtarget::SOUTHERN_ISLANDS", 159 CCDelegateTo<CC_SI_SHADER>>, 160 CCIf<"State.getMachineFunction().getSubtarget<GCNSubtarget>().getGeneration() >= " 161 "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C", 162 CCDelegateTo<CC_AMDGPU_Func>> 163]>; 164 165def CC_AMDGPU_CS_CHAIN : CallingConv<[ 166 CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 167 !foreach(i, !range(105), !cast<Register>("SGPR"#i)) 168 >>>, 169 170 CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< 171 !foreach(i, !range(8, 255), !cast<Register>("VGPR"#i)) 172 >>> 173]>; 174 175// Trivial class to denote when a def is used only to get a RegMask, i.e. 176// SaveList is ignored and the def is not used as part of any calling 177// convention. 178class RegMask<dag mask> : CalleeSavedRegs<mask>; 179 180def AMDGPU_AllVGPRs : RegMask< 181 (sequence "VGPR%u", 0, 255) 182>; 183 184def AMDGPU_AllAGPRs : RegMask< 185 (sequence "AGPR%u", 0, 255) 186>; 187 188def AMDGPU_AllVectorRegs : RegMask< 189 (add AMDGPU_AllVGPRs, AMDGPU_AllAGPRs) 190>; 191 192def AMDGPU_AllAllocatableSRegs : RegMask< 193 (add (sequence "SGPR%u", 0, 105), VCC_LO, VCC_HI) 194>; 195