1 //=== X86CallingConv.cpp - X86 Custom Calling Convention Impl -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the implementation of custom routines for the X86 10 // Calling Convention that aren't done by tablegen. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "X86CallingConv.h" 15 #include "X86Subtarget.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/CodeGen/CallingConvLower.h" 18 #include "llvm/IR/CallingConv.h" 19 20 using namespace llvm; 21 22 /// When regcall calling convention compiled to 32 bit arch, special treatment 23 /// is required for 64 bit masks. 24 /// The value should be assigned to two GPRs. 25 /// \return true if registers were allocated and false otherwise. 26 static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, 27 MVT &LocVT, 28 CCValAssign::LocInfo &LocInfo, 29 ISD::ArgFlagsTy &ArgFlags, 30 CCState &State) { 31 // List of GPR registers that are available to store values in regcall 32 // calling convention. 33 static const MCPhysReg RegList[] = {X86::EAX, X86::ECX, X86::EDX, X86::EDI, 34 X86::ESI}; 35 36 // The vector will save all the available registers for allocation. 37 SmallVector<unsigned, 5> AvailableRegs; 38 39 // searching for the available registers. 40 for (auto Reg : RegList) { 41 if (!State.isAllocated(Reg)) 42 AvailableRegs.push_back(Reg); 43 } 44 45 const size_t RequiredGprsUponSplit = 2; 46 if (AvailableRegs.size() < RequiredGprsUponSplit) 47 return false; // Not enough free registers - continue the search. 48 49 // Allocating the available registers. 50 for (unsigned I = 0; I < RequiredGprsUponSplit; I++) { 51 52 // Marking the register as located. 53 unsigned Reg = State.AllocateReg(AvailableRegs[I]); 54 55 // Since we previously made sure that 2 registers are available 56 // we expect that a real register number will be returned. 57 assert(Reg && "Expecting a register will be available"); 58 59 // Assign the value to the allocated register 60 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 61 } 62 63 // Successful in allocating registers - stop scanning next rules. 64 return true; 65 } 66 67 static ArrayRef<MCPhysReg> CC_X86_VectorCallGetSSEs(const MVT &ValVT) { 68 if (ValVT.is512BitVector()) { 69 static const MCPhysReg RegListZMM[] = {X86::ZMM0, X86::ZMM1, X86::ZMM2, 70 X86::ZMM3, X86::ZMM4, X86::ZMM5}; 71 return ArrayRef(std::begin(RegListZMM), std::end(RegListZMM)); 72 } 73 74 if (ValVT.is256BitVector()) { 75 static const MCPhysReg RegListYMM[] = {X86::YMM0, X86::YMM1, X86::YMM2, 76 X86::YMM3, X86::YMM4, X86::YMM5}; 77 return ArrayRef(std::begin(RegListYMM), std::end(RegListYMM)); 78 } 79 80 static const MCPhysReg RegListXMM[] = {X86::XMM0, X86::XMM1, X86::XMM2, 81 X86::XMM3, X86::XMM4, X86::XMM5}; 82 return ArrayRef(std::begin(RegListXMM), std::end(RegListXMM)); 83 } 84 85 static ArrayRef<MCPhysReg> CC_X86_64_VectorCallGetGPRs() { 86 static const MCPhysReg RegListGPR[] = {X86::RCX, X86::RDX, X86::R8, X86::R9}; 87 return ArrayRef(std::begin(RegListGPR), std::end(RegListGPR)); 88 } 89 90 static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT, 91 MVT &LocVT, 92 CCValAssign::LocInfo &LocInfo, 93 ISD::ArgFlagsTy &ArgFlags, 94 CCState &State) { 95 96 ArrayRef<MCPhysReg> RegList = CC_X86_VectorCallGetSSEs(ValVT); 97 bool Is64bit = static_cast<const X86Subtarget &>( 98 State.getMachineFunction().getSubtarget()) 99 .is64Bit(); 100 101 for (auto Reg : RegList) { 102 // If the register is not marked as allocated - assign to it. 103 if (!State.isAllocated(Reg)) { 104 unsigned AssigedReg = State.AllocateReg(Reg); 105 assert(AssigedReg == Reg && "Expecting a valid register allocation"); 106 State.addLoc( 107 CCValAssign::getReg(ValNo, ValVT, AssigedReg, LocVT, LocInfo)); 108 return true; 109 } 110 // If the register is marked as shadow allocated - assign to it. 111 if (Is64bit && State.IsShadowAllocatedReg(Reg)) { 112 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 113 return true; 114 } 115 } 116 117 llvm_unreachable("Clang should ensure that hva marked vectors will have " 118 "an available register."); 119 return false; 120 } 121 122 /// Vectorcall calling convention has special handling for vector types or 123 /// HVA for 64 bit arch. 124 /// For HVAs shadow registers might be allocated on the first pass 125 /// and actual XMM registers are allocated on the second pass. 126 /// For vector types, actual XMM registers are allocated on the first pass. 127 /// \return true if registers were allocated and false otherwise. 128 static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 129 CCValAssign::LocInfo &LocInfo, 130 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 131 // On the second pass, go through the HVAs only. 132 if (ArgFlags.isSecArgPass()) { 133 if (ArgFlags.isHva()) 134 return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, 135 ArgFlags, State); 136 return true; 137 } 138 139 // Process only vector types as defined by vectorcall spec: 140 // "A vector type is either a floating-point type, for example, 141 // a float or double, or an SIMD vector type, for example, __m128 or __m256". 142 if (!(ValVT.isFloatingPoint() || 143 (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { 144 // If R9 was already assigned it means that we are after the fourth element 145 // and because this is not an HVA / Vector type, we need to allocate 146 // shadow XMM register. 147 if (State.isAllocated(X86::R9)) { 148 // Assign shadow XMM register. 149 (void)State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT)); 150 } 151 152 return false; 153 } 154 155 if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) { 156 // Assign shadow GPR register. 157 (void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs()); 158 159 // Assign XMM register - (shadow for HVA and non-shadow for non HVA). 160 if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { 161 // In Vectorcall Calling convention, additional shadow stack can be 162 // created on top of the basic 32 bytes of win64. 163 // It can happen if the fifth or sixth argument is vector type or HVA. 164 // At that case for each argument a shadow stack of 8 bytes is allocated. 165 const TargetRegisterInfo *TRI = 166 State.getMachineFunction().getSubtarget().getRegisterInfo(); 167 if (TRI->regsOverlap(Reg, X86::XMM4) || 168 TRI->regsOverlap(Reg, X86::XMM5)) 169 State.AllocateStack(8, Align(8)); 170 171 if (!ArgFlags.isHva()) { 172 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 173 return true; // Allocated a register - Stop the search. 174 } 175 } 176 } 177 178 // If this is an HVA - Stop the search, 179 // otherwise continue the search. 180 return ArgFlags.isHva(); 181 } 182 183 /// Vectorcall calling convention has special handling for vector types or 184 /// HVA for 32 bit arch. 185 /// For HVAs actual XMM registers are allocated on the second pass. 186 /// For vector types, actual XMM registers are allocated on the first pass. 187 /// \return true if registers were allocated and false otherwise. 188 static bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 189 CCValAssign::LocInfo &LocInfo, 190 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 191 // On the second pass, go through the HVAs only. 192 if (ArgFlags.isSecArgPass()) { 193 if (ArgFlags.isHva()) 194 return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, 195 ArgFlags, State); 196 return true; 197 } 198 199 // Process only vector types as defined by vectorcall spec: 200 // "A vector type is either a floating point type, for example, 201 // a float or double, or an SIMD vector type, for example, __m128 or __m256". 202 if (!(ValVT.isFloatingPoint() || 203 (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { 204 return false; 205 } 206 207 if (ArgFlags.isHva()) 208 return true; // If this is an HVA - Stop the search. 209 210 // Assign XMM register. 211 if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { 212 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 213 return true; 214 } 215 216 // In case we did not find an available XMM register for a vector - 217 // pass it indirectly. 218 // It is similar to CCPassIndirect, with the addition of inreg. 219 if (!ValVT.isFloatingPoint()) { 220 LocVT = MVT::i32; 221 LocInfo = CCValAssign::Indirect; 222 ArgFlags.setInReg(); 223 } 224 225 return false; // No register was assigned - Continue the search. 226 } 227 228 static bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &, 229 CCValAssign::LocInfo &, ISD::ArgFlagsTy &, 230 CCState &) { 231 llvm_unreachable("The AnyReg calling convention is only supported by the " 232 "stackmap and patchpoint intrinsics."); 233 // gracefully fallback to X86 C calling convention on Release builds. 234 return false; 235 } 236 237 static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 238 CCValAssign::LocInfo &LocInfo, 239 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 240 // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure 241 // not to split i64 and double between a register and stack 242 static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX}; 243 static const unsigned NumRegs = std::size(RegList); 244 245 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); 246 247 // If this is the first part of an double/i64/i128, or if we're already 248 // in the middle of a split, add to the pending list. If this is not 249 // the end of the split, return, otherwise go on to process the pending 250 // list 251 if (ArgFlags.isSplit() || !PendingMembers.empty()) { 252 PendingMembers.push_back( 253 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 254 if (!ArgFlags.isSplitEnd()) 255 return true; 256 } 257 258 // If there are no pending members, we are not in the middle of a split, 259 // so do the usual inreg stuff. 260 if (PendingMembers.empty()) { 261 if (unsigned Reg = State.AllocateReg(RegList)) { 262 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 263 return true; 264 } 265 return false; 266 } 267 268 assert(ArgFlags.isSplitEnd()); 269 270 // We now have the entire original argument in PendingMembers, so decide 271 // whether to use registers or the stack. 272 // Per the MCU ABI: 273 // a) To use registers, we need to have enough of them free to contain 274 // the entire argument. 275 // b) We never want to use more than 2 registers for a single argument. 276 277 unsigned FirstFree = State.getFirstUnallocated(RegList); 278 bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree); 279 280 for (auto &It : PendingMembers) { 281 if (UseRegs) 282 It.convertToReg(State.AllocateReg(RegList[FirstFree++])); 283 else 284 It.convertToMem(State.AllocateStack(4, Align(4))); 285 State.addLoc(It); 286 } 287 288 PendingMembers.clear(); 289 290 return true; 291 } 292 293 /// X86 interrupt handlers can only take one or two stack arguments, but if 294 /// there are two arguments, they are in the opposite order from the standard 295 /// convention. Therefore, we have to look at the argument count up front before 296 /// allocating stack for each argument. 297 static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 298 CCValAssign::LocInfo &LocInfo, 299 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 300 const MachineFunction &MF = State.getMachineFunction(); 301 size_t ArgCount = State.getMachineFunction().getFunction().arg_size(); 302 bool Is64Bit = MF.getSubtarget<X86Subtarget>().is64Bit(); 303 unsigned SlotSize = Is64Bit ? 8 : 4; 304 unsigned Offset; 305 if (ArgCount == 1 && ValNo == 0) { 306 // If we have one argument, the argument is five stack slots big, at fixed 307 // offset zero. 308 Offset = State.AllocateStack(5 * SlotSize, Align(4)); 309 } else if (ArgCount == 2 && ValNo == 0) { 310 // If we have two arguments, the stack slot is *after* the error code 311 // argument. Pretend it doesn't consume stack space, and account for it when 312 // we assign the second argument. 313 Offset = SlotSize; 314 } else if (ArgCount == 2 && ValNo == 1) { 315 // If this is the second of two arguments, it must be the error code. It 316 // appears first on the stack, and is then followed by the five slot 317 // interrupt struct. 318 Offset = 0; 319 (void)State.AllocateStack(6 * SlotSize, Align(4)); 320 } else { 321 report_fatal_error("unsupported x86 interrupt prototype"); 322 } 323 324 // FIXME: This should be accounted for in 325 // X86FrameLowering::getFrameIndexReference, not here. 326 if (Is64Bit && ArgCount == 2) 327 Offset += SlotSize; 328 329 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); 330 return true; 331 } 332 333 static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 334 CCValAssign::LocInfo &LocInfo, 335 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 336 if (LocVT != MVT::i64) { 337 LocVT = MVT::i64; 338 LocInfo = CCValAssign::ZExt; 339 } 340 return false; 341 } 342 343 // Provides entry points of CC_X86 and RetCC_X86. 344 #include "X86GenCallingConv.inc" 345