1 //=== X86CallingConv.cpp - X86 Custom Calling Convention Impl -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the implementation of custom routines for the X86 10 // Calling Convention that aren't done by tablegen. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "X86CallingConv.h" 15 #include "X86Subtarget.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/CodeGen/CallingConvLower.h" 18 #include "llvm/IR/CallingConv.h" 19 #include "llvm/IR/Module.h" 20 21 using namespace llvm; 22 23 /// When regcall calling convention compiled to 32 bit arch, special treatment 24 /// is required for 64 bit masks. 25 /// The value should be assigned to two GPRs. 26 /// \return true if registers were allocated and false otherwise. 27 static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, 28 MVT &LocVT, 29 CCValAssign::LocInfo &LocInfo, 30 ISD::ArgFlagsTy &ArgFlags, 31 CCState &State) { 32 // List of GPR registers that are available to store values in regcall 33 // calling convention. 34 static const MCPhysReg RegList[] = {X86::EAX, X86::ECX, X86::EDX, X86::EDI, 35 X86::ESI}; 36 37 // The vector will save all the available registers for allocation. 38 SmallVector<unsigned, 5> AvailableRegs; 39 40 // searching for the available registers. 41 for (auto Reg : RegList) { 42 if (!State.isAllocated(Reg)) 43 AvailableRegs.push_back(Reg); 44 } 45 46 const size_t RequiredGprsUponSplit = 2; 47 if (AvailableRegs.size() < RequiredGprsUponSplit) 48 return false; // Not enough free registers - continue the search. 49 50 // Allocating the available registers. 51 for (unsigned I = 0; I < RequiredGprsUponSplit; I++) { 52 53 // Marking the register as located. 54 unsigned Reg = State.AllocateReg(AvailableRegs[I]); 55 56 // Since we previously made sure that 2 registers are available 57 // we expect that a real register number will be returned. 58 assert(Reg && "Expecting a register will be available"); 59 60 // Assign the value to the allocated register 61 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 62 } 63 64 // Successful in allocating registers - stop scanning next rules. 65 return true; 66 } 67 68 static ArrayRef<MCPhysReg> CC_X86_VectorCallGetSSEs(const MVT &ValVT) { 69 if (ValVT.is512BitVector()) { 70 static const MCPhysReg RegListZMM[] = {X86::ZMM0, X86::ZMM1, X86::ZMM2, 71 X86::ZMM3, X86::ZMM4, X86::ZMM5}; 72 return ArrayRef(std::begin(RegListZMM), std::end(RegListZMM)); 73 } 74 75 if (ValVT.is256BitVector()) { 76 static const MCPhysReg RegListYMM[] = {X86::YMM0, X86::YMM1, X86::YMM2, 77 X86::YMM3, X86::YMM4, X86::YMM5}; 78 return ArrayRef(std::begin(RegListYMM), std::end(RegListYMM)); 79 } 80 81 static const MCPhysReg RegListXMM[] = {X86::XMM0, X86::XMM1, X86::XMM2, 82 X86::XMM3, X86::XMM4, X86::XMM5}; 83 return ArrayRef(std::begin(RegListXMM), std::end(RegListXMM)); 84 } 85 86 static ArrayRef<MCPhysReg> CC_X86_64_VectorCallGetGPRs() { 87 static const MCPhysReg RegListGPR[] = {X86::RCX, X86::RDX, X86::R8, X86::R9}; 88 return ArrayRef(std::begin(RegListGPR), std::end(RegListGPR)); 89 } 90 91 static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT, 92 MVT &LocVT, 93 CCValAssign::LocInfo &LocInfo, 94 ISD::ArgFlagsTy &ArgFlags, 95 CCState &State) { 96 97 ArrayRef<MCPhysReg> RegList = CC_X86_VectorCallGetSSEs(ValVT); 98 bool Is64bit = static_cast<const X86Subtarget &>( 99 State.getMachineFunction().getSubtarget()) 100 .is64Bit(); 101 102 for (auto Reg : RegList) { 103 // If the register is not marked as allocated - assign to it. 104 if (!State.isAllocated(Reg)) { 105 unsigned AssigedReg = State.AllocateReg(Reg); 106 assert(AssigedReg == Reg && "Expecting a valid register allocation"); 107 State.addLoc( 108 CCValAssign::getReg(ValNo, ValVT, AssigedReg, LocVT, LocInfo)); 109 return true; 110 } 111 // If the register is marked as shadow allocated - assign to it. 112 if (Is64bit && State.IsShadowAllocatedReg(Reg)) { 113 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 114 return true; 115 } 116 } 117 118 llvm_unreachable("Clang should ensure that hva marked vectors will have " 119 "an available register."); 120 return false; 121 } 122 123 /// Vectorcall calling convention has special handling for vector types or 124 /// HVA for 64 bit arch. 125 /// For HVAs shadow registers might be allocated on the first pass 126 /// and actual XMM registers are allocated on the second pass. 127 /// For vector types, actual XMM registers are allocated on the first pass. 128 /// \return true if registers were allocated and false otherwise. 129 static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 130 CCValAssign::LocInfo &LocInfo, 131 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 132 // On the second pass, go through the HVAs only. 133 if (ArgFlags.isSecArgPass()) { 134 if (ArgFlags.isHva()) 135 return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, 136 ArgFlags, State); 137 return true; 138 } 139 140 // Process only vector types as defined by vectorcall spec: 141 // "A vector type is either a floating-point type, for example, 142 // a float or double, or an SIMD vector type, for example, __m128 or __m256". 143 if (!(ValVT.isFloatingPoint() || 144 (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { 145 // If R9 was already assigned it means that we are after the fourth element 146 // and because this is not an HVA / Vector type, we need to allocate 147 // shadow XMM register. 148 if (State.isAllocated(X86::R9)) { 149 // Assign shadow XMM register. 150 (void)State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT)); 151 } 152 153 return false; 154 } 155 156 if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) { 157 // Assign shadow GPR register. 158 (void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs()); 159 160 // Assign XMM register - (shadow for HVA and non-shadow for non HVA). 161 if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { 162 // In Vectorcall Calling convention, additional shadow stack can be 163 // created on top of the basic 32 bytes of win64. 164 // It can happen if the fifth or sixth argument is vector type or HVA. 165 // At that case for each argument a shadow stack of 8 bytes is allocated. 166 const TargetRegisterInfo *TRI = 167 State.getMachineFunction().getSubtarget().getRegisterInfo(); 168 if (TRI->regsOverlap(Reg, X86::XMM4) || 169 TRI->regsOverlap(Reg, X86::XMM5)) 170 State.AllocateStack(8, Align(8)); 171 172 if (!ArgFlags.isHva()) { 173 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 174 return true; // Allocated a register - Stop the search. 175 } 176 } 177 } 178 179 // If this is an HVA - Stop the search, 180 // otherwise continue the search. 181 return ArgFlags.isHva(); 182 } 183 184 /// Vectorcall calling convention has special handling for vector types or 185 /// HVA for 32 bit arch. 186 /// For HVAs actual XMM registers are allocated on the second pass. 187 /// For vector types, actual XMM registers are allocated on the first pass. 188 /// \return true if registers were allocated and false otherwise. 189 static bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 190 CCValAssign::LocInfo &LocInfo, 191 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 192 // On the second pass, go through the HVAs only. 193 if (ArgFlags.isSecArgPass()) { 194 if (ArgFlags.isHva()) 195 return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, 196 ArgFlags, State); 197 return true; 198 } 199 200 // Process only vector types as defined by vectorcall spec: 201 // "A vector type is either a floating point type, for example, 202 // a float or double, or an SIMD vector type, for example, __m128 or __m256". 203 if (!(ValVT.isFloatingPoint() || 204 (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { 205 return false; 206 } 207 208 if (ArgFlags.isHva()) 209 return true; // If this is an HVA - Stop the search. 210 211 // Assign XMM register. 212 if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { 213 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 214 return true; 215 } 216 217 // In case we did not find an available XMM register for a vector - 218 // pass it indirectly. 219 // It is similar to CCPassIndirect, with the addition of inreg. 220 if (!ValVT.isFloatingPoint()) { 221 LocVT = MVT::i32; 222 LocInfo = CCValAssign::Indirect; 223 ArgFlags.setInReg(); 224 } 225 226 return false; // No register was assigned - Continue the search. 227 } 228 229 static bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &, 230 CCValAssign::LocInfo &, ISD::ArgFlagsTy &, 231 CCState &) { 232 llvm_unreachable("The AnyReg calling convention is only supported by the " 233 "stackmap and patchpoint intrinsics."); 234 // gracefully fallback to X86 C calling convention on Release builds. 235 return false; 236 } 237 238 static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 239 CCValAssign::LocInfo &LocInfo, 240 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 241 // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure 242 // not to split i64 and double between a register and stack 243 static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX}; 244 static const unsigned NumRegs = std::size(RegList); 245 246 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); 247 248 // If this is the first part of an double/i64/i128, or if we're already 249 // in the middle of a split, add to the pending list. If this is not 250 // the end of the split, return, otherwise go on to process the pending 251 // list 252 if (ArgFlags.isSplit() || !PendingMembers.empty()) { 253 PendingMembers.push_back( 254 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 255 if (!ArgFlags.isSplitEnd()) 256 return true; 257 } 258 259 // If there are no pending members, we are not in the middle of a split, 260 // so do the usual inreg stuff. 261 if (PendingMembers.empty()) { 262 if (unsigned Reg = State.AllocateReg(RegList)) { 263 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 264 return true; 265 } 266 return false; 267 } 268 269 assert(ArgFlags.isSplitEnd()); 270 271 // We now have the entire original argument in PendingMembers, so decide 272 // whether to use registers or the stack. 273 // Per the MCU ABI: 274 // a) To use registers, we need to have enough of them free to contain 275 // the entire argument. 276 // b) We never want to use more than 2 registers for a single argument. 277 278 unsigned FirstFree = State.getFirstUnallocated(RegList); 279 bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree); 280 281 for (auto &It : PendingMembers) { 282 if (UseRegs) 283 It.convertToReg(State.AllocateReg(RegList[FirstFree++])); 284 else 285 It.convertToMem(State.AllocateStack(4, Align(4))); 286 State.addLoc(It); 287 } 288 289 PendingMembers.clear(); 290 291 return true; 292 } 293 294 /// X86 interrupt handlers can only take one or two stack arguments, but if 295 /// there are two arguments, they are in the opposite order from the standard 296 /// convention. Therefore, we have to look at the argument count up front before 297 /// allocating stack for each argument. 298 static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 299 CCValAssign::LocInfo &LocInfo, 300 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 301 const MachineFunction &MF = State.getMachineFunction(); 302 size_t ArgCount = State.getMachineFunction().getFunction().arg_size(); 303 bool Is64Bit = MF.getSubtarget<X86Subtarget>().is64Bit(); 304 unsigned SlotSize = Is64Bit ? 8 : 4; 305 unsigned Offset; 306 if (ArgCount == 1 && ValNo == 0) { 307 // If we have one argument, the argument is five stack slots big, at fixed 308 // offset zero. 309 Offset = State.AllocateStack(5 * SlotSize, Align(4)); 310 } else if (ArgCount == 2 && ValNo == 0) { 311 // If we have two arguments, the stack slot is *after* the error code 312 // argument. Pretend it doesn't consume stack space, and account for it when 313 // we assign the second argument. 314 Offset = SlotSize; 315 } else if (ArgCount == 2 && ValNo == 1) { 316 // If this is the second of two arguments, it must be the error code. It 317 // appears first on the stack, and is then followed by the five slot 318 // interrupt struct. 319 Offset = 0; 320 (void)State.AllocateStack(6 * SlotSize, Align(4)); 321 } else { 322 report_fatal_error("unsupported x86 interrupt prototype"); 323 } 324 325 // FIXME: This should be accounted for in 326 // X86FrameLowering::getFrameIndexReference, not here. 327 if (Is64Bit && ArgCount == 2) 328 Offset += SlotSize; 329 330 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); 331 return true; 332 } 333 334 static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 335 CCValAssign::LocInfo &LocInfo, 336 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 337 if (LocVT != MVT::i64) { 338 LocVT = MVT::i64; 339 LocInfo = CCValAssign::ZExt; 340 } 341 return false; 342 } 343 344 // Provides entry points of CC_X86 and RetCC_X86. 345 #include "X86GenCallingConv.inc" 346