1 //===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file implements the lowering of LLVM calls to DAG nodes.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "X86.h"
15 #include "X86CallingConv.h"
16 #include "X86FrameLowering.h"
17 #include "X86ISelLowering.h"
18 #include "X86InstrBuilder.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86TargetMachine.h"
21 #include "X86TargetObjectFile.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/ObjCARCUtil.h"
24 #include "llvm/CodeGen/MachineJumpTableInfo.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/WinEHFuncInfo.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/Module.h"
30
31 #define DEBUG_TYPE "x86-isel"
32
33 using namespace llvm;
34
35 STATISTIC(NumTailCalls, "Number of tail calls");
36
37 /// Call this when the user attempts to do something unsupported, like
38 /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
39 /// report_fatal_error, so calling code should attempt to recover without
40 /// crashing.
errorUnsupported(SelectionDAG & DAG,const SDLoc & dl,const char * Msg)41 static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
42 const char *Msg) {
43 MachineFunction &MF = DAG.getMachineFunction();
44 DAG.getContext()->diagnose(
45 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
46 }
47
48 /// Returns true if a CC can dynamically exclude a register from the list of
49 /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
50 /// the return registers.
shouldDisableRetRegFromCSR(CallingConv::ID CC)51 static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
52 switch (CC) {
53 default:
54 return false;
55 case CallingConv::X86_RegCall:
56 case CallingConv::PreserveMost:
57 case CallingConv::PreserveAll:
58 return true;
59 }
60 }
61
62 /// Returns true if a CC can dynamically exclude a register from the list of
63 /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
64 /// the parameters.
shouldDisableArgRegFromCSR(CallingConv::ID CC)65 static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
66 return CC == CallingConv::X86_RegCall;
67 }
68
69 static std::pair<MVT, unsigned>
handleMaskRegisterForCallingConv(unsigned NumElts,CallingConv::ID CC,const X86Subtarget & Subtarget)70 handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
71 const X86Subtarget &Subtarget) {
72 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
73 // convention is one that uses k registers.
74 if (NumElts == 2)
75 return {MVT::v2i64, 1};
76 if (NumElts == 4)
77 return {MVT::v4i32, 1};
78 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
79 CC != CallingConv::Intel_OCL_BI)
80 return {MVT::v8i16, 1};
81 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
82 CC != CallingConv::Intel_OCL_BI)
83 return {MVT::v16i8, 1};
84 // v32i1 passes in ymm unless we have BWI and the calling convention is
85 // regcall.
86 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
87 return {MVT::v32i8, 1};
88 // Split v64i1 vectors if we don't have v64i8 available.
89 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
90 if (Subtarget.useAVX512Regs())
91 return {MVT::v64i8, 1};
92 return {MVT::v32i8, 2};
93 }
94
95 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
96 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
97 NumElts > 64)
98 return {MVT::i8, NumElts};
99
100 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
101 }
102
getRegisterTypeForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT) const103 MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
104 CallingConv::ID CC,
105 EVT VT) const {
106 if (VT.isVector()) {
107 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
108 unsigned NumElts = VT.getVectorNumElements();
109
110 MVT RegisterVT;
111 unsigned NumRegisters;
112 std::tie(RegisterVT, NumRegisters) =
113 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
114 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
115 return RegisterVT;
116 }
117
118 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
119 return MVT::v8f16;
120 }
121
122 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
123 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
124 !Subtarget.hasX87())
125 return MVT::i32;
126
127 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
128 return getRegisterTypeForCallingConv(Context, CC,
129 VT.changeVectorElementType(MVT::f16));
130
131 if (VT == MVT::bf16)
132 return MVT::f16;
133
134 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
135 }
136
getNumRegistersForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT) const137 unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
138 CallingConv::ID CC,
139 EVT VT) const {
140 if (VT.isVector()) {
141 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
142 unsigned NumElts = VT.getVectorNumElements();
143
144 MVT RegisterVT;
145 unsigned NumRegisters;
146 std::tie(RegisterVT, NumRegisters) =
147 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
148 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
149 return NumRegisters;
150 }
151
152 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
153 return 1;
154 }
155
156 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
157 // x87 is disabled.
158 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
159 if (VT == MVT::f64)
160 return 2;
161 if (VT == MVT::f80)
162 return 3;
163 }
164
165 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
166 return getNumRegistersForCallingConv(Context, CC,
167 VT.changeVectorElementType(MVT::f16));
168
169 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
170 }
171
getVectorTypeBreakdownForCallingConv(LLVMContext & Context,CallingConv::ID CC,EVT VT,EVT & IntermediateVT,unsigned & NumIntermediates,MVT & RegisterVT) const172 unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
173 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
174 unsigned &NumIntermediates, MVT &RegisterVT) const {
175 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
176 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
177 Subtarget.hasAVX512() &&
178 (!isPowerOf2_32(VT.getVectorNumElements()) ||
179 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
180 VT.getVectorNumElements() > 64)) {
181 RegisterVT = MVT::i8;
182 IntermediateVT = MVT::i1;
183 NumIntermediates = VT.getVectorNumElements();
184 return NumIntermediates;
185 }
186
187 // Split v64i1 vectors if we don't have v64i8 available.
188 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
189 CC != CallingConv::X86_RegCall) {
190 RegisterVT = MVT::v32i8;
191 IntermediateVT = MVT::v32i1;
192 NumIntermediates = 2;
193 return 2;
194 }
195
196 // Split vNbf16 vectors according to vNf16.
197 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
198 VT = VT.changeVectorElementType(MVT::f16);
199
200 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
201 NumIntermediates, RegisterVT);
202 }
203
getSetCCResultType(const DataLayout & DL,LLVMContext & Context,EVT VT) const204 EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
205 LLVMContext& Context,
206 EVT VT) const {
207 if (!VT.isVector())
208 return MVT::i8;
209
210 if (Subtarget.hasAVX512()) {
211 // Figure out what this type will be legalized to.
212 EVT LegalVT = VT;
213 while (getTypeAction(Context, LegalVT) != TypeLegal)
214 LegalVT = getTypeToTransformTo(Context, LegalVT);
215
216 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
217 if (LegalVT.getSimpleVT().is512BitVector())
218 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
219
220 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
221 // If we legalized to less than a 512-bit vector, then we will use a vXi1
222 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
223 // vXi16/vXi8.
224 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
225 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
226 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
227 }
228 }
229
230 return VT.changeVectorElementTypeToInteger();
231 }
232
233 /// Helper for getByValTypeAlignment to determine
234 /// the desired ByVal argument alignment.
getMaxByValAlign(Type * Ty,Align & MaxAlign)235 static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
236 if (MaxAlign == 16)
237 return;
238 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
239 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
240 MaxAlign = Align(16);
241 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
242 Align EltAlign;
243 getMaxByValAlign(ATy->getElementType(), EltAlign);
244 if (EltAlign > MaxAlign)
245 MaxAlign = EltAlign;
246 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
247 for (auto *EltTy : STy->elements()) {
248 Align EltAlign;
249 getMaxByValAlign(EltTy, EltAlign);
250 if (EltAlign > MaxAlign)
251 MaxAlign = EltAlign;
252 if (MaxAlign == 16)
253 break;
254 }
255 }
256 }
257
258 /// Return the desired alignment for ByVal aggregate
259 /// function arguments in the caller parameter area. For X86, aggregates
260 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
261 /// are at 4-byte boundaries.
getByValTypeAlignment(Type * Ty,const DataLayout & DL) const262 uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
263 const DataLayout &DL) const {
264 if (Subtarget.is64Bit()) {
265 // Max of 8 and alignment of type.
266 Align TyAlign = DL.getABITypeAlign(Ty);
267 if (TyAlign > 8)
268 return TyAlign.value();
269 return 8;
270 }
271
272 Align Alignment(4);
273 if (Subtarget.hasSSE1())
274 getMaxByValAlign(Ty, Alignment);
275 return Alignment.value();
276 }
277
278 /// It returns EVT::Other if the type should be determined using generic
279 /// target-independent logic.
280 /// For vector ops we check that the overall size isn't larger than our
281 /// preferred vector width.
getOptimalMemOpType(const MemOp & Op,const AttributeList & FuncAttributes) const282 EVT X86TargetLowering::getOptimalMemOpType(
283 const MemOp &Op, const AttributeList &FuncAttributes) const {
284 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
285 if (Op.size() >= 16 &&
286 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
287 // FIXME: Check if unaligned 64-byte accesses are slow.
288 if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
289 (Subtarget.getPreferVectorWidth() >= 512)) {
290 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
291 }
292 // FIXME: Check if unaligned 32-byte accesses are slow.
293 if (Op.size() >= 32 && Subtarget.hasAVX() &&
294 Subtarget.useLight256BitInstructions()) {
295 // Although this isn't a well-supported type for AVX1, we'll let
296 // legalization and shuffle lowering produce the optimal codegen. If we
297 // choose an optimal type with a vector element larger than a byte,
298 // getMemsetStores() may create an intermediate splat (using an integer
299 // multiply) before we splat as a vector.
300 return MVT::v32i8;
301 }
302 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
303 return MVT::v16i8;
304 // TODO: Can SSE1 handle a byte vector?
305 // If we have SSE1 registers we should be able to use them.
306 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
307 (Subtarget.getPreferVectorWidth() >= 128))
308 return MVT::v4f32;
309 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
310 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
311 // Do not use f64 to lower memcpy if source is string constant. It's
312 // better to use i32 to avoid the loads.
313 // Also, do not use f64 to lower memset unless this is a memset of zeros.
314 // The gymnastics of splatting a byte value into an XMM register and then
315 // only using 8-byte stores (because this is a CPU with slow unaligned
316 // 16-byte accesses) makes that a loser.
317 return MVT::f64;
318 }
319 }
320 // This is a compromise. If we reach here, unaligned accesses may be slow on
321 // this target. However, creating smaller, aligned accesses could be even
322 // slower and would certainly be a lot more code.
323 if (Subtarget.is64Bit() && Op.size() >= 8)
324 return MVT::i64;
325 return MVT::i32;
326 }
327
isSafeMemOpType(MVT VT) const328 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
329 if (VT == MVT::f32)
330 return Subtarget.hasSSE1();
331 if (VT == MVT::f64)
332 return Subtarget.hasSSE2();
333 return true;
334 }
335
isBitAligned(Align Alignment,uint64_t SizeInBits)336 static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
337 return (8 * Alignment.value()) % SizeInBits == 0;
338 }
339
isMemoryAccessFast(EVT VT,Align Alignment) const340 bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
341 if (isBitAligned(Alignment, VT.getSizeInBits()))
342 return true;
343 switch (VT.getSizeInBits()) {
344 default:
345 // 8-byte and under are always assumed to be fast.
346 return true;
347 case 128:
348 return !Subtarget.isUnalignedMem16Slow();
349 case 256:
350 return !Subtarget.isUnalignedMem32Slow();
351 // TODO: What about AVX-512 (512-bit) accesses?
352 }
353 }
354
allowsMisalignedMemoryAccesses(EVT VT,unsigned,Align Alignment,MachineMemOperand::Flags Flags,unsigned * Fast) const355 bool X86TargetLowering::allowsMisalignedMemoryAccesses(
356 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
357 unsigned *Fast) const {
358 if (Fast)
359 *Fast = isMemoryAccessFast(VT, Alignment);
360 // NonTemporal vector memory ops must be aligned.
361 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
362 // NT loads can only be vector aligned, so if its less aligned than the
363 // minimum vector size (which we can split the vector down to), we might as
364 // well use a regular unaligned vector load.
365 // We don't have any NT loads pre-SSE41.
366 if (!!(Flags & MachineMemOperand::MOLoad))
367 return (Alignment < 16 || !Subtarget.hasSSE41());
368 return false;
369 }
370 // Misaligned accesses of any size are always allowed.
371 return true;
372 }
373
allowsMemoryAccess(LLVMContext & Context,const DataLayout & DL,EVT VT,unsigned AddrSpace,Align Alignment,MachineMemOperand::Flags Flags,unsigned * Fast) const374 bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
375 const DataLayout &DL, EVT VT,
376 unsigned AddrSpace, Align Alignment,
377 MachineMemOperand::Flags Flags,
378 unsigned *Fast) const {
379 if (Fast)
380 *Fast = isMemoryAccessFast(VT, Alignment);
381 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
382 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
383 /*Fast=*/nullptr))
384 return true;
385 // NonTemporal vector memory ops are special, and must be aligned.
386 if (!isBitAligned(Alignment, VT.getSizeInBits()))
387 return false;
388 switch (VT.getSizeInBits()) {
389 case 128:
390 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
391 return true;
392 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
393 return true;
394 return false;
395 case 256:
396 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
397 return true;
398 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
399 return true;
400 return false;
401 case 512:
402 if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
403 return true;
404 return false;
405 default:
406 return false; // Don't have NonTemporal vector memory ops of this size.
407 }
408 }
409 return true;
410 }
411
412 /// Return the entry encoding for a jump table in the
413 /// current function. The returned value is a member of the
414 /// MachineJumpTableInfo::JTEntryKind enum.
getJumpTableEncoding() const415 unsigned X86TargetLowering::getJumpTableEncoding() const {
416 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
417 // symbol.
418 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
419 return MachineJumpTableInfo::EK_Custom32;
420 if (isPositionIndependent() &&
421 getTargetMachine().getCodeModel() == CodeModel::Large &&
422 !Subtarget.isTargetCOFF())
423 return MachineJumpTableInfo::EK_LabelDifference64;
424
425 // Otherwise, use the normal jump table encoding heuristics.
426 return TargetLowering::getJumpTableEncoding();
427 }
428
useSoftFloat() const429 bool X86TargetLowering::useSoftFloat() const {
430 return Subtarget.useSoftFloat();
431 }
432
markLibCallAttributes(MachineFunction * MF,unsigned CC,ArgListTy & Args) const433 void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
434 ArgListTy &Args) const {
435
436 // Only relabel X86-32 for C / Stdcall CCs.
437 if (Subtarget.is64Bit())
438 return;
439 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
440 return;
441 unsigned ParamRegs = 0;
442 if (auto *M = MF->getFunction().getParent())
443 ParamRegs = M->getNumberRegisterParameters();
444
445 // Mark the first N int arguments as having reg
446 for (auto &Arg : Args) {
447 Type *T = Arg.Ty;
448 if (T->isIntOrPtrTy())
449 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
450 unsigned numRegs = 1;
451 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
452 numRegs = 2;
453 if (ParamRegs < numRegs)
454 return;
455 ParamRegs -= numRegs;
456 Arg.IsInReg = true;
457 }
458 }
459 }
460
461 const MCExpr *
LowerCustomJumpTableEntry(const MachineJumpTableInfo * MJTI,const MachineBasicBlock * MBB,unsigned uid,MCContext & Ctx) const462 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
463 const MachineBasicBlock *MBB,
464 unsigned uid,MCContext &Ctx) const{
465 assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
466 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
467 // entries.
468 return MCSymbolRefExpr::create(MBB->getSymbol(),
469 MCSymbolRefExpr::VK_GOTOFF, Ctx);
470 }
471
472 /// Returns relocation base for the given PIC jumptable.
getPICJumpTableRelocBase(SDValue Table,SelectionDAG & DAG) const473 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
474 SelectionDAG &DAG) const {
475 if (!Subtarget.is64Bit())
476 // This doesn't have SDLoc associated with it, but is not really the
477 // same as a Register.
478 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
479 getPointerTy(DAG.getDataLayout()));
480 return Table;
481 }
482
483 /// This returns the relocation base for the given PIC jumptable,
484 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
485 const MCExpr *X86TargetLowering::
getPICJumpTableRelocBaseExpr(const MachineFunction * MF,unsigned JTI,MCContext & Ctx) const486 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
487 MCContext &Ctx) const {
488 // X86-64 uses RIP relative addressing based on the jump table label.
489 if (Subtarget.isPICStyleRIPRel() ||
490 (Subtarget.is64Bit() &&
491 getTargetMachine().getCodeModel() == CodeModel::Large))
492 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
493
494 // Otherwise, the reference is relative to the PIC base.
495 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
496 }
497
498 std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo * TRI,MVT VT) const499 X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
500 MVT VT) const {
501 const TargetRegisterClass *RRC = nullptr;
502 uint8_t Cost = 1;
503 switch (VT.SimpleTy) {
504 default:
505 return TargetLowering::findRepresentativeClass(TRI, VT);
506 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
507 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
508 break;
509 case MVT::x86mmx:
510 RRC = &X86::VR64RegClass;
511 break;
512 case MVT::f32: case MVT::f64:
513 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
514 case MVT::v4f32: case MVT::v2f64:
515 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
516 case MVT::v8f32: case MVT::v4f64:
517 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
518 case MVT::v16f32: case MVT::v8f64:
519 RRC = &X86::VR128XRegClass;
520 break;
521 }
522 return std::make_pair(RRC, Cost);
523 }
524
getAddressSpace() const525 unsigned X86TargetLowering::getAddressSpace() const {
526 if (Subtarget.is64Bit())
527 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
528 return 256;
529 }
530
hasStackGuardSlotTLS(const Triple & TargetTriple)531 static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
532 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
533 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
534 }
535
SegmentOffset(IRBuilderBase & IRB,int Offset,unsigned AddressSpace)536 static Constant* SegmentOffset(IRBuilderBase &IRB,
537 int Offset, unsigned AddressSpace) {
538 return ConstantExpr::getIntToPtr(
539 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
540 IRB.getPtrTy(AddressSpace));
541 }
542
getIRStackGuard(IRBuilderBase & IRB) const543 Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
544 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
545 // tcbhead_t; use it instead of the usual global variable (see
546 // sysdeps/{i386,x86_64}/nptl/tls.h)
547 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
548 unsigned AddressSpace = getAddressSpace();
549
550 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
551 if (Subtarget.isTargetFuchsia())
552 return SegmentOffset(IRB, 0x10, AddressSpace);
553
554 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
555 // Specially, some users may customize the base reg and offset.
556 int Offset = M->getStackProtectorGuardOffset();
557 // If we don't set -stack-protector-guard-offset value:
558 // %fs:0x28, unless we're using a Kernel code model, in which case
559 // it's %gs:0x28. gs:0x14 on i386.
560 if (Offset == INT_MAX)
561 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
562
563 StringRef GuardReg = M->getStackProtectorGuardReg();
564 if (GuardReg == "fs")
565 AddressSpace = X86AS::FS;
566 else if (GuardReg == "gs")
567 AddressSpace = X86AS::GS;
568
569 // Use symbol guard if user specify.
570 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
571 if (!GuardSymb.empty()) {
572 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
573 if (!GV) {
574 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
575 : Type::getInt32Ty(M->getContext());
576 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
577 nullptr, GuardSymb, nullptr,
578 GlobalValue::NotThreadLocal, AddressSpace);
579 if (!Subtarget.isTargetDarwin())
580 GV->setDSOLocal(M->getDirectAccessExternalData());
581 }
582 return GV;
583 }
584
585 return SegmentOffset(IRB, Offset, AddressSpace);
586 }
587 return TargetLowering::getIRStackGuard(IRB);
588 }
589
insertSSPDeclarations(Module & M) const590 void X86TargetLowering::insertSSPDeclarations(Module &M) const {
591 // MSVC CRT provides functionalities for stack protection.
592 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
593 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
594 // MSVC CRT has a global variable holding security cookie.
595 M.getOrInsertGlobal("__security_cookie",
596 PointerType::getUnqual(M.getContext()));
597
598 // MSVC CRT has a function to validate security cookie.
599 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
600 "__security_check_cookie", Type::getVoidTy(M.getContext()),
601 PointerType::getUnqual(M.getContext()));
602 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
603 F->setCallingConv(CallingConv::X86_FastCall);
604 F->addParamAttr(0, Attribute::AttrKind::InReg);
605 }
606 return;
607 }
608
609 StringRef GuardMode = M.getStackProtectorGuard();
610
611 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
612 if ((GuardMode == "tls" || GuardMode.empty()) &&
613 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
614 return;
615 TargetLowering::insertSSPDeclarations(M);
616 }
617
getSDagStackGuard(const Module & M) const618 Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
619 // MSVC CRT has a global variable holding security cookie.
620 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
621 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
622 return M.getGlobalVariable("__security_cookie");
623 }
624 return TargetLowering::getSDagStackGuard(M);
625 }
626
getSSPStackGuardCheck(const Module & M) const627 Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
628 // MSVC CRT has a function to validate security cookie.
629 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
630 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
631 return M.getFunction("__security_check_cookie");
632 }
633 return TargetLowering::getSSPStackGuardCheck(M);
634 }
635
636 Value *
getSafeStackPointerLocation(IRBuilderBase & IRB) const637 X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
638 // Android provides a fixed TLS slot for the SafeStack pointer. See the
639 // definition of TLS_SLOT_SAFESTACK in
640 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
641 if (Subtarget.isTargetAndroid()) {
642 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
643 // %gs:0x24 on i386
644 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
645 return SegmentOffset(IRB, Offset, getAddressSpace());
646 }
647
648 // Fuchsia is similar.
649 if (Subtarget.isTargetFuchsia()) {
650 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
651 return SegmentOffset(IRB, 0x18, getAddressSpace());
652 }
653
654 return TargetLowering::getSafeStackPointerLocation(IRB);
655 }
656
657 //===----------------------------------------------------------------------===//
658 // Return Value Calling Convention Implementation
659 //===----------------------------------------------------------------------===//
660
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context) const661 bool X86TargetLowering::CanLowerReturn(
662 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
663 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
664 SmallVector<CCValAssign, 16> RVLocs;
665 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
666 return CCInfo.CheckReturn(Outs, RetCC_X86);
667 }
668
getScratchRegisters(CallingConv::ID) const669 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
670 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
671 return ScratchRegs;
672 }
673
getRoundingControlRegisters() const674 ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
675 static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
676 return RCRegs;
677 }
678
679 /// Lowers masks values (v*i1) to the local register values
680 /// \returns DAG node after lowering to register type
lowerMasksToReg(const SDValue & ValArg,const EVT & ValLoc,const SDLoc & DL,SelectionDAG & DAG)681 static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
682 const SDLoc &DL, SelectionDAG &DAG) {
683 EVT ValVT = ValArg.getValueType();
684
685 if (ValVT == MVT::v1i1)
686 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
687 DAG.getIntPtrConstant(0, DL));
688
689 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
690 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
691 // Two stage lowering might be required
692 // bitcast: v8i1 -> i8 / v16i1 -> i16
693 // anyextend: i8 -> i32 / i16 -> i32
694 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
695 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
696 if (ValLoc == MVT::i32)
697 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
698 return ValToCopy;
699 }
700
701 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
702 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
703 // One stage lowering is required
704 // bitcast: v32i1 -> i32 / v64i1 -> i64
705 return DAG.getBitcast(ValLoc, ValArg);
706 }
707
708 return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
709 }
710
711 /// Breaks v64i1 value into two registers and adds the new node to the DAG
Passv64i1ArgInRegs(const SDLoc & DL,SelectionDAG & DAG,SDValue & Arg,SmallVectorImpl<std::pair<Register,SDValue>> & RegsToPass,CCValAssign & VA,CCValAssign & NextVA,const X86Subtarget & Subtarget)712 static void Passv64i1ArgInRegs(
713 const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
714 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
715 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
716 assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
717 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
718 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
719 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
720 "The value should reside in two registers");
721
722 // Before splitting the value we cast it to i64
723 Arg = DAG.getBitcast(MVT::i64, Arg);
724
725 // Splitting the value into two i32 types
726 SDValue Lo, Hi;
727 std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
728
729 // Attach the two i32 types into corresponding registers
730 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
731 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
732 }
733
734 SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & dl,SelectionDAG & DAG) const735 X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
736 bool isVarArg,
737 const SmallVectorImpl<ISD::OutputArg> &Outs,
738 const SmallVectorImpl<SDValue> &OutVals,
739 const SDLoc &dl, SelectionDAG &DAG) const {
740 MachineFunction &MF = DAG.getMachineFunction();
741 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
742
743 // In some cases we need to disable registers from the default CSR list.
744 // For example, when they are used as return registers (preserve_* and X86's
745 // regcall) or for argument passing (X86's regcall).
746 bool ShouldDisableCalleeSavedRegister =
747 shouldDisableRetRegFromCSR(CallConv) ||
748 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
749
750 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
751 report_fatal_error("X86 interrupts may not return any value");
752
753 SmallVector<CCValAssign, 16> RVLocs;
754 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
755 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
756
757 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
758 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
759 ++I, ++OutsIndex) {
760 CCValAssign &VA = RVLocs[I];
761 assert(VA.isRegLoc() && "Can only return in registers!");
762
763 // Add the register to the CalleeSaveDisableRegs list.
764 if (ShouldDisableCalleeSavedRegister)
765 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
766
767 SDValue ValToCopy = OutVals[OutsIndex];
768 EVT ValVT = ValToCopy.getValueType();
769
770 // Promote values to the appropriate types.
771 if (VA.getLocInfo() == CCValAssign::SExt)
772 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
773 else if (VA.getLocInfo() == CCValAssign::ZExt)
774 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
775 else if (VA.getLocInfo() == CCValAssign::AExt) {
776 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
777 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
778 else
779 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
780 }
781 else if (VA.getLocInfo() == CCValAssign::BCvt)
782 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
783
784 assert(VA.getLocInfo() != CCValAssign::FPExt &&
785 "Unexpected FP-extend for return value.");
786
787 // Report an error if we have attempted to return a value via an XMM
788 // register and SSE was disabled.
789 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
790 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
791 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
792 } else if (!Subtarget.hasSSE2() &&
793 X86::FR64XRegClass.contains(VA.getLocReg()) &&
794 ValVT == MVT::f64) {
795 // When returning a double via an XMM register, report an error if SSE2 is
796 // not enabled.
797 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
798 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
799 }
800
801 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
802 // the RET instruction and handled by the FP Stackifier.
803 if (VA.getLocReg() == X86::FP0 ||
804 VA.getLocReg() == X86::FP1) {
805 // If this is a copy from an xmm register to ST(0), use an FPExtend to
806 // change the value to the FP stack register class.
807 if (isScalarFPTypeInSSEReg(VA.getValVT()))
808 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
809 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
810 // Don't emit a copytoreg.
811 continue;
812 }
813
814 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
815 // which is returned in RAX / RDX.
816 if (Subtarget.is64Bit()) {
817 if (ValVT == MVT::x86mmx) {
818 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
819 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
820 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
821 ValToCopy);
822 // If we don't have SSE2 available, convert to v4f32 so the generated
823 // register is legal.
824 if (!Subtarget.hasSSE2())
825 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
826 }
827 }
828 }
829
830 if (VA.needsCustom()) {
831 assert(VA.getValVT() == MVT::v64i1 &&
832 "Currently the only custom case is when we split v64i1 to 2 regs");
833
834 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
835 Subtarget);
836
837 // Add the second register to the CalleeSaveDisableRegs list.
838 if (ShouldDisableCalleeSavedRegister)
839 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
840 } else {
841 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
842 }
843 }
844
845 SDValue Glue;
846 SmallVector<SDValue, 6> RetOps;
847 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
848 // Operand #1 = Bytes To Pop
849 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
850 MVT::i32));
851
852 // Copy the result values into the output registers.
853 for (auto &RetVal : RetVals) {
854 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
855 RetOps.push_back(RetVal.second);
856 continue; // Don't emit a copytoreg.
857 }
858
859 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
860 Glue = Chain.getValue(1);
861 RetOps.push_back(
862 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
863 }
864
865 // Swift calling convention does not require we copy the sret argument
866 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
867
868 // All x86 ABIs require that for returning structs by value we copy
869 // the sret argument into %rax/%eax (depending on ABI) for the return.
870 // We saved the argument into a virtual register in the entry block,
871 // so now we copy the value out and into %rax/%eax.
872 //
873 // Checking Function.hasStructRetAttr() here is insufficient because the IR
874 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
875 // false, then an sret argument may be implicitly inserted in the SelDAG. In
876 // either case FuncInfo->setSRetReturnReg() will have been called.
877 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
878 // When we have both sret and another return value, we should use the
879 // original Chain stored in RetOps[0], instead of the current Chain updated
880 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
881
882 // For the case of sret and another return value, we have
883 // Chain_0 at the function entry
884 // Chain_1 = getCopyToReg(Chain_0) in the above loop
885 // If we use Chain_1 in getCopyFromReg, we will have
886 // Val = getCopyFromReg(Chain_1)
887 // Chain_2 = getCopyToReg(Chain_1, Val) from below
888
889 // getCopyToReg(Chain_0) will be glued together with
890 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
891 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
892 // Data dependency from Unit B to Unit A due to usage of Val in
893 // getCopyToReg(Chain_1, Val)
894 // Chain dependency from Unit A to Unit B
895
896 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
897 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
898 getPointerTy(MF.getDataLayout()));
899
900 Register RetValReg
901 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
902 X86::RAX : X86::EAX;
903 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
904 Glue = Chain.getValue(1);
905
906 // RAX/EAX now acts like a return value.
907 RetOps.push_back(
908 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
909
910 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
911 // this however for preserve_most/preserve_all to minimize the number of
912 // callee-saved registers for these CCs.
913 if (ShouldDisableCalleeSavedRegister &&
914 CallConv != CallingConv::PreserveAll &&
915 CallConv != CallingConv::PreserveMost)
916 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
917 }
918
919 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
920 const MCPhysReg *I =
921 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
922 if (I) {
923 for (; *I; ++I) {
924 if (X86::GR64RegClass.contains(*I))
925 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
926 else
927 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
928 }
929 }
930
931 RetOps[0] = Chain; // Update chain.
932
933 // Add the glue if we have it.
934 if (Glue.getNode())
935 RetOps.push_back(Glue);
936
937 X86ISD::NodeType opcode = X86ISD::RET_GLUE;
938 if (CallConv == CallingConv::X86_INTR)
939 opcode = X86ISD::IRET;
940 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
941 }
942
isUsedByReturnOnly(SDNode * N,SDValue & Chain) const943 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
944 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
945 return false;
946
947 SDValue TCChain = Chain;
948 SDNode *Copy = *N->use_begin();
949 if (Copy->getOpcode() == ISD::CopyToReg) {
950 // If the copy has a glue operand, we conservatively assume it isn't safe to
951 // perform a tail call.
952 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
953 return false;
954 TCChain = Copy->getOperand(0);
955 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
956 return false;
957
958 bool HasRet = false;
959 for (const SDNode *U : Copy->uses()) {
960 if (U->getOpcode() != X86ISD::RET_GLUE)
961 return false;
962 // If we are returning more than one value, we can definitely
963 // not make a tail call see PR19530
964 if (U->getNumOperands() > 4)
965 return false;
966 if (U->getNumOperands() == 4 &&
967 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
968 return false;
969 HasRet = true;
970 }
971
972 if (!HasRet)
973 return false;
974
975 Chain = TCChain;
976 return true;
977 }
978
getTypeForExtReturn(LLVMContext & Context,EVT VT,ISD::NodeType ExtendKind) const979 EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
980 ISD::NodeType ExtendKind) const {
981 MVT ReturnMVT = MVT::i32;
982
983 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
984 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
985 // The ABI does not require i1, i8 or i16 to be extended.
986 //
987 // On Darwin, there is code in the wild relying on Clang's old behaviour of
988 // always extending i8/i16 return values, so keep doing that for now.
989 // (PR26665).
990 ReturnMVT = MVT::i8;
991 }
992
993 EVT MinVT = getRegisterType(Context, ReturnMVT);
994 return VT.bitsLT(MinVT) ? MinVT : VT;
995 }
996
997 /// Reads two 32 bit registers and creates a 64 bit mask value.
998 /// \param VA The current 32 bit value that need to be assigned.
999 /// \param NextVA The next 32 bit value that need to be assigned.
1000 /// \param Root The parent DAG node.
1001 /// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1002 /// glue purposes. In the case the DAG is already using
1003 /// physical register instead of virtual, we should glue
1004 /// our new SDValue to InGlue SDvalue.
1005 /// \return a new SDvalue of size 64bit.
getv64i1Argument(CCValAssign & VA,CCValAssign & NextVA,SDValue & Root,SelectionDAG & DAG,const SDLoc & DL,const X86Subtarget & Subtarget,SDValue * InGlue=nullptr)1006 static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
1007 SDValue &Root, SelectionDAG &DAG,
1008 const SDLoc &DL, const X86Subtarget &Subtarget,
1009 SDValue *InGlue = nullptr) {
1010 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1011 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1012 assert(VA.getValVT() == MVT::v64i1 &&
1013 "Expecting first location of 64 bit width type");
1014 assert(NextVA.getValVT() == VA.getValVT() &&
1015 "The locations should have the same type");
1016 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1017 "The values should reside in two registers");
1018
1019 SDValue Lo, Hi;
1020 SDValue ArgValueLo, ArgValueHi;
1021
1022 MachineFunction &MF = DAG.getMachineFunction();
1023 const TargetRegisterClass *RC = &X86::GR32RegClass;
1024
1025 // Read a 32 bit value from the registers.
1026 if (nullptr == InGlue) {
1027 // When no physical register is present,
1028 // create an intermediate virtual register.
1029 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1030 ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1031 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1032 ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1033 } else {
1034 // When a physical register is available read the value from it and glue
1035 // the reads together.
1036 ArgValueLo =
1037 DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1038 *InGlue = ArgValueLo.getValue(2);
1039 ArgValueHi =
1040 DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1041 *InGlue = ArgValueHi.getValue(2);
1042 }
1043
1044 // Convert the i32 type into v32i1 type.
1045 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1046
1047 // Convert the i32 type into v32i1 type.
1048 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1049
1050 // Concatenate the two values together.
1051 return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1052 }
1053
1054 /// The function will lower a register of various sizes (8/16/32/64)
1055 /// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1056 /// \returns a DAG node contains the operand after lowering to mask type.
lowerRegToMasks(const SDValue & ValArg,const EVT & ValVT,const EVT & ValLoc,const SDLoc & DL,SelectionDAG & DAG)1057 static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1058 const EVT &ValLoc, const SDLoc &DL,
1059 SelectionDAG &DAG) {
1060 SDValue ValReturned = ValArg;
1061
1062 if (ValVT == MVT::v1i1)
1063 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1064
1065 if (ValVT == MVT::v64i1) {
1066 // In 32 bit machine, this case is handled by getv64i1Argument
1067 assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1068 // In 64 bit machine, There is no need to truncate the value only bitcast
1069 } else {
1070 MVT MaskLenVT;
1071 switch (ValVT.getSimpleVT().SimpleTy) {
1072 case MVT::v8i1:
1073 MaskLenVT = MVT::i8;
1074 break;
1075 case MVT::v16i1:
1076 MaskLenVT = MVT::i16;
1077 break;
1078 case MVT::v32i1:
1079 MaskLenVT = MVT::i32;
1080 break;
1081 default:
1082 llvm_unreachable("Expecting a vector of i1 types");
1083 }
1084
1085 ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1086 }
1087 return DAG.getBitcast(ValVT, ValReturned);
1088 }
1089
1090 /// Lower the result values of a call into the
1091 /// appropriate copies out of appropriate physical registers.
1092 ///
LowerCallResult(SDValue Chain,SDValue InGlue,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals,uint32_t * RegMask) const1093 SDValue X86TargetLowering::LowerCallResult(
1094 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1095 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1096 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
1097 uint32_t *RegMask) const {
1098
1099 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1100 // Assign locations to each value returned by this call.
1101 SmallVector<CCValAssign, 16> RVLocs;
1102 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1103 *DAG.getContext());
1104 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1105
1106 // Copy all of the result registers out of their specified physreg.
1107 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1108 ++I, ++InsIndex) {
1109 CCValAssign &VA = RVLocs[I];
1110 EVT CopyVT = VA.getLocVT();
1111
1112 // In some calling conventions we need to remove the used registers
1113 // from the register mask.
1114 if (RegMask) {
1115 for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1116 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1117 }
1118
1119 // Report an error if there was an attempt to return FP values via XMM
1120 // registers.
1121 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1122 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1123 if (VA.getLocReg() == X86::XMM1)
1124 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1125 else
1126 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1127 } else if (!Subtarget.hasSSE2() &&
1128 X86::FR64XRegClass.contains(VA.getLocReg()) &&
1129 CopyVT == MVT::f64) {
1130 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1131 if (VA.getLocReg() == X86::XMM1)
1132 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1133 else
1134 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1135 }
1136
1137 // If we prefer to use the value in xmm registers, copy it out as f80 and
1138 // use a truncate to move it from fp stack reg to xmm reg.
1139 bool RoundAfterCopy = false;
1140 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
1141 isScalarFPTypeInSSEReg(VA.getValVT())) {
1142 if (!Subtarget.hasX87())
1143 report_fatal_error("X87 register return with X87 disabled");
1144 CopyVT = MVT::f80;
1145 RoundAfterCopy = (CopyVT != VA.getLocVT());
1146 }
1147
1148 SDValue Val;
1149 if (VA.needsCustom()) {
1150 assert(VA.getValVT() == MVT::v64i1 &&
1151 "Currently the only custom case is when we split v64i1 to 2 regs");
1152 Val =
1153 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1154 } else {
1155 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1156 .getValue(1);
1157 Val = Chain.getValue(0);
1158 InGlue = Chain.getValue(2);
1159 }
1160
1161 if (RoundAfterCopy)
1162 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1163 // This truncation won't change the value.
1164 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1165
1166 if (VA.isExtInLoc()) {
1167 if (VA.getValVT().isVector() &&
1168 VA.getValVT().getScalarType() == MVT::i1 &&
1169 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1170 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1171 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1172 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1173 } else
1174 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1175 }
1176
1177 if (VA.getLocInfo() == CCValAssign::BCvt)
1178 Val = DAG.getBitcast(VA.getValVT(), Val);
1179
1180 InVals.push_back(Val);
1181 }
1182
1183 return Chain;
1184 }
1185
1186 //===----------------------------------------------------------------------===//
1187 // C & StdCall & Fast Calling Convention implementation
1188 //===----------------------------------------------------------------------===//
1189 // StdCall calling convention seems to be standard for many Windows' API
1190 // routines and around. It differs from C calling convention just a little:
1191 // callee should clean up the stack, not caller. Symbols should be also
1192 // decorated in some fancy way :) It doesn't support any vector arguments.
1193 // For info on fast calling convention see Fast Calling Convention (tail call)
1194 // implementation LowerX86_32FastCCCallTo.
1195
1196 /// Determines whether Args, either a set of outgoing arguments to a call, or a
1197 /// set of incoming args of a call, contains an sret pointer that the callee
1198 /// pops
1199 template <typename T>
hasCalleePopSRet(const SmallVectorImpl<T> & Args,const X86Subtarget & Subtarget)1200 static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1201 const X86Subtarget &Subtarget) {
1202 // Not C++20 (yet), so no concepts available.
1203 static_assert(std::is_same_v<T, ISD::OutputArg> ||
1204 std::is_same_v<T, ISD::InputArg>,
1205 "requires ISD::OutputArg or ISD::InputArg");
1206
1207 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
1208 // for most compilations.
1209 if (!Subtarget.is32Bit())
1210 return false;
1211
1212 if (Args.empty())
1213 return false;
1214
1215 // Most calls do not have an sret argument, check the arg next.
1216 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
1217 if (!Flags.isSRet() || Flags.isInReg())
1218 return false;
1219
1220 // The MSVCabi does not pop the sret.
1221 if (Subtarget.getTargetTriple().isOSMSVCRT())
1222 return false;
1223
1224 // MCUs don't pop the sret
1225 if (Subtarget.isTargetMCU())
1226 return false;
1227
1228 // Callee pops argument
1229 return true;
1230 }
1231
1232 /// Make a copy of an aggregate at address specified by "Src" to address
1233 /// "Dst" with size and alignment information specified by the specific
1234 /// parameter attribute. The copy will be passed as a byval function parameter.
CreateCopyOfByValArgument(SDValue Src,SDValue Dst,SDValue Chain,ISD::ArgFlagsTy Flags,SelectionDAG & DAG,const SDLoc & dl)1235 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
1236 SDValue Chain, ISD::ArgFlagsTy Flags,
1237 SelectionDAG &DAG, const SDLoc &dl) {
1238 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1239
1240 return DAG.getMemcpy(
1241 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
1242 /*isVolatile*/ false, /*AlwaysInline=*/true,
1243 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
1244 }
1245
1246 /// Return true if the calling convention is one that we can guarantee TCO for.
canGuaranteeTCO(CallingConv::ID CC)1247 static bool canGuaranteeTCO(CallingConv::ID CC) {
1248 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1249 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
1250 CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
1251 }
1252
1253 /// Return true if we might ever do TCO for calls with this calling convention.
mayTailCallThisCC(CallingConv::ID CC)1254 static bool mayTailCallThisCC(CallingConv::ID CC) {
1255 switch (CC) {
1256 // C calling conventions:
1257 case CallingConv::C:
1258 case CallingConv::Win64:
1259 case CallingConv::X86_64_SysV:
1260 case CallingConv::PreserveNone:
1261 // Callee pop conventions:
1262 case CallingConv::X86_ThisCall:
1263 case CallingConv::X86_StdCall:
1264 case CallingConv::X86_VectorCall:
1265 case CallingConv::X86_FastCall:
1266 // Swift:
1267 case CallingConv::Swift:
1268 return true;
1269 default:
1270 return canGuaranteeTCO(CC);
1271 }
1272 }
1273
1274 /// Return true if the function is being made into a tailcall target by
1275 /// changing its ABI.
shouldGuaranteeTCO(CallingConv::ID CC,bool GuaranteedTailCallOpt)1276 static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1277 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1278 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
1279 }
1280
mayBeEmittedAsTailCall(const CallInst * CI) const1281 bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1282 if (!CI->isTailCall())
1283 return false;
1284
1285 CallingConv::ID CalleeCC = CI->getCallingConv();
1286 if (!mayTailCallThisCC(CalleeCC))
1287 return false;
1288
1289 return true;
1290 }
1291
1292 SDValue
LowerMemArgument(SDValue Chain,CallingConv::ID CallConv,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & dl,SelectionDAG & DAG,const CCValAssign & VA,MachineFrameInfo & MFI,unsigned i) const1293 X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1294 const SmallVectorImpl<ISD::InputArg> &Ins,
1295 const SDLoc &dl, SelectionDAG &DAG,
1296 const CCValAssign &VA,
1297 MachineFrameInfo &MFI, unsigned i) const {
1298 // Create the nodes corresponding to a load from this parameter slot.
1299 ISD::ArgFlagsTy Flags = Ins[i].Flags;
1300 bool AlwaysUseMutable = shouldGuaranteeTCO(
1301 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1302 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1303 EVT ValVT;
1304 MVT PtrVT = getPointerTy(DAG.getDataLayout());
1305
1306 // If value is passed by pointer we have address passed instead of the value
1307 // itself. No need to extend if the mask value and location share the same
1308 // absolute size.
1309 bool ExtendedInMem =
1310 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1311 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
1312
1313 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1314 ValVT = VA.getLocVT();
1315 else
1316 ValVT = VA.getValVT();
1317
1318 // FIXME: For now, all byval parameter objects are marked mutable. This can be
1319 // changed with more analysis.
1320 // In case of tail call optimization mark all arguments mutable. Since they
1321 // could be overwritten by lowering of arguments in case of a tail call.
1322 if (Flags.isByVal()) {
1323 unsigned Bytes = Flags.getByValSize();
1324 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1325
1326 // FIXME: For now, all byval parameter objects are marked as aliasing. This
1327 // can be improved with deeper analysis.
1328 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1329 /*isAliased=*/true);
1330 return DAG.getFrameIndex(FI, PtrVT);
1331 }
1332
1333 EVT ArgVT = Ins[i].ArgVT;
1334
1335 // If this is a vector that has been split into multiple parts, don't elide
1336 // the copy. The layout on the stack may not match the packed in-memory
1337 // layout.
1338 bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1339
1340 // This is an argument in memory. We might be able to perform copy elision.
1341 // If the argument is passed directly in memory without any extension, then we
1342 // can perform copy elision. Large vector types, for example, may be passed
1343 // indirectly by pointer.
1344 if (Flags.isCopyElisionCandidate() &&
1345 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1346 !ScalarizedVector) {
1347 SDValue PartAddr;
1348 if (Ins[i].PartOffset == 0) {
1349 // If this is a one-part value or the first part of a multi-part value,
1350 // create a stack object for the entire argument value type and return a
1351 // load from our portion of it. This assumes that if the first part of an
1352 // argument is in memory, the rest will also be in memory.
1353 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1354 /*IsImmutable=*/false);
1355 PartAddr = DAG.getFrameIndex(FI, PtrVT);
1356 return DAG.getLoad(
1357 ValVT, dl, Chain, PartAddr,
1358 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
1359 }
1360
1361 // This is not the first piece of an argument in memory. See if there is
1362 // already a fixed stack object including this offset. If so, assume it
1363 // was created by the PartOffset == 0 branch above and create a load from
1364 // the appropriate offset into it.
1365 int64_t PartBegin = VA.getLocMemOffset();
1366 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1367 int FI = MFI.getObjectIndexBegin();
1368 for (; MFI.isFixedObjectIndex(FI); ++FI) {
1369 int64_t ObjBegin = MFI.getObjectOffset(FI);
1370 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1371 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1372 break;
1373 }
1374 if (MFI.isFixedObjectIndex(FI)) {
1375 SDValue Addr =
1376 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1377 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1378 return DAG.getLoad(ValVT, dl, Chain, Addr,
1379 MachinePointerInfo::getFixedStack(
1380 DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1381 }
1382 }
1383
1384 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1385 VA.getLocMemOffset(), isImmutable);
1386
1387 // Set SExt or ZExt flag.
1388 if (VA.getLocInfo() == CCValAssign::ZExt) {
1389 MFI.setObjectZExt(FI, true);
1390 } else if (VA.getLocInfo() == CCValAssign::SExt) {
1391 MFI.setObjectSExt(FI, true);
1392 }
1393
1394 MaybeAlign Alignment;
1395 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1396 ValVT != MVT::f80)
1397 Alignment = MaybeAlign(4);
1398 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1399 SDValue Val = DAG.getLoad(
1400 ValVT, dl, Chain, FIN,
1401 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
1402 Alignment);
1403 return ExtendedInMem
1404 ? (VA.getValVT().isVector()
1405 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1406 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1407 : Val;
1408 }
1409
1410 // FIXME: Get this from tablegen.
get64BitArgumentGPRs(CallingConv::ID CallConv,const X86Subtarget & Subtarget)1411 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
1412 const X86Subtarget &Subtarget) {
1413 assert(Subtarget.is64Bit());
1414
1415 if (Subtarget.isCallingConvWin64(CallConv)) {
1416 static const MCPhysReg GPR64ArgRegsWin64[] = {
1417 X86::RCX, X86::RDX, X86::R8, X86::R9
1418 };
1419 return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
1420 }
1421
1422 static const MCPhysReg GPR64ArgRegs64Bit[] = {
1423 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1424 };
1425 return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
1426 }
1427
1428 // FIXME: Get this from tablegen.
get64BitArgumentXMMs(MachineFunction & MF,CallingConv::ID CallConv,const X86Subtarget & Subtarget)1429 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
1430 CallingConv::ID CallConv,
1431 const X86Subtarget &Subtarget) {
1432 assert(Subtarget.is64Bit());
1433 if (Subtarget.isCallingConvWin64(CallConv)) {
1434 // The XMM registers which might contain var arg parameters are shadowed
1435 // in their paired GPR. So we only need to save the GPR to their home
1436 // slots.
1437 // TODO: __vectorcall will change this.
1438 return std::nullopt;
1439 }
1440
1441 bool isSoftFloat = Subtarget.useSoftFloat();
1442 if (isSoftFloat || !Subtarget.hasSSE1())
1443 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1444 // registers.
1445 return std::nullopt;
1446
1447 static const MCPhysReg XMMArgRegs64Bit[] = {
1448 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1449 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1450 };
1451 return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
1452 }
1453
1454 #ifndef NDEBUG
isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs)1455 static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
1456 return llvm::is_sorted(
1457 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1458 return A.getValNo() < B.getValNo();
1459 });
1460 }
1461 #endif
1462
1463 namespace {
1464 /// This is a helper class for lowering variable arguments parameters.
1465 class VarArgsLoweringHelper {
1466 public:
VarArgsLoweringHelper(X86MachineFunctionInfo * FuncInfo,const SDLoc & Loc,SelectionDAG & DAG,const X86Subtarget & Subtarget,CallingConv::ID CallConv,CCState & CCInfo)1467 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1468 SelectionDAG &DAG, const X86Subtarget &Subtarget,
1469 CallingConv::ID CallConv, CCState &CCInfo)
1470 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1471 TheMachineFunction(DAG.getMachineFunction()),
1472 TheFunction(TheMachineFunction.getFunction()),
1473 FrameInfo(TheMachineFunction.getFrameInfo()),
1474 FrameLowering(*Subtarget.getFrameLowering()),
1475 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1476 CCInfo(CCInfo) {}
1477
1478 // Lower variable arguments parameters.
1479 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1480
1481 private:
1482 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1483
1484 void forwardMustTailParameters(SDValue &Chain);
1485
is64Bit() const1486 bool is64Bit() const { return Subtarget.is64Bit(); }
isWin64() const1487 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1488
1489 X86MachineFunctionInfo *FuncInfo;
1490 const SDLoc &DL;
1491 SelectionDAG &DAG;
1492 const X86Subtarget &Subtarget;
1493 MachineFunction &TheMachineFunction;
1494 const Function &TheFunction;
1495 MachineFrameInfo &FrameInfo;
1496 const TargetFrameLowering &FrameLowering;
1497 const TargetLowering &TargLowering;
1498 CallingConv::ID CallConv;
1499 CCState &CCInfo;
1500 };
1501 } // namespace
1502
createVarArgAreaAndStoreRegisters(SDValue & Chain,unsigned StackSize)1503 void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1504 SDValue &Chain, unsigned StackSize) {
1505 // If the function takes variable number of arguments, make a frame index for
1506 // the start of the first vararg value... for expansion of llvm.va_start. We
1507 // can skip this if there are no va_start calls.
1508 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1509 CallConv != CallingConv::X86_ThisCall)) {
1510 FuncInfo->setVarArgsFrameIndex(
1511 FrameInfo.CreateFixedObject(1, StackSize, true));
1512 }
1513
1514 // 64-bit calling conventions support varargs and register parameters, so we
1515 // have to do extra work to spill them in the prologue.
1516 if (is64Bit()) {
1517 // Find the first unallocated argument registers.
1518 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1519 ArrayRef<MCPhysReg> ArgXMMs =
1520 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1521 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1522 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1523
1524 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1525 "SSE register cannot be used when SSE is disabled!");
1526
1527 if (isWin64()) {
1528 // Get to the caller-allocated home save location. Add 8 to account
1529 // for the return address.
1530 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1531 FuncInfo->setRegSaveFrameIndex(
1532 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1533 // Fixup to set vararg frame on shadow area (4 x i64).
1534 if (NumIntRegs < 4)
1535 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1536 } else {
1537 // For X86-64, if there are vararg parameters that are passed via
1538 // registers, then we must store them to their spots on the stack so
1539 // they may be loaded by dereferencing the result of va_next.
1540 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1541 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1542 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1543 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1544 }
1545
1546 SmallVector<SDValue, 6>
1547 LiveGPRs; // list of SDValue for GPR registers keeping live input value
1548 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1549 // keeping live input value
1550 SDValue ALVal; // if applicable keeps SDValue for %al register
1551
1552 // Gather all the live in physical registers.
1553 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1554 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1555 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1556 }
1557 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1558 if (!AvailableXmms.empty()) {
1559 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1560 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1561 for (MCPhysReg Reg : AvailableXmms) {
1562 // FastRegisterAllocator spills virtual registers at basic
1563 // block boundary. That leads to usages of xmm registers
1564 // outside of check for %al. Pass physical registers to
1565 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1566 TheMachineFunction.getRegInfo().addLiveIn(Reg);
1567 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1568 }
1569 }
1570
1571 // Store the integer parameter registers.
1572 SmallVector<SDValue, 8> MemOps;
1573 SDValue RSFIN =
1574 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1575 TargLowering.getPointerTy(DAG.getDataLayout()));
1576 unsigned Offset = FuncInfo->getVarArgsGPOffset();
1577 for (SDValue Val : LiveGPRs) {
1578 SDValue FIN = DAG.getNode(ISD::ADD, DL,
1579 TargLowering.getPointerTy(DAG.getDataLayout()),
1580 RSFIN, DAG.getIntPtrConstant(Offset, DL));
1581 SDValue Store =
1582 DAG.getStore(Val.getValue(1), DL, Val, FIN,
1583 MachinePointerInfo::getFixedStack(
1584 DAG.getMachineFunction(),
1585 FuncInfo->getRegSaveFrameIndex(), Offset));
1586 MemOps.push_back(Store);
1587 Offset += 8;
1588 }
1589
1590 // Now store the XMM (fp + vector) parameter registers.
1591 if (!LiveXMMRegs.empty()) {
1592 SmallVector<SDValue, 12> SaveXMMOps;
1593 SaveXMMOps.push_back(Chain);
1594 SaveXMMOps.push_back(ALVal);
1595 SaveXMMOps.push_back(RSFIN);
1596 SaveXMMOps.push_back(
1597 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1598 llvm::append_range(SaveXMMOps, LiveXMMRegs);
1599 MachineMemOperand *StoreMMO =
1600 DAG.getMachineFunction().getMachineMemOperand(
1601 MachinePointerInfo::getFixedStack(
1602 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1603 Offset),
1604 MachineMemOperand::MOStore, 128, Align(16));
1605 MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
1606 DL, DAG.getVTList(MVT::Other),
1607 SaveXMMOps, MVT::i8, StoreMMO));
1608 }
1609
1610 if (!MemOps.empty())
1611 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1612 }
1613 }
1614
forwardMustTailParameters(SDValue & Chain)1615 void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1616 // Find the largest legal vector type.
1617 MVT VecVT = MVT::Other;
1618 // FIXME: Only some x86_32 calling conventions support AVX512.
1619 if (Subtarget.useAVX512Regs() &&
1620 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1621 CallConv == CallingConv::Intel_OCL_BI)))
1622 VecVT = MVT::v16f32;
1623 else if (Subtarget.hasAVX())
1624 VecVT = MVT::v8f32;
1625 else if (Subtarget.hasSSE2())
1626 VecVT = MVT::v4f32;
1627
1628 // We forward some GPRs and some vector types.
1629 SmallVector<MVT, 2> RegParmTypes;
1630 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1631 RegParmTypes.push_back(IntVT);
1632 if (VecVT != MVT::Other)
1633 RegParmTypes.push_back(VecVT);
1634
1635 // Compute the set of forwarded registers. The rest are scratch.
1636 SmallVectorImpl<ForwardedRegister> &Forwards =
1637 FuncInfo->getForwardedMustTailRegParms();
1638 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1639
1640 // Forward AL for SysV x86_64 targets, since it is used for varargs.
1641 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1642 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1643 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1644 }
1645
1646 // Copy all forwards from physical to virtual registers.
1647 for (ForwardedRegister &FR : Forwards) {
1648 // FIXME: Can we use a less constrained schedule?
1649 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1650 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1651 TargLowering.getRegClassFor(FR.VT));
1652 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1653 }
1654 }
1655
lowerVarArgsParameters(SDValue & Chain,unsigned StackSize)1656 void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1657 unsigned StackSize) {
1658 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1659 // If necessary, it would be set into the correct value later.
1660 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1661 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1662
1663 if (FrameInfo.hasVAStart())
1664 createVarArgAreaAndStoreRegisters(Chain, StackSize);
1665
1666 if (FrameInfo.hasMustTailInVarArgFunc())
1667 forwardMustTailParameters(Chain);
1668 }
1669
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const1670 SDValue X86TargetLowering::LowerFormalArguments(
1671 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1672 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1673 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1674 MachineFunction &MF = DAG.getMachineFunction();
1675 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1676
1677 const Function &F = MF.getFunction();
1678 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1679 F.getName() == "main")
1680 FuncInfo->setForceFramePointer(true);
1681
1682 MachineFrameInfo &MFI = MF.getFrameInfo();
1683 bool Is64Bit = Subtarget.is64Bit();
1684 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1685
1686 assert(
1687 !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1688 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1689
1690 // Assign locations to all of the incoming arguments.
1691 SmallVector<CCValAssign, 16> ArgLocs;
1692 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1693
1694 // Allocate shadow area for Win64.
1695 if (IsWin64)
1696 CCInfo.AllocateStack(32, Align(8));
1697
1698 CCInfo.AnalyzeArguments(Ins, CC_X86);
1699
1700 // In vectorcall calling convention a second pass is required for the HVA
1701 // types.
1702 if (CallingConv::X86_VectorCall == CallConv) {
1703 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1704 }
1705
1706 // The next loop assumes that the locations are in the same order of the
1707 // input arguments.
1708 assert(isSortedByValueNo(ArgLocs) &&
1709 "Argument Location list must be sorted before lowering");
1710
1711 SDValue ArgValue;
1712 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1713 ++I, ++InsIndex) {
1714 assert(InsIndex < Ins.size() && "Invalid Ins index");
1715 CCValAssign &VA = ArgLocs[I];
1716
1717 if (VA.isRegLoc()) {
1718 EVT RegVT = VA.getLocVT();
1719 if (VA.needsCustom()) {
1720 assert(
1721 VA.getValVT() == MVT::v64i1 &&
1722 "Currently the only custom case is when we split v64i1 to 2 regs");
1723
1724 // v64i1 values, in regcall calling convention, that are
1725 // compiled to 32 bit arch, are split up into two registers.
1726 ArgValue =
1727 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1728 } else {
1729 const TargetRegisterClass *RC;
1730 if (RegVT == MVT::i8)
1731 RC = &X86::GR8RegClass;
1732 else if (RegVT == MVT::i16)
1733 RC = &X86::GR16RegClass;
1734 else if (RegVT == MVT::i32)
1735 RC = &X86::GR32RegClass;
1736 else if (Is64Bit && RegVT == MVT::i64)
1737 RC = &X86::GR64RegClass;
1738 else if (RegVT == MVT::f16)
1739 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1740 else if (RegVT == MVT::f32)
1741 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1742 else if (RegVT == MVT::f64)
1743 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1744 else if (RegVT == MVT::f80)
1745 RC = &X86::RFP80RegClass;
1746 else if (RegVT == MVT::f128)
1747 RC = &X86::VR128RegClass;
1748 else if (RegVT.is512BitVector())
1749 RC = &X86::VR512RegClass;
1750 else if (RegVT.is256BitVector())
1751 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1752 else if (RegVT.is128BitVector())
1753 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1754 else if (RegVT == MVT::x86mmx)
1755 RC = &X86::VR64RegClass;
1756 else if (RegVT == MVT::v1i1)
1757 RC = &X86::VK1RegClass;
1758 else if (RegVT == MVT::v8i1)
1759 RC = &X86::VK8RegClass;
1760 else if (RegVT == MVT::v16i1)
1761 RC = &X86::VK16RegClass;
1762 else if (RegVT == MVT::v32i1)
1763 RC = &X86::VK32RegClass;
1764 else if (RegVT == MVT::v64i1)
1765 RC = &X86::VK64RegClass;
1766 else
1767 llvm_unreachable("Unknown argument type!");
1768
1769 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1770 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1771 }
1772
1773 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1774 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1775 // right size.
1776 if (VA.getLocInfo() == CCValAssign::SExt)
1777 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1778 DAG.getValueType(VA.getValVT()));
1779 else if (VA.getLocInfo() == CCValAssign::ZExt)
1780 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1781 DAG.getValueType(VA.getValVT()));
1782 else if (VA.getLocInfo() == CCValAssign::BCvt)
1783 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1784
1785 if (VA.isExtInLoc()) {
1786 // Handle MMX values passed in XMM regs.
1787 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1788 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1789 else if (VA.getValVT().isVector() &&
1790 VA.getValVT().getScalarType() == MVT::i1 &&
1791 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1792 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1793 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1794 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1795 } else
1796 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1797 }
1798 } else {
1799 assert(VA.isMemLoc());
1800 ArgValue =
1801 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1802 }
1803
1804 // If value is passed via pointer - do a load.
1805 if (VA.getLocInfo() == CCValAssign::Indirect &&
1806 !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1807 ArgValue =
1808 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1809 }
1810
1811 InVals.push_back(ArgValue);
1812 }
1813
1814 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1815 if (Ins[I].Flags.isSwiftAsync()) {
1816 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1817 if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1818 X86FI->setHasSwiftAsyncContext(true);
1819 else {
1820 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1821 int FI =
1822 MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
1823 X86FI->setSwiftAsyncContextFrameIdx(FI);
1824 SDValue St = DAG.getStore(
1825 DAG.getEntryNode(), dl, InVals[I],
1826 DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1827 MachinePointerInfo::getFixedStack(MF, FI));
1828 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1829 }
1830 }
1831
1832 // Swift calling convention does not require we copy the sret argument
1833 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1834 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1835 continue;
1836
1837 // All x86 ABIs require that for returning structs by value we copy the
1838 // sret argument into %rax/%eax (depending on ABI) for the return. Save
1839 // the argument into a virtual register so that we can access it from the
1840 // return points.
1841 if (Ins[I].Flags.isSRet()) {
1842 assert(!FuncInfo->getSRetReturnReg() &&
1843 "SRet return has already been set");
1844 MVT PtrTy = getPointerTy(DAG.getDataLayout());
1845 Register Reg =
1846 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
1847 FuncInfo->setSRetReturnReg(Reg);
1848 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1849 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1850 break;
1851 }
1852 }
1853
1854 unsigned StackSize = CCInfo.getStackSize();
1855 // Align stack specially for tail calls.
1856 if (shouldGuaranteeTCO(CallConv,
1857 MF.getTarget().Options.GuaranteedTailCallOpt))
1858 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1859
1860 if (IsVarArg)
1861 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1862 .lowerVarArgsParameters(Chain, StackSize);
1863
1864 // Some CCs need callee pop.
1865 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1866 MF.getTarget().Options.GuaranteedTailCallOpt)) {
1867 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1868 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1869 // X86 interrupts must pop the error code (and the alignment padding) if
1870 // present.
1871 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1872 } else {
1873 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1874 // If this is an sret function, the return should pop the hidden pointer.
1875 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
1876 FuncInfo->setBytesToPopOnReturn(4);
1877 }
1878
1879 if (!Is64Bit) {
1880 // RegSaveFrameIndex is X86-64 only.
1881 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1882 }
1883
1884 FuncInfo->setArgumentStackSize(StackSize);
1885
1886 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1887 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1888 if (Personality == EHPersonality::CoreCLR) {
1889 assert(Is64Bit);
1890 // TODO: Add a mechanism to frame lowering that will allow us to indicate
1891 // that we'd prefer this slot be allocated towards the bottom of the frame
1892 // (i.e. near the stack pointer after allocating the frame). Every
1893 // funclet needs a copy of this slot in its (mostly empty) frame, and the
1894 // offset from the bottom of this and each funclet's frame must be the
1895 // same, so the size of funclets' (mostly empty) frames is dictated by
1896 // how far this slot is from the bottom (since they allocate just enough
1897 // space to accommodate holding this slot at the correct offset).
1898 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1899 EHInfo->PSPSymFrameIdx = PSPSymFI;
1900 }
1901 }
1902
1903 if (shouldDisableArgRegFromCSR(CallConv) ||
1904 F.hasFnAttribute("no_caller_saved_registers")) {
1905 MachineRegisterInfo &MRI = MF.getRegInfo();
1906 for (std::pair<Register, Register> Pair : MRI.liveins())
1907 MRI.disableCalleeSavedRegister(Pair.first);
1908 }
1909
1910 if (CallingConv::PreserveNone == CallConv)
1911 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1912 if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() ||
1913 Ins[I].Flags.isSwiftError()) {
1914 errorUnsupported(DAG, dl,
1915 "Swift attributes can't be used with preserve_none");
1916 break;
1917 }
1918 }
1919
1920 return Chain;
1921 }
1922
LowerMemOpCallTo(SDValue Chain,SDValue StackPtr,SDValue Arg,const SDLoc & dl,SelectionDAG & DAG,const CCValAssign & VA,ISD::ArgFlagsTy Flags,bool isByVal) const1923 SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1924 SDValue Arg, const SDLoc &dl,
1925 SelectionDAG &DAG,
1926 const CCValAssign &VA,
1927 ISD::ArgFlagsTy Flags,
1928 bool isByVal) const {
1929 unsigned LocMemOffset = VA.getLocMemOffset();
1930 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1931 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1932 StackPtr, PtrOff);
1933 if (isByVal)
1934 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1935
1936 MaybeAlign Alignment;
1937 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1938 Arg.getSimpleValueType() != MVT::f80)
1939 Alignment = MaybeAlign(4);
1940 return DAG.getStore(
1941 Chain, dl, Arg, PtrOff,
1942 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
1943 Alignment);
1944 }
1945
1946 /// Emit a load of return address if tail call
1947 /// optimization is performed and it is required.
EmitTailCallLoadRetAddr(SelectionDAG & DAG,SDValue & OutRetAddr,SDValue Chain,bool IsTailCall,bool Is64Bit,int FPDiff,const SDLoc & dl) const1948 SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1949 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1950 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1951 // Adjust the Return address stack slot.
1952 EVT VT = getPointerTy(DAG.getDataLayout());
1953 OutRetAddr = getReturnAddressFrameIndex(DAG);
1954
1955 // Load the "old" Return address.
1956 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
1957 return SDValue(OutRetAddr.getNode(), 1);
1958 }
1959
1960 /// Emit a store of the return address if tail call
1961 /// optimization is performed and it is required (FPDiff!=0).
EmitTailCallStoreRetAddr(SelectionDAG & DAG,MachineFunction & MF,SDValue Chain,SDValue RetAddrFrIdx,EVT PtrVT,unsigned SlotSize,int FPDiff,const SDLoc & dl)1962 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
1963 SDValue Chain, SDValue RetAddrFrIdx,
1964 EVT PtrVT, unsigned SlotSize,
1965 int FPDiff, const SDLoc &dl) {
1966 // Store the return address to the appropriate stack slot.
1967 if (!FPDiff) return Chain;
1968 // Calculate the new stack slot for the return address.
1969 int NewReturnAddrFI =
1970 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
1971 false);
1972 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
1973 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
1974 MachinePointerInfo::getFixedStack(
1975 DAG.getMachineFunction(), NewReturnAddrFI));
1976 return Chain;
1977 }
1978
1979 /// Returns a vector_shuffle mask for an movs{s|d}, movd
1980 /// operation of specified width.
getMOVL(SelectionDAG & DAG,const SDLoc & dl,MVT VT,SDValue V1,SDValue V2) const1981 SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
1982 SDValue V1, SDValue V2) const {
1983 unsigned NumElems = VT.getVectorNumElements();
1984 SmallVector<int, 8> Mask;
1985 Mask.push_back(NumElems);
1986 for (unsigned i = 1; i != NumElems; ++i)
1987 Mask.push_back(i);
1988 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
1989 }
1990
1991 SDValue
LowerCall(TargetLowering::CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const1992 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1993 SmallVectorImpl<SDValue> &InVals) const {
1994 SelectionDAG &DAG = CLI.DAG;
1995 SDLoc &dl = CLI.DL;
1996 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1997 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1998 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1999 SDValue Chain = CLI.Chain;
2000 SDValue Callee = CLI.Callee;
2001 CallingConv::ID CallConv = CLI.CallConv;
2002 bool &isTailCall = CLI.IsTailCall;
2003 bool isVarArg = CLI.IsVarArg;
2004 const auto *CB = CLI.CB;
2005
2006 MachineFunction &MF = DAG.getMachineFunction();
2007 bool Is64Bit = Subtarget.is64Bit();
2008 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2009 bool IsSibcall = false;
2010 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
2011 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
2012 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
2013 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2014 bool HasNCSR = (CB && isa<CallInst>(CB) &&
2015 CB->hasFnAttr("no_caller_saved_registers"));
2016 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
2017 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2018 bool IsCFICall = IsIndirectCall && CLI.CFIType;
2019 const Module *M = MF.getFunction().getParent();
2020 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
2021
2022 MachineFunction::CallSiteInfo CSInfo;
2023 if (CallConv == CallingConv::X86_INTR)
2024 report_fatal_error("X86 interrupts may not be called directly");
2025
2026 // Analyze operands of the call, assigning locations to each operand.
2027 SmallVector<CCValAssign, 16> ArgLocs;
2028 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2029
2030 // Allocate shadow area for Win64.
2031 if (IsWin64)
2032 CCInfo.AllocateStack(32, Align(8));
2033
2034 CCInfo.AnalyzeArguments(Outs, CC_X86);
2035
2036 // In vectorcall calling convention a second pass is required for the HVA
2037 // types.
2038 if (CallingConv::X86_VectorCall == CallConv) {
2039 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2040 }
2041
2042 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2043 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2044 // If we are using a GOT, disable tail calls to external symbols with
2045 // default visibility. Tail calling such a symbol requires using a GOT
2046 // relocation, which forces early binding of the symbol. This breaks code
2047 // that require lazy function symbol resolution. Using musttail or
2048 // GuaranteedTailCallOpt will override this.
2049 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2050 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2051 G->getGlobal()->hasDefaultVisibility()))
2052 isTailCall = false;
2053 }
2054
2055 if (isTailCall && !IsMustTail) {
2056 // Check if it's really possible to do a tail call.
2057 isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
2058 IsCalleePopSRet);
2059
2060 // Sibcalls are automatically detected tailcalls which do not require
2061 // ABI changes.
2062 if (!IsGuaranteeTCO && isTailCall)
2063 IsSibcall = true;
2064
2065 if (isTailCall)
2066 ++NumTailCalls;
2067 }
2068
2069 if (IsMustTail && !isTailCall)
2070 report_fatal_error("failed to perform tail call elimination on a call "
2071 "site marked musttail");
2072
2073 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2074 "Var args not supported with calling convention fastcc, ghc or hipe");
2075
2076 // Get a count of how many bytes are to be pushed on the stack.
2077 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2078 if (IsSibcall)
2079 // This is a sibcall. The memory operands are available in caller's
2080 // own caller's stack.
2081 NumBytes = 0;
2082 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
2083 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2084
2085 int FPDiff = 0;
2086 if (isTailCall &&
2087 shouldGuaranteeTCO(CallConv,
2088 MF.getTarget().Options.GuaranteedTailCallOpt)) {
2089 // Lower arguments at fp - stackoffset + fpdiff.
2090 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2091
2092 FPDiff = NumBytesCallerPushed - NumBytes;
2093
2094 // Set the delta of movement of the returnaddr stackslot.
2095 // But only set if delta is greater than previous delta.
2096 if (FPDiff < X86Info->getTCReturnAddrDelta())
2097 X86Info->setTCReturnAddrDelta(FPDiff);
2098 }
2099
2100 unsigned NumBytesToPush = NumBytes;
2101 unsigned NumBytesToPop = NumBytes;
2102
2103 // If we have an inalloca argument, all stack space has already been allocated
2104 // for us and be right at the top of the stack. We don't support multiple
2105 // arguments passed in memory when using inalloca.
2106 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2107 NumBytesToPush = 0;
2108 if (!ArgLocs.back().isMemLoc())
2109 report_fatal_error("cannot use inalloca attribute on a register "
2110 "parameter");
2111 if (ArgLocs.back().getLocMemOffset() != 0)
2112 report_fatal_error("any parameter with the inalloca attribute must be "
2113 "the only memory argument");
2114 } else if (CLI.IsPreallocated) {
2115 assert(ArgLocs.back().isMemLoc() &&
2116 "cannot use preallocated attribute on a register "
2117 "parameter");
2118 SmallVector<size_t, 4> PreallocatedOffsets;
2119 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2120 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2121 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2122 }
2123 }
2124 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
2125 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2126 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2127 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2128 NumBytesToPush = 0;
2129 }
2130
2131 if (!IsSibcall && !IsMustTail)
2132 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2133 NumBytes - NumBytesToPush, dl);
2134
2135 SDValue RetAddrFrIdx;
2136 // Load return address for tail calls.
2137 if (isTailCall && FPDiff)
2138 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2139 Is64Bit, FPDiff, dl);
2140
2141 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
2142 SmallVector<SDValue, 8> MemOpChains;
2143 SDValue StackPtr;
2144
2145 // The next loop assumes that the locations are in the same order of the
2146 // input arguments.
2147 assert(isSortedByValueNo(ArgLocs) &&
2148 "Argument Location list must be sorted before lowering");
2149
2150 // Walk the register/memloc assignments, inserting copies/loads. In the case
2151 // of tail call optimization arguments are handle later.
2152 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2153 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2154 ++I, ++OutIndex) {
2155 assert(OutIndex < Outs.size() && "Invalid Out index");
2156 // Skip inalloca/preallocated arguments, they have already been written.
2157 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2158 if (Flags.isInAlloca() || Flags.isPreallocated())
2159 continue;
2160
2161 CCValAssign &VA = ArgLocs[I];
2162 EVT RegVT = VA.getLocVT();
2163 SDValue Arg = OutVals[OutIndex];
2164 bool isByVal = Flags.isByVal();
2165
2166 // Promote the value if needed.
2167 switch (VA.getLocInfo()) {
2168 default: llvm_unreachable("Unknown loc info!");
2169 case CCValAssign::Full: break;
2170 case CCValAssign::SExt:
2171 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2172 break;
2173 case CCValAssign::ZExt:
2174 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2175 break;
2176 case CCValAssign::AExt:
2177 if (Arg.getValueType().isVector() &&
2178 Arg.getValueType().getVectorElementType() == MVT::i1)
2179 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2180 else if (RegVT.is128BitVector()) {
2181 // Special case: passing MMX values in XMM registers.
2182 Arg = DAG.getBitcast(MVT::i64, Arg);
2183 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2184 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2185 } else
2186 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2187 break;
2188 case CCValAssign::BCvt:
2189 Arg = DAG.getBitcast(RegVT, Arg);
2190 break;
2191 case CCValAssign::Indirect: {
2192 if (isByVal) {
2193 // Memcpy the argument to a temporary stack slot to prevent
2194 // the caller from seeing any modifications the callee may make
2195 // as guaranteed by the `byval` attribute.
2196 int FrameIdx = MF.getFrameInfo().CreateStackObject(
2197 Flags.getByValSize(),
2198 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2199 SDValue StackSlot =
2200 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2201 Chain =
2202 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2203 // From now on treat this as a regular pointer
2204 Arg = StackSlot;
2205 isByVal = false;
2206 } else {
2207 // Store the argument.
2208 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2209 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2210 Chain = DAG.getStore(
2211 Chain, dl, Arg, SpillSlot,
2212 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2213 Arg = SpillSlot;
2214 }
2215 break;
2216 }
2217 }
2218
2219 if (VA.needsCustom()) {
2220 assert(VA.getValVT() == MVT::v64i1 &&
2221 "Currently the only custom case is when we split v64i1 to 2 regs");
2222 // Split v64i1 value into two registers
2223 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2224 } else if (VA.isRegLoc()) {
2225 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2226 const TargetOptions &Options = DAG.getTarget().Options;
2227 if (Options.EmitCallSiteInfo)
2228 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
2229 if (isVarArg && IsWin64) {
2230 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2231 // shadow reg if callee is a varargs function.
2232 Register ShadowReg;
2233 switch (VA.getLocReg()) {
2234 case X86::XMM0: ShadowReg = X86::RCX; break;
2235 case X86::XMM1: ShadowReg = X86::RDX; break;
2236 case X86::XMM2: ShadowReg = X86::R8; break;
2237 case X86::XMM3: ShadowReg = X86::R9; break;
2238 }
2239 if (ShadowReg)
2240 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2241 }
2242 } else if (!IsSibcall && (!isTailCall || isByVal)) {
2243 assert(VA.isMemLoc());
2244 if (!StackPtr.getNode())
2245 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2246 getPointerTy(DAG.getDataLayout()));
2247 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2248 dl, DAG, VA, Flags, isByVal));
2249 }
2250 }
2251
2252 if (!MemOpChains.empty())
2253 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2254
2255 if (Subtarget.isPICStyleGOT()) {
2256 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2257 // GOT pointer (except regcall).
2258 if (!isTailCall) {
2259 // Indirect call with RegCall calling convertion may use up all the
2260 // general registers, so it is not suitable to bind EBX reister for
2261 // GOT address, just let register allocator handle it.
2262 if (CallConv != CallingConv::X86_RegCall)
2263 RegsToPass.push_back(std::make_pair(
2264 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2265 getPointerTy(DAG.getDataLayout()))));
2266 } else {
2267 // If we are tail calling and generating PIC/GOT style code load the
2268 // address of the callee into ECX. The value in ecx is used as target of
2269 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2270 // for tail calls on PIC/GOT architectures. Normally we would just put the
2271 // address of GOT into ebx and then call target@PLT. But for tail calls
2272 // ebx would be restored (since ebx is callee saved) before jumping to the
2273 // target@PLT.
2274
2275 // Note: The actual moving to ECX is done further down.
2276 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2277 if (G && !G->getGlobal()->hasLocalLinkage() &&
2278 G->getGlobal()->hasDefaultVisibility())
2279 Callee = LowerGlobalAddress(Callee, DAG);
2280 else if (isa<ExternalSymbolSDNode>(Callee))
2281 Callee = LowerExternalSymbol(Callee, DAG);
2282 }
2283 }
2284
2285 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2286 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2287 // From AMD64 ABI document:
2288 // For calls that may call functions that use varargs or stdargs
2289 // (prototype-less calls or calls to functions containing ellipsis (...) in
2290 // the declaration) %al is used as hidden argument to specify the number
2291 // of SSE registers used. The contents of %al do not need to match exactly
2292 // the number of registers, but must be an ubound on the number of SSE
2293 // registers used and is in the range 0 - 8 inclusive.
2294
2295 // Count the number of XMM registers allocated.
2296 static const MCPhysReg XMMArgRegs[] = {
2297 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2298 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2299 };
2300 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2301 assert((Subtarget.hasSSE1() || !NumXMMRegs)
2302 && "SSE registers cannot be used when SSE is disabled");
2303 RegsToPass.push_back(std::make_pair(Register(X86::AL),
2304 DAG.getConstant(NumXMMRegs, dl,
2305 MVT::i8)));
2306 }
2307
2308 if (isVarArg && IsMustTail) {
2309 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2310 for (const auto &F : Forwards) {
2311 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2312 RegsToPass.push_back(std::make_pair(F.PReg, Val));
2313 }
2314 }
2315
2316 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2317 // don't need this because the eligibility check rejects calls that require
2318 // shuffling arguments passed in memory.
2319 if (!IsSibcall && isTailCall) {
2320 // Force all the incoming stack arguments to be loaded from the stack
2321 // before any new outgoing arguments are stored to the stack, because the
2322 // outgoing stack slots may alias the incoming argument stack slots, and
2323 // the alias isn't otherwise explicit. This is slightly more conservative
2324 // than necessary, because it means that each store effectively depends
2325 // on every argument instead of just those arguments it would clobber.
2326 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
2327
2328 SmallVector<SDValue, 8> MemOpChains2;
2329 SDValue FIN;
2330 int FI = 0;
2331 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2332 ++I, ++OutsIndex) {
2333 CCValAssign &VA = ArgLocs[I];
2334
2335 if (VA.isRegLoc()) {
2336 if (VA.needsCustom()) {
2337 assert((CallConv == CallingConv::X86_RegCall) &&
2338 "Expecting custom case only in regcall calling convention");
2339 // This means that we are in special case where one argument was
2340 // passed through two register locations - Skip the next location
2341 ++I;
2342 }
2343
2344 continue;
2345 }
2346
2347 assert(VA.isMemLoc());
2348 SDValue Arg = OutVals[OutsIndex];
2349 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2350 // Skip inalloca/preallocated arguments. They don't require any work.
2351 if (Flags.isInAlloca() || Flags.isPreallocated())
2352 continue;
2353 // Create frame index.
2354 int32_t Offset = VA.getLocMemOffset()+FPDiff;
2355 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2356 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2357 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2358
2359 if (Flags.isByVal()) {
2360 // Copy relative to framepointer.
2361 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
2362 if (!StackPtr.getNode())
2363 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2364 getPointerTy(DAG.getDataLayout()));
2365 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2366 StackPtr, Source);
2367
2368 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
2369 ArgChain,
2370 Flags, DAG, dl));
2371 } else {
2372 // Store relative to framepointer.
2373 MemOpChains2.push_back(DAG.getStore(
2374 ArgChain, dl, Arg, FIN,
2375 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
2376 }
2377 }
2378
2379 if (!MemOpChains2.empty())
2380 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2381
2382 // Store the return address to the appropriate stack slot.
2383 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2384 getPointerTy(DAG.getDataLayout()),
2385 RegInfo->getSlotSize(), FPDiff, dl);
2386 }
2387
2388 // Build a sequence of copy-to-reg nodes chained together with token chain
2389 // and glue operands which copy the outgoing args into registers.
2390 SDValue InGlue;
2391 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2392 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2393 RegsToPass[i].second, InGlue);
2394 InGlue = Chain.getValue(1);
2395 }
2396
2397 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2398 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2399 // In the 64-bit large code model, we have to make all calls
2400 // through a register, since the call instruction's 32-bit
2401 // pc-relative offset may not be large enough to hold the whole
2402 // address.
2403 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2404 Callee->getOpcode() == ISD::ExternalSymbol) {
2405 // Lower direct calls to global addresses and external symbols. Setting
2406 // ForCall to true here has the effect of removing WrapperRIP when possible
2407 // to allow direct calls to be selected without first materializing the
2408 // address into a register.
2409 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
2410 } else if (Subtarget.isTarget64BitILP32() &&
2411 Callee.getValueType() == MVT::i32) {
2412 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2413 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2414 }
2415
2416 // Returns a chain & a glue for retval copy to use.
2417 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2418 SmallVector<SDValue, 8> Ops;
2419
2420 if (!IsSibcall && isTailCall && !IsMustTail) {
2421 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2422 InGlue = Chain.getValue(1);
2423 }
2424
2425 Ops.push_back(Chain);
2426 Ops.push_back(Callee);
2427
2428 if (isTailCall)
2429 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
2430
2431 // Add argument registers to the end of the list so that they are known live
2432 // into the call.
2433 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2434 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2435 RegsToPass[i].second.getValueType()));
2436
2437 // Add a register mask operand representing the call-preserved registers.
2438 const uint32_t *Mask = [&]() {
2439 auto AdaptedCC = CallConv;
2440 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2441 // use X86_INTR calling convention because it has the same CSR mask
2442 // (same preserved registers).
2443 if (HasNCSR)
2444 AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
2445 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2446 // to use the CSR_NoRegs_RegMask.
2447 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2448 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2449 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2450 }();
2451 assert(Mask && "Missing call preserved mask for calling convention");
2452
2453 // If this is an invoke in a 32-bit function using a funclet-based
2454 // personality, assume the function clobbers all registers. If an exception
2455 // is thrown, the runtime will not restore CSRs.
2456 // FIXME: Model this more precisely so that we can register allocate across
2457 // the normal edge and spill and fill across the exceptional edge.
2458 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2459 const Function &CallerFn = MF.getFunction();
2460 EHPersonality Pers =
2461 CallerFn.hasPersonalityFn()
2462 ? classifyEHPersonality(CallerFn.getPersonalityFn())
2463 : EHPersonality::Unknown;
2464 if (isFuncletEHPersonality(Pers))
2465 Mask = RegInfo->getNoPreservedMask();
2466 }
2467
2468 // Define a new register mask from the existing mask.
2469 uint32_t *RegMask = nullptr;
2470
2471 // In some calling conventions we need to remove the used physical registers
2472 // from the reg mask. Create a new RegMask for such calling conventions.
2473 // RegMask for calling conventions that disable only return registers (e.g.
2474 // preserve_most) will be modified later in LowerCallResult.
2475 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2476 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2477 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2478
2479 // Allocate a new Reg Mask and copy Mask.
2480 RegMask = MF.allocateRegMask();
2481 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2482 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2483
2484 // Make sure all sub registers of the argument registers are reset
2485 // in the RegMask.
2486 if (ShouldDisableArgRegs) {
2487 for (auto const &RegPair : RegsToPass)
2488 for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2489 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2490 }
2491
2492 // Create the RegMask Operand according to our updated mask.
2493 Ops.push_back(DAG.getRegisterMask(RegMask));
2494 } else {
2495 // Create the RegMask Operand according to the static mask.
2496 Ops.push_back(DAG.getRegisterMask(Mask));
2497 }
2498
2499 if (InGlue.getNode())
2500 Ops.push_back(InGlue);
2501
2502 if (isTailCall) {
2503 // We used to do:
2504 //// If this is the first return lowered for this function, add the regs
2505 //// to the liveout set for the function.
2506 // This isn't right, although it's probably harmless on x86; liveouts
2507 // should be computed from returns not tail calls. Consider a void
2508 // function making a tail call to a function returning int.
2509 MF.getFrameInfo().setHasTailCall();
2510 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
2511
2512 if (IsCFICall)
2513 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2514
2515 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2516 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2517 return Ret;
2518 }
2519
2520 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2521 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2522 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2523 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2524 // expanded to the call, directly followed by a special marker sequence and
2525 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2526 assert(!isTailCall &&
2527 "tail calls cannot be marked with clang.arc.attachedcall");
2528 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2529
2530 // Add a target global address for the retainRV/claimRV runtime function
2531 // just before the call target.
2532 Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
2533 auto PtrVT = getPointerTy(DAG.getDataLayout());
2534 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2535 Ops.insert(Ops.begin() + 1, GA);
2536 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2537 } else {
2538 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2539 }
2540
2541 if (IsCFICall)
2542 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2543
2544 InGlue = Chain.getValue(1);
2545 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2546 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2547
2548 // Save heapallocsite metadata.
2549 if (CLI.CB)
2550 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2551 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2552
2553 // Create the CALLSEQ_END node.
2554 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2555 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2556 DAG.getTarget().Options.GuaranteedTailCallOpt))
2557 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2558 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
2559 // If this call passes a struct-return pointer, the callee
2560 // pops that struct pointer.
2561 NumBytesForCalleeToPop = 4;
2562
2563 // Returns a glue for retval copy to use.
2564 if (!IsSibcall) {
2565 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2566 InGlue, dl);
2567 InGlue = Chain.getValue(1);
2568 }
2569
2570 if (CallingConv::PreserveNone == CallConv)
2571 for (unsigned I = 0, E = Outs.size(); I != E; ++I) {
2572 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() ||
2573 Outs[I].Flags.isSwiftError()) {
2574 errorUnsupported(DAG, dl,
2575 "Swift attributes can't be used with preserve_none");
2576 break;
2577 }
2578 }
2579
2580 // Handle result values, copying them out of physregs into vregs that we
2581 // return.
2582 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2583 InVals, RegMask);
2584 }
2585
2586 //===----------------------------------------------------------------------===//
2587 // Fast Calling Convention (tail call) implementation
2588 //===----------------------------------------------------------------------===//
2589
2590 // Like std call, callee cleans arguments, convention except that ECX is
2591 // reserved for storing the tail called function address. Only 2 registers are
2592 // free for argument passing (inreg). Tail call optimization is performed
2593 // provided:
2594 // * tailcallopt is enabled
2595 // * caller/callee are fastcc
2596 // On X86_64 architecture with GOT-style position independent code only local
2597 // (within module) calls are supported at the moment.
2598 // To keep the stack aligned according to platform abi the function
2599 // GetAlignedArgumentStackSize ensures that argument delta is always multiples
2600 // of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2601 // If a tail called function callee has more arguments than the caller the
2602 // caller needs to make sure that there is room to move the RETADDR to. This is
2603 // achieved by reserving an area the size of the argument delta right after the
2604 // original RETADDR, but before the saved framepointer or the spilled registers
2605 // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2606 // stack layout:
2607 // arg1
2608 // arg2
2609 // RETADDR
2610 // [ new RETADDR
2611 // move area ]
2612 // (possible EBP)
2613 // ESI
2614 // EDI
2615 // local1 ..
2616
2617 /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2618 /// requirement.
2619 unsigned
GetAlignedArgumentStackSize(const unsigned StackSize,SelectionDAG & DAG) const2620 X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2621 SelectionDAG &DAG) const {
2622 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2623 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2624 assert(StackSize % SlotSize == 0 &&
2625 "StackSize must be a multiple of SlotSize");
2626 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2627 }
2628
2629 /// Return true if the given stack call argument is already available in the
2630 /// same position (relatively) of the caller's incoming argument stack.
2631 static
MatchingStackOffset(SDValue Arg,unsigned Offset,ISD::ArgFlagsTy Flags,MachineFrameInfo & MFI,const MachineRegisterInfo * MRI,const X86InstrInfo * TII,const CCValAssign & VA)2632 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2633 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2634 const X86InstrInfo *TII, const CCValAssign &VA) {
2635 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2636
2637 for (;;) {
2638 // Look through nodes that don't alter the bits of the incoming value.
2639 unsigned Op = Arg.getOpcode();
2640 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2641 Op == ISD::AssertZext) {
2642 Arg = Arg.getOperand(0);
2643 continue;
2644 }
2645 if (Op == ISD::TRUNCATE) {
2646 const SDValue &TruncInput = Arg.getOperand(0);
2647 if (TruncInput.getOpcode() == ISD::AssertZext &&
2648 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2649 Arg.getValueType()) {
2650 Arg = TruncInput.getOperand(0);
2651 continue;
2652 }
2653 }
2654 break;
2655 }
2656
2657 int FI = INT_MAX;
2658 if (Arg.getOpcode() == ISD::CopyFromReg) {
2659 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2660 if (!VR.isVirtual())
2661 return false;
2662 MachineInstr *Def = MRI->getVRegDef(VR);
2663 if (!Def)
2664 return false;
2665 if (!Flags.isByVal()) {
2666 if (!TII->isLoadFromStackSlot(*Def, FI))
2667 return false;
2668 } else {
2669 unsigned Opcode = Def->getOpcode();
2670 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2671 Opcode == X86::LEA64_32r) &&
2672 Def->getOperand(1).isFI()) {
2673 FI = Def->getOperand(1).getIndex();
2674 Bytes = Flags.getByValSize();
2675 } else
2676 return false;
2677 }
2678 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2679 if (Flags.isByVal())
2680 // ByVal argument is passed in as a pointer but it's now being
2681 // dereferenced. e.g.
2682 // define @foo(%struct.X* %A) {
2683 // tail call @bar(%struct.X* byval %A)
2684 // }
2685 return false;
2686 SDValue Ptr = Ld->getBasePtr();
2687 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2688 if (!FINode)
2689 return false;
2690 FI = FINode->getIndex();
2691 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2692 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
2693 FI = FINode->getIndex();
2694 Bytes = Flags.getByValSize();
2695 } else
2696 return false;
2697
2698 assert(FI != INT_MAX);
2699 if (!MFI.isFixedObjectIndex(FI))
2700 return false;
2701
2702 if (Offset != MFI.getObjectOffset(FI))
2703 return false;
2704
2705 // If this is not byval, check that the argument stack object is immutable.
2706 // inalloca and argument copy elision can create mutable argument stack
2707 // objects. Byval objects can be mutated, but a byval call intends to pass the
2708 // mutated memory.
2709 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2710 return false;
2711
2712 if (VA.getLocVT().getFixedSizeInBits() >
2713 Arg.getValueSizeInBits().getFixedValue()) {
2714 // If the argument location is wider than the argument type, check that any
2715 // extension flags match.
2716 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2717 Flags.isSExt() != MFI.isObjectSExt(FI)) {
2718 return false;
2719 }
2720 }
2721
2722 return Bytes == MFI.getObjectSize(FI);
2723 }
2724
2725 /// Check whether the call is eligible for tail call optimization. Targets
2726 /// that want to do tail call optimization should implement this function.
2727 /// Note that the x86 backend does not check musttail calls for eligibility! The
2728 /// rest of x86 tail call lowering must be prepared to forward arguments of any
2729 /// type.
IsEligibleForTailCallOptimization(TargetLowering::CallLoweringInfo & CLI,CCState & CCInfo,SmallVectorImpl<CCValAssign> & ArgLocs,bool IsCalleePopSRet) const2730 bool X86TargetLowering::IsEligibleForTailCallOptimization(
2731 TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
2732 SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
2733 SelectionDAG &DAG = CLI.DAG;
2734 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2735 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2736 const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2737 SDValue Callee = CLI.Callee;
2738 CallingConv::ID CalleeCC = CLI.CallConv;
2739 bool isVarArg = CLI.IsVarArg;
2740
2741 if (!mayTailCallThisCC(CalleeCC))
2742 return false;
2743
2744 // If -tailcallopt is specified, make fastcc functions tail-callable.
2745 MachineFunction &MF = DAG.getMachineFunction();
2746 const Function &CallerF = MF.getFunction();
2747
2748 // If the function return type is x86_fp80 and the callee return type is not,
2749 // then the FP_EXTEND of the call result is not a nop. It's not safe to
2750 // perform a tailcall optimization here.
2751 if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2752 return false;
2753
2754 CallingConv::ID CallerCC = CallerF.getCallingConv();
2755 bool CCMatch = CallerCC == CalleeCC;
2756 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
2757 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
2758 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
2759 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
2760
2761 // Win64 functions have extra shadow space for argument homing. Don't do the
2762 // sibcall if the caller and callee have mismatched expectations for this
2763 // space.
2764 if (IsCalleeWin64 != IsCallerWin64)
2765 return false;
2766
2767 if (IsGuaranteeTCO) {
2768 if (canGuaranteeTCO(CalleeCC) && CCMatch)
2769 return true;
2770 return false;
2771 }
2772
2773 // Look for obvious safe cases to perform tail call optimization that do not
2774 // require ABI changes. This is what gcc calls sibcall.
2775
2776 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2777 // emit a special epilogue.
2778 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2779 if (RegInfo->hasStackRealignment(MF))
2780 return false;
2781
2782 // Also avoid sibcall optimization if we're an sret return fn and the callee
2783 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2784 // insufficient.
2785 if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
2786 // For a compatible tail call the callee must return our sret pointer. So it
2787 // needs to be (a) an sret function itself and (b) we pass our sret as its
2788 // sret. Condition #b is harder to determine.
2789 return false;
2790 } else if (IsCalleePopSRet)
2791 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
2792 // expect that.
2793 return false;
2794
2795 // Do not sibcall optimize vararg calls unless all arguments are passed via
2796 // registers.
2797 LLVMContext &C = *DAG.getContext();
2798 if (isVarArg && !Outs.empty()) {
2799 // Optimizing for varargs on Win64 is unlikely to be safe without
2800 // additional testing.
2801 if (IsCalleeWin64 || IsCallerWin64)
2802 return false;
2803
2804 for (const auto &VA : ArgLocs)
2805 if (!VA.isRegLoc())
2806 return false;
2807 }
2808
2809 // If the call result is in ST0 / ST1, it needs to be popped off the x87
2810 // stack. Therefore, if it's not used by the call it is not safe to optimize
2811 // this into a sibcall.
2812 bool Unused = false;
2813 for (const auto &In : Ins) {
2814 if (!In.Used) {
2815 Unused = true;
2816 break;
2817 }
2818 }
2819 if (Unused) {
2820 SmallVector<CCValAssign, 16> RVLocs;
2821 CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
2822 RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2823 for (const auto &VA : RVLocs) {
2824 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
2825 return false;
2826 }
2827 }
2828
2829 // Check that the call results are passed in the same way.
2830 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2831 RetCC_X86, RetCC_X86))
2832 return false;
2833 // The callee has to preserve all registers the caller needs to preserve.
2834 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2835 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2836 if (!CCMatch) {
2837 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2838 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2839 return false;
2840 }
2841
2842 unsigned StackArgsSize = CCInfo.getStackSize();
2843
2844 // If the callee takes no arguments then go on to check the results of the
2845 // call.
2846 if (!Outs.empty()) {
2847 if (StackArgsSize > 0) {
2848 // Check if the arguments are already laid out in the right way as
2849 // the caller's fixed stack objects.
2850 MachineFrameInfo &MFI = MF.getFrameInfo();
2851 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2852 const X86InstrInfo *TII = Subtarget.getInstrInfo();
2853 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2854 const CCValAssign &VA = ArgLocs[I];
2855 SDValue Arg = OutVals[I];
2856 ISD::ArgFlagsTy Flags = Outs[I].Flags;
2857 if (VA.getLocInfo() == CCValAssign::Indirect)
2858 return false;
2859 if (!VA.isRegLoc()) {
2860 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
2861 TII, VA))
2862 return false;
2863 }
2864 }
2865 }
2866
2867 bool PositionIndependent = isPositionIndependent();
2868 // If the tailcall address may be in a register, then make sure it's
2869 // possible to register allocate for it. In 32-bit, the call address can
2870 // only target EAX, EDX, or ECX since the tail call must be scheduled after
2871 // callee-saved registers are restored. These happen to be the same
2872 // registers used to pass 'inreg' arguments so watch out for those.
2873 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
2874 !isa<ExternalSymbolSDNode>(Callee)) ||
2875 PositionIndependent)) {
2876 unsigned NumInRegs = 0;
2877 // In PIC we need an extra register to formulate the address computation
2878 // for the callee.
2879 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
2880
2881 for (const auto &VA : ArgLocs) {
2882 if (!VA.isRegLoc())
2883 continue;
2884 Register Reg = VA.getLocReg();
2885 switch (Reg) {
2886 default: break;
2887 case X86::EAX: case X86::EDX: case X86::ECX:
2888 if (++NumInRegs == MaxInRegs)
2889 return false;
2890 break;
2891 }
2892 }
2893 }
2894
2895 const MachineRegisterInfo &MRI = MF.getRegInfo();
2896 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2897 return false;
2898 }
2899
2900 bool CalleeWillPop =
2901 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
2902 MF.getTarget().Options.GuaranteedTailCallOpt);
2903
2904 if (unsigned BytesToPop =
2905 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
2906 // If we have bytes to pop, the callee must pop them.
2907 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
2908 if (!CalleePopMatches)
2909 return false;
2910 } else if (CalleeWillPop && StackArgsSize > 0) {
2911 // If we don't have bytes to pop, make sure the callee doesn't pop any.
2912 return false;
2913 }
2914
2915 return true;
2916 }
2917
2918 /// Determines whether the callee is required to pop its own arguments.
2919 /// Callee pop is necessary to support tail calls.
isCalleePop(CallingConv::ID CallingConv,bool is64Bit,bool IsVarArg,bool GuaranteeTCO)2920 bool X86::isCalleePop(CallingConv::ID CallingConv,
2921 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
2922 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
2923 // can guarantee TCO.
2924 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
2925 return true;
2926
2927 switch (CallingConv) {
2928 default:
2929 return false;
2930 case CallingConv::X86_StdCall:
2931 case CallingConv::X86_FastCall:
2932 case CallingConv::X86_ThisCall:
2933 case CallingConv::X86_VectorCall:
2934 return !is64Bit;
2935 }
2936 }
2937