xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/Targets/X86.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ABIInfoImpl.h"
10 #include "TargetInfo.h"
11 #include "clang/Basic/DiagnosticFrontend.h"
12 #include "llvm/ADT/SmallBitVector.h"
13 
14 using namespace clang;
15 using namespace clang::CodeGen;
16 
17 namespace {
18 
19 /// IsX86_MMXType - Return true if this is an MMX type.
IsX86_MMXType(llvm::Type * IRType)20 bool IsX86_MMXType(llvm::Type *IRType) {
21   // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>.
22   return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 &&
23     cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() &&
24     IRType->getScalarSizeInBits() != 64;
25 }
26 
X86AdjustInlineAsmType(CodeGen::CodeGenFunction & CGF,StringRef Constraint,llvm::Type * Ty)27 static llvm::Type *X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
28                                           StringRef Constraint,
29                                           llvm::Type *Ty) {
30   if (Constraint == "k") {
31     llvm::Type *Int1Ty = llvm::Type::getInt1Ty(CGF.getLLVMContext());
32     return llvm::FixedVectorType::get(Int1Ty, Ty->getScalarSizeInBits());
33   }
34 
35   // No operation needed
36   return Ty;
37 }
38 
39 /// Returns true if this type can be passed in SSE registers with the
40 /// X86_VectorCall calling convention. Shared between x86_32 and x86_64.
isX86VectorTypeForVectorCall(ASTContext & Context,QualType Ty)41 static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) {
42   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
43     if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) {
44       if (BT->getKind() == BuiltinType::LongDouble) {
45         if (&Context.getTargetInfo().getLongDoubleFormat() ==
46             &llvm::APFloat::x87DoubleExtended())
47           return false;
48       }
49       return true;
50     }
51   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
52     // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX
53     // registers specially.
54     unsigned VecSize = Context.getTypeSize(VT);
55     if (VecSize == 128 || VecSize == 256 || VecSize == 512)
56       return true;
57   }
58   return false;
59 }
60 
61 /// Returns true if this aggregate is small enough to be passed in SSE registers
62 /// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64.
isX86VectorCallAggregateSmallEnough(uint64_t NumMembers)63 static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) {
64   return NumMembers <= 4;
65 }
66 
67 /// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86.
getDirectX86Hva(llvm::Type * T=nullptr)68 static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) {
69   auto AI = ABIArgInfo::getDirect(T);
70   AI.setInReg(true);
71   AI.setCanBeFlattened(false);
72   return AI;
73 }
74 
75 //===----------------------------------------------------------------------===//
76 // X86-32 ABI Implementation
77 //===----------------------------------------------------------------------===//
78 
79 /// Similar to llvm::CCState, but for Clang.
80 struct CCState {
CCState__anon8aa8892d0111::CCState81   CCState(CGFunctionInfo &FI)
82       : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()),
83 	Required(FI.getRequiredArgs()), IsDelegateCall(FI.isDelegateCall()) {}
84 
85   llvm::SmallBitVector IsPreassigned;
86   unsigned CC = CallingConv::CC_C;
87   unsigned FreeRegs = 0;
88   unsigned FreeSSERegs = 0;
89   RequiredArgs Required;
90   bool IsDelegateCall = false;
91 };
92 
93 /// X86_32ABIInfo - The X86-32 ABI information.
94 class X86_32ABIInfo : public ABIInfo {
95   enum Class {
96     Integer,
97     Float
98   };
99 
100   static const unsigned MinABIStackAlignInBytes = 4;
101 
102   bool IsDarwinVectorABI;
103   bool IsRetSmallStructInRegABI;
104   bool IsWin32StructABI;
105   bool IsSoftFloatABI;
106   bool IsMCUABI;
107   bool IsLinuxABI;
108   unsigned DefaultNumRegisterParameters;
109 
isRegisterSize(unsigned Size)110   static bool isRegisterSize(unsigned Size) {
111     return (Size == 8 || Size == 16 || Size == 32 || Size == 64);
112   }
113 
isHomogeneousAggregateBaseType(QualType Ty) const114   bool isHomogeneousAggregateBaseType(QualType Ty) const override {
115     // FIXME: Assumes vectorcall is in use.
116     return isX86VectorTypeForVectorCall(getContext(), Ty);
117   }
118 
isHomogeneousAggregateSmallEnough(const Type * Ty,uint64_t NumMembers) const119   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
120                                          uint64_t NumMembers) const override {
121     // FIXME: Assumes vectorcall is in use.
122     return isX86VectorCallAggregateSmallEnough(NumMembers);
123   }
124 
125   bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const;
126 
127   /// getIndirectResult - Give a source type \arg Ty, return a suitable result
128   /// such that the argument will be passed in memory.
129   ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
130 
131   ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const;
132 
133   /// Return the alignment to use for the given type on the stack.
134   unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;
135 
136   Class classify(QualType Ty) const;
137   ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
138   ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State,
139                                   unsigned ArgIndex) const;
140 
141   /// Updates the number of available free registers, returns
142   /// true if any registers were allocated.
143   bool updateFreeRegs(QualType Ty, CCState &State) const;
144 
145   bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg,
146                                 bool &NeedsPadding) const;
147   bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const;
148 
149   bool canExpandIndirectArgument(QualType Ty) const;
150 
151   /// Rewrite the function info so that all memory arguments use
152   /// inalloca.
153   void rewriteWithInAlloca(CGFunctionInfo &FI) const;
154 
155   void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
156                            CharUnits &StackOffset, ABIArgInfo &Info,
157                            QualType Type) const;
158   void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const;
159 
160 public:
161 
162   void computeInfo(CGFunctionInfo &FI) const override;
163   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
164                    AggValueSlot Slot) const override;
165 
X86_32ABIInfo(CodeGen::CodeGenTypes & CGT,bool DarwinVectorABI,bool RetSmallStructInRegABI,bool Win32StructABI,unsigned NumRegisterParameters,bool SoftFloatABI)166   X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
167                 bool RetSmallStructInRegABI, bool Win32StructABI,
168                 unsigned NumRegisterParameters, bool SoftFloatABI)
169       : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
170         IsRetSmallStructInRegABI(RetSmallStructInRegABI),
171         IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI),
172         IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
173         IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() ||
174                    CGT.getTarget().getTriple().isOSCygMing()),
175         DefaultNumRegisterParameters(NumRegisterParameters) {}
176 };
177 
178 class X86_32SwiftABIInfo : public SwiftABIInfo {
179 public:
X86_32SwiftABIInfo(CodeGenTypes & CGT)180   explicit X86_32SwiftABIInfo(CodeGenTypes &CGT)
181       : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false) {}
182 
shouldPassIndirectly(ArrayRef<llvm::Type * > ComponentTys,bool AsReturnValue) const183   bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys,
184                             bool AsReturnValue) const override {
185     // LLVM's x86-32 lowering currently only assigns up to three
186     // integer registers and three fp registers.  Oddly, it'll use up to
187     // four vector registers for vectors, but those can overlap with the
188     // scalar registers.
189     return occupiesMoreThan(ComponentTys, /*total=*/3);
190   }
191 };
192 
193 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
194 public:
X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes & CGT,bool DarwinVectorABI,bool RetSmallStructInRegABI,bool Win32StructABI,unsigned NumRegisterParameters,bool SoftFloatABI)195   X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
196                           bool RetSmallStructInRegABI, bool Win32StructABI,
197                           unsigned NumRegisterParameters, bool SoftFloatABI)
198       : TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>(
199             CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI,
200             NumRegisterParameters, SoftFloatABI)) {
201     SwiftInfo = std::make_unique<X86_32SwiftABIInfo>(CGT);
202   }
203 
204   static bool isStructReturnInRegABI(
205       const llvm::Triple &Triple, const CodeGenOptions &Opts);
206 
207   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
208                            CodeGen::CodeGenModule &CGM) const override;
209 
getDwarfEHStackPointer(CodeGen::CodeGenModule & CGM) const210   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
211     // Darwin uses different dwarf register numbers for EH.
212     if (CGM.getTarget().getTriple().isOSDarwin()) return 5;
213     return 4;
214   }
215 
216   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
217                                llvm::Value *Address) const override;
218 
adjustInlineAsmType(CodeGen::CodeGenFunction & CGF,StringRef Constraint,llvm::Type * Ty) const219   llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
220                                   StringRef Constraint,
221                                   llvm::Type* Ty) const override {
222     return X86AdjustInlineAsmType(CGF, Constraint, Ty);
223   }
224 
225   void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue,
226                                 std::string &Constraints,
227                                 std::vector<llvm::Type *> &ResultRegTypes,
228                                 std::vector<llvm::Type *> &ResultTruncRegTypes,
229                                 std::vector<LValue> &ResultRegDests,
230                                 std::string &AsmString,
231                                 unsigned NumOutputs) const override;
232 
getARCRetainAutoreleasedReturnValueMarker() const233   StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
234     return "movl\t%ebp, %ebp"
235            "\t\t// marker for objc_retainAutoreleaseReturnValue";
236   }
237 };
238 
239 }
240 
241 /// Rewrite input constraint references after adding some output constraints.
242 /// In the case where there is one output and one input and we add one output,
243 /// we need to replace all operand references greater than or equal to 1:
244 ///     mov $0, $1
245 ///     mov eax, $1
246 /// The result will be:
247 ///     mov $0, $2
248 ///     mov eax, $2
rewriteInputConstraintReferences(unsigned FirstIn,unsigned NumNewOuts,std::string & AsmString)249 static void rewriteInputConstraintReferences(unsigned FirstIn,
250                                              unsigned NumNewOuts,
251                                              std::string &AsmString) {
252   std::string Buf;
253   llvm::raw_string_ostream OS(Buf);
254   size_t Pos = 0;
255   while (Pos < AsmString.size()) {
256     size_t DollarStart = AsmString.find('$', Pos);
257     if (DollarStart == std::string::npos)
258       DollarStart = AsmString.size();
259     size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart);
260     if (DollarEnd == std::string::npos)
261       DollarEnd = AsmString.size();
262     OS << StringRef(&AsmString[Pos], DollarEnd - Pos);
263     Pos = DollarEnd;
264     size_t NumDollars = DollarEnd - DollarStart;
265     if (NumDollars % 2 != 0 && Pos < AsmString.size()) {
266       // We have an operand reference.
267       size_t DigitStart = Pos;
268       if (AsmString[DigitStart] == '{') {
269         OS << '{';
270         ++DigitStart;
271       }
272       size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart);
273       if (DigitEnd == std::string::npos)
274         DigitEnd = AsmString.size();
275       StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart);
276       unsigned OperandIndex;
277       if (!OperandStr.getAsInteger(10, OperandIndex)) {
278         if (OperandIndex >= FirstIn)
279           OperandIndex += NumNewOuts;
280         OS << OperandIndex;
281       } else {
282         OS << OperandStr;
283       }
284       Pos = DigitEnd;
285     }
286   }
287   AsmString = std::move(Buf);
288 }
289 
290 /// Add output constraints for EAX:EDX because they are return registers.
addReturnRegisterOutputs(CodeGenFunction & CGF,LValue ReturnSlot,std::string & Constraints,std::vector<llvm::Type * > & ResultRegTypes,std::vector<llvm::Type * > & ResultTruncRegTypes,std::vector<LValue> & ResultRegDests,std::string & AsmString,unsigned NumOutputs) const291 void X86_32TargetCodeGenInfo::addReturnRegisterOutputs(
292     CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints,
293     std::vector<llvm::Type *> &ResultRegTypes,
294     std::vector<llvm::Type *> &ResultTruncRegTypes,
295     std::vector<LValue> &ResultRegDests, std::string &AsmString,
296     unsigned NumOutputs) const {
297   uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType());
298 
299   // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is
300   // larger.
301   if (!Constraints.empty())
302     Constraints += ',';
303   if (RetWidth <= 32) {
304     Constraints += "={eax}";
305     ResultRegTypes.push_back(CGF.Int32Ty);
306   } else {
307     // Use the 'A' constraint for EAX:EDX.
308     Constraints += "=A";
309     ResultRegTypes.push_back(CGF.Int64Ty);
310   }
311 
312   // Truncate EAX or EAX:EDX to an integer of the appropriate size.
313   llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth);
314   ResultTruncRegTypes.push_back(CoerceTy);
315 
316   // Coerce the integer by bitcasting the return slot pointer.
317   ReturnSlot.setAddress(ReturnSlot.getAddress().withElementType(CoerceTy));
318   ResultRegDests.push_back(ReturnSlot);
319 
320   rewriteInputConstraintReferences(NumOutputs, 1, AsmString);
321 }
322 
323 /// shouldReturnTypeInRegister - Determine if the given type should be
324 /// returned in a register (for the Darwin and MCU ABI).
shouldReturnTypeInRegister(QualType Ty,ASTContext & Context) const325 bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
326                                                ASTContext &Context) const {
327   uint64_t Size = Context.getTypeSize(Ty);
328 
329   // For i386, type must be register sized.
330   // For the MCU ABI, it only needs to be <= 8-byte
331   if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size)))
332    return false;
333 
334   if (Ty->isVectorType()) {
335     // 64- and 128- bit vectors inside structures are not returned in
336     // registers.
337     if (Size == 64 || Size == 128)
338       return false;
339 
340     return true;
341   }
342 
343   // If this is a builtin, pointer, enum, complex type, member pointer, or
344   // member function pointer it is ok.
345   if (Ty->getAs<BuiltinType>() || Ty->hasPointerRepresentation() ||
346       Ty->isAnyComplexType() || Ty->isEnumeralType() ||
347       Ty->isBlockPointerType() || Ty->isMemberPointerType())
348     return true;
349 
350   // Arrays are treated like records.
351   if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty))
352     return shouldReturnTypeInRegister(AT->getElementType(), Context);
353 
354   // Otherwise, it must be a record type.
355   const RecordType *RT = Ty->getAs<RecordType>();
356   if (!RT) return false;
357 
358   // FIXME: Traverse bases here too.
359 
360   // Structure types are passed in register if all fields would be
361   // passed in a register.
362   for (const auto *FD : RT->getDecl()->fields()) {
363     // Empty fields are ignored.
364     if (isEmptyField(Context, FD, true))
365       continue;
366 
367     // Check fields recursively.
368     if (!shouldReturnTypeInRegister(FD->getType(), Context))
369       return false;
370   }
371   return true;
372 }
373 
is32Or64BitBasicType(QualType Ty,ASTContext & Context)374 static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
375   // Treat complex types as the element type.
376   if (const ComplexType *CTy = Ty->getAs<ComplexType>())
377     Ty = CTy->getElementType();
378 
379   // Check for a type which we know has a simple scalar argument-passing
380   // convention without any padding.  (We're specifically looking for 32
381   // and 64-bit integer and integer-equivalents, float, and double.)
382   if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() &&
383       !Ty->isEnumeralType() && !Ty->isBlockPointerType())
384     return false;
385 
386   uint64_t Size = Context.getTypeSize(Ty);
387   return Size == 32 || Size == 64;
388 }
389 
addFieldSizes(ASTContext & Context,const RecordDecl * RD,uint64_t & Size)390 static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD,
391                           uint64_t &Size) {
392   for (const auto *FD : RD->fields()) {
393     // Scalar arguments on the stack get 4 byte alignment on x86. If the
394     // argument is smaller than 32-bits, expanding the struct will create
395     // alignment padding.
396     if (!is32Or64BitBasicType(FD->getType(), Context))
397       return false;
398 
399     // FIXME: Reject bit-fields wholesale; there are two problems, we don't know
400     // how to expand them yet, and the predicate for telling if a bitfield still
401     // counts as "basic" is more complicated than what we were doing previously.
402     if (FD->isBitField())
403       return false;
404 
405     Size += Context.getTypeSize(FD->getType());
406   }
407   return true;
408 }
409 
addBaseAndFieldSizes(ASTContext & Context,const CXXRecordDecl * RD,uint64_t & Size)410 static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD,
411                                  uint64_t &Size) {
412   // Don't do this if there are any non-empty bases.
413   for (const CXXBaseSpecifier &Base : RD->bases()) {
414     if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(),
415                               Size))
416       return false;
417   }
418   if (!addFieldSizes(Context, RD, Size))
419     return false;
420   return true;
421 }
422 
423 /// Test whether an argument type which is to be passed indirectly (on the
424 /// stack) would have the equivalent layout if it was expanded into separate
425 /// arguments. If so, we prefer to do the latter to avoid inhibiting
426 /// optimizations.
canExpandIndirectArgument(QualType Ty) const427 bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
428   // We can only expand structure types.
429   const RecordType *RT = Ty->getAs<RecordType>();
430   if (!RT)
431     return false;
432   const RecordDecl *RD = RT->getDecl();
433   uint64_t Size = 0;
434   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
435     if (!IsWin32StructABI) {
436       // On non-Windows, we have to conservatively match our old bitcode
437       // prototypes in order to be ABI-compatible at the bitcode level.
438       if (!CXXRD->isCLike())
439         return false;
440     } else {
441       // Don't do this for dynamic classes.
442       if (CXXRD->isDynamicClass())
443         return false;
444     }
445     if (!addBaseAndFieldSizes(getContext(), CXXRD, Size))
446       return false;
447   } else {
448     if (!addFieldSizes(getContext(), RD, Size))
449       return false;
450   }
451 
452   // We can do this if there was no alignment padding.
453   return Size == getContext().getTypeSize(Ty);
454 }
455 
getIndirectReturnResult(QualType RetTy,CCState & State) const456 ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const {
457   // If the return value is indirect, then the hidden argument is consuming one
458   // integer register.
459   if (State.CC != llvm::CallingConv::X86_FastCall &&
460       State.CC != llvm::CallingConv::X86_VectorCall && State.FreeRegs) {
461     --State.FreeRegs;
462     if (!IsMCUABI)
463       return getNaturalAlignIndirectInReg(RetTy);
464   }
465   return getNaturalAlignIndirect(
466       RetTy, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(),
467       /*ByVal=*/false);
468 }
469 
classifyReturnType(QualType RetTy,CCState & State) const470 ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
471                                              CCState &State) const {
472   if (RetTy->isVoidType())
473     return ABIArgInfo::getIgnore();
474 
475   const Type *Base = nullptr;
476   uint64_t NumElts = 0;
477   if ((State.CC == llvm::CallingConv::X86_VectorCall ||
478        State.CC == llvm::CallingConv::X86_RegCall) &&
479       isHomogeneousAggregate(RetTy, Base, NumElts)) {
480     // The LLVM struct type for such an aggregate should lower properly.
481     return ABIArgInfo::getDirect();
482   }
483 
484   if (const VectorType *VT = RetTy->getAs<VectorType>()) {
485     // On Darwin, some vectors are returned in registers.
486     if (IsDarwinVectorABI) {
487       uint64_t Size = getContext().getTypeSize(RetTy);
488 
489       // 128-bit vectors are a special case; they are returned in
490       // registers and we need to make sure to pick a type the LLVM
491       // backend will like.
492       if (Size == 128)
493         return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
494             llvm::Type::getInt64Ty(getVMContext()), 2));
495 
496       // Always return in register if it fits in a general purpose
497       // register, or if it is 64 bits and has a single element.
498       if ((Size == 8 || Size == 16 || Size == 32) ||
499           (Size == 64 && VT->getNumElements() == 1))
500         return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
501                                                             Size));
502 
503       return getIndirectReturnResult(RetTy, State);
504     }
505 
506     return ABIArgInfo::getDirect();
507   }
508 
509   if (isAggregateTypeForABI(RetTy)) {
510     if (const RecordType *RT = RetTy->getAs<RecordType>()) {
511       // Structures with flexible arrays are always indirect.
512       if (RT->getDecl()->hasFlexibleArrayMember())
513         return getIndirectReturnResult(RetTy, State);
514     }
515 
516     // If specified, structs and unions are always indirect.
517     if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType())
518       return getIndirectReturnResult(RetTy, State);
519 
520     // Ignore empty structs/unions.
521     if (isEmptyRecord(getContext(), RetTy, true))
522       return ABIArgInfo::getIgnore();
523 
524     // Return complex of _Float16 as <2 x half> so the backend will use xmm0.
525     if (const ComplexType *CT = RetTy->getAs<ComplexType>()) {
526       QualType ET = getContext().getCanonicalType(CT->getElementType());
527       if (ET->isFloat16Type())
528         return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
529             llvm::Type::getHalfTy(getVMContext()), 2));
530     }
531 
532     // Small structures which are register sized are generally returned
533     // in a register.
534     if (shouldReturnTypeInRegister(RetTy, getContext())) {
535       uint64_t Size = getContext().getTypeSize(RetTy);
536 
537       // As a special-case, if the struct is a "single-element" struct, and
538       // the field is of type "float" or "double", return it in a
539       // floating-point register. (MSVC does not apply this special case.)
540       // We apply a similar transformation for pointer types to improve the
541       // quality of the generated IR.
542       if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
543         if ((!IsWin32StructABI && SeltTy->isRealFloatingType())
544             || SeltTy->hasPointerRepresentation())
545           return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
546 
547       // FIXME: We should be able to narrow this integer in cases with dead
548       // padding.
549       return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size));
550     }
551 
552     return getIndirectReturnResult(RetTy, State);
553   }
554 
555   // Treat an enum type as its underlying type.
556   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
557     RetTy = EnumTy->getDecl()->getIntegerType();
558 
559   if (const auto *EIT = RetTy->getAs<BitIntType>())
560     if (EIT->getNumBits() > 64)
561       return getIndirectReturnResult(RetTy, State);
562 
563   return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
564                                                : ABIArgInfo::getDirect());
565 }
566 
getTypeStackAlignInBytes(QualType Ty,unsigned Align) const567 unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty,
568                                                  unsigned Align) const {
569   // Otherwise, if the alignment is less than or equal to the minimum ABI
570   // alignment, just use the default; the backend will handle this.
571   if (Align <= MinABIStackAlignInBytes)
572     return 0; // Use default alignment.
573 
574   if (IsLinuxABI) {
575     // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't
576     // want to spend any effort dealing with the ramifications of ABI breaks.
577     //
578     // If the vector type is __m128/__m256/__m512, return the default alignment.
579     if (Ty->isVectorType() && (Align == 16 || Align == 32 || Align == 64))
580       return Align;
581   }
582   // On non-Darwin, the stack type alignment is always 4.
583   if (!IsDarwinVectorABI) {
584     // Set explicit alignment, since we may need to realign the top.
585     return MinABIStackAlignInBytes;
586   }
587 
588   // Otherwise, if the type contains an SSE vector type, the alignment is 16.
589   if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) ||
590                       isRecordWithSIMDVectorType(getContext(), Ty)))
591     return 16;
592 
593   return MinABIStackAlignInBytes;
594 }
595 
getIndirectResult(QualType Ty,bool ByVal,CCState & State) const596 ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal,
597                                             CCState &State) const {
598   if (!ByVal) {
599     if (State.FreeRegs) {
600       --State.FreeRegs; // Non-byval indirects just use one pointer.
601       if (!IsMCUABI)
602         return getNaturalAlignIndirectInReg(Ty);
603     }
604     return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
605                                    false);
606   }
607 
608   // Compute the byval alignment.
609   unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
610   unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign);
611   if (StackAlign == 0)
612     return ABIArgInfo::getIndirect(
613         CharUnits::fromQuantity(4),
614         /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(),
615         /*ByVal=*/true);
616 
617   // If the stack alignment is less than the type alignment, realign the
618   // argument.
619   bool Realign = TypeAlign > StackAlign;
620   return ABIArgInfo::getIndirect(
621       CharUnits::fromQuantity(StackAlign),
622       /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(), /*ByVal=*/true,
623       Realign);
624 }
625 
classify(QualType Ty) const626 X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
627   const Type *T = isSingleElementStruct(Ty, getContext());
628   if (!T)
629     T = Ty.getTypePtr();
630 
631   if (const BuiltinType *BT = T->getAs<BuiltinType>()) {
632     BuiltinType::Kind K = BT->getKind();
633     if (K == BuiltinType::Float || K == BuiltinType::Double)
634       return Float;
635   }
636   return Integer;
637 }
638 
updateFreeRegs(QualType Ty,CCState & State) const639 bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const {
640   if (!IsSoftFloatABI) {
641     Class C = classify(Ty);
642     if (C == Float)
643       return false;
644   }
645 
646   unsigned Size = getContext().getTypeSize(Ty);
647   unsigned SizeInRegs = (Size + 31) / 32;
648 
649   if (SizeInRegs == 0)
650     return false;
651 
652   if (!IsMCUABI) {
653     if (SizeInRegs > State.FreeRegs) {
654       State.FreeRegs = 0;
655       return false;
656     }
657   } else {
658     // The MCU psABI allows passing parameters in-reg even if there are
659     // earlier parameters that are passed on the stack. Also,
660     // it does not allow passing >8-byte structs in-register,
661     // even if there are 3 free registers available.
662     if (SizeInRegs > State.FreeRegs || SizeInRegs > 2)
663       return false;
664   }
665 
666   State.FreeRegs -= SizeInRegs;
667   return true;
668 }
669 
shouldAggregateUseDirect(QualType Ty,CCState & State,bool & InReg,bool & NeedsPadding) const670 bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State,
671                                              bool &InReg,
672                                              bool &NeedsPadding) const {
673   // On Windows, aggregates other than HFAs are never passed in registers, and
674   // they do not consume register slots. Homogenous floating-point aggregates
675   // (HFAs) have already been dealt with at this point.
676   if (IsWin32StructABI && isAggregateTypeForABI(Ty))
677     return false;
678 
679   NeedsPadding = false;
680   InReg = !IsMCUABI;
681 
682   if (!updateFreeRegs(Ty, State))
683     return false;
684 
685   if (IsMCUABI)
686     return true;
687 
688   if (State.CC == llvm::CallingConv::X86_FastCall ||
689       State.CC == llvm::CallingConv::X86_VectorCall ||
690       State.CC == llvm::CallingConv::X86_RegCall) {
691     if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs)
692       NeedsPadding = true;
693 
694     return false;
695   }
696 
697   return true;
698 }
699 
shouldPrimitiveUseInReg(QualType Ty,CCState & State) const700 bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
701   bool IsPtrOrInt = (getContext().getTypeSize(Ty) <= 32) &&
702                     (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() ||
703                      Ty->isReferenceType());
704 
705   if (!IsPtrOrInt && (State.CC == llvm::CallingConv::X86_FastCall ||
706                       State.CC == llvm::CallingConv::X86_VectorCall))
707     return false;
708 
709   if (!updateFreeRegs(Ty, State))
710     return false;
711 
712   if (!IsPtrOrInt && State.CC == llvm::CallingConv::X86_RegCall)
713     return false;
714 
715   // Return true to apply inreg to all legal parameters except for MCU targets.
716   return !IsMCUABI;
717 }
718 
runVectorCallFirstPass(CGFunctionInfo & FI,CCState & State) const719 void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const {
720   // Vectorcall x86 works subtly different than in x64, so the format is
721   // a bit different than the x64 version.  First, all vector types (not HVAs)
722   // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers.
723   // This differs from the x64 implementation, where the first 6 by INDEX get
724   // registers.
725   // In the second pass over the arguments, HVAs are passed in the remaining
726   // vector registers if possible, or indirectly by address. The address will be
727   // passed in ECX/EDX if available. Any other arguments are passed according to
728   // the usual fastcall rules.
729   MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
730   for (int I = 0, E = Args.size(); I < E; ++I) {
731     const Type *Base = nullptr;
732     uint64_t NumElts = 0;
733     const QualType &Ty = Args[I].type;
734     if ((Ty->isVectorType() || Ty->isBuiltinType()) &&
735         isHomogeneousAggregate(Ty, Base, NumElts)) {
736       if (State.FreeSSERegs >= NumElts) {
737         State.FreeSSERegs -= NumElts;
738         Args[I].info = ABIArgInfo::getDirectInReg();
739         State.IsPreassigned.set(I);
740       }
741     }
742   }
743 }
744 
classifyArgumentType(QualType Ty,CCState & State,unsigned ArgIndex) const745 ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State,
746                                                unsigned ArgIndex) const {
747   // FIXME: Set alignment on indirect arguments.
748   bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
749   bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
750   bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;
751 
752   Ty = useFirstFieldIfTransparentUnion(Ty);
753   TypeInfo TI = getContext().getTypeInfo(Ty);
754 
755   // Check with the C++ ABI first.
756   const RecordType *RT = Ty->getAs<RecordType>();
757   if (RT) {
758     CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
759     if (RAA == CGCXXABI::RAA_Indirect) {
760       return getIndirectResult(Ty, false, State);
761     } else if (State.IsDelegateCall) {
762       // Avoid having different alignments on delegate call args by always
763       // setting the alignment to 4, which is what we do for inallocas.
764       ABIArgInfo Res = getIndirectResult(Ty, false, State);
765       Res.setIndirectAlign(CharUnits::fromQuantity(4));
766       return Res;
767     } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
768       // The field index doesn't matter, we'll fix it up later.
769       return ABIArgInfo::getInAlloca(/*FieldIndex=*/0);
770     }
771   }
772 
773   // Regcall uses the concept of a homogenous vector aggregate, similar
774   // to other targets.
775   const Type *Base = nullptr;
776   uint64_t NumElts = 0;
777   if ((IsRegCall || IsVectorCall) &&
778       isHomogeneousAggregate(Ty, Base, NumElts)) {
779     if (State.FreeSSERegs >= NumElts) {
780       State.FreeSSERegs -= NumElts;
781 
782       // Vectorcall passes HVAs directly and does not flatten them, but regcall
783       // does.
784       if (IsVectorCall)
785         return getDirectX86Hva();
786 
787       if (Ty->isBuiltinType() || Ty->isVectorType())
788         return ABIArgInfo::getDirect();
789       return ABIArgInfo::getExpand();
790     }
791     if (IsVectorCall && Ty->isBuiltinType())
792       return ABIArgInfo::getDirect();
793     return getIndirectResult(Ty, /*ByVal=*/false, State);
794   }
795 
796   if (isAggregateTypeForABI(Ty)) {
797     // Structures with flexible arrays are always indirect.
798     // FIXME: This should not be byval!
799     if (RT && RT->getDecl()->hasFlexibleArrayMember())
800       return getIndirectResult(Ty, true, State);
801 
802     // Ignore empty structs/unions on non-Windows.
803     if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true))
804       return ABIArgInfo::getIgnore();
805 
806     // Ignore 0 sized structs.
807     if (TI.Width == 0)
808       return ABIArgInfo::getIgnore();
809 
810     llvm::LLVMContext &LLVMContext = getVMContext();
811     llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
812     bool NeedsPadding = false;
813     bool InReg;
814     if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
815       unsigned SizeInRegs = (TI.Width + 31) / 32;
816       SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
817       llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
818       if (InReg)
819         return ABIArgInfo::getDirectInReg(Result);
820       else
821         return ABIArgInfo::getDirect(Result);
822     }
823     llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;
824 
825     // Pass over-aligned aggregates to non-variadic functions on Windows
826     // indirectly. This behavior was added in MSVC 2015. Use the required
827     // alignment from the record layout, since that may be less than the
828     // regular type alignment, and types with required alignment of less than 4
829     // bytes are not passed indirectly.
830     if (IsWin32StructABI && State.Required.isRequiredArg(ArgIndex)) {
831       unsigned AlignInBits = 0;
832       if (RT) {
833         const ASTRecordLayout &Layout =
834           getContext().getASTRecordLayout(RT->getDecl());
835         AlignInBits = getContext().toBits(Layout.getRequiredAlignment());
836       } else if (TI.isAlignRequired()) {
837         AlignInBits = TI.Align;
838       }
839       if (AlignInBits > 32)
840         return getIndirectResult(Ty, /*ByVal=*/false, State);
841     }
842 
843     // Expand small (<= 128-bit) record types when we know that the stack layout
844     // of those arguments will match the struct. This is important because the
845     // LLVM backend isn't smart enough to remove byval, which inhibits many
846     // optimizations.
847     // Don't do this for the MCU if there are still free integer registers
848     // (see X86_64 ABI for full explanation).
849     if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) &&
850         canExpandIndirectArgument(Ty))
851       return ABIArgInfo::getExpandWithPadding(
852           IsFastCall || IsVectorCall || IsRegCall, PaddingType);
853 
854     return getIndirectResult(Ty, true, State);
855   }
856 
857   if (const VectorType *VT = Ty->getAs<VectorType>()) {
858     // On Windows, vectors are passed directly if registers are available, or
859     // indirectly if not. This avoids the need to align argument memory. Pass
860     // user-defined vector types larger than 512 bits indirectly for simplicity.
861     if (IsWin32StructABI) {
862       if (TI.Width <= 512 && State.FreeSSERegs > 0) {
863         --State.FreeSSERegs;
864         return ABIArgInfo::getDirectInReg();
865       }
866       return getIndirectResult(Ty, /*ByVal=*/false, State);
867     }
868 
869     // On Darwin, some vectors are passed in memory, we handle this by passing
870     // it as an i8/i16/i32/i64.
871     if (IsDarwinVectorABI) {
872       if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) ||
873           (TI.Width == 64 && VT->getNumElements() == 1))
874         return ABIArgInfo::getDirect(
875             llvm::IntegerType::get(getVMContext(), TI.Width));
876     }
877 
878     if (IsX86_MMXType(CGT.ConvertType(Ty)))
879       return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64));
880 
881     return ABIArgInfo::getDirect();
882   }
883 
884 
885   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
886     Ty = EnumTy->getDecl()->getIntegerType();
887 
888   bool InReg = shouldPrimitiveUseInReg(Ty, State);
889 
890   if (isPromotableIntegerTypeForABI(Ty)) {
891     if (InReg)
892       return ABIArgInfo::getExtendInReg(Ty, CGT.ConvertType(Ty));
893     return ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty));
894   }
895 
896   if (const auto *EIT = Ty->getAs<BitIntType>()) {
897     if (EIT->getNumBits() <= 64) {
898       if (InReg)
899         return ABIArgInfo::getDirectInReg();
900       return ABIArgInfo::getDirect();
901     }
902     return getIndirectResult(Ty, /*ByVal=*/false, State);
903   }
904 
905   if (InReg)
906     return ABIArgInfo::getDirectInReg();
907   return ABIArgInfo::getDirect();
908 }
909 
computeInfo(CGFunctionInfo & FI) const910 void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
911   CCState State(FI);
912   if (IsMCUABI)
913     State.FreeRegs = 3;
914   else if (State.CC == llvm::CallingConv::X86_FastCall) {
915     State.FreeRegs = 2;
916     State.FreeSSERegs = 3;
917   } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
918     State.FreeRegs = 2;
919     State.FreeSSERegs = 6;
920   } else if (FI.getHasRegParm())
921     State.FreeRegs = FI.getRegParm();
922   else if (State.CC == llvm::CallingConv::X86_RegCall) {
923     State.FreeRegs = 5;
924     State.FreeSSERegs = 8;
925   } else if (IsWin32StructABI) {
926     // Since MSVC 2015, the first three SSE vectors have been passed in
927     // registers. The rest are passed indirectly.
928     State.FreeRegs = DefaultNumRegisterParameters;
929     State.FreeSSERegs = 3;
930   } else
931     State.FreeRegs = DefaultNumRegisterParameters;
932 
933   if (!::classifyReturnType(getCXXABI(), FI, *this)) {
934     FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State);
935   } else if (FI.getReturnInfo().isIndirect()) {
936     // The C++ ABI is not aware of register usage, so we have to check if the
937     // return value was sret and put it in a register ourselves if appropriate.
938     if (State.FreeRegs) {
939       --State.FreeRegs;  // The sret parameter consumes a register.
940       if (!IsMCUABI)
941         FI.getReturnInfo().setInReg(true);
942     }
943   }
944 
945   // The chain argument effectively gives us another free register.
946   if (FI.isChainCall())
947     ++State.FreeRegs;
948 
949   // For vectorcall, do a first pass over the arguments, assigning FP and vector
950   // arguments to XMM registers as available.
951   if (State.CC == llvm::CallingConv::X86_VectorCall)
952     runVectorCallFirstPass(FI, State);
953 
954   bool UsedInAlloca = false;
955   MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
956   for (unsigned I = 0, E = Args.size(); I < E; ++I) {
957     // Skip arguments that have already been assigned.
958     if (State.IsPreassigned.test(I))
959       continue;
960 
961     Args[I].info =
962         classifyArgumentType(Args[I].type, State, I);
963     UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca);
964   }
965 
966   // If we needed to use inalloca for any argument, do a second pass and rewrite
967   // all the memory arguments to use inalloca.
968   if (UsedInAlloca)
969     rewriteWithInAlloca(FI);
970 }
971 
972 void
addFieldToArgStruct(SmallVector<llvm::Type *,6> & FrameFields,CharUnits & StackOffset,ABIArgInfo & Info,QualType Type) const973 X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
974                                    CharUnits &StackOffset, ABIArgInfo &Info,
975                                    QualType Type) const {
976   // Arguments are always 4-byte-aligned.
977   CharUnits WordSize = CharUnits::fromQuantity(4);
978   assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct");
979 
980   // sret pointers and indirect things will require an extra pointer
981   // indirection, unless they are byval. Most things are byval, and will not
982   // require this indirection.
983   bool IsIndirect = false;
984   if (Info.isIndirect() && !Info.getIndirectByVal())
985     IsIndirect = true;
986   Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect);
987   llvm::Type *LLTy = CGT.ConvertTypeForMem(Type);
988   if (IsIndirect)
989     LLTy = llvm::PointerType::getUnqual(getVMContext());
990   FrameFields.push_back(LLTy);
991   StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type);
992 
993   // Insert padding bytes to respect alignment.
994   CharUnits FieldEnd = StackOffset;
995   StackOffset = FieldEnd.alignTo(WordSize);
996   if (StackOffset != FieldEnd) {
997     CharUnits NumBytes = StackOffset - FieldEnd;
998     llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
999     Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity());
1000     FrameFields.push_back(Ty);
1001   }
1002 }
1003 
isArgInAlloca(const ABIArgInfo & Info)1004 static bool isArgInAlloca(const ABIArgInfo &Info) {
1005   // Leave ignored and inreg arguments alone.
1006   switch (Info.getKind()) {
1007   case ABIArgInfo::InAlloca:
1008     return true;
1009   case ABIArgInfo::Ignore:
1010   case ABIArgInfo::IndirectAliased:
1011     return false;
1012   case ABIArgInfo::Indirect:
1013   case ABIArgInfo::Direct:
1014   case ABIArgInfo::Extend:
1015     return !Info.getInReg();
1016   case ABIArgInfo::Expand:
1017   case ABIArgInfo::CoerceAndExpand:
1018     // These are aggregate types which are never passed in registers when
1019     // inalloca is involved.
1020     return true;
1021   }
1022   llvm_unreachable("invalid enum");
1023 }
1024 
rewriteWithInAlloca(CGFunctionInfo & FI) const1025 void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
1026   assert(IsWin32StructABI && "inalloca only supported on win32");
1027 
1028   // Build a packed struct type for all of the arguments in memory.
1029   SmallVector<llvm::Type *, 6> FrameFields;
1030 
1031   // The stack alignment is always 4.
1032   CharUnits StackAlign = CharUnits::fromQuantity(4);
1033 
1034   CharUnits StackOffset;
1035   CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end();
1036 
1037   // Put 'this' into the struct before 'sret', if necessary.
1038   bool IsThisCall =
1039       FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall;
1040   ABIArgInfo &Ret = FI.getReturnInfo();
1041   if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall &&
1042       isArgInAlloca(I->info)) {
1043     addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type);
1044     ++I;
1045   }
1046 
1047   // Put the sret parameter into the inalloca struct if it's in memory.
1048   if (Ret.isIndirect() && !Ret.getInReg()) {
1049     addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType());
1050     // On Windows, the hidden sret parameter is always returned in eax.
1051     Ret.setInAllocaSRet(IsWin32StructABI);
1052   }
1053 
1054   // Skip the 'this' parameter in ecx.
1055   if (IsThisCall)
1056     ++I;
1057 
1058   // Put arguments passed in memory into the struct.
1059   for (; I != E; ++I) {
1060     if (isArgInAlloca(I->info))
1061       addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type);
1062   }
1063 
1064   FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields,
1065                                         /*isPacked=*/true),
1066                   StackAlign);
1067 }
1068 
EmitVAArg(CodeGenFunction & CGF,Address VAListAddr,QualType Ty,AggValueSlot Slot) const1069 RValue X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
1070                                 QualType Ty, AggValueSlot Slot) const {
1071 
1072   auto TypeInfo = getContext().getTypeInfoInChars(Ty);
1073 
1074   CCState State(*const_cast<CGFunctionInfo *>(CGF.CurFnInfo));
1075   ABIArgInfo AI = classifyArgumentType(Ty, State, /*ArgIndex*/ 0);
1076   // Empty records are ignored for parameter passing purposes.
1077   if (AI.isIgnore())
1078     return Slot.asRValue();
1079 
1080   // x86-32 changes the alignment of certain arguments on the stack.
1081   //
1082   // Just messing with TypeInfo like this works because we never pass
1083   // anything indirectly.
1084   TypeInfo.Align = CharUnits::fromQuantity(
1085                 getTypeStackAlignInBytes(Ty, TypeInfo.Align.getQuantity()));
1086 
1087   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo,
1088                           CharUnits::fromQuantity(4),
1089                           /*AllowHigherAlign*/ true, Slot);
1090 }
1091 
isStructReturnInRegABI(const llvm::Triple & Triple,const CodeGenOptions & Opts)1092 bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
1093     const llvm::Triple &Triple, const CodeGenOptions &Opts) {
1094   assert(Triple.getArch() == llvm::Triple::x86);
1095 
1096   switch (Opts.getStructReturnConvention()) {
1097   case CodeGenOptions::SRCK_Default:
1098     break;
1099   case CodeGenOptions::SRCK_OnStack:  // -fpcc-struct-return
1100     return false;
1101   case CodeGenOptions::SRCK_InRegs:  // -freg-struct-return
1102     return true;
1103   }
1104 
1105   if (Triple.isOSDarwin() || Triple.isOSIAMCU())
1106     return true;
1107 
1108   switch (Triple.getOS()) {
1109   case llvm::Triple::DragonFly:
1110   case llvm::Triple::FreeBSD:
1111   case llvm::Triple::OpenBSD:
1112   case llvm::Triple::Win32:
1113     return true;
1114   default:
1115     return false;
1116   }
1117 }
1118 
addX86InterruptAttrs(const FunctionDecl * FD,llvm::GlobalValue * GV,CodeGen::CodeGenModule & CGM)1119 static void addX86InterruptAttrs(const FunctionDecl *FD, llvm::GlobalValue *GV,
1120                                  CodeGen::CodeGenModule &CGM) {
1121   if (!FD->hasAttr<AnyX86InterruptAttr>())
1122     return;
1123 
1124   llvm::Function *Fn = cast<llvm::Function>(GV);
1125   Fn->setCallingConv(llvm::CallingConv::X86_INTR);
1126   if (FD->getNumParams() == 0)
1127     return;
1128 
1129   auto PtrTy = cast<PointerType>(FD->getParamDecl(0)->getType());
1130   llvm::Type *ByValTy = CGM.getTypes().ConvertType(PtrTy->getPointeeType());
1131   llvm::Attribute NewAttr = llvm::Attribute::getWithByValType(
1132     Fn->getContext(), ByValTy);
1133   Fn->addParamAttr(0, NewAttr);
1134 }
1135 
setTargetAttributes(const Decl * D,llvm::GlobalValue * GV,CodeGen::CodeGenModule & CGM) const1136 void X86_32TargetCodeGenInfo::setTargetAttributes(
1137     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
1138   if (GV->isDeclaration())
1139     return;
1140   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
1141     if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
1142       llvm::Function *Fn = cast<llvm::Function>(GV);
1143       Fn->addFnAttr("stackrealign");
1144     }
1145 
1146     addX86InterruptAttrs(FD, GV, CGM);
1147   }
1148 }
1149 
initDwarfEHRegSizeTable(CodeGen::CodeGenFunction & CGF,llvm::Value * Address) const1150 bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable(
1151                                                CodeGen::CodeGenFunction &CGF,
1152                                                llvm::Value *Address) const {
1153   CodeGen::CGBuilderTy &Builder = CGF.Builder;
1154 
1155   llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);
1156 
1157   // 0-7 are the eight integer registers;  the order is different
1158   //   on Darwin (for EH), but the range is the same.
1159   // 8 is %eip.
1160   AssignToArrayRange(Builder, Address, Four8, 0, 8);
1161 
1162   if (CGF.CGM.getTarget().getTriple().isOSDarwin()) {
1163     // 12-16 are st(0..4).  Not sure why we stop at 4.
1164     // These have size 16, which is sizeof(long double) on
1165     // platforms with 8-byte alignment for that type.
1166     llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16);
1167     AssignToArrayRange(Builder, Address, Sixteen8, 12, 16);
1168 
1169   } else {
1170     // 9 is %eflags, which doesn't get a size on Darwin for some
1171     // reason.
1172     Builder.CreateAlignedStore(
1173         Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9),
1174                                CharUnits::One());
1175 
1176     // 11-16 are st(0..5).  Not sure why we stop at 5.
1177     // These have size 12, which is sizeof(long double) on
1178     // platforms with 4-byte alignment for that type.
1179     llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12);
1180     AssignToArrayRange(Builder, Address, Twelve8, 11, 16);
1181   }
1182 
1183   return false;
1184 }
1185 
1186 //===----------------------------------------------------------------------===//
1187 // X86-64 ABI Implementation
1188 //===----------------------------------------------------------------------===//
1189 
1190 
1191 namespace {
1192 
1193 /// \p returns the size in bits of the largest (native) vector for \p AVXLevel.
getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel)1194 static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
1195   switch (AVXLevel) {
1196   case X86AVXABILevel::AVX512:
1197     return 512;
1198   case X86AVXABILevel::AVX:
1199     return 256;
1200   case X86AVXABILevel::None:
1201     return 128;
1202   }
1203   llvm_unreachable("Unknown AVXLevel");
1204 }
1205 
1206 /// X86_64ABIInfo - The X86_64 ABI information.
1207 class X86_64ABIInfo : public ABIInfo {
1208   enum Class {
1209     Integer = 0,
1210     SSE,
1211     SSEUp,
1212     X87,
1213     X87Up,
1214     ComplexX87,
1215     NoClass,
1216     Memory
1217   };
1218 
1219   /// merge - Implement the X86_64 ABI merging algorithm.
1220   ///
1221   /// Merge an accumulating classification \arg Accum with a field
1222   /// classification \arg Field.
1223   ///
1224   /// \param Accum - The accumulating classification. This should
1225   /// always be either NoClass or the result of a previous merge
1226   /// call. In addition, this should never be Memory (the caller
1227   /// should just return Memory for the aggregate).
1228   static Class merge(Class Accum, Class Field);
1229 
1230   /// postMerge - Implement the X86_64 ABI post merging algorithm.
1231   ///
1232   /// Post merger cleanup, reduces a malformed Hi and Lo pair to
1233   /// final MEMORY or SSE classes when necessary.
1234   ///
1235   /// \param AggregateSize - The size of the current aggregate in
1236   /// the classification process.
1237   ///
1238   /// \param Lo - The classification for the parts of the type
1239   /// residing in the low word of the containing object.
1240   ///
1241   /// \param Hi - The classification for the parts of the type
1242   /// residing in the higher words of the containing object.
1243   ///
1244   void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const;
1245 
1246   /// classify - Determine the x86_64 register classes in which the
1247   /// given type T should be passed.
1248   ///
1249   /// \param Lo - The classification for the parts of the type
1250   /// residing in the low word of the containing object.
1251   ///
1252   /// \param Hi - The classification for the parts of the type
1253   /// residing in the high word of the containing object.
1254   ///
1255   /// \param OffsetBase - The bit offset of this type in the
1256   /// containing object.  Some parameters are classified different
1257   /// depending on whether they straddle an eightbyte boundary.
1258   ///
1259   /// \param isNamedArg - Whether the argument in question is a "named"
1260   /// argument, as used in AMD64-ABI 3.5.7.
1261   ///
1262   /// \param IsRegCall - Whether the calling conversion is regcall.
1263   ///
1264   /// If a word is unused its result will be NoClass; if a type should
1265   /// be passed in Memory then at least the classification of \arg Lo
1266   /// will be Memory.
1267   ///
1268   /// The \arg Lo class will be NoClass iff the argument is ignored.
1269   ///
1270   /// If the \arg Lo class is ComplexX87, then the \arg Hi class will
1271   /// also be ComplexX87.
1272   void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi,
1273                 bool isNamedArg, bool IsRegCall = false) const;
1274 
1275   llvm::Type *GetByteVectorType(QualType Ty) const;
1276   llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType,
1277                                  unsigned IROffset, QualType SourceTy,
1278                                  unsigned SourceOffset) const;
1279   llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType,
1280                                      unsigned IROffset, QualType SourceTy,
1281                                      unsigned SourceOffset) const;
1282 
1283   /// getIndirectResult - Give a source type \arg Ty, return a suitable result
1284   /// such that the argument will be returned in memory.
1285   ABIArgInfo getIndirectReturnResult(QualType Ty) const;
1286 
1287   /// getIndirectResult - Give a source type \arg Ty, return a suitable result
1288   /// such that the argument will be passed in memory.
1289   ///
1290   /// \param freeIntRegs - The number of free integer registers remaining
1291   /// available.
1292   ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const;
1293 
1294   ABIArgInfo classifyReturnType(QualType RetTy) const;
1295 
1296   ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs,
1297                                   unsigned &neededInt, unsigned &neededSSE,
1298                                   bool isNamedArg,
1299                                   bool IsRegCall = false) const;
1300 
1301   ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
1302                                        unsigned &NeededSSE,
1303                                        unsigned &MaxVectorWidth) const;
1304 
1305   ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt,
1306                                            unsigned &NeededSSE,
1307                                            unsigned &MaxVectorWidth) const;
1308 
1309   bool IsIllegalVectorType(QualType Ty) const;
1310 
1311   /// The 0.98 ABI revision clarified a lot of ambiguities,
1312   /// unfortunately in ways that were not always consistent with
1313   /// certain previous compilers.  In particular, platforms which
1314   /// required strict binary compatibility with older versions of GCC
1315   /// may need to exempt themselves.
honorsRevision0_98() const1316   bool honorsRevision0_98() const {
1317     return !getTarget().getTriple().isOSDarwin();
1318   }
1319 
1320   /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to
1321   /// classify it as INTEGER (for compatibility with older clang compilers).
classifyIntegerMMXAsSSE() const1322   bool classifyIntegerMMXAsSSE() const {
1323     // Clang <= 3.8 did not do this.
1324     if (getContext().getLangOpts().getClangABICompat() <=
1325         LangOptions::ClangABI::Ver3_8)
1326       return false;
1327 
1328     const llvm::Triple &Triple = getTarget().getTriple();
1329     if (Triple.isOSDarwin() || Triple.isPS() || Triple.isOSFreeBSD())
1330       return false;
1331     return true;
1332   }
1333 
1334   // GCC classifies vectors of __int128 as memory.
passInt128VectorsInMem() const1335   bool passInt128VectorsInMem() const {
1336     // Clang <= 9.0 did not do this.
1337     if (getContext().getLangOpts().getClangABICompat() <=
1338         LangOptions::ClangABI::Ver9)
1339       return false;
1340 
1341     const llvm::Triple &T = getTarget().getTriple();
1342     return T.isOSLinux() || T.isOSNetBSD();
1343   }
1344 
returnCXXRecordGreaterThan128InMem() const1345   bool returnCXXRecordGreaterThan128InMem() const {
1346     // Clang <= 20.0 did not do this.
1347     if (getContext().getLangOpts().getClangABICompat() <=
1348         LangOptions::ClangABI::Ver20)
1349       return false;
1350 
1351     return true;
1352   }
1353 
1354   X86AVXABILevel AVXLevel;
1355   // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
1356   // 64-bit hardware.
1357   bool Has64BitPointers;
1358 
1359 public:
X86_64ABIInfo(CodeGen::CodeGenTypes & CGT,X86AVXABILevel AVXLevel)1360   X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
1361       : ABIInfo(CGT), AVXLevel(AVXLevel),
1362         Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {}
1363 
isPassedUsingAVXType(QualType type) const1364   bool isPassedUsingAVXType(QualType type) const {
1365     unsigned neededInt, neededSSE;
1366     // The freeIntRegs argument doesn't matter here.
1367     ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE,
1368                                            /*isNamedArg*/true);
1369     if (info.isDirect()) {
1370       llvm::Type *ty = info.getCoerceToType();
1371       if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty))
1372         return vectorTy->getPrimitiveSizeInBits().getFixedValue() > 128;
1373     }
1374     return false;
1375   }
1376 
1377   void computeInfo(CGFunctionInfo &FI) const override;
1378 
1379   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
1380                    AggValueSlot Slot) const override;
1381   RValue EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
1382                      AggValueSlot Slot) const override;
1383 
has64BitPointers() const1384   bool has64BitPointers() const {
1385     return Has64BitPointers;
1386   }
1387 };
1388 
1389 /// WinX86_64ABIInfo - The Windows X86_64 ABI information.
1390 class WinX86_64ABIInfo : public ABIInfo {
1391 public:
WinX86_64ABIInfo(CodeGen::CodeGenTypes & CGT,X86AVXABILevel AVXLevel)1392   WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
1393       : ABIInfo(CGT), AVXLevel(AVXLevel),
1394         IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {}
1395 
1396   void computeInfo(CGFunctionInfo &FI) const override;
1397 
1398   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
1399                    AggValueSlot Slot) const override;
1400 
isHomogeneousAggregateBaseType(QualType Ty) const1401   bool isHomogeneousAggregateBaseType(QualType Ty) const override {
1402     // FIXME: Assumes vectorcall is in use.
1403     return isX86VectorTypeForVectorCall(getContext(), Ty);
1404   }
1405 
isHomogeneousAggregateSmallEnough(const Type * Ty,uint64_t NumMembers) const1406   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
1407                                          uint64_t NumMembers) const override {
1408     // FIXME: Assumes vectorcall is in use.
1409     return isX86VectorCallAggregateSmallEnough(NumMembers);
1410   }
1411 
1412 private:
1413   ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType,
1414                       bool IsVectorCall, bool IsRegCall) const;
1415   ABIArgInfo reclassifyHvaArgForVectorCall(QualType Ty, unsigned &FreeSSERegs,
1416                                            const ABIArgInfo &current) const;
1417 
1418   X86AVXABILevel AVXLevel;
1419 
1420   bool IsMingw64;
1421 };
1422 
1423 class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
1424 public:
X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes & CGT,X86AVXABILevel AVXLevel)1425   X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
1426       : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) {
1427     SwiftInfo =
1428         std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true);
1429   }
1430 
1431   /// Disable tail call on x86-64. The epilogue code before the tail jump blocks
1432   /// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations.
markARCOptimizedReturnCallsAsNoTail() const1433   bool markARCOptimizedReturnCallsAsNoTail() const override { return true; }
1434 
getDwarfEHStackPointer(CodeGen::CodeGenModule & CGM) const1435   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
1436     return 7;
1437   }
1438 
initDwarfEHRegSizeTable(CodeGen::CodeGenFunction & CGF,llvm::Value * Address) const1439   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
1440                                llvm::Value *Address) const override {
1441     llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
1442 
1443     // 0-15 are the 16 integer registers.
1444     // 16 is %rip.
1445     AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16);
1446     return false;
1447   }
1448 
adjustInlineAsmType(CodeGen::CodeGenFunction & CGF,StringRef Constraint,llvm::Type * Ty) const1449   llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
1450                                   StringRef Constraint,
1451                                   llvm::Type* Ty) const override {
1452     return X86AdjustInlineAsmType(CGF, Constraint, Ty);
1453   }
1454 
isNoProtoCallVariadic(const CallArgList & args,const FunctionNoProtoType * fnType) const1455   bool isNoProtoCallVariadic(const CallArgList &args,
1456                              const FunctionNoProtoType *fnType) const override {
1457     // The default CC on x86-64 sets %al to the number of SSA
1458     // registers used, and GCC sets this when calling an unprototyped
1459     // function, so we override the default behavior.  However, don't do
1460     // that when AVX types are involved: the ABI explicitly states it is
1461     // undefined, and it doesn't work in practice because of how the ABI
1462     // defines varargs anyway.
1463     if (fnType->getCallConv() == CC_C) {
1464       bool HasAVXType = false;
1465       for (const CallArg &arg : args) {
1466         if (getABIInfo<X86_64ABIInfo>().isPassedUsingAVXType(arg.Ty)) {
1467           HasAVXType = true;
1468           break;
1469         }
1470       }
1471 
1472       if (!HasAVXType)
1473         return true;
1474     }
1475 
1476     return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType);
1477   }
1478 
setTargetAttributes(const Decl * D,llvm::GlobalValue * GV,CodeGen::CodeGenModule & CGM) const1479   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
1480                            CodeGen::CodeGenModule &CGM) const override {
1481     if (GV->isDeclaration())
1482       return;
1483     if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
1484       if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
1485         llvm::Function *Fn = cast<llvm::Function>(GV);
1486         Fn->addFnAttr("stackrealign");
1487       }
1488 
1489       addX86InterruptAttrs(FD, GV, CGM);
1490     }
1491   }
1492 
1493   void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
1494                             const FunctionDecl *Caller,
1495                             const FunctionDecl *Callee, const CallArgList &Args,
1496                             QualType ReturnType) const override;
1497 };
1498 } // namespace
1499 
initFeatureMaps(const ASTContext & Ctx,llvm::StringMap<bool> & CallerMap,const FunctionDecl * Caller,llvm::StringMap<bool> & CalleeMap,const FunctionDecl * Callee)1500 static void initFeatureMaps(const ASTContext &Ctx,
1501                             llvm::StringMap<bool> &CallerMap,
1502                             const FunctionDecl *Caller,
1503                             llvm::StringMap<bool> &CalleeMap,
1504                             const FunctionDecl *Callee) {
1505   if (CalleeMap.empty() && CallerMap.empty()) {
1506     // The caller is potentially nullptr in the case where the call isn't in a
1507     // function.  In this case, the getFunctionFeatureMap ensures we just get
1508     // the TU level setting (since it cannot be modified by 'target'..
1509     Ctx.getFunctionFeatureMap(CallerMap, Caller);
1510     Ctx.getFunctionFeatureMap(CalleeMap, Callee);
1511   }
1512 }
1513 
checkAVXParamFeature(DiagnosticsEngine & Diag,SourceLocation CallLoc,const llvm::StringMap<bool> & CallerMap,const llvm::StringMap<bool> & CalleeMap,QualType Ty,StringRef Feature,bool IsArgument)1514 static bool checkAVXParamFeature(DiagnosticsEngine &Diag,
1515                                  SourceLocation CallLoc,
1516                                  const llvm::StringMap<bool> &CallerMap,
1517                                  const llvm::StringMap<bool> &CalleeMap,
1518                                  QualType Ty, StringRef Feature,
1519                                  bool IsArgument) {
1520   bool CallerHasFeat = CallerMap.lookup(Feature);
1521   bool CalleeHasFeat = CalleeMap.lookup(Feature);
1522   if (!CallerHasFeat && !CalleeHasFeat)
1523     return Diag.Report(CallLoc, diag::warn_avx_calling_convention)
1524            << IsArgument << Ty << Feature;
1525 
1526   // Mixing calling conventions here is very clearly an error.
1527   if (!CallerHasFeat || !CalleeHasFeat)
1528     return Diag.Report(CallLoc, diag::err_avx_calling_convention)
1529            << IsArgument << Ty << Feature;
1530 
1531   // Else, both caller and callee have the required feature, so there is no need
1532   // to diagnose.
1533   return false;
1534 }
1535 
checkAVX512ParamFeature(DiagnosticsEngine & Diag,SourceLocation CallLoc,const llvm::StringMap<bool> & CallerMap,const llvm::StringMap<bool> & CalleeMap,QualType Ty,bool IsArgument)1536 static bool checkAVX512ParamFeature(DiagnosticsEngine &Diag,
1537                                     SourceLocation CallLoc,
1538                                     const llvm::StringMap<bool> &CallerMap,
1539                                     const llvm::StringMap<bool> &CalleeMap,
1540                                     QualType Ty, bool IsArgument) {
1541   bool Caller256 = CallerMap.lookup("avx512f") && !CallerMap.lookup("evex512");
1542   bool Callee256 = CalleeMap.lookup("avx512f") && !CalleeMap.lookup("evex512");
1543 
1544   // Forbid 512-bit or larger vector pass or return when we disabled ZMM
1545   // instructions.
1546   if (Caller256 || Callee256)
1547     return Diag.Report(CallLoc, diag::err_avx_calling_convention)
1548            << IsArgument << Ty << "evex512";
1549 
1550   return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
1551                               "avx512f", IsArgument);
1552 }
1553 
checkAVXParam(DiagnosticsEngine & Diag,ASTContext & Ctx,SourceLocation CallLoc,const llvm::StringMap<bool> & CallerMap,const llvm::StringMap<bool> & CalleeMap,QualType Ty,bool IsArgument)1554 static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx,
1555                           SourceLocation CallLoc,
1556                           const llvm::StringMap<bool> &CallerMap,
1557                           const llvm::StringMap<bool> &CalleeMap, QualType Ty,
1558                           bool IsArgument) {
1559   uint64_t Size = Ctx.getTypeSize(Ty);
1560   if (Size > 256)
1561     return checkAVX512ParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
1562                                    IsArgument);
1563 
1564   if (Size > 128)
1565     return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx",
1566                                 IsArgument);
1567 
1568   return false;
1569 }
1570 
checkFunctionCallABI(CodeGenModule & CGM,SourceLocation CallLoc,const FunctionDecl * Caller,const FunctionDecl * Callee,const CallArgList & Args,QualType ReturnType) const1571 void X86_64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
1572                                                    SourceLocation CallLoc,
1573                                                    const FunctionDecl *Caller,
1574                                                    const FunctionDecl *Callee,
1575                                                    const CallArgList &Args,
1576                                                    QualType ReturnType) const {
1577   if (!Callee)
1578     return;
1579 
1580   llvm::StringMap<bool> CallerMap;
1581   llvm::StringMap<bool> CalleeMap;
1582   unsigned ArgIndex = 0;
1583 
1584   // We need to loop through the actual call arguments rather than the
1585   // function's parameters, in case this variadic.
1586   for (const CallArg &Arg : Args) {
1587     // The "avx" feature changes how vectors >128 in size are passed. "avx512f"
1588     // additionally changes how vectors >256 in size are passed. Like GCC, we
1589     // warn when a function is called with an argument where this will change.
1590     // Unlike GCC, we also error when it is an obvious ABI mismatch, that is,
1591     // the caller and callee features are mismatched.
1592     // Unfortunately, we cannot do this diagnostic in SEMA, since the callee can
1593     // change its ABI with attribute-target after this call.
1594     if (Arg.getType()->isVectorType() &&
1595         CGM.getContext().getTypeSize(Arg.getType()) > 128) {
1596       initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
1597       QualType Ty = Arg.getType();
1598       // The CallArg seems to have desugared the type already, so for clearer
1599       // diagnostics, replace it with the type in the FunctionDecl if possible.
1600       if (ArgIndex < Callee->getNumParams())
1601         Ty = Callee->getParamDecl(ArgIndex)->getType();
1602 
1603       if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap,
1604                         CalleeMap, Ty, /*IsArgument*/ true))
1605         return;
1606     }
1607     ++ArgIndex;
1608   }
1609 
1610   // Check return always, as we don't have a good way of knowing in codegen
1611   // whether this value is used, tail-called, etc.
1612   if (Callee->getReturnType()->isVectorType() &&
1613       CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) {
1614     initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
1615     checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap,
1616                   CalleeMap, Callee->getReturnType(),
1617                   /*IsArgument*/ false);
1618   }
1619 }
1620 
qualifyWindowsLibrary(StringRef Lib)1621 std::string TargetCodeGenInfo::qualifyWindowsLibrary(StringRef Lib) {
1622   // If the argument does not end in .lib, automatically add the suffix.
1623   // If the argument contains a space, enclose it in quotes.
1624   // This matches the behavior of MSVC.
1625   bool Quote = Lib.contains(' ');
1626   std::string ArgStr = Quote ? "\"" : "";
1627   ArgStr += Lib;
1628   if (!Lib.ends_with_insensitive(".lib") && !Lib.ends_with_insensitive(".a"))
1629     ArgStr += ".lib";
1630   ArgStr += Quote ? "\"" : "";
1631   return ArgStr;
1632 }
1633 
1634 namespace {
1635 class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo {
1636 public:
WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes & CGT,bool DarwinVectorABI,bool RetSmallStructInRegABI,bool Win32StructABI,unsigned NumRegisterParameters)1637   WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
1638         bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI,
1639         unsigned NumRegisterParameters)
1640     : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI,
1641         Win32StructABI, NumRegisterParameters, false) {}
1642 
1643   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
1644                            CodeGen::CodeGenModule &CGM) const override;
1645 
getDependentLibraryOption(llvm::StringRef Lib,llvm::SmallString<24> & Opt) const1646   void getDependentLibraryOption(llvm::StringRef Lib,
1647                                  llvm::SmallString<24> &Opt) const override {
1648     Opt = "/DEFAULTLIB:";
1649     Opt += qualifyWindowsLibrary(Lib);
1650   }
1651 
getDetectMismatchOption(llvm::StringRef Name,llvm::StringRef Value,llvm::SmallString<32> & Opt) const1652   void getDetectMismatchOption(llvm::StringRef Name,
1653                                llvm::StringRef Value,
1654                                llvm::SmallString<32> &Opt) const override {
1655     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
1656   }
1657 };
1658 } // namespace
1659 
setTargetAttributes(const Decl * D,llvm::GlobalValue * GV,CodeGen::CodeGenModule & CGM) const1660 void WinX86_32TargetCodeGenInfo::setTargetAttributes(
1661     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
1662   X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
1663   if (GV->isDeclaration())
1664     return;
1665   addStackProbeTargetAttributes(D, GV, CGM);
1666 }
1667 
1668 namespace {
1669 class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
1670 public:
WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes & CGT,X86AVXABILevel AVXLevel)1671   WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
1672                              X86AVXABILevel AVXLevel)
1673       : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) {
1674     SwiftInfo =
1675         std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true);
1676   }
1677 
1678   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
1679                            CodeGen::CodeGenModule &CGM) const override;
1680 
getDwarfEHStackPointer(CodeGen::CodeGenModule & CGM) const1681   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
1682     return 7;
1683   }
1684 
initDwarfEHRegSizeTable(CodeGen::CodeGenFunction & CGF,llvm::Value * Address) const1685   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
1686                                llvm::Value *Address) const override {
1687     llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
1688 
1689     // 0-15 are the 16 integer registers.
1690     // 16 is %rip.
1691     AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16);
1692     return false;
1693   }
1694 
getDependentLibraryOption(llvm::StringRef Lib,llvm::SmallString<24> & Opt) const1695   void getDependentLibraryOption(llvm::StringRef Lib,
1696                                  llvm::SmallString<24> &Opt) const override {
1697     Opt = "/DEFAULTLIB:";
1698     Opt += qualifyWindowsLibrary(Lib);
1699   }
1700 
getDetectMismatchOption(llvm::StringRef Name,llvm::StringRef Value,llvm::SmallString<32> & Opt) const1701   void getDetectMismatchOption(llvm::StringRef Name,
1702                                llvm::StringRef Value,
1703                                llvm::SmallString<32> &Opt) const override {
1704     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
1705   }
1706 };
1707 } // namespace
1708 
setTargetAttributes(const Decl * D,llvm::GlobalValue * GV,CodeGen::CodeGenModule & CGM) const1709 void WinX86_64TargetCodeGenInfo::setTargetAttributes(
1710     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
1711   TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
1712   if (GV->isDeclaration())
1713     return;
1714   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
1715     if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
1716       llvm::Function *Fn = cast<llvm::Function>(GV);
1717       Fn->addFnAttr("stackrealign");
1718     }
1719 
1720     addX86InterruptAttrs(FD, GV, CGM);
1721   }
1722 
1723   addStackProbeTargetAttributes(D, GV, CGM);
1724 }
1725 
postMerge(unsigned AggregateSize,Class & Lo,Class & Hi) const1726 void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo,
1727                               Class &Hi) const {
1728   // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done:
1729   //
1730   // (a) If one of the classes is Memory, the whole argument is passed in
1731   //     memory.
1732   //
1733   // (b) If X87UP is not preceded by X87, the whole argument is passed in
1734   //     memory.
1735   //
1736   // (c) If the size of the aggregate exceeds two eightbytes and the first
1737   //     eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole
1738   //     argument is passed in memory. NOTE: This is necessary to keep the
1739   //     ABI working for processors that don't support the __m256 type.
1740   //
1741   // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE.
1742   //
1743   // Some of these are enforced by the merging logic.  Others can arise
1744   // only with unions; for example:
1745   //   union { _Complex double; unsigned; }
1746   //
1747   // Note that clauses (b) and (c) were added in 0.98.
1748   //
1749   if (Hi == Memory)
1750     Lo = Memory;
1751   if (Hi == X87Up && Lo != X87 && honorsRevision0_98())
1752     Lo = Memory;
1753   if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp))
1754     Lo = Memory;
1755   if (Hi == SSEUp && Lo != SSE)
1756     Hi = SSE;
1757 }
1758 
merge(Class Accum,Class Field)1759 X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) {
1760   // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is
1761   // classified recursively so that always two fields are
1762   // considered. The resulting class is calculated according to
1763   // the classes of the fields in the eightbyte:
1764   //
1765   // (a) If both classes are equal, this is the resulting class.
1766   //
1767   // (b) If one of the classes is NO_CLASS, the resulting class is
1768   // the other class.
1769   //
1770   // (c) If one of the classes is MEMORY, the result is the MEMORY
1771   // class.
1772   //
1773   // (d) If one of the classes is INTEGER, the result is the
1774   // INTEGER.
1775   //
1776   // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class,
1777   // MEMORY is used as class.
1778   //
1779   // (f) Otherwise class SSE is used.
1780 
1781   // Accum should never be memory (we should have returned) or
1782   // ComplexX87 (because this cannot be passed in a structure).
1783   assert((Accum != Memory && Accum != ComplexX87) &&
1784          "Invalid accumulated classification during merge.");
1785   if (Accum == Field || Field == NoClass)
1786     return Accum;
1787   if (Field == Memory)
1788     return Memory;
1789   if (Accum == NoClass)
1790     return Field;
1791   if (Accum == Integer || Field == Integer)
1792     return Integer;
1793   if (Field == X87 || Field == X87Up || Field == ComplexX87 ||
1794       Accum == X87 || Accum == X87Up)
1795     return Memory;
1796   return SSE;
1797 }
1798 
classify(QualType Ty,uint64_t OffsetBase,Class & Lo,Class & Hi,bool isNamedArg,bool IsRegCall) const1799 void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo,
1800                              Class &Hi, bool isNamedArg, bool IsRegCall) const {
1801   // FIXME: This code can be simplified by introducing a simple value class for
1802   // Class pairs with appropriate constructor methods for the various
1803   // situations.
1804 
1805   // FIXME: Some of the split computations are wrong; unaligned vectors
1806   // shouldn't be passed in registers for example, so there is no chance they
1807   // can straddle an eightbyte. Verify & simplify.
1808 
1809   Lo = Hi = NoClass;
1810 
1811   Class &Current = OffsetBase < 64 ? Lo : Hi;
1812   Current = Memory;
1813 
1814   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
1815     BuiltinType::Kind k = BT->getKind();
1816 
1817     if (k == BuiltinType::Void) {
1818       Current = NoClass;
1819     } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) {
1820       Lo = Integer;
1821       Hi = Integer;
1822     } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) {
1823       Current = Integer;
1824     } else if (k == BuiltinType::Float || k == BuiltinType::Double ||
1825                k == BuiltinType::Float16 || k == BuiltinType::BFloat16) {
1826       Current = SSE;
1827     } else if (k == BuiltinType::Float128) {
1828       Lo = SSE;
1829       Hi = SSEUp;
1830     } else if (k == BuiltinType::LongDouble) {
1831       const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
1832       if (LDF == &llvm::APFloat::IEEEquad()) {
1833         Lo = SSE;
1834         Hi = SSEUp;
1835       } else if (LDF == &llvm::APFloat::x87DoubleExtended()) {
1836         Lo = X87;
1837         Hi = X87Up;
1838       } else if (LDF == &llvm::APFloat::IEEEdouble()) {
1839         Current = SSE;
1840       } else
1841         llvm_unreachable("unexpected long double representation!");
1842     }
1843     // FIXME: _Decimal32 and _Decimal64 are SSE.
1844     // FIXME: _float128 and _Decimal128 are (SSE, SSEUp).
1845     return;
1846   }
1847 
1848   if (const EnumType *ET = Ty->getAs<EnumType>()) {
1849     // Classify the underlying integer type.
1850     classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg);
1851     return;
1852   }
1853 
1854   if (Ty->hasPointerRepresentation()) {
1855     Current = Integer;
1856     return;
1857   }
1858 
1859   if (Ty->isMemberPointerType()) {
1860     if (Ty->isMemberFunctionPointerType()) {
1861       if (Has64BitPointers) {
1862         // If Has64BitPointers, this is an {i64, i64}, so classify both
1863         // Lo and Hi now.
1864         Lo = Hi = Integer;
1865       } else {
1866         // Otherwise, with 32-bit pointers, this is an {i32, i32}. If that
1867         // straddles an eightbyte boundary, Hi should be classified as well.
1868         uint64_t EB_FuncPtr = (OffsetBase) / 64;
1869         uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64;
1870         if (EB_FuncPtr != EB_ThisAdj) {
1871           Lo = Hi = Integer;
1872         } else {
1873           Current = Integer;
1874         }
1875       }
1876     } else {
1877       Current = Integer;
1878     }
1879     return;
1880   }
1881 
1882   if (const VectorType *VT = Ty->getAs<VectorType>()) {
1883     uint64_t Size = getContext().getTypeSize(VT);
1884     if (Size == 1 || Size == 8 || Size == 16 || Size == 32) {
1885       // gcc passes the following as integer:
1886       // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float>
1887       // 2 bytes - <2 x char>, <1 x short>
1888       // 1 byte  - <1 x char>
1889       Current = Integer;
1890 
1891       // If this type crosses an eightbyte boundary, it should be
1892       // split.
1893       uint64_t EB_Lo = (OffsetBase) / 64;
1894       uint64_t EB_Hi = (OffsetBase + Size - 1) / 64;
1895       if (EB_Lo != EB_Hi)
1896         Hi = Lo;
1897     } else if (Size == 64) {
1898       QualType ElementType = VT->getElementType();
1899 
1900       // gcc passes <1 x double> in memory. :(
1901       if (ElementType->isSpecificBuiltinType(BuiltinType::Double))
1902         return;
1903 
1904       // gcc passes <1 x long long> as SSE but clang used to unconditionally
1905       // pass them as integer.  For platforms where clang is the de facto
1906       // platform compiler, we must continue to use integer.
1907       if (!classifyIntegerMMXAsSSE() &&
1908           (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) ||
1909            ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) ||
1910            ElementType->isSpecificBuiltinType(BuiltinType::Long) ||
1911            ElementType->isSpecificBuiltinType(BuiltinType::ULong)))
1912         Current = Integer;
1913       else
1914         Current = SSE;
1915 
1916       // If this type crosses an eightbyte boundary, it should be
1917       // split.
1918       if (OffsetBase && OffsetBase != 64)
1919         Hi = Lo;
1920     } else if (Size == 128 ||
1921                (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) {
1922       QualType ElementType = VT->getElementType();
1923 
1924       // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :(
1925       if (passInt128VectorsInMem() && Size != 128 &&
1926           (ElementType->isSpecificBuiltinType(BuiltinType::Int128) ||
1927            ElementType->isSpecificBuiltinType(BuiltinType::UInt128)))
1928         return;
1929 
1930       // Arguments of 256-bits are split into four eightbyte chunks. The
1931       // least significant one belongs to class SSE and all the others to class
1932       // SSEUP. The original Lo and Hi design considers that types can't be
1933       // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense.
1934       // This design isn't correct for 256-bits, but since there're no cases
1935       // where the upper parts would need to be inspected, avoid adding
1936       // complexity and just consider Hi to match the 64-256 part.
1937       //
1938       // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in
1939       // registers if they are "named", i.e. not part of the "..." of a
1940       // variadic function.
1941       //
1942       // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are
1943       // split into eight eightbyte chunks, one SSE and seven SSEUP.
1944       Lo = SSE;
1945       Hi = SSEUp;
1946     }
1947     return;
1948   }
1949 
1950   if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
1951     QualType ET = getContext().getCanonicalType(CT->getElementType());
1952 
1953     uint64_t Size = getContext().getTypeSize(Ty);
1954     if (ET->isIntegralOrEnumerationType()) {
1955       if (Size <= 64)
1956         Current = Integer;
1957       else if (Size <= 128)
1958         Lo = Hi = Integer;
1959     } else if (ET->isFloat16Type() || ET == getContext().FloatTy ||
1960                ET->isBFloat16Type()) {
1961       Current = SSE;
1962     } else if (ET == getContext().DoubleTy) {
1963       Lo = Hi = SSE;
1964     } else if (ET == getContext().LongDoubleTy) {
1965       const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
1966       if (LDF == &llvm::APFloat::IEEEquad())
1967         Current = Memory;
1968       else if (LDF == &llvm::APFloat::x87DoubleExtended())
1969         Current = ComplexX87;
1970       else if (LDF == &llvm::APFloat::IEEEdouble())
1971         Lo = Hi = SSE;
1972       else
1973         llvm_unreachable("unexpected long double representation!");
1974     }
1975 
1976     // If this complex type crosses an eightbyte boundary then it
1977     // should be split.
1978     uint64_t EB_Real = (OffsetBase) / 64;
1979     uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64;
1980     if (Hi == NoClass && EB_Real != EB_Imag)
1981       Hi = Lo;
1982 
1983     return;
1984   }
1985 
1986   if (const auto *EITy = Ty->getAs<BitIntType>()) {
1987     if (EITy->getNumBits() <= 64)
1988       Current = Integer;
1989     else if (EITy->getNumBits() <= 128)
1990       Lo = Hi = Integer;
1991     // Larger values need to get passed in memory.
1992     return;
1993   }
1994 
1995   if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
1996     // Arrays are treated like structures.
1997 
1998     uint64_t Size = getContext().getTypeSize(Ty);
1999 
2000     // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
2001     // than eight eightbytes, ..., it has class MEMORY.
2002     // regcall ABI doesn't have limitation to an object. The only limitation
2003     // is the free registers, which will be checked in computeInfo.
2004     if (!IsRegCall && Size > 512)
2005       return;
2006 
2007     // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned
2008     // fields, it has class MEMORY.
2009     //
2010     // Only need to check alignment of array base.
2011     if (OffsetBase % getContext().getTypeAlign(AT->getElementType()))
2012       return;
2013 
2014     // Otherwise implement simplified merge. We could be smarter about
2015     // this, but it isn't worth it and would be harder to verify.
2016     Current = NoClass;
2017     uint64_t EltSize = getContext().getTypeSize(AT->getElementType());
2018     uint64_t ArraySize = AT->getZExtSize();
2019 
2020     // The only case a 256-bit wide vector could be used is when the array
2021     // contains a single 256-bit element. Since Lo and Hi logic isn't extended
2022     // to work for sizes wider than 128, early check and fallback to memory.
2023     //
2024     if (Size > 128 &&
2025         (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel)))
2026       return;
2027 
2028     for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) {
2029       Class FieldLo, FieldHi;
2030       classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg);
2031       Lo = merge(Lo, FieldLo);
2032       Hi = merge(Hi, FieldHi);
2033       if (Lo == Memory || Hi == Memory)
2034         break;
2035     }
2036 
2037     postMerge(Size, Lo, Hi);
2038     assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification.");
2039     return;
2040   }
2041 
2042   if (const RecordType *RT = Ty->getAs<RecordType>()) {
2043     uint64_t Size = getContext().getTypeSize(Ty);
2044 
2045     // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
2046     // than eight eightbytes, ..., it has class MEMORY.
2047     if (Size > 512)
2048       return;
2049 
2050     // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial
2051     // copy constructor or a non-trivial destructor, it is passed by invisible
2052     // reference.
2053     if (getRecordArgABI(RT, getCXXABI()))
2054       return;
2055 
2056     const RecordDecl *RD = RT->getDecl();
2057 
2058     // Assume variable sized types are passed in memory.
2059     if (RD->hasFlexibleArrayMember())
2060       return;
2061 
2062     const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
2063 
2064     // Reset Lo class, this will be recomputed.
2065     Current = NoClass;
2066 
2067     // If this is a C++ record, classify the bases first.
2068     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
2069       for (const auto &I : CXXRD->bases()) {
2070         assert(!I.isVirtual() && !I.getType()->isDependentType() &&
2071                "Unexpected base class!");
2072         const auto *Base =
2073             cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
2074 
2075         // Classify this field.
2076         //
2077         // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a
2078         // single eightbyte, each is classified separately. Each eightbyte gets
2079         // initialized to class NO_CLASS.
2080         Class FieldLo, FieldHi;
2081         uint64_t Offset =
2082           OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base));
2083         classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg);
2084         Lo = merge(Lo, FieldLo);
2085         Hi = merge(Hi, FieldHi);
2086         if (returnCXXRecordGreaterThan128InMem() &&
2087             (Size > 128 && (Size != getContext().getTypeSize(I.getType()) ||
2088                             Size > getNativeVectorSizeForAVXABI(AVXLevel)))) {
2089           // The only case a 256(or 512)-bit wide vector could be used to return
2090           // is when CXX record contains a single 256(or 512)-bit element.
2091           Lo = Memory;
2092         }
2093         if (Lo == Memory || Hi == Memory) {
2094           postMerge(Size, Lo, Hi);
2095           return;
2096         }
2097       }
2098     }
2099 
2100     // Classify the fields one at a time, merging the results.
2101     unsigned idx = 0;
2102     bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <=
2103                                 LangOptions::ClangABI::Ver11 ||
2104                             getContext().getTargetInfo().getTriple().isPS();
2105     bool IsUnion = RT->isUnionType() && !UseClang11Compat;
2106 
2107     for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
2108            i != e; ++i, ++idx) {
2109       uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx);
2110       bool BitField = i->isBitField();
2111 
2112       // Ignore padding bit-fields.
2113       if (BitField && i->isUnnamedBitField())
2114         continue;
2115 
2116       // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than
2117       // eight eightbytes, or it contains unaligned fields, it has class MEMORY.
2118       //
2119       // The only case a 256-bit or a 512-bit wide vector could be used is when
2120       // the struct contains a single 256-bit or 512-bit element. Early check
2121       // and fallback to memory.
2122       //
2123       // FIXME: Extended the Lo and Hi logic properly to work for size wider
2124       // than 128.
2125       if (Size > 128 &&
2126           ((!IsUnion && Size != getContext().getTypeSize(i->getType())) ||
2127            Size > getNativeVectorSizeForAVXABI(AVXLevel))) {
2128         Lo = Memory;
2129         postMerge(Size, Lo, Hi);
2130         return;
2131       }
2132 
2133       bool IsInMemory =
2134           Offset % getContext().getTypeAlign(i->getType().getCanonicalType());
2135       // Note, skip this test for bit-fields, see below.
2136       if (!BitField && IsInMemory) {
2137         Lo = Memory;
2138         postMerge(Size, Lo, Hi);
2139         return;
2140       }
2141 
2142       // Classify this field.
2143       //
2144       // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate
2145       // exceeds a single eightbyte, each is classified
2146       // separately. Each eightbyte gets initialized to class
2147       // NO_CLASS.
2148       Class FieldLo, FieldHi;
2149 
2150       // Bit-fields require special handling, they do not force the
2151       // structure to be passed in memory even if unaligned, and
2152       // therefore they can straddle an eightbyte.
2153       if (BitField) {
2154         assert(!i->isUnnamedBitField());
2155         uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx);
2156         uint64_t Size = i->getBitWidthValue();
2157 
2158         uint64_t EB_Lo = Offset / 64;
2159         uint64_t EB_Hi = (Offset + Size - 1) / 64;
2160 
2161         if (EB_Lo) {
2162           assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes.");
2163           FieldLo = NoClass;
2164           FieldHi = Integer;
2165         } else {
2166           FieldLo = Integer;
2167           FieldHi = EB_Hi ? Integer : NoClass;
2168         }
2169       } else
2170         classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg);
2171       Lo = merge(Lo, FieldLo);
2172       Hi = merge(Hi, FieldHi);
2173       if (Lo == Memory || Hi == Memory)
2174         break;
2175     }
2176 
2177     postMerge(Size, Lo, Hi);
2178   }
2179 }
2180 
getIndirectReturnResult(QualType Ty) const2181 ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const {
2182   // If this is a scalar LLVM value then assume LLVM will pass it in the right
2183   // place naturally.
2184   if (!isAggregateTypeForABI(Ty)) {
2185     // Treat an enum type as its underlying type.
2186     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
2187       Ty = EnumTy->getDecl()->getIntegerType();
2188 
2189     if (Ty->isBitIntType())
2190       return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace());
2191 
2192     return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
2193                                               : ABIArgInfo::getDirect());
2194   }
2195 
2196   return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace());
2197 }
2198 
IsIllegalVectorType(QualType Ty) const2199 bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
2200   if (const VectorType *VecTy = Ty->getAs<VectorType>()) {
2201     uint64_t Size = getContext().getTypeSize(VecTy);
2202     unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
2203     if (Size <= 64 || Size > LargestVector)
2204       return true;
2205     QualType EltTy = VecTy->getElementType();
2206     if (passInt128VectorsInMem() &&
2207         (EltTy->isSpecificBuiltinType(BuiltinType::Int128) ||
2208          EltTy->isSpecificBuiltinType(BuiltinType::UInt128)))
2209       return true;
2210   }
2211 
2212   return false;
2213 }
2214 
getIndirectResult(QualType Ty,unsigned freeIntRegs) const2215 ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
2216                                             unsigned freeIntRegs) const {
2217   // If this is a scalar LLVM value then assume LLVM will pass it in the right
2218   // place naturally.
2219   //
2220   // This assumption is optimistic, as there could be free registers available
2221   // when we need to pass this argument in memory, and LLVM could try to pass
2222   // the argument in the free register. This does not seem to happen currently,
2223   // but this code would be much safer if we could mark the argument with
2224   // 'onstack'. See PR12193.
2225   if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) &&
2226       !Ty->isBitIntType()) {
2227     // Treat an enum type as its underlying type.
2228     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
2229       Ty = EnumTy->getDecl()->getIntegerType();
2230 
2231     return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
2232                                               : ABIArgInfo::getDirect());
2233   }
2234 
2235   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
2236     return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
2237                                    RAA == CGCXXABI::RAA_DirectInMemory);
2238 
2239   // Compute the byval alignment. We specify the alignment of the byval in all
2240   // cases so that the mid-level optimizer knows the alignment of the byval.
2241   unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U);
2242 
2243   // Attempt to avoid passing indirect results using byval when possible. This
2244   // is important for good codegen.
2245   //
2246   // We do this by coercing the value into a scalar type which the backend can
2247   // handle naturally (i.e., without using byval).
2248   //
2249   // For simplicity, we currently only do this when we have exhausted all of the
2250   // free integer registers. Doing this when there are free integer registers
2251   // would require more care, as we would have to ensure that the coerced value
2252   // did not claim the unused register. That would require either reording the
2253   // arguments to the function (so that any subsequent inreg values came first),
2254   // or only doing this optimization when there were no following arguments that
2255   // might be inreg.
2256   //
2257   // We currently expect it to be rare (particularly in well written code) for
2258   // arguments to be passed on the stack when there are still free integer
2259   // registers available (this would typically imply large structs being passed
2260   // by value), so this seems like a fair tradeoff for now.
2261   //
2262   // We can revisit this if the backend grows support for 'onstack' parameter
2263   // attributes. See PR12193.
2264   if (freeIntRegs == 0) {
2265     uint64_t Size = getContext().getTypeSize(Ty);
2266 
2267     // If this type fits in an eightbyte, coerce it into the matching integral
2268     // type, which will end up on the stack (with alignment 8).
2269     if (Align == 8 && Size <= 64)
2270       return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
2271                                                           Size));
2272   }
2273 
2274   return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align),
2275                                  getDataLayout().getAllocaAddrSpace());
2276 }
2277 
2278 /// The ABI specifies that a value should be passed in a full vector XMM/YMM
2279 /// register. Pick an LLVM IR type that will be passed as a vector register.
GetByteVectorType(QualType Ty) const2280 llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
2281   // Wrapper structs/arrays that only contain vectors are passed just like
2282   // vectors; strip them off if present.
2283   if (const Type *InnerTy = isSingleElementStruct(Ty, getContext()))
2284     Ty = QualType(InnerTy, 0);
2285 
2286   llvm::Type *IRType = CGT.ConvertType(Ty);
2287   if (isa<llvm::VectorType>(IRType)) {
2288     // Don't pass vXi128 vectors in their native type, the backend can't
2289     // legalize them.
2290     if (passInt128VectorsInMem() &&
2291         cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy(128)) {
2292       // Use a vXi64 vector.
2293       uint64_t Size = getContext().getTypeSize(Ty);
2294       return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()),
2295                                         Size / 64);
2296     }
2297 
2298     return IRType;
2299   }
2300 
2301   if (IRType->getTypeID() == llvm::Type::FP128TyID)
2302     return IRType;
2303 
2304   // We couldn't find the preferred IR vector type for 'Ty'.
2305   uint64_t Size = getContext().getTypeSize(Ty);
2306   assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!");
2307 
2308 
2309   // Return a LLVM IR vector type based on the size of 'Ty'.
2310   return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()),
2311                                     Size / 64);
2312 }
2313 
2314 /// BitsContainNoUserData - Return true if the specified [start,end) bit range
2315 /// is known to either be off the end of the specified type or being in
2316 /// alignment padding.  The user type specified is known to be at most 128 bits
2317 /// in size, and have passed through X86_64ABIInfo::classify with a successful
2318 /// classification that put one of the two halves in the INTEGER class.
2319 ///
2320 /// It is conservatively correct to return false.
BitsContainNoUserData(QualType Ty,unsigned StartBit,unsigned EndBit,ASTContext & Context)2321 static bool BitsContainNoUserData(QualType Ty, unsigned StartBit,
2322                                   unsigned EndBit, ASTContext &Context) {
2323   // If the bytes being queried are off the end of the type, there is no user
2324   // data hiding here.  This handles analysis of builtins, vectors and other
2325   // types that don't contain interesting padding.
2326   unsigned TySize = (unsigned)Context.getTypeSize(Ty);
2327   if (TySize <= StartBit)
2328     return true;
2329 
2330   if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
2331     unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType());
2332     unsigned NumElts = (unsigned)AT->getZExtSize();
2333 
2334     // Check each element to see if the element overlaps with the queried range.
2335     for (unsigned i = 0; i != NumElts; ++i) {
2336       // If the element is after the span we care about, then we're done..
2337       unsigned EltOffset = i*EltSize;
2338       if (EltOffset >= EndBit) break;
2339 
2340       unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0;
2341       if (!BitsContainNoUserData(AT->getElementType(), EltStart,
2342                                  EndBit-EltOffset, Context))
2343         return false;
2344     }
2345     // If it overlaps no elements, then it is safe to process as padding.
2346     return true;
2347   }
2348 
2349   if (const RecordType *RT = Ty->getAs<RecordType>()) {
2350     const RecordDecl *RD = RT->getDecl();
2351     const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
2352 
2353     // If this is a C++ record, check the bases first.
2354     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
2355       for (const auto &I : CXXRD->bases()) {
2356         assert(!I.isVirtual() && !I.getType()->isDependentType() &&
2357                "Unexpected base class!");
2358         const auto *Base =
2359             cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
2360 
2361         // If the base is after the span we care about, ignore it.
2362         unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base));
2363         if (BaseOffset >= EndBit) continue;
2364 
2365         unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0;
2366         if (!BitsContainNoUserData(I.getType(), BaseStart,
2367                                    EndBit-BaseOffset, Context))
2368           return false;
2369       }
2370     }
2371 
2372     // Verify that no field has data that overlaps the region of interest.  Yes
2373     // this could be sped up a lot by being smarter about queried fields,
2374     // however we're only looking at structs up to 16 bytes, so we don't care
2375     // much.
2376     unsigned idx = 0;
2377     for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
2378          i != e; ++i, ++idx) {
2379       unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx);
2380 
2381       // If we found a field after the region we care about, then we're done.
2382       if (FieldOffset >= EndBit) break;
2383 
2384       unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0;
2385       if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset,
2386                                  Context))
2387         return false;
2388     }
2389 
2390     // If nothing in this record overlapped the area of interest, then we're
2391     // clean.
2392     return true;
2393   }
2394 
2395   return false;
2396 }
2397 
2398 /// getFPTypeAtOffset - Return a floating point type at the specified offset.
getFPTypeAtOffset(llvm::Type * IRType,unsigned IROffset,const llvm::DataLayout & TD)2399 static llvm::Type *getFPTypeAtOffset(llvm::Type *IRType, unsigned IROffset,
2400                                      const llvm::DataLayout &TD) {
2401   if (IROffset == 0 && IRType->isFloatingPointTy())
2402     return IRType;
2403 
2404   // If this is a struct, recurse into the field at the specified offset.
2405   if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) {
2406     if (!STy->getNumContainedTypes())
2407       return nullptr;
2408 
2409     const llvm::StructLayout *SL = TD.getStructLayout(STy);
2410     unsigned Elt = SL->getElementContainingOffset(IROffset);
2411     IROffset -= SL->getElementOffset(Elt);
2412     return getFPTypeAtOffset(STy->getElementType(Elt), IROffset, TD);
2413   }
2414 
2415   // If this is an array, recurse into the field at the specified offset.
2416   if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) {
2417     llvm::Type *EltTy = ATy->getElementType();
2418     unsigned EltSize = TD.getTypeAllocSize(EltTy);
2419     IROffset -= IROffset / EltSize * EltSize;
2420     return getFPTypeAtOffset(EltTy, IROffset, TD);
2421   }
2422 
2423   return nullptr;
2424 }
2425 
2426 /// GetSSETypeAtOffset - Return a type that will be passed by the backend in the
2427 /// low 8 bytes of an XMM register, corresponding to the SSE class.
2428 llvm::Type *X86_64ABIInfo::
GetSSETypeAtOffset(llvm::Type * IRType,unsigned IROffset,QualType SourceTy,unsigned SourceOffset) const2429 GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
2430                    QualType SourceTy, unsigned SourceOffset) const {
2431   const llvm::DataLayout &TD = getDataLayout();
2432   unsigned SourceSize =
2433       (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
2434   llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD);
2435   if (!T0 || T0->isDoubleTy())
2436     return llvm::Type::getDoubleTy(getVMContext());
2437 
2438   // Get the adjacent FP type.
2439   llvm::Type *T1 = nullptr;
2440   unsigned T0Size = TD.getTypeAllocSize(T0);
2441   if (SourceSize > T0Size)
2442       T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD);
2443   if (T1 == nullptr) {
2444     // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due
2445     // to its alignment.
2446     if (T0->is16bitFPTy() && SourceSize > 4)
2447       T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
2448     // If we can't get a second FP type, return a simple half or float.
2449     // avx512fp16-abi.c:pr51813_2 shows it works to return float for
2450     // {float, i8} too.
2451     if (T1 == nullptr)
2452       return T0;
2453   }
2454 
2455   if (T0->isFloatTy() && T1->isFloatTy())
2456     return llvm::FixedVectorType::get(T0, 2);
2457 
2458   if (T0->is16bitFPTy() && T1->is16bitFPTy()) {
2459     llvm::Type *T2 = nullptr;
2460     if (SourceSize > 4)
2461       T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
2462     if (T2 == nullptr)
2463       return llvm::FixedVectorType::get(T0, 2);
2464     return llvm::FixedVectorType::get(T0, 4);
2465   }
2466 
2467   if (T0->is16bitFPTy() || T1->is16bitFPTy())
2468     return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4);
2469 
2470   return llvm::Type::getDoubleTy(getVMContext());
2471 }
2472 
2473 
2474 /// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in
2475 /// an 8-byte GPR.  This means that we either have a scalar or we are talking
2476 /// about the high or low part of an up-to-16-byte struct.  This routine picks
2477 /// the best LLVM IR type to represent this, which may be i64 or may be anything
2478 /// else that the backend will pass in a GPR that works better (e.g. i8, %foo*,
2479 /// etc).
2480 ///
2481 /// PrefType is an LLVM IR type that corresponds to (part of) the IR type for
2482 /// the source type.  IROffset is an offset in bytes into the LLVM IR type that
2483 /// the 8-byte value references.  PrefType may be null.
2484 ///
2485 /// SourceTy is the source-level type for the entire argument.  SourceOffset is
2486 /// an offset into this that we're processing (which is always either 0 or 8).
2487 ///
2488 llvm::Type *X86_64ABIInfo::
GetINTEGERTypeAtOffset(llvm::Type * IRType,unsigned IROffset,QualType SourceTy,unsigned SourceOffset) const2489 GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset,
2490                        QualType SourceTy, unsigned SourceOffset) const {
2491   // If we're dealing with an un-offset LLVM IR type, then it means that we're
2492   // returning an 8-byte unit starting with it.  See if we can safely use it.
2493   if (IROffset == 0) {
2494     // Pointers and int64's always fill the 8-byte unit.
2495     if ((isa<llvm::PointerType>(IRType) && Has64BitPointers) ||
2496         IRType->isIntegerTy(64))
2497       return IRType;
2498 
2499     // If we have a 1/2/4-byte integer, we can use it only if the rest of the
2500     // goodness in the source type is just tail padding.  This is allowed to
2501     // kick in for struct {double,int} on the int, but not on
2502     // struct{double,int,int} because we wouldn't return the second int.  We
2503     // have to do this analysis on the source type because we can't depend on
2504     // unions being lowered a specific way etc.
2505     if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) ||
2506         IRType->isIntegerTy(32) ||
2507         (isa<llvm::PointerType>(IRType) && !Has64BitPointers)) {
2508       unsigned BitWidth = isa<llvm::PointerType>(IRType) ? 32 :
2509           cast<llvm::IntegerType>(IRType)->getBitWidth();
2510 
2511       if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth,
2512                                 SourceOffset*8+64, getContext()))
2513         return IRType;
2514     }
2515   }
2516 
2517   if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) {
2518     // If this is a struct, recurse into the field at the specified offset.
2519     const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy);
2520     if (IROffset < SL->getSizeInBytes()) {
2521       unsigned FieldIdx = SL->getElementContainingOffset(IROffset);
2522       IROffset -= SL->getElementOffset(FieldIdx);
2523 
2524       return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset,
2525                                     SourceTy, SourceOffset);
2526     }
2527   }
2528 
2529   if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) {
2530     llvm::Type *EltTy = ATy->getElementType();
2531     unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy);
2532     unsigned EltOffset = IROffset/EltSize*EltSize;
2533     return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy,
2534                                   SourceOffset);
2535   }
2536 
2537   // Okay, we don't have any better idea of what to pass, so we pass this in an
2538   // integer register that isn't too big to fit the rest of the struct.
2539   unsigned TySizeInBytes =
2540     (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity();
2541 
2542   assert(TySizeInBytes != SourceOffset && "Empty field?");
2543 
2544   // It is always safe to classify this as an integer type up to i64 that
2545   // isn't larger than the structure.
2546   return llvm::IntegerType::get(getVMContext(),
2547                                 std::min(TySizeInBytes-SourceOffset, 8U)*8);
2548 }
2549 
2550 
2551 /// GetX86_64ByValArgumentPair - Given a high and low type that can ideally
2552 /// be used as elements of a two register pair to pass or return, return a
2553 /// first class aggregate to represent them.  For example, if the low part of
2554 /// a by-value argument should be passed as i32* and the high part as float,
2555 /// return {i32*, float}.
2556 static llvm::Type *
GetX86_64ByValArgumentPair(llvm::Type * Lo,llvm::Type * Hi,const llvm::DataLayout & TD)2557 GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi,
2558                            const llvm::DataLayout &TD) {
2559   // In order to correctly satisfy the ABI, we need to the high part to start
2560   // at offset 8.  If the high and low parts we inferred are both 4-byte types
2561   // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have
2562   // the second element at offset 8.  Check for this:
2563   unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo);
2564   llvm::Align HiAlign = TD.getABITypeAlign(Hi);
2565   unsigned HiStart = llvm::alignTo(LoSize, HiAlign);
2566   assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!");
2567 
2568   // To handle this, we have to increase the size of the low part so that the
2569   // second element will start at an 8 byte offset.  We can't increase the size
2570   // of the second element because it might make us access off the end of the
2571   // struct.
2572   if (HiStart != 8) {
2573     // There are usually two sorts of types the ABI generation code can produce
2574     // for the low part of a pair that aren't 8 bytes in size: half, float or
2575     // i8/i16/i32.  This can also include pointers when they are 32-bit (X32 and
2576     // NaCl).
2577     // Promote these to a larger type.
2578     if (Lo->isHalfTy() || Lo->isFloatTy())
2579       Lo = llvm::Type::getDoubleTy(Lo->getContext());
2580     else {
2581       assert((Lo->isIntegerTy() || Lo->isPointerTy())
2582              && "Invalid/unknown lo type");
2583       Lo = llvm::Type::getInt64Ty(Lo->getContext());
2584     }
2585   }
2586 
2587   llvm::StructType *Result = llvm::StructType::get(Lo, Hi);
2588 
2589   // Verify that the second element is at an 8-byte offset.
2590   assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 &&
2591          "Invalid x86-64 argument pair!");
2592   return Result;
2593 }
2594 
2595 ABIArgInfo X86_64ABIInfo::
classifyReturnType(QualType RetTy) const2596 classifyReturnType(QualType RetTy) const {
2597   // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the
2598   // classification algorithm.
2599   X86_64ABIInfo::Class Lo, Hi;
2600   classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true);
2601 
2602   // Check some invariants.
2603   assert((Hi != Memory || Lo == Memory) && "Invalid memory classification.");
2604   assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification.");
2605 
2606   llvm::Type *ResType = nullptr;
2607   switch (Lo) {
2608   case NoClass:
2609     if (Hi == NoClass)
2610       return ABIArgInfo::getIgnore();
2611     // If the low part is just padding, it takes no register, leave ResType
2612     // null.
2613     assert((Hi == SSE || Hi == Integer || Hi == X87Up) &&
2614            "Unknown missing lo part");
2615     break;
2616 
2617   case SSEUp:
2618   case X87Up:
2619     llvm_unreachable("Invalid classification for lo word.");
2620 
2621     // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via
2622     // hidden argument.
2623   case Memory:
2624     return getIndirectReturnResult(RetTy);
2625 
2626     // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next
2627     // available register of the sequence %rax, %rdx is used.
2628   case Integer:
2629     ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0);
2630 
2631     // If we have a sign or zero extended integer, make sure to return Extend
2632     // so that the parameter gets the right LLVM IR attributes.
2633     if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) {
2634       // Treat an enum type as its underlying type.
2635       if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
2636         RetTy = EnumTy->getDecl()->getIntegerType();
2637 
2638       if (RetTy->isIntegralOrEnumerationType() &&
2639           isPromotableIntegerTypeForABI(RetTy))
2640         return ABIArgInfo::getExtend(RetTy);
2641     }
2642     break;
2643 
2644     // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next
2645     // available SSE register of the sequence %xmm0, %xmm1 is used.
2646   case SSE:
2647     ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0);
2648     break;
2649 
2650     // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is
2651     // returned on the X87 stack in %st0 as 80-bit x87 number.
2652   case X87:
2653     ResType = llvm::Type::getX86_FP80Ty(getVMContext());
2654     break;
2655 
2656     // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real
2657     // part of the value is returned in %st0 and the imaginary part in
2658     // %st1.
2659   case ComplexX87:
2660     assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification.");
2661     ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()),
2662                                     llvm::Type::getX86_FP80Ty(getVMContext()));
2663     break;
2664   }
2665 
2666   llvm::Type *HighPart = nullptr;
2667   switch (Hi) {
2668     // Memory was handled previously and X87 should
2669     // never occur as a hi class.
2670   case Memory:
2671   case X87:
2672     llvm_unreachable("Invalid classification for hi word.");
2673 
2674   case ComplexX87: // Previously handled.
2675   case NoClass:
2676     break;
2677 
2678   case Integer:
2679     HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
2680     if (Lo == NoClass)  // Return HighPart at offset 8 in memory.
2681       return ABIArgInfo::getDirect(HighPart, 8);
2682     break;
2683   case SSE:
2684     HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
2685     if (Lo == NoClass)  // Return HighPart at offset 8 in memory.
2686       return ABIArgInfo::getDirect(HighPart, 8);
2687     break;
2688 
2689     // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte
2690     // is passed in the next available eightbyte chunk if the last used
2691     // vector register.
2692     //
2693     // SSEUP should always be preceded by SSE, just widen.
2694   case SSEUp:
2695     assert(Lo == SSE && "Unexpected SSEUp classification.");
2696     ResType = GetByteVectorType(RetTy);
2697     break;
2698 
2699     // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is
2700     // returned together with the previous X87 value in %st0.
2701   case X87Up:
2702     // If X87Up is preceded by X87, we don't need to do
2703     // anything. However, in some cases with unions it may not be
2704     // preceded by X87. In such situations we follow gcc and pass the
2705     // extra bits in an SSE reg.
2706     if (Lo != X87) {
2707       HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
2708       if (Lo == NoClass)  // Return HighPart at offset 8 in memory.
2709         return ABIArgInfo::getDirect(HighPart, 8);
2710     }
2711     break;
2712   }
2713 
2714   // If a high part was specified, merge it together with the low part.  It is
2715   // known to pass in the high eightbyte of the result.  We do this by forming a
2716   // first class struct aggregate with the high and low part: {low, high}
2717   if (HighPart)
2718     ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout());
2719 
2720   return ABIArgInfo::getDirect(ResType);
2721 }
2722 
2723 ABIArgInfo
classifyArgumentType(QualType Ty,unsigned freeIntRegs,unsigned & neededInt,unsigned & neededSSE,bool isNamedArg,bool IsRegCall) const2724 X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs,
2725                                     unsigned &neededInt, unsigned &neededSSE,
2726                                     bool isNamedArg, bool IsRegCall) const {
2727   Ty = useFirstFieldIfTransparentUnion(Ty);
2728 
2729   X86_64ABIInfo::Class Lo, Hi;
2730   classify(Ty, 0, Lo, Hi, isNamedArg, IsRegCall);
2731 
2732   // Check some invariants.
2733   // FIXME: Enforce these by construction.
2734   assert((Hi != Memory || Lo == Memory) && "Invalid memory classification.");
2735   assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification.");
2736 
2737   neededInt = 0;
2738   neededSSE = 0;
2739   llvm::Type *ResType = nullptr;
2740   switch (Lo) {
2741   case NoClass:
2742     if (Hi == NoClass)
2743       return ABIArgInfo::getIgnore();
2744     // If the low part is just padding, it takes no register, leave ResType
2745     // null.
2746     assert((Hi == SSE || Hi == Integer || Hi == X87Up) &&
2747            "Unknown missing lo part");
2748     break;
2749 
2750     // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument
2751     // on the stack.
2752   case Memory:
2753 
2754     // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or
2755     // COMPLEX_X87, it is passed in memory.
2756   case X87:
2757   case ComplexX87:
2758     if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect)
2759       ++neededInt;
2760     return getIndirectResult(Ty, freeIntRegs);
2761 
2762   case SSEUp:
2763   case X87Up:
2764     llvm_unreachable("Invalid classification for lo word.");
2765 
2766     // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next
2767     // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8
2768     // and %r9 is used.
2769   case Integer:
2770     ++neededInt;
2771 
2772     // Pick an 8-byte type based on the preferred type.
2773     ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0);
2774 
2775     // If we have a sign or zero extended integer, make sure to return Extend
2776     // so that the parameter gets the right LLVM IR attributes.
2777     if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) {
2778       // Treat an enum type as its underlying type.
2779       if (const EnumType *EnumTy = Ty->getAs<EnumType>())
2780         Ty = EnumTy->getDecl()->getIntegerType();
2781 
2782       if (Ty->isIntegralOrEnumerationType() &&
2783           isPromotableIntegerTypeForABI(Ty))
2784         return ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty));
2785     }
2786 
2787     break;
2788 
2789     // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next
2790     // available SSE register is used, the registers are taken in the
2791     // order from %xmm0 to %xmm7.
2792   case SSE: {
2793     llvm::Type *IRType = CGT.ConvertType(Ty);
2794     ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0);
2795     ++neededSSE;
2796     break;
2797   }
2798   }
2799 
2800   llvm::Type *HighPart = nullptr;
2801   switch (Hi) {
2802     // Memory was handled previously, ComplexX87 and X87 should
2803     // never occur as hi classes, and X87Up must be preceded by X87,
2804     // which is passed in memory.
2805   case Memory:
2806   case X87:
2807   case ComplexX87:
2808     llvm_unreachable("Invalid classification for hi word.");
2809 
2810   case NoClass: break;
2811 
2812   case Integer:
2813     ++neededInt;
2814     // Pick an 8-byte type based on the preferred type.
2815     HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8);
2816 
2817     if (Lo == NoClass)  // Pass HighPart at offset 8 in memory.
2818       return ABIArgInfo::getDirect(HighPart, 8);
2819     break;
2820 
2821     // X87Up generally doesn't occur here (long double is passed in
2822     // memory), except in situations involving unions.
2823   case X87Up:
2824   case SSE:
2825     ++neededSSE;
2826     HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8);
2827 
2828     if (Lo == NoClass)  // Pass HighPart at offset 8 in memory.
2829       return ABIArgInfo::getDirect(HighPart, 8);
2830     break;
2831 
2832     // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
2833     // eightbyte is passed in the upper half of the last used SSE
2834     // register.  This only happens when 128-bit vectors are passed.
2835   case SSEUp:
2836     assert(Lo == SSE && "Unexpected SSEUp classification");
2837     ResType = GetByteVectorType(Ty);
2838     break;
2839   }
2840 
2841   // If a high part was specified, merge it together with the low part.  It is
2842   // known to pass in the high eightbyte of the result.  We do this by forming a
2843   // first class struct aggregate with the high and low part: {low, high}
2844   if (HighPart)
2845     ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout());
2846 
2847   return ABIArgInfo::getDirect(ResType);
2848 }
2849 
2850 ABIArgInfo
classifyRegCallStructTypeImpl(QualType Ty,unsigned & NeededInt,unsigned & NeededSSE,unsigned & MaxVectorWidth) const2851 X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt,
2852                                              unsigned &NeededSSE,
2853                                              unsigned &MaxVectorWidth) const {
2854   auto RT = Ty->getAs<RecordType>();
2855   assert(RT && "classifyRegCallStructType only valid with struct types");
2856 
2857   if (RT->getDecl()->hasFlexibleArrayMember())
2858     return getIndirectReturnResult(Ty);
2859 
2860   // Sum up bases
2861   if (auto CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl())) {
2862     if (CXXRD->isDynamicClass()) {
2863       NeededInt = NeededSSE = 0;
2864       return getIndirectReturnResult(Ty);
2865     }
2866 
2867     for (const auto &I : CXXRD->bases())
2868       if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE,
2869                                         MaxVectorWidth)
2870               .isIndirect()) {
2871         NeededInt = NeededSSE = 0;
2872         return getIndirectReturnResult(Ty);
2873       }
2874   }
2875 
2876   // Sum up members
2877   for (const auto *FD : RT->getDecl()->fields()) {
2878     QualType MTy = FD->getType();
2879     if (MTy->isRecordType() && !MTy->isUnionType()) {
2880       if (classifyRegCallStructTypeImpl(MTy, NeededInt, NeededSSE,
2881                                         MaxVectorWidth)
2882               .isIndirect()) {
2883         NeededInt = NeededSSE = 0;
2884         return getIndirectReturnResult(Ty);
2885       }
2886     } else {
2887       unsigned LocalNeededInt, LocalNeededSSE;
2888       if (classifyArgumentType(MTy, UINT_MAX, LocalNeededInt, LocalNeededSSE,
2889                                true, true)
2890               .isIndirect()) {
2891         NeededInt = NeededSSE = 0;
2892         return getIndirectReturnResult(Ty);
2893       }
2894       if (const auto *AT = getContext().getAsConstantArrayType(MTy))
2895         MTy = AT->getElementType();
2896       if (const auto *VT = MTy->getAs<VectorType>())
2897         if (getContext().getTypeSize(VT) > MaxVectorWidth)
2898           MaxVectorWidth = getContext().getTypeSize(VT);
2899       NeededInt += LocalNeededInt;
2900       NeededSSE += LocalNeededSSE;
2901     }
2902   }
2903 
2904   return ABIArgInfo::getDirect();
2905 }
2906 
2907 ABIArgInfo
classifyRegCallStructType(QualType Ty,unsigned & NeededInt,unsigned & NeededSSE,unsigned & MaxVectorWidth) const2908 X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
2909                                          unsigned &NeededSSE,
2910                                          unsigned &MaxVectorWidth) const {
2911 
2912   NeededInt = 0;
2913   NeededSSE = 0;
2914   MaxVectorWidth = 0;
2915 
2916   return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE,
2917                                        MaxVectorWidth);
2918 }
2919 
computeInfo(CGFunctionInfo & FI) const2920 void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
2921 
2922   const unsigned CallingConv = FI.getCallingConvention();
2923   // It is possible to force Win64 calling convention on any x86_64 target by
2924   // using __attribute__((ms_abi)). In such case to correctly emit Win64
2925   // compatible code delegate this call to WinX86_64ABIInfo::computeInfo.
2926   if (CallingConv == llvm::CallingConv::Win64) {
2927     WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel);
2928     Win64ABIInfo.computeInfo(FI);
2929     return;
2930   }
2931 
2932   bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall;
2933 
2934   // Keep track of the number of assigned registers.
2935   unsigned FreeIntRegs = IsRegCall ? 11 : 6;
2936   unsigned FreeSSERegs = IsRegCall ? 16 : 8;
2937   unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0;
2938 
2939   if (!::classifyReturnType(getCXXABI(), FI, *this)) {
2940     if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() &&
2941         !FI.getReturnType()->getTypePtr()->isUnionType()) {
2942       FI.getReturnInfo() = classifyRegCallStructType(
2943           FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth);
2944       if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
2945         FreeIntRegs -= NeededInt;
2946         FreeSSERegs -= NeededSSE;
2947       } else {
2948         FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
2949       }
2950     } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() &&
2951                getContext().getCanonicalType(FI.getReturnType()
2952                                                  ->getAs<ComplexType>()
2953                                                  ->getElementType()) ==
2954                    getContext().LongDoubleTy)
2955       // Complex Long Double Type is passed in Memory when Regcall
2956       // calling convention is used.
2957       FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
2958     else
2959       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
2960   }
2961 
2962   // If the return value is indirect, then the hidden argument is consuming one
2963   // integer register.
2964   if (FI.getReturnInfo().isIndirect())
2965     --FreeIntRegs;
2966   else if (NeededSSE && MaxVectorWidth > 0)
2967     FI.setMaxVectorWidth(MaxVectorWidth);
2968 
2969   // The chain argument effectively gives us another free register.
2970   if (FI.isChainCall())
2971     ++FreeIntRegs;
2972 
2973   unsigned NumRequiredArgs = FI.getNumRequiredArgs();
2974   // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers
2975   // get assigned (in left-to-right order) for passing as follows...
2976   unsigned ArgNo = 0;
2977   for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
2978        it != ie; ++it, ++ArgNo) {
2979     bool IsNamedArg = ArgNo < NumRequiredArgs;
2980 
2981     if (IsRegCall && it->type->isStructureOrClassType())
2982       it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE,
2983                                            MaxVectorWidth);
2984     else
2985       it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt,
2986                                       NeededSSE, IsNamedArg);
2987 
2988     // AMD64-ABI 3.2.3p3: If there are no registers available for any
2989     // eightbyte of an argument, the whole argument is passed on the
2990     // stack. If registers have already been assigned for some
2991     // eightbytes of such an argument, the assignments get reverted.
2992     if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
2993       FreeIntRegs -= NeededInt;
2994       FreeSSERegs -= NeededSSE;
2995       if (MaxVectorWidth > FI.getMaxVectorWidth())
2996         FI.setMaxVectorWidth(MaxVectorWidth);
2997     } else {
2998       it->info = getIndirectResult(it->type, FreeIntRegs);
2999     }
3000   }
3001 }
3002 
EmitX86_64VAArgFromMemory(CodeGenFunction & CGF,Address VAListAddr,QualType Ty)3003 static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF,
3004                                          Address VAListAddr, QualType Ty) {
3005   Address overflow_arg_area_p =
3006       CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p");
3007   llvm::Value *overflow_arg_area =
3008     CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area");
3009 
3010   // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
3011   // byte boundary if alignment needed by type exceeds 8 byte boundary.
3012   // It isn't stated explicitly in the standard, but in practice we use
3013   // alignment greater than 16 where necessary.
3014   CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty);
3015   if (Align > CharUnits::fromQuantity(8)) {
3016     overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area,
3017                                                       Align);
3018   }
3019 
3020   // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
3021   llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
3022   llvm::Value *Res = overflow_arg_area;
3023 
3024   // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
3025   // l->overflow_arg_area + sizeof(type).
3026   // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
3027   // an 8 byte boundary.
3028 
3029   uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8;
3030   llvm::Value *Offset =
3031       llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7)  & ~7);
3032   overflow_arg_area = CGF.Builder.CreateGEP(CGF.Int8Ty, overflow_arg_area,
3033                                             Offset, "overflow_arg_area.next");
3034   CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p);
3035 
3036   // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type.
3037   return Address(Res, LTy, Align);
3038 }
3039 
EmitVAArg(CodeGenFunction & CGF,Address VAListAddr,QualType Ty,AggValueSlot Slot) const3040 RValue X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
3041                                 QualType Ty, AggValueSlot Slot) const {
3042   // Assume that va_list type is correct; should be pointer to LLVM type:
3043   // struct {
3044   //   i32 gp_offset;
3045   //   i32 fp_offset;
3046   //   i8* overflow_arg_area;
3047   //   i8* reg_save_area;
3048   // };
3049   unsigned neededInt, neededSSE;
3050 
3051   Ty = getContext().getCanonicalType(Ty);
3052   ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE,
3053                                        /*isNamedArg*/false);
3054 
3055   // Empty records are ignored for parameter passing purposes.
3056   if (AI.isIgnore())
3057     return Slot.asRValue();
3058 
3059   // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
3060   // in the registers. If not go to step 7.
3061   if (!neededInt && !neededSSE)
3062     return CGF.EmitLoadOfAnyValue(
3063         CGF.MakeAddrLValue(EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty), Ty),
3064         Slot);
3065 
3066   // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
3067   // general purpose registers needed to pass type and num_fp to hold
3068   // the number of floating point registers needed.
3069 
3070   // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
3071   // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
3072   // l->fp_offset > 304 - num_fp * 16 go to step 7.
3073   //
3074   // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of
3075   // register save space).
3076 
3077   llvm::Value *InRegs = nullptr;
3078   Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid();
3079   llvm::Value *gp_offset = nullptr, *fp_offset = nullptr;
3080   if (neededInt) {
3081     gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p");
3082     gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset");
3083     InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8);
3084     InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp");
3085   }
3086 
3087   if (neededSSE) {
3088     fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p");
3089     fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset");
3090     llvm::Value *FitsInFP =
3091       llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16);
3092     FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp");
3093     InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP;
3094   }
3095 
3096   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
3097   llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem");
3098   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
3099   CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock);
3100 
3101   // Emit code to load the value if it was passed in registers.
3102 
3103   CGF.EmitBlock(InRegBlock);
3104 
3105   // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
3106   // an offset of l->gp_offset and/or l->fp_offset. This may require
3107   // copying to a temporary location in case the parameter is passed
3108   // in different register classes or requires an alignment greater
3109   // than 8 for general purpose registers and 16 for XMM registers.
3110   //
3111   // FIXME: This really results in shameful code when we end up needing to
3112   // collect arguments from different places; often what should result in a
3113   // simple assembling of a structure from scattered addresses has many more
3114   // loads than necessary. Can we clean this up?
3115   llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
3116   llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(
3117       CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area");
3118 
3119   Address RegAddr = Address::invalid();
3120   if (neededInt && neededSSE) {
3121     // FIXME: Cleanup.
3122     assert(AI.isDirect() && "Unexpected ABI info for mixed regs");
3123     llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType());
3124     Address Tmp = CGF.CreateMemTemp(Ty);
3125     Tmp = Tmp.withElementType(ST);
3126     assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs");
3127     llvm::Type *TyLo = ST->getElementType(0);
3128     llvm::Type *TyHi = ST->getElementType(1);
3129     assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) &&
3130            "Unexpected ABI info for mixed regs");
3131     llvm::Value *GPAddr =
3132         CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset);
3133     llvm::Value *FPAddr =
3134         CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset);
3135     llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr;
3136     llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr;
3137 
3138     // Copy the first element.
3139     // FIXME: Our choice of alignment here and below is probably pessimistic.
3140     llvm::Value *V = CGF.Builder.CreateAlignedLoad(
3141         TyLo, RegLoAddr,
3142         CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyLo)));
3143     CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0));
3144 
3145     // Copy the second element.
3146     V = CGF.Builder.CreateAlignedLoad(
3147         TyHi, RegHiAddr,
3148         CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi)));
3149     CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));
3150 
3151     RegAddr = Tmp.withElementType(LTy);
3152   } else if (neededInt || neededSSE == 1) {
3153     // Copy to a temporary if necessary to ensure the appropriate alignment.
3154     auto TInfo = getContext().getTypeInfoInChars(Ty);
3155     uint64_t TySize = TInfo.Width.getQuantity();
3156     CharUnits TyAlign = TInfo.Align;
3157     llvm::Type *CoTy = nullptr;
3158     if (AI.isDirect())
3159       CoTy = AI.getCoerceToType();
3160 
3161     llvm::Value *GpOrFpOffset = neededInt ? gp_offset : fp_offset;
3162     uint64_t Alignment = neededInt ? 8 : 16;
3163     uint64_t RegSize = neededInt ? neededInt * 8 : 16;
3164     // There are two cases require special handling:
3165     // 1)
3166     //    ```
3167     //    struct {
3168     //      struct {} a[8];
3169     //      int b;
3170     //    };
3171     //    ```
3172     //    The lower 8 bytes of the structure are not stored,
3173     //    so an 8-byte offset is needed when accessing the structure.
3174     // 2)
3175     //   ```
3176     //   struct {
3177     //     long long a;
3178     //     struct {} b;
3179     //   };
3180     //   ```
3181     //   The stored size of this structure is smaller than its actual size,
3182     //   which may lead to reading past the end of the register save area.
3183     if (CoTy && (AI.getDirectOffset() == 8 || RegSize < TySize)) {
3184       Address Tmp = CGF.CreateMemTemp(Ty);
3185       llvm::Value *Addr =
3186           CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset);
3187       llvm::Value *Src = CGF.Builder.CreateAlignedLoad(CoTy, Addr, TyAlign);
3188       llvm::Value *PtrOffset =
3189           llvm::ConstantInt::get(CGF.Int32Ty, AI.getDirectOffset());
3190       Address Dst = Address(
3191           CGF.Builder.CreateGEP(CGF.Int8Ty, Tmp.getBasePointer(), PtrOffset),
3192           LTy, TyAlign);
3193       CGF.Builder.CreateStore(Src, Dst);
3194       RegAddr = Tmp.withElementType(LTy);
3195     } else {
3196       RegAddr =
3197           Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset),
3198                   LTy, CharUnits::fromQuantity(Alignment));
3199 
3200       // Copy into a temporary if the type is more aligned than the
3201       // register save area.
3202       if (neededInt && TyAlign.getQuantity() > 8) {
3203         Address Tmp = CGF.CreateMemTemp(Ty);
3204         CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false);
3205         RegAddr = Tmp;
3206       }
3207     }
3208 
3209   } else {
3210     assert(neededSSE == 2 && "Invalid number of needed registers!");
3211     // SSE registers are spaced 16 bytes apart in the register save
3212     // area, we need to collect the two eightbytes together.
3213     // The ABI isn't explicit about this, but it seems reasonable
3214     // to assume that the slots are 16-byte aligned, since the stack is
3215     // naturally 16-byte aligned and the prologue is expected to store
3216     // all the SSE registers to the RSA.
3217     Address RegAddrLo = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea,
3218                                                       fp_offset),
3219                                 CGF.Int8Ty, CharUnits::fromQuantity(16));
3220     Address RegAddrHi =
3221       CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
3222                                              CharUnits::fromQuantity(16));
3223     llvm::Type *ST = AI.canHaveCoerceToType()
3224                          ? AI.getCoerceToType()
3225                          : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy);
3226     llvm::Value *V;
3227     Address Tmp = CGF.CreateMemTemp(Ty);
3228     Tmp = Tmp.withElementType(ST);
3229     V = CGF.Builder.CreateLoad(
3230         RegAddrLo.withElementType(ST->getStructElementType(0)));
3231     CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0));
3232     V = CGF.Builder.CreateLoad(
3233         RegAddrHi.withElementType(ST->getStructElementType(1)));
3234     CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));
3235 
3236     RegAddr = Tmp.withElementType(LTy);
3237   }
3238 
3239   // AMD64-ABI 3.5.7p5: Step 5. Set:
3240   // l->gp_offset = l->gp_offset + num_gp * 8
3241   // l->fp_offset = l->fp_offset + num_fp * 16.
3242   if (neededInt) {
3243     llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8);
3244     CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset),
3245                             gp_offset_p);
3246   }
3247   if (neededSSE) {
3248     llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16);
3249     CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset),
3250                             fp_offset_p);
3251   }
3252   CGF.EmitBranch(ContBlock);
3253 
3254   // Emit code to load the value if it was passed in memory.
3255 
3256   CGF.EmitBlock(InMemBlock);
3257   Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
3258 
3259   // Return the appropriate result.
3260 
3261   CGF.EmitBlock(ContBlock);
3262   Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock,
3263                                  "vaarg.addr");
3264   return CGF.EmitLoadOfAnyValue(CGF.MakeAddrLValue(ResAddr, Ty), Slot);
3265 }
3266 
EmitMSVAArg(CodeGenFunction & CGF,Address VAListAddr,QualType Ty,AggValueSlot Slot) const3267 RValue X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
3268                                   QualType Ty, AggValueSlot Slot) const {
3269   // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
3270   // not 1, 2, 4, or 8 bytes, must be passed by reference."
3271   uint64_t Width = getContext().getTypeSize(Ty);
3272   bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width);
3273 
3274   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
3275                           CGF.getContext().getTypeInfoInChars(Ty),
3276                           CharUnits::fromQuantity(8),
3277                           /*allowHigherAlign*/ false, Slot);
3278 }
3279 
reclassifyHvaArgForVectorCall(QualType Ty,unsigned & FreeSSERegs,const ABIArgInfo & current) const3280 ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgForVectorCall(
3281     QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo &current) const {
3282   const Type *Base = nullptr;
3283   uint64_t NumElts = 0;
3284 
3285   if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
3286       isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) {
3287     FreeSSERegs -= NumElts;
3288     return getDirectX86Hva();
3289   }
3290   return current;
3291 }
3292 
classify(QualType Ty,unsigned & FreeSSERegs,bool IsReturnType,bool IsVectorCall,bool IsRegCall) const3293 ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
3294                                       bool IsReturnType, bool IsVectorCall,
3295                                       bool IsRegCall) const {
3296 
3297   if (Ty->isVoidType())
3298     return ABIArgInfo::getIgnore();
3299 
3300   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
3301     Ty = EnumTy->getDecl()->getIntegerType();
3302 
3303   TypeInfo Info = getContext().getTypeInfo(Ty);
3304   uint64_t Width = Info.Width;
3305   CharUnits Align = getContext().toCharUnitsFromBits(Info.Align);
3306 
3307   const RecordType *RT = Ty->getAs<RecordType>();
3308   if (RT) {
3309     if (!IsReturnType) {
3310       if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()))
3311         return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
3312                                        RAA == CGCXXABI::RAA_DirectInMemory);
3313     }
3314 
3315     if (RT->getDecl()->hasFlexibleArrayMember())
3316       return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
3317                                      /*ByVal=*/false);
3318   }
3319 
3320   const Type *Base = nullptr;
3321   uint64_t NumElts = 0;
3322   // vectorcall adds the concept of a homogenous vector aggregate, similar to
3323   // other targets.
3324   if ((IsVectorCall || IsRegCall) &&
3325       isHomogeneousAggregate(Ty, Base, NumElts)) {
3326     if (IsRegCall) {
3327       if (FreeSSERegs >= NumElts) {
3328         FreeSSERegs -= NumElts;
3329         if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())
3330           return ABIArgInfo::getDirect();
3331         return ABIArgInfo::getExpand();
3332       }
3333       return ABIArgInfo::getIndirect(
3334           Align, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(),
3335           /*ByVal=*/false);
3336     } else if (IsVectorCall) {
3337       if (FreeSSERegs >= NumElts &&
3338           (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) {
3339         FreeSSERegs -= NumElts;
3340         return ABIArgInfo::getDirect();
3341       } else if (IsReturnType) {
3342         return ABIArgInfo::getExpand();
3343       } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
3344         // HVAs are delayed and reclassified in the 2nd step.
3345         return ABIArgInfo::getIndirect(
3346             Align, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(),
3347             /*ByVal=*/false);
3348       }
3349     }
3350   }
3351 
3352   if (Ty->isMemberPointerType()) {
3353     // If the member pointer is represented by an LLVM int or ptr, pass it
3354     // directly.
3355     llvm::Type *LLTy = CGT.ConvertType(Ty);
3356     if (LLTy->isPointerTy() || LLTy->isIntegerTy())
3357       return ABIArgInfo::getDirect();
3358   }
3359 
3360   if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) {
3361     // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
3362     // not 1, 2, 4, or 8 bytes, must be passed by reference."
3363     if (Width > 64 || !llvm::isPowerOf2_64(Width))
3364       return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
3365                                      /*ByVal=*/false);
3366 
3367     // Otherwise, coerce it to a small integer.
3368     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width));
3369   }
3370 
3371   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
3372     switch (BT->getKind()) {
3373     case BuiltinType::Bool:
3374       // Bool type is always extended to the ABI, other builtin types are not
3375       // extended.
3376       return ABIArgInfo::getExtend(Ty);
3377 
3378     case BuiltinType::LongDouble:
3379       // Mingw64 GCC uses the old 80 bit extended precision floating point
3380       // unit. It passes them indirectly through memory.
3381       if (IsMingw64) {
3382         const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
3383         if (LDF == &llvm::APFloat::x87DoubleExtended())
3384           return ABIArgInfo::getIndirect(
3385               Align, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(),
3386               /*ByVal=*/false);
3387       }
3388       break;
3389 
3390     case BuiltinType::Int128:
3391     case BuiltinType::UInt128:
3392     case BuiltinType::Float128:
3393       // 128-bit float and integer types share the same ABI.
3394 
3395       // If it's a parameter type, the normal ABI rule is that arguments larger
3396       // than 8 bytes are passed indirectly. GCC follows it. We follow it too,
3397       // even though it isn't particularly efficient.
3398       if (!IsReturnType)
3399         return ABIArgInfo::getIndirect(
3400             Align, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(),
3401             /*ByVal=*/false);
3402 
3403       // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that.
3404       // Clang matches them for compatibility.
3405       // NOTE: GCC actually returns f128 indirectly but will hopefully change.
3406       // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054#c8.
3407       return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
3408           llvm::Type::getInt64Ty(getVMContext()), 2));
3409 
3410     default:
3411       break;
3412     }
3413   }
3414 
3415   if (Ty->isBitIntType()) {
3416     // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
3417     // not 1, 2, 4, or 8 bytes, must be passed by reference."
3418     // However, non-power-of-two bit-precise integers will be passed as 1, 2, 4,
3419     // or 8 bytes anyway as long is it fits in them, so we don't have to check
3420     // the power of 2.
3421     if (Width <= 64)
3422       return ABIArgInfo::getDirect();
3423     return ABIArgInfo::getIndirect(
3424         Align, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(),
3425         /*ByVal=*/false);
3426   }
3427 
3428   return ABIArgInfo::getDirect();
3429 }
3430 
computeInfo(CGFunctionInfo & FI) const3431 void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
3432   const unsigned CC = FI.getCallingConvention();
3433   bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall;
3434   bool IsRegCall = CC == llvm::CallingConv::X86_RegCall;
3435 
3436   // If __attribute__((sysv_abi)) is in use, use the SysV argument
3437   // classification rules.
3438   if (CC == llvm::CallingConv::X86_64_SysV) {
3439     X86_64ABIInfo SysVABIInfo(CGT, AVXLevel);
3440     SysVABIInfo.computeInfo(FI);
3441     return;
3442   }
3443 
3444   unsigned FreeSSERegs = 0;
3445   if (IsVectorCall) {
3446     // We can use up to 4 SSE return registers with vectorcall.
3447     FreeSSERegs = 4;
3448   } else if (IsRegCall) {
3449     // RegCall gives us 16 SSE registers.
3450     FreeSSERegs = 16;
3451   }
3452 
3453   if (!getCXXABI().classifyReturnType(FI))
3454     FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true,
3455                                   IsVectorCall, IsRegCall);
3456 
3457   if (IsVectorCall) {
3458     // We can use up to 6 SSE register parameters with vectorcall.
3459     FreeSSERegs = 6;
3460   } else if (IsRegCall) {
3461     // RegCall gives us 16 SSE registers, we can reuse the return registers.
3462     FreeSSERegs = 16;
3463   }
3464 
3465   unsigned ArgNum = 0;
3466   unsigned ZeroSSERegs = 0;
3467   for (auto &I : FI.arguments()) {
3468     // Vectorcall in x64 only permits the first 6 arguments to be passed as
3469     // XMM/YMM registers. After the sixth argument, pretend no vector
3470     // registers are left.
3471     unsigned *MaybeFreeSSERegs =
3472         (IsVectorCall && ArgNum >= 6) ? &ZeroSSERegs : &FreeSSERegs;
3473     I.info =
3474         classify(I.type, *MaybeFreeSSERegs, false, IsVectorCall, IsRegCall);
3475     ++ArgNum;
3476   }
3477 
3478   if (IsVectorCall) {
3479     // For vectorcall, assign aggregate HVAs to any free vector registers in a
3480     // second pass.
3481     for (auto &I : FI.arguments())
3482       I.info = reclassifyHvaArgForVectorCall(I.type, FreeSSERegs, I.info);
3483   }
3484 }
3485 
EmitVAArg(CodeGenFunction & CGF,Address VAListAddr,QualType Ty,AggValueSlot Slot) const3486 RValue WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
3487                                    QualType Ty, AggValueSlot Slot) const {
3488   // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
3489   // not 1, 2, 4, or 8 bytes, must be passed by reference."
3490   uint64_t Width = getContext().getTypeSize(Ty);
3491   bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width);
3492 
3493   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
3494                           CGF.getContext().getTypeInfoInChars(Ty),
3495                           CharUnits::fromQuantity(8),
3496                           /*allowHigherAlign*/ false, Slot);
3497 }
3498 
createX86_32TargetCodeGenInfo(CodeGenModule & CGM,bool DarwinVectorABI,bool Win32StructABI,unsigned NumRegisterParameters,bool SoftFloatABI)3499 std::unique_ptr<TargetCodeGenInfo> CodeGen::createX86_32TargetCodeGenInfo(
3500     CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI,
3501     unsigned NumRegisterParameters, bool SoftFloatABI) {
3502   bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI(
3503       CGM.getTriple(), CGM.getCodeGenOpts());
3504   return std::make_unique<X86_32TargetCodeGenInfo>(
3505       CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI,
3506       NumRegisterParameters, SoftFloatABI);
3507 }
3508 
createWinX86_32TargetCodeGenInfo(CodeGenModule & CGM,bool DarwinVectorABI,bool Win32StructABI,unsigned NumRegisterParameters)3509 std::unique_ptr<TargetCodeGenInfo> CodeGen::createWinX86_32TargetCodeGenInfo(
3510     CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI,
3511     unsigned NumRegisterParameters) {
3512   bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI(
3513       CGM.getTriple(), CGM.getCodeGenOpts());
3514   return std::make_unique<WinX86_32TargetCodeGenInfo>(
3515       CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI,
3516       NumRegisterParameters);
3517 }
3518 
3519 std::unique_ptr<TargetCodeGenInfo>
createX86_64TargetCodeGenInfo(CodeGenModule & CGM,X86AVXABILevel AVXLevel)3520 CodeGen::createX86_64TargetCodeGenInfo(CodeGenModule &CGM,
3521                                        X86AVXABILevel AVXLevel) {
3522   return std::make_unique<X86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel);
3523 }
3524 
3525 std::unique_ptr<TargetCodeGenInfo>
createWinX86_64TargetCodeGenInfo(CodeGenModule & CGM,X86AVXABILevel AVXLevel)3526 CodeGen::createWinX86_64TargetCodeGenInfo(CodeGenModule &CGM,
3527                                           X86AVXABILevel AVXLevel) {
3528   return std::make_unique<WinX86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel);
3529 }
3530