xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/Targets/AArch64.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===- AArch64.cpp --------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ABIInfoImpl.h"
10 #include "TargetInfo.h"
11 #include "clang/AST/Decl.h"
12 #include "clang/Basic/DiagnosticFrontend.h"
13 #include "llvm/TargetParser/AArch64TargetParser.h"
14 
15 using namespace clang;
16 using namespace clang::CodeGen;
17 
18 //===----------------------------------------------------------------------===//
19 // AArch64 ABI Implementation
20 //===----------------------------------------------------------------------===//
21 
22 namespace {
23 
24 class AArch64ABIInfo : public ABIInfo {
25   AArch64ABIKind Kind;
26 
27 public:
28   AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
29       : ABIInfo(CGT), Kind(Kind) {}
30 
31   bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; }
32 
33 private:
34   AArch64ABIKind getABIKind() const { return Kind; }
35   bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; }
36 
37   ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadicFn) const;
38   ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadicFn,
39                                   bool IsNamedArg, unsigned CallingConvention,
40                                   unsigned &NSRN, unsigned &NPRN) const;
41   llvm::Type *convertFixedToScalableVectorType(const VectorType *VT) const;
42   ABIArgInfo coerceIllegalVector(QualType Ty, unsigned &NSRN,
43                                  unsigned &NPRN) const;
44   ABIArgInfo coerceAndExpandPureScalableAggregate(
45       QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
46       const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
47       unsigned &NPRN) const;
48   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
49   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
50                                          uint64_t Members) const override;
51   bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override;
52 
53   bool isIllegalVectorType(QualType Ty) const;
54 
55   bool passAsAggregateType(QualType Ty) const;
56   bool passAsPureScalableType(QualType Ty, unsigned &NV, unsigned &NP,
57                               SmallVectorImpl<llvm::Type *> &CoerceToSeq) const;
58 
59   void flattenType(llvm::Type *Ty,
60                    SmallVectorImpl<llvm::Type *> &Flattened) const;
61 
62   void computeInfo(CGFunctionInfo &FI) const override {
63     if (!::classifyReturnType(getCXXABI(), FI, *this))
64       FI.getReturnInfo() =
65           classifyReturnType(FI.getReturnType(), FI.isVariadic());
66 
67     unsigned ArgNo = 0;
68     unsigned NSRN = 0, NPRN = 0;
69     for (auto &it : FI.arguments()) {
70       const bool IsNamedArg =
71           !FI.isVariadic() || ArgNo < FI.getRequiredArgs().getNumRequiredArgs();
72       ++ArgNo;
73       it.info = classifyArgumentType(it.type, FI.isVariadic(), IsNamedArg,
74                                      FI.getCallingConvention(), NSRN, NPRN);
75     }
76   }
77 
78   RValue EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
79                          AggValueSlot Slot) const;
80 
81   RValue EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
82                         AArch64ABIKind Kind, AggValueSlot Slot) const;
83 
84   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
85                    AggValueSlot Slot) const override {
86     llvm::Type *BaseTy = CGF.ConvertType(Ty);
87     if (isa<llvm::ScalableVectorType>(BaseTy))
88       llvm::report_fatal_error("Passing SVE types to variadic functions is "
89                                "currently not supported");
90 
91     return Kind == AArch64ABIKind::Win64
92                ? EmitMSVAArg(CGF, VAListAddr, Ty, Slot)
93            : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF, Slot)
94                            : EmitAAPCSVAArg(VAListAddr, Ty, CGF, Kind, Slot);
95   }
96 
97   RValue EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
98                      AggValueSlot Slot) const override;
99 
100   bool allowBFloatArgsAndRet() const override {
101     return getTarget().hasBFloat16Type();
102   }
103 
104   using ABIInfo::appendAttributeMangling;
105   void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
106                                raw_ostream &Out) const override;
107   void appendAttributeMangling(StringRef AttrStr,
108                                raw_ostream &Out) const override;
109 };
110 
111 class AArch64SwiftABIInfo : public SwiftABIInfo {
112 public:
113   explicit AArch64SwiftABIInfo(CodeGenTypes &CGT)
114       : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {}
115 
116   bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
117                          unsigned NumElts) const override;
118 };
119 
120 class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
121 public:
122   AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
123       : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {
124     SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT);
125   }
126 
127   StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
128     return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue";
129   }
130 
131   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
132     return 31;
133   }
134 
135   bool doesReturnSlotInterfereWithArgs() const override { return false; }
136 
137   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
138                            CodeGen::CodeGenModule &CGM) const override {
139     auto *Fn = dyn_cast<llvm::Function>(GV);
140     if (!Fn)
141       return;
142 
143     const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
144     TargetInfo::BranchProtectionInfo BPI(CGM.getLangOpts());
145 
146     if (FD && FD->hasAttr<TargetAttr>()) {
147       const auto *TA = FD->getAttr<TargetAttr>();
148       ParsedTargetAttr Attr =
149           CGM.getTarget().parseTargetAttr(TA->getFeaturesStr());
150       if (!Attr.BranchProtection.empty()) {
151         StringRef Error;
152         (void)CGM.getTarget().validateBranchProtection(
153             Attr.BranchProtection, Attr.CPU, BPI, CGM.getLangOpts(), Error);
154         assert(Error.empty());
155       }
156     }
157     setBranchProtectionFnAttributes(BPI, *Fn);
158     setPointerAuthFnAttributes(CGM.getCodeGenOpts().PointerAuth, *Fn);
159   }
160 
161   bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF,
162                                 llvm::Type *Ty) const override {
163     if (CGF.getTarget().hasFeature("ls64")) {
164       auto *ST = dyn_cast<llvm::StructType>(Ty);
165       if (ST && ST->getNumElements() == 1) {
166         auto *AT = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
167         if (AT && AT->getNumElements() == 8 &&
168             AT->getElementType()->isIntegerTy(64))
169           return true;
170       }
171     }
172     return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
173   }
174 
175   void checkFunctionABI(CodeGenModule &CGM,
176                         const FunctionDecl *Decl) const override;
177 
178   void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
179                             const FunctionDecl *Caller,
180                             const FunctionDecl *Callee, const CallArgList &Args,
181                             QualType ReturnType) const override;
182 
183   bool wouldInliningViolateFunctionCallABI(
184       const FunctionDecl *Caller, const FunctionDecl *Callee) const override;
185 
186 private:
187   // Diagnose calls between functions with incompatible Streaming SVE
188   // attributes.
189   void checkFunctionCallABIStreaming(CodeGenModule &CGM, SourceLocation CallLoc,
190                                      const FunctionDecl *Caller,
191                                      const FunctionDecl *Callee) const;
192   // Diagnose calls which must pass arguments in floating-point registers when
193   // the selected target does not have floating-point registers.
194   void checkFunctionCallABISoftFloat(CodeGenModule &CGM, SourceLocation CallLoc,
195                                      const FunctionDecl *Caller,
196                                      const FunctionDecl *Callee,
197                                      const CallArgList &Args,
198                                      QualType ReturnType) const;
199 };
200 
201 class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
202 public:
203   WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind K)
204       : AArch64TargetCodeGenInfo(CGT, K) {}
205 
206   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
207                            CodeGen::CodeGenModule &CGM) const override;
208 
209   void getDependentLibraryOption(llvm::StringRef Lib,
210                                  llvm::SmallString<24> &Opt) const override {
211     Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
212   }
213 
214   void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
215                                llvm::SmallString<32> &Opt) const override {
216     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
217   }
218 };
219 
220 void WindowsAArch64TargetCodeGenInfo::setTargetAttributes(
221     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
222   AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
223   if (GV->isDeclaration())
224     return;
225   addStackProbeTargetAttributes(D, GV, CGM);
226 }
227 }
228 
229 llvm::Type *
230 AArch64ABIInfo::convertFixedToScalableVectorType(const VectorType *VT) const {
231   assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
232 
233   if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
234     assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
235                BuiltinType::UChar &&
236            "unexpected builtin type for SVE predicate!");
237     return llvm::ScalableVectorType::get(llvm::Type::getInt1Ty(getVMContext()),
238                                          16);
239   }
240 
241   if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
242     const auto *BT = VT->getElementType()->castAs<BuiltinType>();
243     switch (BT->getKind()) {
244     default:
245       llvm_unreachable("unexpected builtin type for SVE vector!");
246 
247     case BuiltinType::SChar:
248     case BuiltinType::UChar:
249     case BuiltinType::MFloat8:
250       return llvm::ScalableVectorType::get(
251           llvm::Type::getInt8Ty(getVMContext()), 16);
252 
253     case BuiltinType::Short:
254     case BuiltinType::UShort:
255       return llvm::ScalableVectorType::get(
256           llvm::Type::getInt16Ty(getVMContext()), 8);
257 
258     case BuiltinType::Int:
259     case BuiltinType::UInt:
260       return llvm::ScalableVectorType::get(
261           llvm::Type::getInt32Ty(getVMContext()), 4);
262 
263     case BuiltinType::Long:
264     case BuiltinType::ULong:
265       return llvm::ScalableVectorType::get(
266           llvm::Type::getInt64Ty(getVMContext()), 2);
267 
268     case BuiltinType::Half:
269       return llvm::ScalableVectorType::get(
270           llvm::Type::getHalfTy(getVMContext()), 8);
271 
272     case BuiltinType::Float:
273       return llvm::ScalableVectorType::get(
274           llvm::Type::getFloatTy(getVMContext()), 4);
275 
276     case BuiltinType::Double:
277       return llvm::ScalableVectorType::get(
278           llvm::Type::getDoubleTy(getVMContext()), 2);
279 
280     case BuiltinType::BFloat16:
281       return llvm::ScalableVectorType::get(
282           llvm::Type::getBFloatTy(getVMContext()), 8);
283     }
284   }
285 
286   llvm_unreachable("expected fixed-length SVE vector");
287 }
288 
289 ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty, unsigned &NSRN,
290                                                unsigned &NPRN) const {
291   assert(Ty->isVectorType() && "expected vector type!");
292 
293   const auto *VT = Ty->castAs<VectorType>();
294   if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
295     assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
296     assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
297                BuiltinType::UChar &&
298            "unexpected builtin type for SVE predicate!");
299     NPRN = std::min(NPRN + 1, 4u);
300     return ABIArgInfo::getDirect(llvm::ScalableVectorType::get(
301         llvm::Type::getInt1Ty(getVMContext()), 16));
302   }
303 
304   if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
305     NSRN = std::min(NSRN + 1, 8u);
306     return ABIArgInfo::getDirect(convertFixedToScalableVectorType(VT));
307   }
308 
309   uint64_t Size = getContext().getTypeSize(Ty);
310   // Android promotes <2 x i8> to i16, not i32
311   if ((isAndroid() || isOHOSFamily()) && (Size <= 16)) {
312     llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext());
313     return ABIArgInfo::getDirect(ResType);
314   }
315   if (Size <= 32) {
316     llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
317     return ABIArgInfo::getDirect(ResType);
318   }
319   if (Size == 64) {
320     NSRN = std::min(NSRN + 1, 8u);
321     auto *ResType =
322         llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
323     return ABIArgInfo::getDirect(ResType);
324   }
325   if (Size == 128) {
326     NSRN = std::min(NSRN + 1, 8u);
327     auto *ResType =
328         llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
329     return ABIArgInfo::getDirect(ResType);
330   }
331 
332   return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
333                                  /*ByVal=*/false);
334 }
335 
336 ABIArgInfo AArch64ABIInfo::coerceAndExpandPureScalableAggregate(
337     QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
338     const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
339     unsigned &NPRN) const {
340   if (!IsNamedArg || NSRN + NVec > 8 || NPRN + NPred > 4)
341     return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
342                                    /*ByVal=*/false);
343   NSRN += NVec;
344   NPRN += NPred;
345 
346   // Handle SVE vector tuples.
347   if (Ty->isSVESizelessBuiltinType())
348     return ABIArgInfo::getDirect();
349 
350   llvm::Type *UnpaddedCoerceToType =
351       UnpaddedCoerceToSeq.size() == 1
352           ? UnpaddedCoerceToSeq[0]
353           : llvm::StructType::get(CGT.getLLVMContext(), UnpaddedCoerceToSeq,
354                                   true);
355 
356   SmallVector<llvm::Type *> CoerceToSeq;
357   flattenType(CGT.ConvertType(Ty), CoerceToSeq);
358   auto *CoerceToType =
359       llvm::StructType::get(CGT.getLLVMContext(), CoerceToSeq, false);
360 
361   return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType);
362 }
363 
364 ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
365                                                 bool IsNamedArg,
366                                                 unsigned CallingConvention,
367                                                 unsigned &NSRN,
368                                                 unsigned &NPRN) const {
369   Ty = useFirstFieldIfTransparentUnion(Ty);
370 
371   // Handle illegal vector types here.
372   if (isIllegalVectorType(Ty))
373     return coerceIllegalVector(Ty, NSRN, NPRN);
374 
375   if (!passAsAggregateType(Ty)) {
376     // Treat an enum type as its underlying type.
377     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
378       Ty = EnumTy->getDecl()->getIntegerType();
379 
380     if (const auto *EIT = Ty->getAs<BitIntType>())
381       if (EIT->getNumBits() > 128)
382         return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
383                                        false);
384 
385     if (Ty->isVectorType())
386       NSRN = std::min(NSRN + 1, 8u);
387     else if (const auto *BT = Ty->getAs<BuiltinType>()) {
388       if (BT->isFloatingPoint())
389         NSRN = std::min(NSRN + 1, 8u);
390       else {
391         switch (BT->getKind()) {
392         case BuiltinType::SveBool:
393         case BuiltinType::SveCount:
394           NPRN = std::min(NPRN + 1, 4u);
395           break;
396         case BuiltinType::SveBoolx2:
397           NPRN = std::min(NPRN + 2, 4u);
398           break;
399         case BuiltinType::SveBoolx4:
400           NPRN = std::min(NPRN + 4, 4u);
401           break;
402         default:
403           if (BT->isSVESizelessBuiltinType())
404             NSRN = std::min(
405                 NSRN + getContext().getBuiltinVectorTypeInfo(BT).NumVectors,
406                 8u);
407         }
408       }
409     }
410 
411     return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
412                 ? ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty))
413                 : ABIArgInfo::getDirect());
414   }
415 
416   // Structures with either a non-trivial destructor or a non-trivial
417   // copy constructor are always indirect.
418   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
419     return getNaturalAlignIndirect(
420         Ty, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(),
421         /*ByVal=*/RAA == CGCXXABI::RAA_DirectInMemory);
422   }
423 
424   // Empty records:
425   uint64_t Size = getContext().getTypeSize(Ty);
426   bool IsEmpty = isEmptyRecord(getContext(), Ty, true);
427   if (!Ty->isSVESizelessBuiltinType() && (IsEmpty || Size == 0)) {
428     // Empty records are ignored in C mode, and in C++ on Darwin.
429     if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
430       return ABIArgInfo::getIgnore();
431 
432     // In C++ mode, arguments which have sizeof() == 0 (which are non-standard
433     // C++) are ignored. This isn't defined by any standard, so we copy GCC's
434     // behaviour here.
435     if (Size == 0)
436       return ABIArgInfo::getIgnore();
437 
438     // Otherwise, they are passed as if they have a size of 1 byte.
439     return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
440   }
441 
442   // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
443   const Type *Base = nullptr;
444   uint64_t Members = 0;
445   bool IsWin64 = Kind == AArch64ABIKind::Win64 ||
446                  CallingConvention == llvm::CallingConv::Win64;
447   bool IsWinVariadic = IsWin64 && IsVariadicFn;
448   // In variadic functions on Windows, all composite types are treated alike,
449   // no special handling of HFAs/HVAs.
450   if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
451     NSRN = std::min(NSRN + Members, uint64_t(8));
452     if (Kind != AArch64ABIKind::AAPCS)
453       return ABIArgInfo::getDirect(
454           llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
455 
456     // For HFAs/HVAs, cap the argument alignment to 16, otherwise
457     // set it to 8 according to the AAPCS64 document.
458     unsigned Align =
459         getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
460     Align = (Align >= 16) ? 16 : 8;
461     return ABIArgInfo::getDirect(
462         llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0,
463         nullptr, true, Align);
464   }
465 
466   // In AAPCS named arguments of a Pure Scalable Type are passed expanded in
467   // registers, or indirectly if there are not enough registers.
468   if (Kind == AArch64ABIKind::AAPCS) {
469     unsigned NVec = 0, NPred = 0;
470     SmallVector<llvm::Type *> UnpaddedCoerceToSeq;
471     if (passAsPureScalableType(Ty, NVec, NPred, UnpaddedCoerceToSeq) &&
472         (NVec + NPred) > 0)
473       return coerceAndExpandPureScalableAggregate(
474           Ty, IsNamedArg, NVec, NPred, UnpaddedCoerceToSeq, NSRN, NPRN);
475   }
476 
477   // Aggregates <= 16 bytes are passed directly in registers or on the stack.
478   if (Size <= 128) {
479     unsigned Alignment;
480     if (Kind == AArch64ABIKind::AAPCS) {
481       Alignment = getContext().getTypeUnadjustedAlign(Ty);
482       Alignment = Alignment < 128 ? 64 : 128;
483     } else {
484       Alignment =
485           std::max(getContext().getTypeAlign(Ty),
486                    (unsigned)getTarget().getPointerWidth(LangAS::Default));
487     }
488     Size = llvm::alignTo(Size, Alignment);
489 
490     // If the Aggregate is made up of pointers, use an array of pointers for the
491     // coerced type. This prevents having to convert ptr2int->int2ptr through
492     // the call, allowing alias analysis to produce better code.
493     auto ContainsOnlyPointers = [&](const auto &Self, QualType Ty) {
494       if (isEmptyRecord(getContext(), Ty, true))
495         return false;
496       const RecordType *RT = Ty->getAs<RecordType>();
497       if (!RT)
498         return false;
499       const RecordDecl *RD = RT->getDecl();
500       if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
501         for (const auto &I : CXXRD->bases())
502           if (!Self(Self, I.getType()))
503             return false;
504       }
505       return all_of(RD->fields(), [&](FieldDecl *FD) {
506         QualType FDTy = FD->getType();
507         if (FDTy->isArrayType())
508           FDTy = getContext().getBaseElementType(FDTy);
509         return (FDTy->isPointerOrReferenceType() &&
510                 getContext().getTypeSize(FDTy) == 64 &&
511                 !FDTy->getPointeeType().hasAddressSpace()) ||
512                Self(Self, FDTy);
513       });
514     };
515 
516     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
517     // For aggregates with 16-byte alignment, we use i128.
518     llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment);
519     if ((Size == 64 || Size == 128) && Alignment == 64 &&
520         ContainsOnlyPointers(ContainsOnlyPointers, Ty))
521       BaseTy = llvm::PointerType::getUnqual(getVMContext());
522     return ABIArgInfo::getDirect(
523         Size == Alignment ? BaseTy
524                           : llvm::ArrayType::get(BaseTy, Size / Alignment));
525   }
526 
527   return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
528                                  /*ByVal=*/false);
529 }
530 
531 ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
532                                               bool IsVariadicFn) const {
533   if (RetTy->isVoidType())
534     return ABIArgInfo::getIgnore();
535 
536   if (const auto *VT = RetTy->getAs<VectorType>()) {
537     if (VT->getVectorKind() == VectorKind::SveFixedLengthData ||
538         VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
539       unsigned NSRN = 0, NPRN = 0;
540       return coerceIllegalVector(RetTy, NSRN, NPRN);
541     }
542   }
543 
544   // Large vector types should be returned via memory.
545   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
546     return getNaturalAlignIndirect(RetTy, getDataLayout().getAllocaAddrSpace());
547 
548   if (!passAsAggregateType(RetTy)) {
549     // Treat an enum type as its underlying type.
550     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
551       RetTy = EnumTy->getDecl()->getIntegerType();
552 
553     if (const auto *EIT = RetTy->getAs<BitIntType>())
554       if (EIT->getNumBits() > 128)
555         return getNaturalAlignIndirect(RetTy,
556                                        getDataLayout().getAllocaAddrSpace());
557 
558     return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS()
559                 ? ABIArgInfo::getExtend(RetTy)
560                 : ABIArgInfo::getDirect());
561   }
562 
563   uint64_t Size = getContext().getTypeSize(RetTy);
564   if (!RetTy->isSVESizelessBuiltinType() &&
565       (isEmptyRecord(getContext(), RetTy, true) || Size == 0))
566     return ABIArgInfo::getIgnore();
567 
568   const Type *Base = nullptr;
569   uint64_t Members = 0;
570   if (isHomogeneousAggregate(RetTy, Base, Members) &&
571       !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 &&
572         IsVariadicFn))
573     // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
574     return ABIArgInfo::getDirect();
575 
576   // In AAPCS return values of a Pure Scalable type are treated as a single
577   // named argument and passed expanded in registers, or indirectly if there are
578   // not enough registers.
579   if (Kind == AArch64ABIKind::AAPCS) {
580     unsigned NSRN = 0, NPRN = 0;
581     unsigned NVec = 0, NPred = 0;
582     SmallVector<llvm::Type *> UnpaddedCoerceToSeq;
583     if (passAsPureScalableType(RetTy, NVec, NPred, UnpaddedCoerceToSeq) &&
584         (NVec + NPred) > 0)
585       return coerceAndExpandPureScalableAggregate(
586           RetTy, /* IsNamedArg */ true, NVec, NPred, UnpaddedCoerceToSeq, NSRN,
587           NPRN);
588   }
589 
590   // Aggregates <= 16 bytes are returned directly in registers or on the stack.
591   if (Size <= 128) {
592     if (Size <= 64 && getDataLayout().isLittleEndian()) {
593       // Composite types are returned in lower bits of a 64-bit register for LE,
594       // and in higher bits for BE. However, integer types are always returned
595       // in lower bits for both LE and BE, and they are not rounded up to
596       // 64-bits. We can skip rounding up of composite types for LE, but not for
597       // BE, otherwise composite types will be indistinguishable from integer
598       // types.
599       return ABIArgInfo::getDirect(
600           llvm::IntegerType::get(getVMContext(), Size));
601     }
602 
603     unsigned Alignment = getContext().getTypeAlign(RetTy);
604     Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
605 
606     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
607     // For aggregates with 16-byte alignment, we use i128.
608     if (Alignment < 128 && Size == 128) {
609       llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
610       return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
611     }
612     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
613   }
614 
615   return getNaturalAlignIndirect(RetTy, getDataLayout().getAllocaAddrSpace());
616 }
617 
618 /// isIllegalVectorType - check whether the vector type is legal for AArch64.
619 bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
620   if (const VectorType *VT = Ty->getAs<VectorType>()) {
621     // Check whether VT is a fixed-length SVE vector. These types are
622     // represented as scalable vectors in function args/return and must be
623     // coerced from fixed vectors.
624     if (VT->getVectorKind() == VectorKind::SveFixedLengthData ||
625         VT->getVectorKind() == VectorKind::SveFixedLengthPredicate)
626       return true;
627 
628     // Check whether VT is legal.
629     unsigned NumElements = VT->getNumElements();
630     uint64_t Size = getContext().getTypeSize(VT);
631     // NumElements should be power of 2.
632     if (!llvm::isPowerOf2_32(NumElements))
633       return true;
634 
635     // arm64_32 has to be compatible with the ARM logic here, which allows huge
636     // vectors for some reason.
637     llvm::Triple Triple = getTarget().getTriple();
638     if (Triple.getArch() == llvm::Triple::aarch64_32 &&
639         Triple.isOSBinFormatMachO())
640       return Size <= 32;
641 
642     return Size != 64 && (Size != 128 || NumElements == 1);
643   }
644   return false;
645 }
646 
647 bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize,
648                                             llvm::Type *EltTy,
649                                             unsigned NumElts) const {
650   if (!llvm::isPowerOf2_32(NumElts))
651     return false;
652   if (VectorSize.getQuantity() != 8 &&
653       (VectorSize.getQuantity() != 16 || NumElts == 1))
654     return false;
655   return true;
656 }
657 
658 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
659   // For the soft-float ABI variant, no types are considered to be homogeneous
660   // aggregates.
661   if (isSoftFloat())
662     return false;
663 
664   // Homogeneous aggregates for AAPCS64 must have base types of a floating
665   // point type or a short-vector type. This is the same as the 32-bit ABI,
666   // but with the difference that any floating-point type is allowed,
667   // including __fp16.
668   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
669     if (BT->isFloatingPoint())
670       return true;
671   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
672     if (auto Kind = VT->getVectorKind();
673         Kind == VectorKind::SveFixedLengthData ||
674         Kind == VectorKind::SveFixedLengthPredicate)
675       return false;
676 
677     unsigned VecSize = getContext().getTypeSize(VT);
678     if (VecSize == 64 || VecSize == 128)
679       return true;
680   }
681   return false;
682 }
683 
684 bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
685                                                        uint64_t Members) const {
686   return Members <= 4;
687 }
688 
689 bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
690     const {
691   // AAPCS64 says that the rule for whether something is a homogeneous
692   // aggregate is applied to the output of the data layout decision. So
693   // anything that doesn't affect the data layout also does not affect
694   // homogeneity. In particular, zero-length bitfields don't stop a struct
695   // being homogeneous.
696   return true;
697 }
698 
699 bool AArch64ABIInfo::passAsAggregateType(QualType Ty) const {
700   if (Kind == AArch64ABIKind::AAPCS && Ty->isSVESizelessBuiltinType()) {
701     const auto *BT = Ty->castAs<BuiltinType>();
702     return !BT->isSVECount() &&
703            getContext().getBuiltinVectorTypeInfo(BT).NumVectors > 1;
704   }
705   return isAggregateTypeForABI(Ty);
706 }
707 
708 // Check if a type needs to be passed in registers as a Pure Scalable Type (as
709 // defined by AAPCS64). Return the number of data vectors and the number of
710 // predicate vectors in the type, into `NVec` and `NPred`, respectively. Upon
711 // return `CoerceToSeq` contains an expanded sequence of LLVM IR types, one
712 // element for each non-composite member. For practical purposes, limit the
713 // length of `CoerceToSeq` to about 12 (the maximum that could possibly fit
714 // in registers) and return false, the effect of which will be to  pass the
715 // argument under the rules for a large (> 128 bytes) composite.
716 bool AArch64ABIInfo::passAsPureScalableType(
717     QualType Ty, unsigned &NVec, unsigned &NPred,
718     SmallVectorImpl<llvm::Type *> &CoerceToSeq) const {
719   if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
720     uint64_t NElt = AT->getZExtSize();
721     if (NElt == 0)
722       return false;
723 
724     unsigned NV = 0, NP = 0;
725     SmallVector<llvm::Type *> EltCoerceToSeq;
726     if (!passAsPureScalableType(AT->getElementType(), NV, NP, EltCoerceToSeq))
727       return false;
728 
729     if (CoerceToSeq.size() + NElt * EltCoerceToSeq.size() > 12)
730       return false;
731 
732     for (uint64_t I = 0; I < NElt; ++I)
733       llvm::append_range(CoerceToSeq, EltCoerceToSeq);
734 
735     NVec += NElt * NV;
736     NPred += NElt * NP;
737     return true;
738   }
739 
740   if (const RecordType *RT = Ty->getAs<RecordType>()) {
741     // If the record cannot be passed in registers, then it's not a PST.
742     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
743         RAA != CGCXXABI::RAA_Default)
744       return false;
745 
746     // Pure scalable types are never unions and never contain unions.
747     const RecordDecl *RD = RT->getDecl();
748     if (RD->isUnion())
749       return false;
750 
751     // If this is a C++ record, check the bases.
752     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
753       for (const auto &I : CXXRD->bases()) {
754         if (isEmptyRecord(getContext(), I.getType(), true))
755           continue;
756         if (!passAsPureScalableType(I.getType(), NVec, NPred, CoerceToSeq))
757           return false;
758       }
759     }
760 
761     // Check members.
762     for (const auto *FD : RD->fields()) {
763       QualType FT = FD->getType();
764       if (isEmptyField(getContext(), FD, /* AllowArrays */ true))
765         continue;
766       if (!passAsPureScalableType(FT, NVec, NPred, CoerceToSeq))
767         return false;
768     }
769 
770     return true;
771   }
772 
773   if (const auto *VT = Ty->getAs<VectorType>()) {
774     if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
775       ++NPred;
776       if (CoerceToSeq.size() + 1 > 12)
777         return false;
778       CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
779       return true;
780     }
781 
782     if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
783       ++NVec;
784       if (CoerceToSeq.size() + 1 > 12)
785         return false;
786       CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
787       return true;
788     }
789 
790     return false;
791   }
792 
793   if (!Ty->isBuiltinType())
794     return false;
795 
796   bool isPredicate;
797   switch (Ty->castAs<BuiltinType>()->getKind()) {
798 #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId)                    \
799   case BuiltinType::Id:                                                        \
800     isPredicate = false;                                                       \
801     break;
802 #define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId)                 \
803   case BuiltinType::Id:                                                        \
804     isPredicate = true;                                                        \
805     break;
806 #include "clang/Basic/AArch64ACLETypes.def"
807   default:
808     return false;
809   }
810 
811   ASTContext::BuiltinVectorTypeInfo Info =
812       getContext().getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
813   assert(Info.NumVectors > 0 && Info.NumVectors <= 4 &&
814          "Expected 1, 2, 3 or 4 vectors!");
815   if (isPredicate)
816     NPred += Info.NumVectors;
817   else
818     NVec += Info.NumVectors;
819   llvm::Type *EltTy = Info.ElementType->isMFloat8Type()
820                           ? llvm::Type::getInt8Ty(getVMContext())
821                           : CGT.ConvertType(Info.ElementType);
822   auto *VTy = llvm::ScalableVectorType::get(EltTy, Info.EC.getKnownMinValue());
823 
824   if (CoerceToSeq.size() + Info.NumVectors > 12)
825     return false;
826   std::fill_n(std::back_inserter(CoerceToSeq), Info.NumVectors, VTy);
827 
828   return true;
829 }
830 
831 // Expand an LLVM IR type into a sequence with a element for each non-struct,
832 // non-array member of the type, with the exception of the padding types, which
833 // are retained.
834 void AArch64ABIInfo::flattenType(
835     llvm::Type *Ty, SmallVectorImpl<llvm::Type *> &Flattened) const {
836 
837   if (ABIArgInfo::isPaddingForCoerceAndExpand(Ty)) {
838     Flattened.push_back(Ty);
839     return;
840   }
841 
842   if (const auto *AT = dyn_cast<llvm::ArrayType>(Ty)) {
843     uint64_t NElt = AT->getNumElements();
844     if (NElt == 0)
845       return;
846 
847     SmallVector<llvm::Type *> EltFlattened;
848     flattenType(AT->getElementType(), EltFlattened);
849 
850     for (uint64_t I = 0; I < NElt; ++I)
851       llvm::append_range(Flattened, EltFlattened);
852     return;
853   }
854 
855   if (const auto *ST = dyn_cast<llvm::StructType>(Ty)) {
856     for (auto *ET : ST->elements())
857       flattenType(ET, Flattened);
858     return;
859   }
860 
861   Flattened.push_back(Ty);
862 }
863 
864 RValue AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
865                                       CodeGenFunction &CGF, AArch64ABIKind Kind,
866                                       AggValueSlot Slot) const {
867   // These numbers are not used for variadic arguments, hence it doesn't matter
868   // they don't retain their values across multiple calls to
869   // `classifyArgumentType` here.
870   unsigned NSRN = 0, NPRN = 0;
871   ABIArgInfo AI =
872       classifyArgumentType(Ty, /*IsVariadicFn=*/true, /* IsNamedArg */ false,
873                            CGF.CurFnInfo->getCallingConvention(), NSRN, NPRN);
874   // Empty records are ignored for parameter passing purposes.
875   if (AI.isIgnore())
876     return Slot.asRValue();
877 
878   bool IsIndirect = AI.isIndirect();
879 
880   llvm::Type *BaseTy = CGF.ConvertType(Ty);
881   if (IsIndirect)
882     BaseTy = llvm::PointerType::getUnqual(BaseTy->getContext());
883   else if (AI.getCoerceToType())
884     BaseTy = AI.getCoerceToType();
885 
886   unsigned NumRegs = 1;
887   if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) {
888     BaseTy = ArrTy->getElementType();
889     NumRegs = ArrTy->getNumElements();
890   }
891   bool IsFPR =
892       !isSoftFloat() && (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
893 
894   // The AArch64 va_list type and handling is specified in the Procedure Call
895   // Standard, section B.4:
896   //
897   // struct {
898   //   void *__stack;
899   //   void *__gr_top;
900   //   void *__vr_top;
901   //   int __gr_offs;
902   //   int __vr_offs;
903   // };
904 
905   llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
906   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
907   llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
908   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
909 
910   CharUnits TySize = getContext().getTypeSizeInChars(Ty);
911   CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty);
912 
913   Address reg_offs_p = Address::invalid();
914   llvm::Value *reg_offs = nullptr;
915   int reg_top_index;
916   int RegSize = IsIndirect ? 8 : TySize.getQuantity();
917   if (!IsFPR) {
918     // 3 is the field number of __gr_offs
919     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
920     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
921     reg_top_index = 1; // field number for __gr_top
922     RegSize = llvm::alignTo(RegSize, 8);
923   } else {
924     // 4 is the field number of __vr_offs.
925     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
926     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
927     reg_top_index = 2; // field number for __vr_top
928     RegSize = 16 * NumRegs;
929   }
930 
931   //=======================================
932   // Find out where argument was passed
933   //=======================================
934 
935   // If reg_offs >= 0 we're already using the stack for this type of
936   // argument. We don't want to keep updating reg_offs (in case it overflows,
937   // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
938   // whatever they get).
939   llvm::Value *UsingStack = nullptr;
940   UsingStack = CGF.Builder.CreateICmpSGE(
941       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0));
942 
943   CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
944 
945   // Otherwise, at least some kind of argument could go in these registers, the
946   // question is whether this particular type is too big.
947   CGF.EmitBlock(MaybeRegBlock);
948 
949   // Integer arguments may need to correct register alignment (for example a
950   // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
951   // align __gr_offs to calculate the potential address.
952   if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) {
953     int Align = TyAlign.getQuantity();
954 
955     reg_offs = CGF.Builder.CreateAdd(
956         reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
957         "align_regoffs");
958     reg_offs = CGF.Builder.CreateAnd(
959         reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align),
960         "aligned_regoffs");
961   }
962 
963   // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
964   // The fact that this is done unconditionally reflects the fact that
965   // allocating an argument to the stack also uses up all the remaining
966   // registers of the appropriate kind.
967   llvm::Value *NewOffset = nullptr;
968   NewOffset = CGF.Builder.CreateAdd(
969       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs");
970   CGF.Builder.CreateStore(NewOffset, reg_offs_p);
971 
972   // Now we're in a position to decide whether this argument really was in
973   // registers or not.
974   llvm::Value *InRegs = nullptr;
975   InRegs = CGF.Builder.CreateICmpSLE(
976       NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg");
977 
978   CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
979 
980   //=======================================
981   // Argument was in registers
982   //=======================================
983 
984   // Now we emit the code for if the argument was originally passed in
985   // registers. First start the appropriate block:
986   CGF.EmitBlock(InRegBlock);
987 
988   llvm::Value *reg_top = nullptr;
989   Address reg_top_p =
990       CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
991   reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
992   Address BaseAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, reg_top, reg_offs),
993                    CGF.Int8Ty, CharUnits::fromQuantity(IsFPR ? 16 : 8));
994   Address RegAddr = Address::invalid();
995   llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty), *ElementTy = MemTy;
996 
997   if (IsIndirect) {
998     // If it's been passed indirectly (actually a struct), whatever we find from
999     // stored registers or on the stack will actually be a struct **.
1000     MemTy = llvm::PointerType::getUnqual(MemTy->getContext());
1001   }
1002 
1003   const Type *Base = nullptr;
1004   uint64_t NumMembers = 0;
1005   bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers);
1006   if (IsHFA && NumMembers > 1) {
1007     // Homogeneous aggregates passed in registers will have their elements split
1008     // and stored 16-bytes apart regardless of size (they're notionally in qN,
1009     // qN+1, ...). We reload and store into a temporary local variable
1010     // contiguously.
1011     assert(!IsIndirect && "Homogeneous aggregates should be passed directly");
1012     auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0));
1013     llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
1014     llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
1015     Address Tmp = CGF.CreateTempAlloca(HFATy,
1016                                        std::max(TyAlign, BaseTyInfo.Align));
1017 
1018     // On big-endian platforms, the value will be right-aligned in its slot.
1019     int Offset = 0;
1020     if (CGF.CGM.getDataLayout().isBigEndian() &&
1021         BaseTyInfo.Width.getQuantity() < 16)
1022       Offset = 16 - BaseTyInfo.Width.getQuantity();
1023 
1024     for (unsigned i = 0; i < NumMembers; ++i) {
1025       CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset);
1026       Address LoadAddr =
1027         CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset);
1028       LoadAddr = LoadAddr.withElementType(BaseTy);
1029 
1030       Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i);
1031 
1032       llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
1033       CGF.Builder.CreateStore(Elem, StoreAddr);
1034     }
1035 
1036     RegAddr = Tmp.withElementType(MemTy);
1037   } else {
1038     // Otherwise the object is contiguous in memory.
1039 
1040     // It might be right-aligned in its slot.
1041     CharUnits SlotSize = BaseAddr.getAlignment();
1042     if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect &&
1043         (IsHFA || !isAggregateTypeForABI(Ty)) &&
1044         TySize < SlotSize) {
1045       CharUnits Offset = SlotSize - TySize;
1046       BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset);
1047     }
1048 
1049     RegAddr = BaseAddr.withElementType(MemTy);
1050   }
1051 
1052   CGF.EmitBranch(ContBlock);
1053 
1054   //=======================================
1055   // Argument was on the stack
1056   //=======================================
1057   CGF.EmitBlock(OnStackBlock);
1058 
1059   Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
1060   llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack");
1061 
1062   // Again, stack arguments may need realignment. In this case both integer and
1063   // floating-point ones might be affected.
1064   if (!IsIndirect && TyAlign.getQuantity() > 8) {
1065     OnStackPtr = emitRoundPointerUpToAlignment(CGF, OnStackPtr, TyAlign);
1066   }
1067   Address OnStackAddr = Address(OnStackPtr, CGF.Int8Ty,
1068                                 std::max(CharUnits::fromQuantity(8), TyAlign));
1069 
1070   // All stack slots are multiples of 8 bytes.
1071   CharUnits StackSlotSize = CharUnits::fromQuantity(8);
1072   CharUnits StackSize;
1073   if (IsIndirect)
1074     StackSize = StackSlotSize;
1075   else
1076     StackSize = TySize.alignTo(StackSlotSize);
1077 
1078   llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize);
1079   llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP(
1080       CGF.Int8Ty, OnStackPtr, StackSizeC, "new_stack");
1081 
1082   // Write the new value of __stack for the next call to va_arg
1083   CGF.Builder.CreateStore(NewStack, stack_p);
1084 
1085   if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
1086       TySize < StackSlotSize) {
1087     CharUnits Offset = StackSlotSize - TySize;
1088     OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset);
1089   }
1090 
1091   OnStackAddr = OnStackAddr.withElementType(MemTy);
1092 
1093   CGF.EmitBranch(ContBlock);
1094 
1095   //=======================================
1096   // Tidy up
1097   //=======================================
1098   CGF.EmitBlock(ContBlock);
1099 
1100   Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, OnStackAddr,
1101                                  OnStackBlock, "vaargs.addr");
1102 
1103   if (IsIndirect)
1104     return CGF.EmitLoadOfAnyValue(
1105         CGF.MakeAddrLValue(
1106             Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), ElementTy,
1107                     TyAlign),
1108             Ty),
1109         Slot);
1110 
1111   return CGF.EmitLoadOfAnyValue(CGF.MakeAddrLValue(ResAddr, Ty), Slot);
1112 }
1113 
1114 RValue AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
1115                                        CodeGenFunction &CGF,
1116                                        AggValueSlot Slot) const {
1117   // The backend's lowering doesn't support va_arg for aggregates or
1118   // illegal vector types.  Lower VAArg here for these cases and use
1119   // the LLVM va_arg instruction for everything else.
1120   if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
1121     return CGF.EmitLoadOfAnyValue(
1122         CGF.MakeAddrLValue(
1123             EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()), Ty),
1124         Slot);
1125 
1126   uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8;
1127   CharUnits SlotSize = CharUnits::fromQuantity(PointerSize);
1128 
1129   // Empty records are ignored for parameter passing purposes.
1130   if (isEmptyRecord(getContext(), Ty, true))
1131     return Slot.asRValue();
1132 
1133   // The size of the actual thing passed, which might end up just
1134   // being a pointer for indirect types.
1135   auto TyInfo = getContext().getTypeInfoInChars(Ty);
1136 
1137   // Arguments bigger than 16 bytes which aren't homogeneous
1138   // aggregates should be passed indirectly.
1139   bool IsIndirect = false;
1140   if (TyInfo.Width.getQuantity() > 16) {
1141     const Type *Base = nullptr;
1142     uint64_t Members = 0;
1143     IsIndirect = !isHomogeneousAggregate(Ty, Base, Members);
1144   }
1145 
1146   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize,
1147                           /*AllowHigherAlign*/ true, Slot);
1148 }
1149 
1150 RValue AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
1151                                    QualType Ty, AggValueSlot Slot) const {
1152   bool IsIndirect = false;
1153 
1154   // Composites larger than 16 bytes are passed by reference.
1155   if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
1156     IsIndirect = true;
1157 
1158   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
1159                           CGF.getContext().getTypeInfoInChars(Ty),
1160                           CharUnits::fromQuantity(8),
1161                           /*allowHigherAlign*/ false, Slot);
1162 }
1163 
1164 static bool isStreamingCompatible(const FunctionDecl *F) {
1165   if (const auto *T = F->getType()->getAs<FunctionProtoType>())
1166     return T->getAArch64SMEAttributes() &
1167            FunctionType::SME_PStateSMCompatibleMask;
1168   return false;
1169 }
1170 
1171 // Report an error if an argument or return value of type Ty would need to be
1172 // passed in a floating-point register.
1173 static void diagnoseIfNeedsFPReg(DiagnosticsEngine &Diags,
1174                                  const StringRef ABIName,
1175                                  const AArch64ABIInfo &ABIInfo,
1176                                  const QualType &Ty, const NamedDecl *D,
1177                                  SourceLocation loc) {
1178   const Type *HABase = nullptr;
1179   uint64_t HAMembers = 0;
1180   if (Ty->isFloatingType() || Ty->isVectorType() ||
1181       ABIInfo.isHomogeneousAggregate(Ty, HABase, HAMembers)) {
1182     Diags.Report(loc, diag::err_target_unsupported_type_for_abi)
1183         << D->getDeclName() << Ty << ABIName;
1184   }
1185 }
1186 
1187 // If we are using a hard-float ABI, but do not have floating point registers,
1188 // then report an error for any function arguments or returns which would be
1189 // passed in floating-pint registers.
1190 void AArch64TargetCodeGenInfo::checkFunctionABI(
1191     CodeGenModule &CGM, const FunctionDecl *FuncDecl) const {
1192   const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>();
1193   const TargetInfo &TI = ABIInfo.getContext().getTargetInfo();
1194 
1195   if (!TI.hasFeature("fp") && !ABIInfo.isSoftFloat()) {
1196     diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo,
1197                          FuncDecl->getReturnType(), FuncDecl,
1198                          FuncDecl->getLocation());
1199     for (ParmVarDecl *PVD : FuncDecl->parameters()) {
1200       diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, PVD->getType(),
1201                            PVD, FuncDecl->getLocation());
1202     }
1203   }
1204 }
1205 
1206 enum class ArmSMEInlinability : uint8_t {
1207   Ok = 0,
1208   ErrorCalleeRequiresNewZA = 1 << 0,
1209   ErrorCalleeRequiresNewZT0 = 1 << 1,
1210   WarnIncompatibleStreamingModes = 1 << 2,
1211   ErrorIncompatibleStreamingModes = 1 << 3,
1212 
1213   IncompatibleStreamingModes =
1214       WarnIncompatibleStreamingModes | ErrorIncompatibleStreamingModes,
1215 
1216   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/ErrorIncompatibleStreamingModes),
1217 };
1218 
1219 /// Determines if there are any Arm SME ABI issues with inlining \p Callee into
1220 /// \p Caller. Returns the issue (if any) in the ArmSMEInlinability bit enum.
1221 static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller,
1222                                                 const FunctionDecl *Callee) {
1223   bool CallerIsStreaming =
1224       IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true);
1225   bool CalleeIsStreaming =
1226       IsArmStreamingFunction(Callee, /*IncludeLocallyStreaming=*/true);
1227   bool CallerIsStreamingCompatible = isStreamingCompatible(Caller);
1228   bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee);
1229 
1230   ArmSMEInlinability Inlinability = ArmSMEInlinability::Ok;
1231 
1232   if (!CalleeIsStreamingCompatible &&
1233       (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) {
1234     if (CalleeIsStreaming)
1235       Inlinability |= ArmSMEInlinability::ErrorIncompatibleStreamingModes;
1236     else
1237       Inlinability |= ArmSMEInlinability::WarnIncompatibleStreamingModes;
1238   }
1239   if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) {
1240     if (NewAttr->isNewZA())
1241       Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZA;
1242     if (NewAttr->isNewZT0())
1243       Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZT0;
1244   }
1245 
1246   return Inlinability;
1247 }
1248 
1249 void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
1250     CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
1251     const FunctionDecl *Callee) const {
1252   if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
1253     return;
1254 
1255   ArmSMEInlinability Inlinability = GetArmSMEInlinability(Caller, Callee);
1256 
1257   if ((Inlinability & ArmSMEInlinability::IncompatibleStreamingModes) !=
1258       ArmSMEInlinability::Ok)
1259     CGM.getDiags().Report(
1260         CallLoc,
1261         (Inlinability & ArmSMEInlinability::ErrorIncompatibleStreamingModes) ==
1262                 ArmSMEInlinability::ErrorIncompatibleStreamingModes
1263             ? diag::err_function_always_inline_attribute_mismatch
1264             : diag::warn_function_always_inline_attribute_mismatch)
1265         << Caller->getDeclName() << Callee->getDeclName() << "streaming";
1266 
1267   if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZA) ==
1268       ArmSMEInlinability::ErrorCalleeRequiresNewZA)
1269     CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
1270         << Callee->getDeclName();
1271 
1272   if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZT0) ==
1273       ArmSMEInlinability::ErrorCalleeRequiresNewZT0)
1274     CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_zt0)
1275         << Callee->getDeclName();
1276 }
1277 
1278 // If the target does not have floating-point registers, but we are using a
1279 // hard-float ABI, there is no way to pass floating-point, vector or HFA values
1280 // to functions, so we report an error.
1281 void AArch64TargetCodeGenInfo::checkFunctionCallABISoftFloat(
1282     CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
1283     const FunctionDecl *Callee, const CallArgList &Args,
1284     QualType ReturnType) const {
1285   const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>();
1286   const TargetInfo &TI = ABIInfo.getContext().getTargetInfo();
1287 
1288   if (!Caller || TI.hasFeature("fp") || ABIInfo.isSoftFloat())
1289     return;
1290 
1291   diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, ReturnType,
1292                        Callee ? Callee : Caller, CallLoc);
1293 
1294   for (const CallArg &Arg : Args)
1295     diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, Arg.getType(),
1296                          Callee ? Callee : Caller, CallLoc);
1297 }
1298 
1299 void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
1300                                                     SourceLocation CallLoc,
1301                                                     const FunctionDecl *Caller,
1302                                                     const FunctionDecl *Callee,
1303                                                     const CallArgList &Args,
1304                                                     QualType ReturnType) const {
1305   checkFunctionCallABIStreaming(CGM, CallLoc, Caller, Callee);
1306   checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, ReturnType);
1307 }
1308 
1309 bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI(
1310     const FunctionDecl *Caller, const FunctionDecl *Callee) const {
1311   return Caller && Callee &&
1312          GetArmSMEInlinability(Caller, Callee) != ArmSMEInlinability::Ok;
1313 }
1314 
1315 void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
1316                                              unsigned Index,
1317                                              raw_ostream &Out) const {
1318   appendAttributeMangling(Attr->getFeatureStr(Index), Out);
1319 }
1320 
1321 void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr,
1322                                              raw_ostream &Out) const {
1323   if (AttrStr == "default") {
1324     Out << ".default";
1325     return;
1326   }
1327 
1328   Out << "._";
1329   SmallVector<StringRef, 8> Features;
1330   AttrStr.split(Features, "+");
1331   for (auto &Feat : Features)
1332     Feat = Feat.trim();
1333 
1334   llvm::sort(Features, [](const StringRef LHS, const StringRef RHS) {
1335     return LHS.compare(RHS) < 0;
1336   });
1337 
1338   llvm::SmallDenseSet<StringRef, 8> UniqueFeats;
1339   for (auto &Feat : Features)
1340     if (auto Ext = llvm::AArch64::parseFMVExtension(Feat))
1341       if (UniqueFeats.insert(Ext->Name).second)
1342         Out << 'M' << Ext->Name;
1343 }
1344 
1345 std::unique_ptr<TargetCodeGenInfo>
1346 CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM,
1347                                         AArch64ABIKind Kind) {
1348   return std::make_unique<AArch64TargetCodeGenInfo>(CGM.getTypes(), Kind);
1349 }
1350 
1351 std::unique_ptr<TargetCodeGenInfo>
1352 CodeGen::createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM,
1353                                                AArch64ABIKind K) {
1354   return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM.getTypes(), K);
1355 }
1356