xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/Targets/AArch64.cpp (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===- AArch64.cpp --------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ABIInfoImpl.h"
10 #include "TargetInfo.h"
11 #include "clang/AST/Decl.h"
12 #include "clang/Basic/DiagnosticFrontend.h"
13 #include "llvm/TargetParser/AArch64TargetParser.h"
14 
15 using namespace clang;
16 using namespace clang::CodeGen;
17 
18 //===----------------------------------------------------------------------===//
19 // AArch64 ABI Implementation
20 //===----------------------------------------------------------------------===//
21 
22 namespace {
23 
24 class AArch64ABIInfo : public ABIInfo {
25   AArch64ABIKind Kind;
26 
27 public:
28   AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
29       : ABIInfo(CGT), Kind(Kind) {}
30 
31   bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; }
32 
33 private:
34   AArch64ABIKind getABIKind() const { return Kind; }
35   bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; }
36 
37   ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const;
38   ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadic,
39                                   unsigned CallingConvention) const;
40   ABIArgInfo coerceIllegalVector(QualType Ty) const;
41   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
42   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
43                                          uint64_t Members) const override;
44   bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override;
45 
46   bool isIllegalVectorType(QualType Ty) const;
47 
48   void computeInfo(CGFunctionInfo &FI) const override {
49     if (!::classifyReturnType(getCXXABI(), FI, *this))
50       FI.getReturnInfo() =
51           classifyReturnType(FI.getReturnType(), FI.isVariadic());
52 
53     for (auto &it : FI.arguments())
54       it.info = classifyArgumentType(it.type, FI.isVariadic(),
55                                      FI.getCallingConvention());
56   }
57 
58   RValue EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
59                          AggValueSlot Slot) const;
60 
61   RValue EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
62                         AArch64ABIKind Kind, AggValueSlot Slot) const;
63 
64   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
65                    AggValueSlot Slot) const override {
66     llvm::Type *BaseTy = CGF.ConvertType(Ty);
67     if (isa<llvm::ScalableVectorType>(BaseTy))
68       llvm::report_fatal_error("Passing SVE types to variadic functions is "
69                                "currently not supported");
70 
71     return Kind == AArch64ABIKind::Win64
72                ? EmitMSVAArg(CGF, VAListAddr, Ty, Slot)
73            : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF, Slot)
74                            : EmitAAPCSVAArg(VAListAddr, Ty, CGF, Kind, Slot);
75   }
76 
77   RValue EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
78                      AggValueSlot Slot) const override;
79 
80   bool allowBFloatArgsAndRet() const override {
81     return getTarget().hasBFloat16Type();
82   }
83 
84   using ABIInfo::appendAttributeMangling;
85   void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
86                                raw_ostream &Out) const override;
87   void appendAttributeMangling(StringRef AttrStr,
88                                raw_ostream &Out) const override;
89 };
90 
91 class AArch64SwiftABIInfo : public SwiftABIInfo {
92 public:
93   explicit AArch64SwiftABIInfo(CodeGenTypes &CGT)
94       : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {}
95 
96   bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
97                          unsigned NumElts) const override;
98 };
99 
100 class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
101 public:
102   AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
103       : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {
104     SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT);
105   }
106 
107   StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
108     return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue";
109   }
110 
111   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
112     return 31;
113   }
114 
115   bool doesReturnSlotInterfereWithArgs() const override { return false; }
116 
117   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
118                            CodeGen::CodeGenModule &CGM) const override {
119     const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
120     if (!FD)
121       return;
122 
123     TargetInfo::BranchProtectionInfo BPI(CGM.getLangOpts());
124 
125     if (const auto *TA = FD->getAttr<TargetAttr>()) {
126       ParsedTargetAttr Attr =
127           CGM.getTarget().parseTargetAttr(TA->getFeaturesStr());
128       if (!Attr.BranchProtection.empty()) {
129         StringRef Error;
130         (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection,
131                                                        Attr.CPU, BPI, Error);
132         assert(Error.empty());
133       }
134     }
135     auto *Fn = cast<llvm::Function>(GV);
136     setBranchProtectionFnAttributes(BPI, *Fn);
137   }
138 
139   bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF,
140                                 llvm::Type *Ty) const override {
141     if (CGF.getTarget().hasFeature("ls64")) {
142       auto *ST = dyn_cast<llvm::StructType>(Ty);
143       if (ST && ST->getNumElements() == 1) {
144         auto *AT = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
145         if (AT && AT->getNumElements() == 8 &&
146             AT->getElementType()->isIntegerTy(64))
147           return true;
148       }
149     }
150     return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
151   }
152 
153   void checkFunctionABI(CodeGenModule &CGM,
154                         const FunctionDecl *Decl) const override;
155 
156   void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
157                             const FunctionDecl *Caller,
158                             const FunctionDecl *Callee, const CallArgList &Args,
159                             QualType ReturnType) const override;
160 
161 private:
162   // Diagnose calls between functions with incompatible Streaming SVE
163   // attributes.
164   void checkFunctionCallABIStreaming(CodeGenModule &CGM, SourceLocation CallLoc,
165                                      const FunctionDecl *Caller,
166                                      const FunctionDecl *Callee) const;
167   // Diagnose calls which must pass arguments in floating-point registers when
168   // the selected target does not have floating-point registers.
169   void checkFunctionCallABISoftFloat(CodeGenModule &CGM, SourceLocation CallLoc,
170                                      const FunctionDecl *Caller,
171                                      const FunctionDecl *Callee,
172                                      const CallArgList &Args,
173                                      QualType ReturnType) const;
174 };
175 
176 class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
177 public:
178   WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind K)
179       : AArch64TargetCodeGenInfo(CGT, K) {}
180 
181   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
182                            CodeGen::CodeGenModule &CGM) const override;
183 
184   void getDependentLibraryOption(llvm::StringRef Lib,
185                                  llvm::SmallString<24> &Opt) const override {
186     Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
187   }
188 
189   void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
190                                llvm::SmallString<32> &Opt) const override {
191     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
192   }
193 };
194 
195 void WindowsAArch64TargetCodeGenInfo::setTargetAttributes(
196     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
197   AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
198   if (GV->isDeclaration())
199     return;
200   addStackProbeTargetAttributes(D, GV, CGM);
201 }
202 }
203 
204 ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const {
205   assert(Ty->isVectorType() && "expected vector type!");
206 
207   const auto *VT = Ty->castAs<VectorType>();
208   if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
209     assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
210     assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
211                BuiltinType::UChar &&
212            "unexpected builtin type for SVE predicate!");
213     return ABIArgInfo::getDirect(llvm::ScalableVectorType::get(
214         llvm::Type::getInt1Ty(getVMContext()), 16));
215   }
216 
217   if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
218     assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
219 
220     const auto *BT = VT->getElementType()->castAs<BuiltinType>();
221     llvm::ScalableVectorType *ResType = nullptr;
222     switch (BT->getKind()) {
223     default:
224       llvm_unreachable("unexpected builtin type for SVE vector!");
225     case BuiltinType::SChar:
226     case BuiltinType::UChar:
227       ResType = llvm::ScalableVectorType::get(
228           llvm::Type::getInt8Ty(getVMContext()), 16);
229       break;
230     case BuiltinType::Short:
231     case BuiltinType::UShort:
232       ResType = llvm::ScalableVectorType::get(
233           llvm::Type::getInt16Ty(getVMContext()), 8);
234       break;
235     case BuiltinType::Int:
236     case BuiltinType::UInt:
237       ResType = llvm::ScalableVectorType::get(
238           llvm::Type::getInt32Ty(getVMContext()), 4);
239       break;
240     case BuiltinType::Long:
241     case BuiltinType::ULong:
242       ResType = llvm::ScalableVectorType::get(
243           llvm::Type::getInt64Ty(getVMContext()), 2);
244       break;
245     case BuiltinType::Half:
246       ResType = llvm::ScalableVectorType::get(
247           llvm::Type::getHalfTy(getVMContext()), 8);
248       break;
249     case BuiltinType::Float:
250       ResType = llvm::ScalableVectorType::get(
251           llvm::Type::getFloatTy(getVMContext()), 4);
252       break;
253     case BuiltinType::Double:
254       ResType = llvm::ScalableVectorType::get(
255           llvm::Type::getDoubleTy(getVMContext()), 2);
256       break;
257     case BuiltinType::BFloat16:
258       ResType = llvm::ScalableVectorType::get(
259           llvm::Type::getBFloatTy(getVMContext()), 8);
260       break;
261     }
262     return ABIArgInfo::getDirect(ResType);
263   }
264 
265   uint64_t Size = getContext().getTypeSize(Ty);
266   // Android promotes <2 x i8> to i16, not i32
267   if ((isAndroid() || isOHOSFamily()) && (Size <= 16)) {
268     llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext());
269     return ABIArgInfo::getDirect(ResType);
270   }
271   if (Size <= 32) {
272     llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
273     return ABIArgInfo::getDirect(ResType);
274   }
275   if (Size == 64) {
276     auto *ResType =
277         llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
278     return ABIArgInfo::getDirect(ResType);
279   }
280   if (Size == 128) {
281     auto *ResType =
282         llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
283     return ABIArgInfo::getDirect(ResType);
284   }
285   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
286 }
287 
288 ABIArgInfo
289 AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
290                                      unsigned CallingConvention) const {
291   Ty = useFirstFieldIfTransparentUnion(Ty);
292 
293   // Handle illegal vector types here.
294   if (isIllegalVectorType(Ty))
295     return coerceIllegalVector(Ty);
296 
297   if (!isAggregateTypeForABI(Ty)) {
298     // Treat an enum type as its underlying type.
299     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
300       Ty = EnumTy->getDecl()->getIntegerType();
301 
302     if (const auto *EIT = Ty->getAs<BitIntType>())
303       if (EIT->getNumBits() > 128)
304         return getNaturalAlignIndirect(Ty, false);
305 
306     return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
307                 ? ABIArgInfo::getExtend(Ty)
308                 : ABIArgInfo::getDirect());
309   }
310 
311   // Structures with either a non-trivial destructor or a non-trivial
312   // copy constructor are always indirect.
313   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
314     return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
315                                      CGCXXABI::RAA_DirectInMemory);
316   }
317 
318   // Empty records are always ignored on Darwin, but actually passed in C++ mode
319   // elsewhere for GNU compatibility.
320   uint64_t Size = getContext().getTypeSize(Ty);
321   bool IsEmpty = isEmptyRecord(getContext(), Ty, true);
322   if (IsEmpty || Size == 0) {
323     if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
324       return ABIArgInfo::getIgnore();
325 
326     // GNU C mode. The only argument that gets ignored is an empty one with size
327     // 0.
328     if (IsEmpty && Size == 0)
329       return ABIArgInfo::getIgnore();
330     return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
331   }
332 
333   // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
334   const Type *Base = nullptr;
335   uint64_t Members = 0;
336   bool IsWin64 = Kind == AArch64ABIKind::Win64 ||
337                  CallingConvention == llvm::CallingConv::Win64;
338   bool IsWinVariadic = IsWin64 && IsVariadic;
339   // In variadic functions on Windows, all composite types are treated alike,
340   // no special handling of HFAs/HVAs.
341   if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
342     if (Kind != AArch64ABIKind::AAPCS)
343       return ABIArgInfo::getDirect(
344           llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
345 
346     // For HFAs/HVAs, cap the argument alignment to 16, otherwise
347     // set it to 8 according to the AAPCS64 document.
348     unsigned Align =
349         getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
350     Align = (Align >= 16) ? 16 : 8;
351     return ABIArgInfo::getDirect(
352         llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0,
353         nullptr, true, Align);
354   }
355 
356   // Aggregates <= 16 bytes are passed directly in registers or on the stack.
357   if (Size <= 128) {
358     // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
359     // same size and alignment.
360     if (getTarget().isRenderScriptTarget()) {
361       return coerceToIntArray(Ty, getContext(), getVMContext());
362     }
363     unsigned Alignment;
364     if (Kind == AArch64ABIKind::AAPCS) {
365       Alignment = getContext().getTypeUnadjustedAlign(Ty);
366       Alignment = Alignment < 128 ? 64 : 128;
367     } else {
368       Alignment =
369           std::max(getContext().getTypeAlign(Ty),
370                    (unsigned)getTarget().getPointerWidth(LangAS::Default));
371     }
372     Size = llvm::alignTo(Size, Alignment);
373 
374     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
375     // For aggregates with 16-byte alignment, we use i128.
376     llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment);
377     return ABIArgInfo::getDirect(
378         Size == Alignment ? BaseTy
379                           : llvm::ArrayType::get(BaseTy, Size / Alignment));
380   }
381 
382   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
383 }
384 
385 ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
386                                               bool IsVariadic) const {
387   if (RetTy->isVoidType())
388     return ABIArgInfo::getIgnore();
389 
390   if (const auto *VT = RetTy->getAs<VectorType>()) {
391     if (VT->getVectorKind() == VectorKind::SveFixedLengthData ||
392         VT->getVectorKind() == VectorKind::SveFixedLengthPredicate)
393       return coerceIllegalVector(RetTy);
394   }
395 
396   // Large vector types should be returned via memory.
397   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
398     return getNaturalAlignIndirect(RetTy);
399 
400   if (!isAggregateTypeForABI(RetTy)) {
401     // Treat an enum type as its underlying type.
402     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
403       RetTy = EnumTy->getDecl()->getIntegerType();
404 
405     if (const auto *EIT = RetTy->getAs<BitIntType>())
406       if (EIT->getNumBits() > 128)
407         return getNaturalAlignIndirect(RetTy);
408 
409     return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS()
410                 ? ABIArgInfo::getExtend(RetTy)
411                 : ABIArgInfo::getDirect());
412   }
413 
414   uint64_t Size = getContext().getTypeSize(RetTy);
415   if (isEmptyRecord(getContext(), RetTy, true) || Size == 0)
416     return ABIArgInfo::getIgnore();
417 
418   const Type *Base = nullptr;
419   uint64_t Members = 0;
420   if (isHomogeneousAggregate(RetTy, Base, Members) &&
421       !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 &&
422         IsVariadic))
423     // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
424     return ABIArgInfo::getDirect();
425 
426   // Aggregates <= 16 bytes are returned directly in registers or on the stack.
427   if (Size <= 128) {
428     // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
429     // same size and alignment.
430     if (getTarget().isRenderScriptTarget()) {
431       return coerceToIntArray(RetTy, getContext(), getVMContext());
432     }
433 
434     if (Size <= 64 && getDataLayout().isLittleEndian()) {
435       // Composite types are returned in lower bits of a 64-bit register for LE,
436       // and in higher bits for BE. However, integer types are always returned
437       // in lower bits for both LE and BE, and they are not rounded up to
438       // 64-bits. We can skip rounding up of composite types for LE, but not for
439       // BE, otherwise composite types will be indistinguishable from integer
440       // types.
441       return ABIArgInfo::getDirect(
442           llvm::IntegerType::get(getVMContext(), Size));
443     }
444 
445     unsigned Alignment = getContext().getTypeAlign(RetTy);
446     Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
447 
448     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
449     // For aggregates with 16-byte alignment, we use i128.
450     if (Alignment < 128 && Size == 128) {
451       llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
452       return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
453     }
454     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
455   }
456 
457   return getNaturalAlignIndirect(RetTy);
458 }
459 
460 /// isIllegalVectorType - check whether the vector type is legal for AArch64.
461 bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
462   if (const VectorType *VT = Ty->getAs<VectorType>()) {
463     // Check whether VT is a fixed-length SVE vector. These types are
464     // represented as scalable vectors in function args/return and must be
465     // coerced from fixed vectors.
466     if (VT->getVectorKind() == VectorKind::SveFixedLengthData ||
467         VT->getVectorKind() == VectorKind::SveFixedLengthPredicate)
468       return true;
469 
470     // Check whether VT is legal.
471     unsigned NumElements = VT->getNumElements();
472     uint64_t Size = getContext().getTypeSize(VT);
473     // NumElements should be power of 2.
474     if (!llvm::isPowerOf2_32(NumElements))
475       return true;
476 
477     // arm64_32 has to be compatible with the ARM logic here, which allows huge
478     // vectors for some reason.
479     llvm::Triple Triple = getTarget().getTriple();
480     if (Triple.getArch() == llvm::Triple::aarch64_32 &&
481         Triple.isOSBinFormatMachO())
482       return Size <= 32;
483 
484     return Size != 64 && (Size != 128 || NumElements == 1);
485   }
486   return false;
487 }
488 
489 bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize,
490                                             llvm::Type *EltTy,
491                                             unsigned NumElts) const {
492   if (!llvm::isPowerOf2_32(NumElts))
493     return false;
494   if (VectorSize.getQuantity() != 8 &&
495       (VectorSize.getQuantity() != 16 || NumElts == 1))
496     return false;
497   return true;
498 }
499 
500 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
501   // For the soft-float ABI variant, no types are considered to be homogeneous
502   // aggregates.
503   if (Kind == AArch64ABIKind::AAPCSSoft)
504     return false;
505 
506   // Homogeneous aggregates for AAPCS64 must have base types of a floating
507   // point type or a short-vector type. This is the same as the 32-bit ABI,
508   // but with the difference that any floating-point type is allowed,
509   // including __fp16.
510   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
511     if (BT->isFloatingPoint())
512       return true;
513   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
514     unsigned VecSize = getContext().getTypeSize(VT);
515     if (VecSize == 64 || VecSize == 128)
516       return true;
517   }
518   return false;
519 }
520 
521 bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
522                                                        uint64_t Members) const {
523   return Members <= 4;
524 }
525 
526 bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
527     const {
528   // AAPCS64 says that the rule for whether something is a homogeneous
529   // aggregate is applied to the output of the data layout decision. So
530   // anything that doesn't affect the data layout also does not affect
531   // homogeneity. In particular, zero-length bitfields don't stop a struct
532   // being homogeneous.
533   return true;
534 }
535 
536 RValue AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
537                                       CodeGenFunction &CGF, AArch64ABIKind Kind,
538                                       AggValueSlot Slot) const {
539   ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
540                                        CGF.CurFnInfo->getCallingConvention());
541   // Empty records are ignored for parameter passing purposes.
542   if (AI.isIgnore())
543     return Slot.asRValue();
544 
545   bool IsIndirect = AI.isIndirect();
546 
547   llvm::Type *BaseTy = CGF.ConvertType(Ty);
548   if (IsIndirect)
549     BaseTy = llvm::PointerType::getUnqual(BaseTy);
550   else if (AI.getCoerceToType())
551     BaseTy = AI.getCoerceToType();
552 
553   unsigned NumRegs = 1;
554   if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) {
555     BaseTy = ArrTy->getElementType();
556     NumRegs = ArrTy->getNumElements();
557   }
558   bool IsFPR = Kind != AArch64ABIKind::AAPCSSoft &&
559                (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
560 
561   // The AArch64 va_list type and handling is specified in the Procedure Call
562   // Standard, section B.4:
563   //
564   // struct {
565   //   void *__stack;
566   //   void *__gr_top;
567   //   void *__vr_top;
568   //   int __gr_offs;
569   //   int __vr_offs;
570   // };
571 
572   llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
573   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
574   llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
575   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
576 
577   CharUnits TySize = getContext().getTypeSizeInChars(Ty);
578   CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty);
579 
580   Address reg_offs_p = Address::invalid();
581   llvm::Value *reg_offs = nullptr;
582   int reg_top_index;
583   int RegSize = IsIndirect ? 8 : TySize.getQuantity();
584   if (!IsFPR) {
585     // 3 is the field number of __gr_offs
586     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
587     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
588     reg_top_index = 1; // field number for __gr_top
589     RegSize = llvm::alignTo(RegSize, 8);
590   } else {
591     // 4 is the field number of __vr_offs.
592     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
593     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
594     reg_top_index = 2; // field number for __vr_top
595     RegSize = 16 * NumRegs;
596   }
597 
598   //=======================================
599   // Find out where argument was passed
600   //=======================================
601 
602   // If reg_offs >= 0 we're already using the stack for this type of
603   // argument. We don't want to keep updating reg_offs (in case it overflows,
604   // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
605   // whatever they get).
606   llvm::Value *UsingStack = nullptr;
607   UsingStack = CGF.Builder.CreateICmpSGE(
608       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0));
609 
610   CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
611 
612   // Otherwise, at least some kind of argument could go in these registers, the
613   // question is whether this particular type is too big.
614   CGF.EmitBlock(MaybeRegBlock);
615 
616   // Integer arguments may need to correct register alignment (for example a
617   // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
618   // align __gr_offs to calculate the potential address.
619   if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) {
620     int Align = TyAlign.getQuantity();
621 
622     reg_offs = CGF.Builder.CreateAdd(
623         reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
624         "align_regoffs");
625     reg_offs = CGF.Builder.CreateAnd(
626         reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align),
627         "aligned_regoffs");
628   }
629 
630   // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
631   // The fact that this is done unconditionally reflects the fact that
632   // allocating an argument to the stack also uses up all the remaining
633   // registers of the appropriate kind.
634   llvm::Value *NewOffset = nullptr;
635   NewOffset = CGF.Builder.CreateAdd(
636       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs");
637   CGF.Builder.CreateStore(NewOffset, reg_offs_p);
638 
639   // Now we're in a position to decide whether this argument really was in
640   // registers or not.
641   llvm::Value *InRegs = nullptr;
642   InRegs = CGF.Builder.CreateICmpSLE(
643       NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg");
644 
645   CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
646 
647   //=======================================
648   // Argument was in registers
649   //=======================================
650 
651   // Now we emit the code for if the argument was originally passed in
652   // registers. First start the appropriate block:
653   CGF.EmitBlock(InRegBlock);
654 
655   llvm::Value *reg_top = nullptr;
656   Address reg_top_p =
657       CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
658   reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
659   Address BaseAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, reg_top, reg_offs),
660                    CGF.Int8Ty, CharUnits::fromQuantity(IsFPR ? 16 : 8));
661   Address RegAddr = Address::invalid();
662   llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty), *ElementTy = MemTy;
663 
664   if (IsIndirect) {
665     // If it's been passed indirectly (actually a struct), whatever we find from
666     // stored registers or on the stack will actually be a struct **.
667     MemTy = llvm::PointerType::getUnqual(MemTy);
668   }
669 
670   const Type *Base = nullptr;
671   uint64_t NumMembers = 0;
672   bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers);
673   if (IsHFA && NumMembers > 1) {
674     // Homogeneous aggregates passed in registers will have their elements split
675     // and stored 16-bytes apart regardless of size (they're notionally in qN,
676     // qN+1, ...). We reload and store into a temporary local variable
677     // contiguously.
678     assert(!IsIndirect && "Homogeneous aggregates should be passed directly");
679     auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0));
680     llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
681     llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
682     Address Tmp = CGF.CreateTempAlloca(HFATy,
683                                        std::max(TyAlign, BaseTyInfo.Align));
684 
685     // On big-endian platforms, the value will be right-aligned in its slot.
686     int Offset = 0;
687     if (CGF.CGM.getDataLayout().isBigEndian() &&
688         BaseTyInfo.Width.getQuantity() < 16)
689       Offset = 16 - BaseTyInfo.Width.getQuantity();
690 
691     for (unsigned i = 0; i < NumMembers; ++i) {
692       CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset);
693       Address LoadAddr =
694         CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset);
695       LoadAddr = LoadAddr.withElementType(BaseTy);
696 
697       Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i);
698 
699       llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
700       CGF.Builder.CreateStore(Elem, StoreAddr);
701     }
702 
703     RegAddr = Tmp.withElementType(MemTy);
704   } else {
705     // Otherwise the object is contiguous in memory.
706 
707     // It might be right-aligned in its slot.
708     CharUnits SlotSize = BaseAddr.getAlignment();
709     if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect &&
710         (IsHFA || !isAggregateTypeForABI(Ty)) &&
711         TySize < SlotSize) {
712       CharUnits Offset = SlotSize - TySize;
713       BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset);
714     }
715 
716     RegAddr = BaseAddr.withElementType(MemTy);
717   }
718 
719   CGF.EmitBranch(ContBlock);
720 
721   //=======================================
722   // Argument was on the stack
723   //=======================================
724   CGF.EmitBlock(OnStackBlock);
725 
726   Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
727   llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack");
728 
729   // Again, stack arguments may need realignment. In this case both integer and
730   // floating-point ones might be affected.
731   if (!IsIndirect && TyAlign.getQuantity() > 8) {
732     OnStackPtr = emitRoundPointerUpToAlignment(CGF, OnStackPtr, TyAlign);
733   }
734   Address OnStackAddr = Address(OnStackPtr, CGF.Int8Ty,
735                                 std::max(CharUnits::fromQuantity(8), TyAlign));
736 
737   // All stack slots are multiples of 8 bytes.
738   CharUnits StackSlotSize = CharUnits::fromQuantity(8);
739   CharUnits StackSize;
740   if (IsIndirect)
741     StackSize = StackSlotSize;
742   else
743     StackSize = TySize.alignTo(StackSlotSize);
744 
745   llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize);
746   llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP(
747       CGF.Int8Ty, OnStackPtr, StackSizeC, "new_stack");
748 
749   // Write the new value of __stack for the next call to va_arg
750   CGF.Builder.CreateStore(NewStack, stack_p);
751 
752   if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
753       TySize < StackSlotSize) {
754     CharUnits Offset = StackSlotSize - TySize;
755     OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset);
756   }
757 
758   OnStackAddr = OnStackAddr.withElementType(MemTy);
759 
760   CGF.EmitBranch(ContBlock);
761 
762   //=======================================
763   // Tidy up
764   //=======================================
765   CGF.EmitBlock(ContBlock);
766 
767   Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, OnStackAddr,
768                                  OnStackBlock, "vaargs.addr");
769 
770   if (IsIndirect)
771     return CGF.EmitLoadOfAnyValue(
772         CGF.MakeAddrLValue(
773             Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), ElementTy,
774                     TyAlign),
775             Ty),
776         Slot);
777 
778   return CGF.EmitLoadOfAnyValue(CGF.MakeAddrLValue(ResAddr, Ty), Slot);
779 }
780 
781 RValue AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
782                                        CodeGenFunction &CGF,
783                                        AggValueSlot Slot) const {
784   // The backend's lowering doesn't support va_arg for aggregates or
785   // illegal vector types.  Lower VAArg here for these cases and use
786   // the LLVM va_arg instruction for everything else.
787   if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
788     return CGF.EmitLoadOfAnyValue(
789         CGF.MakeAddrLValue(
790             EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()), Ty),
791         Slot);
792 
793   uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8;
794   CharUnits SlotSize = CharUnits::fromQuantity(PointerSize);
795 
796   // Empty records are ignored for parameter passing purposes.
797   if (isEmptyRecord(getContext(), Ty, true))
798     return Slot.asRValue();
799 
800   // The size of the actual thing passed, which might end up just
801   // being a pointer for indirect types.
802   auto TyInfo = getContext().getTypeInfoInChars(Ty);
803 
804   // Arguments bigger than 16 bytes which aren't homogeneous
805   // aggregates should be passed indirectly.
806   bool IsIndirect = false;
807   if (TyInfo.Width.getQuantity() > 16) {
808     const Type *Base = nullptr;
809     uint64_t Members = 0;
810     IsIndirect = !isHomogeneousAggregate(Ty, Base, Members);
811   }
812 
813   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize,
814                           /*AllowHigherAlign*/ true, Slot);
815 }
816 
817 RValue AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
818                                    QualType Ty, AggValueSlot Slot) const {
819   bool IsIndirect = false;
820 
821   // Composites larger than 16 bytes are passed by reference.
822   if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
823     IsIndirect = true;
824 
825   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
826                           CGF.getContext().getTypeInfoInChars(Ty),
827                           CharUnits::fromQuantity(8),
828                           /*allowHigherAlign*/ false, Slot);
829 }
830 
831 static bool isStreamingCompatible(const FunctionDecl *F) {
832   if (const auto *T = F->getType()->getAs<FunctionProtoType>())
833     return T->getAArch64SMEAttributes() &
834            FunctionType::SME_PStateSMCompatibleMask;
835   return false;
836 }
837 
838 // Report an error if an argument or return value of type Ty would need to be
839 // passed in a floating-point register.
840 static void diagnoseIfNeedsFPReg(DiagnosticsEngine &Diags,
841                                  const StringRef ABIName,
842                                  const AArch64ABIInfo &ABIInfo,
843                                  const QualType &Ty, const NamedDecl *D,
844                                  SourceLocation loc) {
845   const Type *HABase = nullptr;
846   uint64_t HAMembers = 0;
847   if (Ty->isFloatingType() || Ty->isVectorType() ||
848       ABIInfo.isHomogeneousAggregate(Ty, HABase, HAMembers)) {
849     Diags.Report(loc, diag::err_target_unsupported_type_for_abi)
850         << D->getDeclName() << Ty << ABIName;
851   }
852 }
853 
854 // If we are using a hard-float ABI, but do not have floating point registers,
855 // then report an error for any function arguments or returns which would be
856 // passed in floating-pint registers.
857 void AArch64TargetCodeGenInfo::checkFunctionABI(
858     CodeGenModule &CGM, const FunctionDecl *FuncDecl) const {
859   const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>();
860   const TargetInfo &TI = ABIInfo.getContext().getTargetInfo();
861 
862   if (!TI.hasFeature("fp") && !ABIInfo.isSoftFloat()) {
863     diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo,
864                          FuncDecl->getReturnType(), FuncDecl,
865                          FuncDecl->getLocation());
866     for (ParmVarDecl *PVD : FuncDecl->parameters()) {
867       diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, PVD->getType(),
868                            PVD, FuncDecl->getLocation());
869     }
870   }
871 }
872 
873 void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
874     CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
875     const FunctionDecl *Callee) const {
876   if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
877     return;
878 
879   bool CallerIsStreaming =
880       IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true);
881   bool CalleeIsStreaming =
882       IsArmStreamingFunction(Callee, /*IncludeLocallyStreaming=*/true);
883   bool CallerIsStreamingCompatible = isStreamingCompatible(Caller);
884   bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee);
885 
886   if (!CalleeIsStreamingCompatible &&
887       (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible))
888     CGM.getDiags().Report(
889         CallLoc, CalleeIsStreaming
890                      ? diag::err_function_always_inline_attribute_mismatch
891                      : diag::warn_function_always_inline_attribute_mismatch)
892         << Caller->getDeclName() << Callee->getDeclName() << "streaming";
893   if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
894     if (NewAttr->isNewZA())
895       CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za)
896           << Callee->getDeclName();
897 }
898 
899 // If the target does not have floating-point registers, but we are using a
900 // hard-float ABI, there is no way to pass floating-point, vector or HFA values
901 // to functions, so we report an error.
902 void AArch64TargetCodeGenInfo::checkFunctionCallABISoftFloat(
903     CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
904     const FunctionDecl *Callee, const CallArgList &Args,
905     QualType ReturnType) const {
906   const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>();
907   const TargetInfo &TI = ABIInfo.getContext().getTargetInfo();
908 
909   if (!Caller || TI.hasFeature("fp") || ABIInfo.isSoftFloat())
910     return;
911 
912   diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, ReturnType,
913                        Callee ? Callee : Caller, CallLoc);
914 
915   for (const CallArg &Arg : Args)
916     diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, Arg.getType(),
917                          Callee ? Callee : Caller, CallLoc);
918 }
919 
920 void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
921                                                     SourceLocation CallLoc,
922                                                     const FunctionDecl *Caller,
923                                                     const FunctionDecl *Callee,
924                                                     const CallArgList &Args,
925                                                     QualType ReturnType) const {
926   checkFunctionCallABIStreaming(CGM, CallLoc, Caller, Callee);
927   checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, ReturnType);
928 }
929 
930 void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
931                                              unsigned Index,
932                                              raw_ostream &Out) const {
933   appendAttributeMangling(Attr->getFeatureStr(Index), Out);
934 }
935 
936 void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr,
937                                              raw_ostream &Out) const {
938   if (AttrStr == "default") {
939     Out << ".default";
940     return;
941   }
942 
943   Out << "._";
944   SmallVector<StringRef, 8> Features;
945   AttrStr.split(Features, "+");
946   for (auto &Feat : Features)
947     Feat = Feat.trim();
948 
949   llvm::sort(Features, [](const StringRef LHS, const StringRef RHS) {
950     return LHS.compare(RHS) < 0;
951   });
952 
953   llvm::SmallDenseSet<StringRef, 8> UniqueFeats;
954   for (auto &Feat : Features)
955     if (auto Ext = llvm::AArch64::parseFMVExtension(Feat))
956       if (UniqueFeats.insert(Ext->Name).second)
957         Out << 'M' << Ext->Name;
958 }
959 
960 std::unique_ptr<TargetCodeGenInfo>
961 CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM,
962                                         AArch64ABIKind Kind) {
963   return std::make_unique<AArch64TargetCodeGenInfo>(CGM.getTypes(), Kind);
964 }
965 
966 std::unique_ptr<TargetCodeGenInfo>
967 CodeGen::createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM,
968                                                AArch64ABIKind K) {
969   return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM.getTypes(), K);
970 }
971