1 //===- AArch64.cpp --------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ABIInfoImpl.h" 10 #include "TargetInfo.h" 11 #include "clang/AST/Decl.h" 12 #include "clang/Basic/DiagnosticFrontend.h" 13 #include "llvm/TargetParser/AArch64TargetParser.h" 14 15 using namespace clang; 16 using namespace clang::CodeGen; 17 18 //===----------------------------------------------------------------------===// 19 // AArch64 ABI Implementation 20 //===----------------------------------------------------------------------===// 21 22 namespace { 23 24 class AArch64ABIInfo : public ABIInfo { 25 AArch64ABIKind Kind; 26 27 public: 28 AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind) 29 : ABIInfo(CGT), Kind(Kind) {} 30 31 bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; } 32 33 private: 34 AArch64ABIKind getABIKind() const { return Kind; } 35 bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; } 36 37 ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadicFn) const; 38 ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadicFn, 39 bool IsNamedArg, unsigned CallingConvention, 40 unsigned &NSRN, unsigned &NPRN) const; 41 llvm::Type *convertFixedToScalableVectorType(const VectorType *VT) const; 42 ABIArgInfo coerceIllegalVector(QualType Ty, unsigned &NSRN, 43 unsigned &NPRN) const; 44 ABIArgInfo coerceAndExpandPureScalableAggregate( 45 QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred, 46 const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN, 47 unsigned &NPRN) const; 48 bool isHomogeneousAggregateBaseType(QualType Ty) const override; 49 bool isHomogeneousAggregateSmallEnough(const Type *Ty, 50 uint64_t Members) const override; 51 bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override; 52 53 bool isIllegalVectorType(QualType Ty) const; 54 55 bool passAsAggregateType(QualType Ty) const; 56 bool passAsPureScalableType(QualType Ty, unsigned &NV, unsigned &NP, 57 SmallVectorImpl<llvm::Type *> &CoerceToSeq) const; 58 59 void flattenType(llvm::Type *Ty, 60 SmallVectorImpl<llvm::Type *> &Flattened) const; 61 62 void computeInfo(CGFunctionInfo &FI) const override { 63 if (!::classifyReturnType(getCXXABI(), FI, *this)) 64 FI.getReturnInfo() = 65 classifyReturnType(FI.getReturnType(), FI.isVariadic()); 66 67 unsigned ArgNo = 0; 68 unsigned NSRN = 0, NPRN = 0; 69 for (auto &it : FI.arguments()) { 70 const bool IsNamedArg = 71 !FI.isVariadic() || ArgNo < FI.getRequiredArgs().getNumRequiredArgs(); 72 ++ArgNo; 73 it.info = classifyArgumentType(it.type, FI.isVariadic(), IsNamedArg, 74 FI.getCallingConvention(), NSRN, NPRN); 75 } 76 } 77 78 RValue EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF, 79 AggValueSlot Slot) const; 80 81 RValue EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF, 82 AArch64ABIKind Kind, AggValueSlot Slot) const; 83 84 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 85 AggValueSlot Slot) const override { 86 llvm::Type *BaseTy = CGF.ConvertType(Ty); 87 if (isa<llvm::ScalableVectorType>(BaseTy)) 88 llvm::report_fatal_error("Passing SVE types to variadic functions is " 89 "currently not supported"); 90 91 return Kind == AArch64ABIKind::Win64 92 ? EmitMSVAArg(CGF, VAListAddr, Ty, Slot) 93 : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF, Slot) 94 : EmitAAPCSVAArg(VAListAddr, Ty, CGF, Kind, Slot); 95 } 96 97 RValue EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 98 AggValueSlot Slot) const override; 99 100 bool allowBFloatArgsAndRet() const override { 101 return getTarget().hasBFloat16Type(); 102 } 103 104 using ABIInfo::appendAttributeMangling; 105 void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index, 106 raw_ostream &Out) const override; 107 void appendAttributeMangling(StringRef AttrStr, 108 raw_ostream &Out) const override; 109 }; 110 111 class AArch64SwiftABIInfo : public SwiftABIInfo { 112 public: 113 explicit AArch64SwiftABIInfo(CodeGenTypes &CGT) 114 : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {} 115 116 bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, 117 unsigned NumElts) const override; 118 }; 119 120 class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { 121 public: 122 AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind Kind) 123 : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) { 124 SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT); 125 } 126 127 StringRef getARCRetainAutoreleasedReturnValueMarker() const override { 128 return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue"; 129 } 130 131 int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { 132 return 31; 133 } 134 135 bool doesReturnSlotInterfereWithArgs() const override { return false; } 136 137 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 138 CodeGen::CodeGenModule &CGM) const override { 139 auto *Fn = dyn_cast<llvm::Function>(GV); 140 if (!Fn) 141 return; 142 143 const auto *FD = dyn_cast_or_null<FunctionDecl>(D); 144 TargetInfo::BranchProtectionInfo BPI(CGM.getLangOpts()); 145 146 if (FD && FD->hasAttr<TargetAttr>()) { 147 const auto *TA = FD->getAttr<TargetAttr>(); 148 ParsedTargetAttr Attr = 149 CGM.getTarget().parseTargetAttr(TA->getFeaturesStr()); 150 if (!Attr.BranchProtection.empty()) { 151 StringRef Error; 152 (void)CGM.getTarget().validateBranchProtection( 153 Attr.BranchProtection, Attr.CPU, BPI, CGM.getLangOpts(), Error); 154 assert(Error.empty()); 155 } 156 } 157 setBranchProtectionFnAttributes(BPI, *Fn); 158 setPointerAuthFnAttributes(CGM.getCodeGenOpts().PointerAuth, *Fn); 159 } 160 161 bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF, 162 llvm::Type *Ty) const override { 163 if (CGF.getTarget().hasFeature("ls64")) { 164 auto *ST = dyn_cast<llvm::StructType>(Ty); 165 if (ST && ST->getNumElements() == 1) { 166 auto *AT = dyn_cast<llvm::ArrayType>(ST->getElementType(0)); 167 if (AT && AT->getNumElements() == 8 && 168 AT->getElementType()->isIntegerTy(64)) 169 return true; 170 } 171 } 172 return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty); 173 } 174 175 void checkFunctionABI(CodeGenModule &CGM, 176 const FunctionDecl *Decl) const override; 177 178 void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, 179 const FunctionDecl *Caller, 180 const FunctionDecl *Callee, const CallArgList &Args, 181 QualType ReturnType) const override; 182 183 bool wouldInliningViolateFunctionCallABI( 184 const FunctionDecl *Caller, const FunctionDecl *Callee) const override; 185 186 private: 187 // Diagnose calls between functions with incompatible Streaming SVE 188 // attributes. 189 void checkFunctionCallABIStreaming(CodeGenModule &CGM, SourceLocation CallLoc, 190 const FunctionDecl *Caller, 191 const FunctionDecl *Callee) const; 192 // Diagnose calls which must pass arguments in floating-point registers when 193 // the selected target does not have floating-point registers. 194 void checkFunctionCallABISoftFloat(CodeGenModule &CGM, SourceLocation CallLoc, 195 const FunctionDecl *Caller, 196 const FunctionDecl *Callee, 197 const CallArgList &Args, 198 QualType ReturnType) const; 199 }; 200 201 class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo { 202 public: 203 WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind K) 204 : AArch64TargetCodeGenInfo(CGT, K) {} 205 206 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 207 CodeGen::CodeGenModule &CGM) const override; 208 209 void getDependentLibraryOption(llvm::StringRef Lib, 210 llvm::SmallString<24> &Opt) const override { 211 Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); 212 } 213 214 void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, 215 llvm::SmallString<32> &Opt) const override { 216 Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; 217 } 218 }; 219 220 void WindowsAArch64TargetCodeGenInfo::setTargetAttributes( 221 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 222 AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); 223 if (GV->isDeclaration()) 224 return; 225 addStackProbeTargetAttributes(D, GV, CGM); 226 } 227 } 228 229 llvm::Type * 230 AArch64ABIInfo::convertFixedToScalableVectorType(const VectorType *VT) const { 231 assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); 232 233 if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) { 234 assert(VT->getElementType()->castAs<BuiltinType>()->getKind() == 235 BuiltinType::UChar && 236 "unexpected builtin type for SVE predicate!"); 237 return llvm::ScalableVectorType::get(llvm::Type::getInt1Ty(getVMContext()), 238 16); 239 } 240 241 if (VT->getVectorKind() == VectorKind::SveFixedLengthData) { 242 const auto *BT = VT->getElementType()->castAs<BuiltinType>(); 243 switch (BT->getKind()) { 244 default: 245 llvm_unreachable("unexpected builtin type for SVE vector!"); 246 247 case BuiltinType::SChar: 248 case BuiltinType::UChar: 249 case BuiltinType::MFloat8: 250 return llvm::ScalableVectorType::get( 251 llvm::Type::getInt8Ty(getVMContext()), 16); 252 253 case BuiltinType::Short: 254 case BuiltinType::UShort: 255 return llvm::ScalableVectorType::get( 256 llvm::Type::getInt16Ty(getVMContext()), 8); 257 258 case BuiltinType::Int: 259 case BuiltinType::UInt: 260 return llvm::ScalableVectorType::get( 261 llvm::Type::getInt32Ty(getVMContext()), 4); 262 263 case BuiltinType::Long: 264 case BuiltinType::ULong: 265 return llvm::ScalableVectorType::get( 266 llvm::Type::getInt64Ty(getVMContext()), 2); 267 268 case BuiltinType::Half: 269 return llvm::ScalableVectorType::get( 270 llvm::Type::getHalfTy(getVMContext()), 8); 271 272 case BuiltinType::Float: 273 return llvm::ScalableVectorType::get( 274 llvm::Type::getFloatTy(getVMContext()), 4); 275 276 case BuiltinType::Double: 277 return llvm::ScalableVectorType::get( 278 llvm::Type::getDoubleTy(getVMContext()), 2); 279 280 case BuiltinType::BFloat16: 281 return llvm::ScalableVectorType::get( 282 llvm::Type::getBFloatTy(getVMContext()), 8); 283 } 284 } 285 286 llvm_unreachable("expected fixed-length SVE vector"); 287 } 288 289 ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty, unsigned &NSRN, 290 unsigned &NPRN) const { 291 assert(Ty->isVectorType() && "expected vector type!"); 292 293 const auto *VT = Ty->castAs<VectorType>(); 294 if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) { 295 assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); 296 assert(VT->getElementType()->castAs<BuiltinType>()->getKind() == 297 BuiltinType::UChar && 298 "unexpected builtin type for SVE predicate!"); 299 NPRN = std::min(NPRN + 1, 4u); 300 return ABIArgInfo::getDirect(llvm::ScalableVectorType::get( 301 llvm::Type::getInt1Ty(getVMContext()), 16)); 302 } 303 304 if (VT->getVectorKind() == VectorKind::SveFixedLengthData) { 305 NSRN = std::min(NSRN + 1, 8u); 306 return ABIArgInfo::getDirect(convertFixedToScalableVectorType(VT)); 307 } 308 309 uint64_t Size = getContext().getTypeSize(Ty); 310 // Android promotes <2 x i8> to i16, not i32 311 if ((isAndroid() || isOHOSFamily()) && (Size <= 16)) { 312 llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext()); 313 return ABIArgInfo::getDirect(ResType); 314 } 315 if (Size <= 32) { 316 llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); 317 return ABIArgInfo::getDirect(ResType); 318 } 319 if (Size == 64) { 320 NSRN = std::min(NSRN + 1, 8u); 321 auto *ResType = 322 llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2); 323 return ABIArgInfo::getDirect(ResType); 324 } 325 if (Size == 128) { 326 NSRN = std::min(NSRN + 1, 8u); 327 auto *ResType = 328 llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4); 329 return ABIArgInfo::getDirect(ResType); 330 } 331 332 return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), 333 /*ByVal=*/false); 334 } 335 336 ABIArgInfo AArch64ABIInfo::coerceAndExpandPureScalableAggregate( 337 QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred, 338 const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN, 339 unsigned &NPRN) const { 340 if (!IsNamedArg || NSRN + NVec > 8 || NPRN + NPred > 4) 341 return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), 342 /*ByVal=*/false); 343 NSRN += NVec; 344 NPRN += NPred; 345 346 // Handle SVE vector tuples. 347 if (Ty->isSVESizelessBuiltinType()) 348 return ABIArgInfo::getDirect(); 349 350 llvm::Type *UnpaddedCoerceToType = 351 UnpaddedCoerceToSeq.size() == 1 352 ? UnpaddedCoerceToSeq[0] 353 : llvm::StructType::get(CGT.getLLVMContext(), UnpaddedCoerceToSeq, 354 true); 355 356 SmallVector<llvm::Type *> CoerceToSeq; 357 flattenType(CGT.ConvertType(Ty), CoerceToSeq); 358 auto *CoerceToType = 359 llvm::StructType::get(CGT.getLLVMContext(), CoerceToSeq, false); 360 361 return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); 362 } 363 364 ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn, 365 bool IsNamedArg, 366 unsigned CallingConvention, 367 unsigned &NSRN, 368 unsigned &NPRN) const { 369 Ty = useFirstFieldIfTransparentUnion(Ty); 370 371 // Handle illegal vector types here. 372 if (isIllegalVectorType(Ty)) 373 return coerceIllegalVector(Ty, NSRN, NPRN); 374 375 if (!passAsAggregateType(Ty)) { 376 // Treat an enum type as its underlying type. 377 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 378 Ty = EnumTy->getDecl()->getIntegerType(); 379 380 if (const auto *EIT = Ty->getAs<BitIntType>()) 381 if (EIT->getNumBits() > 128) 382 return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), 383 false); 384 385 if (Ty->isVectorType()) 386 NSRN = std::min(NSRN + 1, 8u); 387 else if (const auto *BT = Ty->getAs<BuiltinType>()) { 388 if (BT->isFloatingPoint()) 389 NSRN = std::min(NSRN + 1, 8u); 390 else { 391 switch (BT->getKind()) { 392 case BuiltinType::SveBool: 393 case BuiltinType::SveCount: 394 NPRN = std::min(NPRN + 1, 4u); 395 break; 396 case BuiltinType::SveBoolx2: 397 NPRN = std::min(NPRN + 2, 4u); 398 break; 399 case BuiltinType::SveBoolx4: 400 NPRN = std::min(NPRN + 4, 4u); 401 break; 402 default: 403 if (BT->isSVESizelessBuiltinType()) 404 NSRN = std::min( 405 NSRN + getContext().getBuiltinVectorTypeInfo(BT).NumVectors, 406 8u); 407 } 408 } 409 } 410 411 return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS() 412 ? ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty)) 413 : ABIArgInfo::getDirect()); 414 } 415 416 // Structures with either a non-trivial destructor or a non-trivial 417 // copy constructor are always indirect. 418 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { 419 return getNaturalAlignIndirect( 420 Ty, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(), 421 /*ByVal=*/RAA == CGCXXABI::RAA_DirectInMemory); 422 } 423 424 // Empty records: 425 uint64_t Size = getContext().getTypeSize(Ty); 426 bool IsEmpty = isEmptyRecord(getContext(), Ty, true); 427 if (!Ty->isSVESizelessBuiltinType() && (IsEmpty || Size == 0)) { 428 // Empty records are ignored in C mode, and in C++ on Darwin. 429 if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS()) 430 return ABIArgInfo::getIgnore(); 431 432 // In C++ mode, arguments which have sizeof() == 0 (which are non-standard 433 // C++) are ignored. This isn't defined by any standard, so we copy GCC's 434 // behaviour here. 435 if (Size == 0) 436 return ABIArgInfo::getIgnore(); 437 438 // Otherwise, they are passed as if they have a size of 1 byte. 439 return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); 440 } 441 442 // Homogeneous Floating-point Aggregates (HFAs) need to be expanded. 443 const Type *Base = nullptr; 444 uint64_t Members = 0; 445 bool IsWin64 = Kind == AArch64ABIKind::Win64 || 446 CallingConvention == llvm::CallingConv::Win64; 447 bool IsWinVariadic = IsWin64 && IsVariadicFn; 448 // In variadic functions on Windows, all composite types are treated alike, 449 // no special handling of HFAs/HVAs. 450 if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) { 451 NSRN = std::min(NSRN + Members, uint64_t(8)); 452 if (Kind != AArch64ABIKind::AAPCS) 453 return ABIArgInfo::getDirect( 454 llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); 455 456 // For HFAs/HVAs, cap the argument alignment to 16, otherwise 457 // set it to 8 according to the AAPCS64 document. 458 unsigned Align = 459 getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); 460 Align = (Align >= 16) ? 16 : 8; 461 return ABIArgInfo::getDirect( 462 llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0, 463 nullptr, true, Align); 464 } 465 466 // In AAPCS named arguments of a Pure Scalable Type are passed expanded in 467 // registers, or indirectly if there are not enough registers. 468 if (Kind == AArch64ABIKind::AAPCS) { 469 unsigned NVec = 0, NPred = 0; 470 SmallVector<llvm::Type *> UnpaddedCoerceToSeq; 471 if (passAsPureScalableType(Ty, NVec, NPred, UnpaddedCoerceToSeq) && 472 (NVec + NPred) > 0) 473 return coerceAndExpandPureScalableAggregate( 474 Ty, IsNamedArg, NVec, NPred, UnpaddedCoerceToSeq, NSRN, NPRN); 475 } 476 477 // Aggregates <= 16 bytes are passed directly in registers or on the stack. 478 if (Size <= 128) { 479 unsigned Alignment; 480 if (Kind == AArch64ABIKind::AAPCS) { 481 Alignment = getContext().getTypeUnadjustedAlign(Ty); 482 Alignment = Alignment < 128 ? 64 : 128; 483 } else { 484 Alignment = 485 std::max(getContext().getTypeAlign(Ty), 486 (unsigned)getTarget().getPointerWidth(LangAS::Default)); 487 } 488 Size = llvm::alignTo(Size, Alignment); 489 490 // If the Aggregate is made up of pointers, use an array of pointers for the 491 // coerced type. This prevents having to convert ptr2int->int2ptr through 492 // the call, allowing alias analysis to produce better code. 493 auto ContainsOnlyPointers = [&](const auto &Self, QualType Ty) { 494 if (isEmptyRecord(getContext(), Ty, true)) 495 return false; 496 const RecordType *RT = Ty->getAs<RecordType>(); 497 if (!RT) 498 return false; 499 const RecordDecl *RD = RT->getDecl(); 500 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 501 for (const auto &I : CXXRD->bases()) 502 if (!Self(Self, I.getType())) 503 return false; 504 } 505 return all_of(RD->fields(), [&](FieldDecl *FD) { 506 QualType FDTy = FD->getType(); 507 if (FDTy->isArrayType()) 508 FDTy = getContext().getBaseElementType(FDTy); 509 return (FDTy->isPointerOrReferenceType() && 510 getContext().getTypeSize(FDTy) == 64 && 511 !FDTy->getPointeeType().hasAddressSpace()) || 512 Self(Self, FDTy); 513 }); 514 }; 515 516 // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. 517 // For aggregates with 16-byte alignment, we use i128. 518 llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment); 519 if ((Size == 64 || Size == 128) && Alignment == 64 && 520 ContainsOnlyPointers(ContainsOnlyPointers, Ty)) 521 BaseTy = llvm::PointerType::getUnqual(getVMContext()); 522 return ABIArgInfo::getDirect( 523 Size == Alignment ? BaseTy 524 : llvm::ArrayType::get(BaseTy, Size / Alignment)); 525 } 526 527 return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), 528 /*ByVal=*/false); 529 } 530 531 ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy, 532 bool IsVariadicFn) const { 533 if (RetTy->isVoidType()) 534 return ABIArgInfo::getIgnore(); 535 536 if (const auto *VT = RetTy->getAs<VectorType>()) { 537 if (VT->getVectorKind() == VectorKind::SveFixedLengthData || 538 VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) { 539 unsigned NSRN = 0, NPRN = 0; 540 return coerceIllegalVector(RetTy, NSRN, NPRN); 541 } 542 } 543 544 // Large vector types should be returned via memory. 545 if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) 546 return getNaturalAlignIndirect(RetTy, getDataLayout().getAllocaAddrSpace()); 547 548 if (!passAsAggregateType(RetTy)) { 549 // Treat an enum type as its underlying type. 550 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 551 RetTy = EnumTy->getDecl()->getIntegerType(); 552 553 if (const auto *EIT = RetTy->getAs<BitIntType>()) 554 if (EIT->getNumBits() > 128) 555 return getNaturalAlignIndirect(RetTy, 556 getDataLayout().getAllocaAddrSpace()); 557 558 return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS() 559 ? ABIArgInfo::getExtend(RetTy) 560 : ABIArgInfo::getDirect()); 561 } 562 563 uint64_t Size = getContext().getTypeSize(RetTy); 564 if (!RetTy->isSVESizelessBuiltinType() && 565 (isEmptyRecord(getContext(), RetTy, true) || Size == 0)) 566 return ABIArgInfo::getIgnore(); 567 568 const Type *Base = nullptr; 569 uint64_t Members = 0; 570 if (isHomogeneousAggregate(RetTy, Base, Members) && 571 !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 && 572 IsVariadicFn)) 573 // Homogeneous Floating-point Aggregates (HFAs) are returned directly. 574 return ABIArgInfo::getDirect(); 575 576 // In AAPCS return values of a Pure Scalable type are treated as a single 577 // named argument and passed expanded in registers, or indirectly if there are 578 // not enough registers. 579 if (Kind == AArch64ABIKind::AAPCS) { 580 unsigned NSRN = 0, NPRN = 0; 581 unsigned NVec = 0, NPred = 0; 582 SmallVector<llvm::Type *> UnpaddedCoerceToSeq; 583 if (passAsPureScalableType(RetTy, NVec, NPred, UnpaddedCoerceToSeq) && 584 (NVec + NPred) > 0) 585 return coerceAndExpandPureScalableAggregate( 586 RetTy, /* IsNamedArg */ true, NVec, NPred, UnpaddedCoerceToSeq, NSRN, 587 NPRN); 588 } 589 590 // Aggregates <= 16 bytes are returned directly in registers or on the stack. 591 if (Size <= 128) { 592 if (Size <= 64 && getDataLayout().isLittleEndian()) { 593 // Composite types are returned in lower bits of a 64-bit register for LE, 594 // and in higher bits for BE. However, integer types are always returned 595 // in lower bits for both LE and BE, and they are not rounded up to 596 // 64-bits. We can skip rounding up of composite types for LE, but not for 597 // BE, otherwise composite types will be indistinguishable from integer 598 // types. 599 return ABIArgInfo::getDirect( 600 llvm::IntegerType::get(getVMContext(), Size)); 601 } 602 603 unsigned Alignment = getContext().getTypeAlign(RetTy); 604 Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes 605 606 // We use a pair of i64 for 16-byte aggregate with 8-byte alignment. 607 // For aggregates with 16-byte alignment, we use i128. 608 if (Alignment < 128 && Size == 128) { 609 llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext()); 610 return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64)); 611 } 612 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size)); 613 } 614 615 return getNaturalAlignIndirect(RetTy, getDataLayout().getAllocaAddrSpace()); 616 } 617 618 /// isIllegalVectorType - check whether the vector type is legal for AArch64. 619 bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { 620 if (const VectorType *VT = Ty->getAs<VectorType>()) { 621 // Check whether VT is a fixed-length SVE vector. These types are 622 // represented as scalable vectors in function args/return and must be 623 // coerced from fixed vectors. 624 if (VT->getVectorKind() == VectorKind::SveFixedLengthData || 625 VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) 626 return true; 627 628 // Check whether VT is legal. 629 unsigned NumElements = VT->getNumElements(); 630 uint64_t Size = getContext().getTypeSize(VT); 631 // NumElements should be power of 2. 632 if (!llvm::isPowerOf2_32(NumElements)) 633 return true; 634 635 // arm64_32 has to be compatible with the ARM logic here, which allows huge 636 // vectors for some reason. 637 llvm::Triple Triple = getTarget().getTriple(); 638 if (Triple.getArch() == llvm::Triple::aarch64_32 && 639 Triple.isOSBinFormatMachO()) 640 return Size <= 32; 641 642 return Size != 64 && (Size != 128 || NumElements == 1); 643 } 644 return false; 645 } 646 647 bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize, 648 llvm::Type *EltTy, 649 unsigned NumElts) const { 650 if (!llvm::isPowerOf2_32(NumElts)) 651 return false; 652 if (VectorSize.getQuantity() != 8 && 653 (VectorSize.getQuantity() != 16 || NumElts == 1)) 654 return false; 655 return true; 656 } 657 658 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { 659 // For the soft-float ABI variant, no types are considered to be homogeneous 660 // aggregates. 661 if (isSoftFloat()) 662 return false; 663 664 // Homogeneous aggregates for AAPCS64 must have base types of a floating 665 // point type or a short-vector type. This is the same as the 32-bit ABI, 666 // but with the difference that any floating-point type is allowed, 667 // including __fp16. 668 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 669 if (BT->isFloatingPoint()) 670 return true; 671 } else if (const VectorType *VT = Ty->getAs<VectorType>()) { 672 if (auto Kind = VT->getVectorKind(); 673 Kind == VectorKind::SveFixedLengthData || 674 Kind == VectorKind::SveFixedLengthPredicate) 675 return false; 676 677 unsigned VecSize = getContext().getTypeSize(VT); 678 if (VecSize == 64 || VecSize == 128) 679 return true; 680 } 681 return false; 682 } 683 684 bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, 685 uint64_t Members) const { 686 return Members <= 4; 687 } 688 689 bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() 690 const { 691 // AAPCS64 says that the rule for whether something is a homogeneous 692 // aggregate is applied to the output of the data layout decision. So 693 // anything that doesn't affect the data layout also does not affect 694 // homogeneity. In particular, zero-length bitfields don't stop a struct 695 // being homogeneous. 696 return true; 697 } 698 699 bool AArch64ABIInfo::passAsAggregateType(QualType Ty) const { 700 if (Kind == AArch64ABIKind::AAPCS && Ty->isSVESizelessBuiltinType()) { 701 const auto *BT = Ty->castAs<BuiltinType>(); 702 return !BT->isSVECount() && 703 getContext().getBuiltinVectorTypeInfo(BT).NumVectors > 1; 704 } 705 return isAggregateTypeForABI(Ty); 706 } 707 708 // Check if a type needs to be passed in registers as a Pure Scalable Type (as 709 // defined by AAPCS64). Return the number of data vectors and the number of 710 // predicate vectors in the type, into `NVec` and `NPred`, respectively. Upon 711 // return `CoerceToSeq` contains an expanded sequence of LLVM IR types, one 712 // element for each non-composite member. For practical purposes, limit the 713 // length of `CoerceToSeq` to about 12 (the maximum that could possibly fit 714 // in registers) and return false, the effect of which will be to pass the 715 // argument under the rules for a large (> 128 bytes) composite. 716 bool AArch64ABIInfo::passAsPureScalableType( 717 QualType Ty, unsigned &NVec, unsigned &NPred, 718 SmallVectorImpl<llvm::Type *> &CoerceToSeq) const { 719 if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { 720 uint64_t NElt = AT->getZExtSize(); 721 if (NElt == 0) 722 return false; 723 724 unsigned NV = 0, NP = 0; 725 SmallVector<llvm::Type *> EltCoerceToSeq; 726 if (!passAsPureScalableType(AT->getElementType(), NV, NP, EltCoerceToSeq)) 727 return false; 728 729 if (CoerceToSeq.size() + NElt * EltCoerceToSeq.size() > 12) 730 return false; 731 732 for (uint64_t I = 0; I < NElt; ++I) 733 llvm::append_range(CoerceToSeq, EltCoerceToSeq); 734 735 NVec += NElt * NV; 736 NPred += NElt * NP; 737 return true; 738 } 739 740 if (const RecordType *RT = Ty->getAs<RecordType>()) { 741 // If the record cannot be passed in registers, then it's not a PST. 742 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); 743 RAA != CGCXXABI::RAA_Default) 744 return false; 745 746 // Pure scalable types are never unions and never contain unions. 747 const RecordDecl *RD = RT->getDecl(); 748 if (RD->isUnion()) 749 return false; 750 751 // If this is a C++ record, check the bases. 752 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 753 for (const auto &I : CXXRD->bases()) { 754 if (isEmptyRecord(getContext(), I.getType(), true)) 755 continue; 756 if (!passAsPureScalableType(I.getType(), NVec, NPred, CoerceToSeq)) 757 return false; 758 } 759 } 760 761 // Check members. 762 for (const auto *FD : RD->fields()) { 763 QualType FT = FD->getType(); 764 if (isEmptyField(getContext(), FD, /* AllowArrays */ true)) 765 continue; 766 if (!passAsPureScalableType(FT, NVec, NPred, CoerceToSeq)) 767 return false; 768 } 769 770 return true; 771 } 772 773 if (const auto *VT = Ty->getAs<VectorType>()) { 774 if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) { 775 ++NPred; 776 if (CoerceToSeq.size() + 1 > 12) 777 return false; 778 CoerceToSeq.push_back(convertFixedToScalableVectorType(VT)); 779 return true; 780 } 781 782 if (VT->getVectorKind() == VectorKind::SveFixedLengthData) { 783 ++NVec; 784 if (CoerceToSeq.size() + 1 > 12) 785 return false; 786 CoerceToSeq.push_back(convertFixedToScalableVectorType(VT)); 787 return true; 788 } 789 790 return false; 791 } 792 793 if (!Ty->isBuiltinType()) 794 return false; 795 796 bool isPredicate; 797 switch (Ty->castAs<BuiltinType>()->getKind()) { 798 #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ 799 case BuiltinType::Id: \ 800 isPredicate = false; \ 801 break; 802 #define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId) \ 803 case BuiltinType::Id: \ 804 isPredicate = true; \ 805 break; 806 #include "clang/Basic/AArch64ACLETypes.def" 807 default: 808 return false; 809 } 810 811 ASTContext::BuiltinVectorTypeInfo Info = 812 getContext().getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty)); 813 assert(Info.NumVectors > 0 && Info.NumVectors <= 4 && 814 "Expected 1, 2, 3 or 4 vectors!"); 815 if (isPredicate) 816 NPred += Info.NumVectors; 817 else 818 NVec += Info.NumVectors; 819 llvm::Type *EltTy = Info.ElementType->isMFloat8Type() 820 ? llvm::Type::getInt8Ty(getVMContext()) 821 : CGT.ConvertType(Info.ElementType); 822 auto *VTy = llvm::ScalableVectorType::get(EltTy, Info.EC.getKnownMinValue()); 823 824 if (CoerceToSeq.size() + Info.NumVectors > 12) 825 return false; 826 std::fill_n(std::back_inserter(CoerceToSeq), Info.NumVectors, VTy); 827 828 return true; 829 } 830 831 // Expand an LLVM IR type into a sequence with a element for each non-struct, 832 // non-array member of the type, with the exception of the padding types, which 833 // are retained. 834 void AArch64ABIInfo::flattenType( 835 llvm::Type *Ty, SmallVectorImpl<llvm::Type *> &Flattened) const { 836 837 if (ABIArgInfo::isPaddingForCoerceAndExpand(Ty)) { 838 Flattened.push_back(Ty); 839 return; 840 } 841 842 if (const auto *AT = dyn_cast<llvm::ArrayType>(Ty)) { 843 uint64_t NElt = AT->getNumElements(); 844 if (NElt == 0) 845 return; 846 847 SmallVector<llvm::Type *> EltFlattened; 848 flattenType(AT->getElementType(), EltFlattened); 849 850 for (uint64_t I = 0; I < NElt; ++I) 851 llvm::append_range(Flattened, EltFlattened); 852 return; 853 } 854 855 if (const auto *ST = dyn_cast<llvm::StructType>(Ty)) { 856 for (auto *ET : ST->elements()) 857 flattenType(ET, Flattened); 858 return; 859 } 860 861 Flattened.push_back(Ty); 862 } 863 864 RValue AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty, 865 CodeGenFunction &CGF, AArch64ABIKind Kind, 866 AggValueSlot Slot) const { 867 // These numbers are not used for variadic arguments, hence it doesn't matter 868 // they don't retain their values across multiple calls to 869 // `classifyArgumentType` here. 870 unsigned NSRN = 0, NPRN = 0; 871 ABIArgInfo AI = 872 classifyArgumentType(Ty, /*IsVariadicFn=*/true, /* IsNamedArg */ false, 873 CGF.CurFnInfo->getCallingConvention(), NSRN, NPRN); 874 // Empty records are ignored for parameter passing purposes. 875 if (AI.isIgnore()) 876 return Slot.asRValue(); 877 878 bool IsIndirect = AI.isIndirect(); 879 880 llvm::Type *BaseTy = CGF.ConvertType(Ty); 881 if (IsIndirect) 882 BaseTy = llvm::PointerType::getUnqual(BaseTy->getContext()); 883 else if (AI.getCoerceToType()) 884 BaseTy = AI.getCoerceToType(); 885 886 unsigned NumRegs = 1; 887 if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) { 888 BaseTy = ArrTy->getElementType(); 889 NumRegs = ArrTy->getNumElements(); 890 } 891 bool IsFPR = 892 !isSoftFloat() && (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy()); 893 894 // The AArch64 va_list type and handling is specified in the Procedure Call 895 // Standard, section B.4: 896 // 897 // struct { 898 // void *__stack; 899 // void *__gr_top; 900 // void *__vr_top; 901 // int __gr_offs; 902 // int __vr_offs; 903 // }; 904 905 llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg"); 906 llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); 907 llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack"); 908 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); 909 910 CharUnits TySize = getContext().getTypeSizeInChars(Ty); 911 CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty); 912 913 Address reg_offs_p = Address::invalid(); 914 llvm::Value *reg_offs = nullptr; 915 int reg_top_index; 916 int RegSize = IsIndirect ? 8 : TySize.getQuantity(); 917 if (!IsFPR) { 918 // 3 is the field number of __gr_offs 919 reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p"); 920 reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); 921 reg_top_index = 1; // field number for __gr_top 922 RegSize = llvm::alignTo(RegSize, 8); 923 } else { 924 // 4 is the field number of __vr_offs. 925 reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p"); 926 reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs"); 927 reg_top_index = 2; // field number for __vr_top 928 RegSize = 16 * NumRegs; 929 } 930 931 //======================================= 932 // Find out where argument was passed 933 //======================================= 934 935 // If reg_offs >= 0 we're already using the stack for this type of 936 // argument. We don't want to keep updating reg_offs (in case it overflows, 937 // though anyone passing 2GB of arguments, each at most 16 bytes, deserves 938 // whatever they get). 939 llvm::Value *UsingStack = nullptr; 940 UsingStack = CGF.Builder.CreateICmpSGE( 941 reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0)); 942 943 CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock); 944 945 // Otherwise, at least some kind of argument could go in these registers, the 946 // question is whether this particular type is too big. 947 CGF.EmitBlock(MaybeRegBlock); 948 949 // Integer arguments may need to correct register alignment (for example a 950 // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we 951 // align __gr_offs to calculate the potential address. 952 if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) { 953 int Align = TyAlign.getQuantity(); 954 955 reg_offs = CGF.Builder.CreateAdd( 956 reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1), 957 "align_regoffs"); 958 reg_offs = CGF.Builder.CreateAnd( 959 reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align), 960 "aligned_regoffs"); 961 } 962 963 // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list. 964 // The fact that this is done unconditionally reflects the fact that 965 // allocating an argument to the stack also uses up all the remaining 966 // registers of the appropriate kind. 967 llvm::Value *NewOffset = nullptr; 968 NewOffset = CGF.Builder.CreateAdd( 969 reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs"); 970 CGF.Builder.CreateStore(NewOffset, reg_offs_p); 971 972 // Now we're in a position to decide whether this argument really was in 973 // registers or not. 974 llvm::Value *InRegs = nullptr; 975 InRegs = CGF.Builder.CreateICmpSLE( 976 NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg"); 977 978 CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock); 979 980 //======================================= 981 // Argument was in registers 982 //======================================= 983 984 // Now we emit the code for if the argument was originally passed in 985 // registers. First start the appropriate block: 986 CGF.EmitBlock(InRegBlock); 987 988 llvm::Value *reg_top = nullptr; 989 Address reg_top_p = 990 CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p"); 991 reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top"); 992 Address BaseAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, reg_top, reg_offs), 993 CGF.Int8Ty, CharUnits::fromQuantity(IsFPR ? 16 : 8)); 994 Address RegAddr = Address::invalid(); 995 llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty), *ElementTy = MemTy; 996 997 if (IsIndirect) { 998 // If it's been passed indirectly (actually a struct), whatever we find from 999 // stored registers or on the stack will actually be a struct **. 1000 MemTy = llvm::PointerType::getUnqual(MemTy->getContext()); 1001 } 1002 1003 const Type *Base = nullptr; 1004 uint64_t NumMembers = 0; 1005 bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers); 1006 if (IsHFA && NumMembers > 1) { 1007 // Homogeneous aggregates passed in registers will have their elements split 1008 // and stored 16-bytes apart regardless of size (they're notionally in qN, 1009 // qN+1, ...). We reload and store into a temporary local variable 1010 // contiguously. 1011 assert(!IsIndirect && "Homogeneous aggregates should be passed directly"); 1012 auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0)); 1013 llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0)); 1014 llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers); 1015 Address Tmp = CGF.CreateTempAlloca(HFATy, 1016 std::max(TyAlign, BaseTyInfo.Align)); 1017 1018 // On big-endian platforms, the value will be right-aligned in its slot. 1019 int Offset = 0; 1020 if (CGF.CGM.getDataLayout().isBigEndian() && 1021 BaseTyInfo.Width.getQuantity() < 16) 1022 Offset = 16 - BaseTyInfo.Width.getQuantity(); 1023 1024 for (unsigned i = 0; i < NumMembers; ++i) { 1025 CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset); 1026 Address LoadAddr = 1027 CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset); 1028 LoadAddr = LoadAddr.withElementType(BaseTy); 1029 1030 Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i); 1031 1032 llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr); 1033 CGF.Builder.CreateStore(Elem, StoreAddr); 1034 } 1035 1036 RegAddr = Tmp.withElementType(MemTy); 1037 } else { 1038 // Otherwise the object is contiguous in memory. 1039 1040 // It might be right-aligned in its slot. 1041 CharUnits SlotSize = BaseAddr.getAlignment(); 1042 if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect && 1043 (IsHFA || !isAggregateTypeForABI(Ty)) && 1044 TySize < SlotSize) { 1045 CharUnits Offset = SlotSize - TySize; 1046 BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset); 1047 } 1048 1049 RegAddr = BaseAddr.withElementType(MemTy); 1050 } 1051 1052 CGF.EmitBranch(ContBlock); 1053 1054 //======================================= 1055 // Argument was on the stack 1056 //======================================= 1057 CGF.EmitBlock(OnStackBlock); 1058 1059 Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p"); 1060 llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack"); 1061 1062 // Again, stack arguments may need realignment. In this case both integer and 1063 // floating-point ones might be affected. 1064 if (!IsIndirect && TyAlign.getQuantity() > 8) { 1065 OnStackPtr = emitRoundPointerUpToAlignment(CGF, OnStackPtr, TyAlign); 1066 } 1067 Address OnStackAddr = Address(OnStackPtr, CGF.Int8Ty, 1068 std::max(CharUnits::fromQuantity(8), TyAlign)); 1069 1070 // All stack slots are multiples of 8 bytes. 1071 CharUnits StackSlotSize = CharUnits::fromQuantity(8); 1072 CharUnits StackSize; 1073 if (IsIndirect) 1074 StackSize = StackSlotSize; 1075 else 1076 StackSize = TySize.alignTo(StackSlotSize); 1077 1078 llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize); 1079 llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP( 1080 CGF.Int8Ty, OnStackPtr, StackSizeC, "new_stack"); 1081 1082 // Write the new value of __stack for the next call to va_arg 1083 CGF.Builder.CreateStore(NewStack, stack_p); 1084 1085 if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) && 1086 TySize < StackSlotSize) { 1087 CharUnits Offset = StackSlotSize - TySize; 1088 OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset); 1089 } 1090 1091 OnStackAddr = OnStackAddr.withElementType(MemTy); 1092 1093 CGF.EmitBranch(ContBlock); 1094 1095 //======================================= 1096 // Tidy up 1097 //======================================= 1098 CGF.EmitBlock(ContBlock); 1099 1100 Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, OnStackAddr, 1101 OnStackBlock, "vaargs.addr"); 1102 1103 if (IsIndirect) 1104 return CGF.EmitLoadOfAnyValue( 1105 CGF.MakeAddrLValue( 1106 Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), ElementTy, 1107 TyAlign), 1108 Ty), 1109 Slot); 1110 1111 return CGF.EmitLoadOfAnyValue(CGF.MakeAddrLValue(ResAddr, Ty), Slot); 1112 } 1113 1114 RValue AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty, 1115 CodeGenFunction &CGF, 1116 AggValueSlot Slot) const { 1117 // The backend's lowering doesn't support va_arg for aggregates or 1118 // illegal vector types. Lower VAArg here for these cases and use 1119 // the LLVM va_arg instruction for everything else. 1120 if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty)) 1121 return CGF.EmitLoadOfAnyValue( 1122 CGF.MakeAddrLValue( 1123 EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()), Ty), 1124 Slot); 1125 1126 uint64_t PointerSize = getTarget().getPointerWidth(LangAS::Default) / 8; 1127 CharUnits SlotSize = CharUnits::fromQuantity(PointerSize); 1128 1129 // Empty records are ignored for parameter passing purposes. 1130 if (isEmptyRecord(getContext(), Ty, true)) 1131 return Slot.asRValue(); 1132 1133 // The size of the actual thing passed, which might end up just 1134 // being a pointer for indirect types. 1135 auto TyInfo = getContext().getTypeInfoInChars(Ty); 1136 1137 // Arguments bigger than 16 bytes which aren't homogeneous 1138 // aggregates should be passed indirectly. 1139 bool IsIndirect = false; 1140 if (TyInfo.Width.getQuantity() > 16) { 1141 const Type *Base = nullptr; 1142 uint64_t Members = 0; 1143 IsIndirect = !isHomogeneousAggregate(Ty, Base, Members); 1144 } 1145 1146 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize, 1147 /*AllowHigherAlign*/ true, Slot); 1148 } 1149 1150 RValue AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, 1151 QualType Ty, AggValueSlot Slot) const { 1152 bool IsIndirect = false; 1153 1154 // Composites larger than 16 bytes are passed by reference. 1155 if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128) 1156 IsIndirect = true; 1157 1158 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, 1159 CGF.getContext().getTypeInfoInChars(Ty), 1160 CharUnits::fromQuantity(8), 1161 /*allowHigherAlign*/ false, Slot); 1162 } 1163 1164 static bool isStreamingCompatible(const FunctionDecl *F) { 1165 if (const auto *T = F->getType()->getAs<FunctionProtoType>()) 1166 return T->getAArch64SMEAttributes() & 1167 FunctionType::SME_PStateSMCompatibleMask; 1168 return false; 1169 } 1170 1171 // Report an error if an argument or return value of type Ty would need to be 1172 // passed in a floating-point register. 1173 static void diagnoseIfNeedsFPReg(DiagnosticsEngine &Diags, 1174 const StringRef ABIName, 1175 const AArch64ABIInfo &ABIInfo, 1176 const QualType &Ty, const NamedDecl *D, 1177 SourceLocation loc) { 1178 const Type *HABase = nullptr; 1179 uint64_t HAMembers = 0; 1180 if (Ty->isFloatingType() || Ty->isVectorType() || 1181 ABIInfo.isHomogeneousAggregate(Ty, HABase, HAMembers)) { 1182 Diags.Report(loc, diag::err_target_unsupported_type_for_abi) 1183 << D->getDeclName() << Ty << ABIName; 1184 } 1185 } 1186 1187 // If we are using a hard-float ABI, but do not have floating point registers, 1188 // then report an error for any function arguments or returns which would be 1189 // passed in floating-pint registers. 1190 void AArch64TargetCodeGenInfo::checkFunctionABI( 1191 CodeGenModule &CGM, const FunctionDecl *FuncDecl) const { 1192 const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>(); 1193 const TargetInfo &TI = ABIInfo.getContext().getTargetInfo(); 1194 1195 if (!TI.hasFeature("fp") && !ABIInfo.isSoftFloat()) { 1196 diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, 1197 FuncDecl->getReturnType(), FuncDecl, 1198 FuncDecl->getLocation()); 1199 for (ParmVarDecl *PVD : FuncDecl->parameters()) { 1200 diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, PVD->getType(), 1201 PVD, FuncDecl->getLocation()); 1202 } 1203 } 1204 } 1205 1206 enum class ArmSMEInlinability : uint8_t { 1207 Ok = 0, 1208 ErrorCalleeRequiresNewZA = 1 << 0, 1209 ErrorCalleeRequiresNewZT0 = 1 << 1, 1210 WarnIncompatibleStreamingModes = 1 << 2, 1211 ErrorIncompatibleStreamingModes = 1 << 3, 1212 1213 IncompatibleStreamingModes = 1214 WarnIncompatibleStreamingModes | ErrorIncompatibleStreamingModes, 1215 1216 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/ErrorIncompatibleStreamingModes), 1217 }; 1218 1219 /// Determines if there are any Arm SME ABI issues with inlining \p Callee into 1220 /// \p Caller. Returns the issue (if any) in the ArmSMEInlinability bit enum. 1221 static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller, 1222 const FunctionDecl *Callee) { 1223 bool CallerIsStreaming = 1224 IsArmStreamingFunction(Caller, /*IncludeLocallyStreaming=*/true); 1225 bool CalleeIsStreaming = 1226 IsArmStreamingFunction(Callee, /*IncludeLocallyStreaming=*/true); 1227 bool CallerIsStreamingCompatible = isStreamingCompatible(Caller); 1228 bool CalleeIsStreamingCompatible = isStreamingCompatible(Callee); 1229 1230 ArmSMEInlinability Inlinability = ArmSMEInlinability::Ok; 1231 1232 if (!CalleeIsStreamingCompatible && 1233 (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) { 1234 if (CalleeIsStreaming) 1235 Inlinability |= ArmSMEInlinability::ErrorIncompatibleStreamingModes; 1236 else 1237 Inlinability |= ArmSMEInlinability::WarnIncompatibleStreamingModes; 1238 } 1239 if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) { 1240 if (NewAttr->isNewZA()) 1241 Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZA; 1242 if (NewAttr->isNewZT0()) 1243 Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZT0; 1244 } 1245 1246 return Inlinability; 1247 } 1248 1249 void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming( 1250 CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, 1251 const FunctionDecl *Callee) const { 1252 if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>()) 1253 return; 1254 1255 ArmSMEInlinability Inlinability = GetArmSMEInlinability(Caller, Callee); 1256 1257 if ((Inlinability & ArmSMEInlinability::IncompatibleStreamingModes) != 1258 ArmSMEInlinability::Ok) 1259 CGM.getDiags().Report( 1260 CallLoc, 1261 (Inlinability & ArmSMEInlinability::ErrorIncompatibleStreamingModes) == 1262 ArmSMEInlinability::ErrorIncompatibleStreamingModes 1263 ? diag::err_function_always_inline_attribute_mismatch 1264 : diag::warn_function_always_inline_attribute_mismatch) 1265 << Caller->getDeclName() << Callee->getDeclName() << "streaming"; 1266 1267 if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZA) == 1268 ArmSMEInlinability::ErrorCalleeRequiresNewZA) 1269 CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_za) 1270 << Callee->getDeclName(); 1271 1272 if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZT0) == 1273 ArmSMEInlinability::ErrorCalleeRequiresNewZT0) 1274 CGM.getDiags().Report(CallLoc, diag::err_function_always_inline_new_zt0) 1275 << Callee->getDeclName(); 1276 } 1277 1278 // If the target does not have floating-point registers, but we are using a 1279 // hard-float ABI, there is no way to pass floating-point, vector or HFA values 1280 // to functions, so we report an error. 1281 void AArch64TargetCodeGenInfo::checkFunctionCallABISoftFloat( 1282 CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, 1283 const FunctionDecl *Callee, const CallArgList &Args, 1284 QualType ReturnType) const { 1285 const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>(); 1286 const TargetInfo &TI = ABIInfo.getContext().getTargetInfo(); 1287 1288 if (!Caller || TI.hasFeature("fp") || ABIInfo.isSoftFloat()) 1289 return; 1290 1291 diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, ReturnType, 1292 Callee ? Callee : Caller, CallLoc); 1293 1294 for (const CallArg &Arg : Args) 1295 diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, Arg.getType(), 1296 Callee ? Callee : Caller, CallLoc); 1297 } 1298 1299 void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM, 1300 SourceLocation CallLoc, 1301 const FunctionDecl *Caller, 1302 const FunctionDecl *Callee, 1303 const CallArgList &Args, 1304 QualType ReturnType) const { 1305 checkFunctionCallABIStreaming(CGM, CallLoc, Caller, Callee); 1306 checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, ReturnType); 1307 } 1308 1309 bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI( 1310 const FunctionDecl *Caller, const FunctionDecl *Callee) const { 1311 return Caller && Callee && 1312 GetArmSMEInlinability(Caller, Callee) != ArmSMEInlinability::Ok; 1313 } 1314 1315 void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr, 1316 unsigned Index, 1317 raw_ostream &Out) const { 1318 appendAttributeMangling(Attr->getFeatureStr(Index), Out); 1319 } 1320 1321 void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr, 1322 raw_ostream &Out) const { 1323 if (AttrStr == "default") { 1324 Out << ".default"; 1325 return; 1326 } 1327 1328 Out << "._"; 1329 SmallVector<StringRef, 8> Features; 1330 AttrStr.split(Features, "+"); 1331 for (auto &Feat : Features) 1332 Feat = Feat.trim(); 1333 1334 llvm::sort(Features, [](const StringRef LHS, const StringRef RHS) { 1335 return LHS.compare(RHS) < 0; 1336 }); 1337 1338 llvm::SmallDenseSet<StringRef, 8> UniqueFeats; 1339 for (auto &Feat : Features) 1340 if (auto Ext = llvm::AArch64::parseFMVExtension(Feat)) 1341 if (UniqueFeats.insert(Ext->Name).second) 1342 Out << 'M' << Ext->Name; 1343 } 1344 1345 std::unique_ptr<TargetCodeGenInfo> 1346 CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM, 1347 AArch64ABIKind Kind) { 1348 return std::make_unique<AArch64TargetCodeGenInfo>(CGM.getTypes(), Kind); 1349 } 1350 1351 std::unique_ptr<TargetCodeGenInfo> 1352 CodeGen::createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM, 1353 AArch64ABIKind K) { 1354 return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM.getTypes(), K); 1355 } 1356