1 //===- X86.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ABIInfoImpl.h" 10 #include "TargetInfo.h" 11 #include "clang/Basic/DiagnosticFrontend.h" 12 #include "llvm/ADT/SmallBitVector.h" 13 14 using namespace clang; 15 using namespace clang::CodeGen; 16 17 namespace { 18 19 /// IsX86_MMXType - Return true if this is an MMX type. 20 bool IsX86_MMXType(llvm::Type *IRType) { 21 // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>. 22 return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 && 23 cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() && 24 IRType->getScalarSizeInBits() != 64; 25 } 26 27 static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 28 StringRef Constraint, 29 llvm::Type* Ty) { 30 bool IsMMXCons = llvm::StringSwitch<bool>(Constraint) 31 .Cases("y", "&y", "^Ym", true) 32 .Default(false); 33 if (IsMMXCons && Ty->isVectorTy()) { 34 if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedValue() != 35 64) { 36 // Invalid MMX constraint 37 return nullptr; 38 } 39 40 return llvm::Type::getX86_MMXTy(CGF.getLLVMContext()); 41 } 42 43 // No operation needed 44 return Ty; 45 } 46 47 /// Returns true if this type can be passed in SSE registers with the 48 /// X86_VectorCall calling convention. Shared between x86_32 and x86_64. 49 static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { 50 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 51 if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) { 52 if (BT->getKind() == BuiltinType::LongDouble) { 53 if (&Context.getTargetInfo().getLongDoubleFormat() == 54 &llvm::APFloat::x87DoubleExtended()) 55 return false; 56 } 57 return true; 58 } 59 } else if (const VectorType *VT = Ty->getAs<VectorType>()) { 60 // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX 61 // registers specially. 62 unsigned VecSize = Context.getTypeSize(VT); 63 if (VecSize == 128 || VecSize == 256 || VecSize == 512) 64 return true; 65 } 66 return false; 67 } 68 69 /// Returns true if this aggregate is small enough to be passed in SSE registers 70 /// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64. 71 static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) { 72 return NumMembers <= 4; 73 } 74 75 /// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86. 76 static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { 77 auto AI = ABIArgInfo::getDirect(T); 78 AI.setInReg(true); 79 AI.setCanBeFlattened(false); 80 return AI; 81 } 82 83 //===----------------------------------------------------------------------===// 84 // X86-32 ABI Implementation 85 //===----------------------------------------------------------------------===// 86 87 /// Similar to llvm::CCState, but for Clang. 88 struct CCState { 89 CCState(CGFunctionInfo &FI) 90 : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {} 91 92 llvm::SmallBitVector IsPreassigned; 93 unsigned CC = CallingConv::CC_C; 94 unsigned FreeRegs = 0; 95 unsigned FreeSSERegs = 0; 96 }; 97 98 /// X86_32ABIInfo - The X86-32 ABI information. 99 class X86_32ABIInfo : public ABIInfo { 100 enum Class { 101 Integer, 102 Float 103 }; 104 105 static const unsigned MinABIStackAlignInBytes = 4; 106 107 bool IsDarwinVectorABI; 108 bool IsRetSmallStructInRegABI; 109 bool IsWin32StructABI; 110 bool IsSoftFloatABI; 111 bool IsMCUABI; 112 bool IsLinuxABI; 113 unsigned DefaultNumRegisterParameters; 114 115 static bool isRegisterSize(unsigned Size) { 116 return (Size == 8 || Size == 16 || Size == 32 || Size == 64); 117 } 118 119 bool isHomogeneousAggregateBaseType(QualType Ty) const override { 120 // FIXME: Assumes vectorcall is in use. 121 return isX86VectorTypeForVectorCall(getContext(), Ty); 122 } 123 124 bool isHomogeneousAggregateSmallEnough(const Type *Ty, 125 uint64_t NumMembers) const override { 126 // FIXME: Assumes vectorcall is in use. 127 return isX86VectorCallAggregateSmallEnough(NumMembers); 128 } 129 130 bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const; 131 132 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 133 /// such that the argument will be passed in memory. 134 ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; 135 136 ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const; 137 138 /// Return the alignment to use for the given type on the stack. 139 unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; 140 141 Class classify(QualType Ty) const; 142 ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; 143 ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State, 144 bool isDelegateCall) const; 145 146 /// Updates the number of available free registers, returns 147 /// true if any registers were allocated. 148 bool updateFreeRegs(QualType Ty, CCState &State) const; 149 150 bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, 151 bool &NeedsPadding) const; 152 bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; 153 154 bool canExpandIndirectArgument(QualType Ty) const; 155 156 /// Rewrite the function info so that all memory arguments use 157 /// inalloca. 158 void rewriteWithInAlloca(CGFunctionInfo &FI) const; 159 160 void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, 161 CharUnits &StackOffset, ABIArgInfo &Info, 162 QualType Type) const; 163 void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const; 164 165 public: 166 167 void computeInfo(CGFunctionInfo &FI) const override; 168 Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 169 QualType Ty) const override; 170 171 X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, 172 bool RetSmallStructInRegABI, bool Win32StructABI, 173 unsigned NumRegisterParameters, bool SoftFloatABI) 174 : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), 175 IsRetSmallStructInRegABI(RetSmallStructInRegABI), 176 IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), 177 IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), 178 IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() || 179 CGT.getTarget().getTriple().isOSCygMing()), 180 DefaultNumRegisterParameters(NumRegisterParameters) {} 181 }; 182 183 class X86_32SwiftABIInfo : public SwiftABIInfo { 184 public: 185 explicit X86_32SwiftABIInfo(CodeGenTypes &CGT) 186 : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false) {} 187 188 bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, 189 bool AsReturnValue) const override { 190 // LLVM's x86-32 lowering currently only assigns up to three 191 // integer registers and three fp registers. Oddly, it'll use up to 192 // four vector registers for vectors, but those can overlap with the 193 // scalar registers. 194 return occupiesMoreThan(ComponentTys, /*total=*/3); 195 } 196 }; 197 198 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { 199 public: 200 X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, 201 bool RetSmallStructInRegABI, bool Win32StructABI, 202 unsigned NumRegisterParameters, bool SoftFloatABI) 203 : TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>( 204 CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 205 NumRegisterParameters, SoftFloatABI)) { 206 SwiftInfo = std::make_unique<X86_32SwiftABIInfo>(CGT); 207 } 208 209 static bool isStructReturnInRegABI( 210 const llvm::Triple &Triple, const CodeGenOptions &Opts); 211 212 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 213 CodeGen::CodeGenModule &CGM) const override; 214 215 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 216 // Darwin uses different dwarf register numbers for EH. 217 if (CGM.getTarget().getTriple().isOSDarwin()) return 5; 218 return 4; 219 } 220 221 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 222 llvm::Value *Address) const override; 223 224 llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 225 StringRef Constraint, 226 llvm::Type* Ty) const override { 227 return X86AdjustInlineAsmType(CGF, Constraint, Ty); 228 } 229 230 void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue, 231 std::string &Constraints, 232 std::vector<llvm::Type *> &ResultRegTypes, 233 std::vector<llvm::Type *> &ResultTruncRegTypes, 234 std::vector<LValue> &ResultRegDests, 235 std::string &AsmString, 236 unsigned NumOutputs) const override; 237 238 StringRef getARCRetainAutoreleasedReturnValueMarker() const override { 239 return "movl\t%ebp, %ebp" 240 "\t\t// marker for objc_retainAutoreleaseReturnValue"; 241 } 242 }; 243 244 } 245 246 /// Rewrite input constraint references after adding some output constraints. 247 /// In the case where there is one output and one input and we add one output, 248 /// we need to replace all operand references greater than or equal to 1: 249 /// mov $0, $1 250 /// mov eax, $1 251 /// The result will be: 252 /// mov $0, $2 253 /// mov eax, $2 254 static void rewriteInputConstraintReferences(unsigned FirstIn, 255 unsigned NumNewOuts, 256 std::string &AsmString) { 257 std::string Buf; 258 llvm::raw_string_ostream OS(Buf); 259 size_t Pos = 0; 260 while (Pos < AsmString.size()) { 261 size_t DollarStart = AsmString.find('$', Pos); 262 if (DollarStart == std::string::npos) 263 DollarStart = AsmString.size(); 264 size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart); 265 if (DollarEnd == std::string::npos) 266 DollarEnd = AsmString.size(); 267 OS << StringRef(&AsmString[Pos], DollarEnd - Pos); 268 Pos = DollarEnd; 269 size_t NumDollars = DollarEnd - DollarStart; 270 if (NumDollars % 2 != 0 && Pos < AsmString.size()) { 271 // We have an operand reference. 272 size_t DigitStart = Pos; 273 if (AsmString[DigitStart] == '{') { 274 OS << '{'; 275 ++DigitStart; 276 } 277 size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart); 278 if (DigitEnd == std::string::npos) 279 DigitEnd = AsmString.size(); 280 StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart); 281 unsigned OperandIndex; 282 if (!OperandStr.getAsInteger(10, OperandIndex)) { 283 if (OperandIndex >= FirstIn) 284 OperandIndex += NumNewOuts; 285 OS << OperandIndex; 286 } else { 287 OS << OperandStr; 288 } 289 Pos = DigitEnd; 290 } 291 } 292 AsmString = std::move(OS.str()); 293 } 294 295 /// Add output constraints for EAX:EDX because they are return registers. 296 void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( 297 CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints, 298 std::vector<llvm::Type *> &ResultRegTypes, 299 std::vector<llvm::Type *> &ResultTruncRegTypes, 300 std::vector<LValue> &ResultRegDests, std::string &AsmString, 301 unsigned NumOutputs) const { 302 uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType()); 303 304 // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is 305 // larger. 306 if (!Constraints.empty()) 307 Constraints += ','; 308 if (RetWidth <= 32) { 309 Constraints += "={eax}"; 310 ResultRegTypes.push_back(CGF.Int32Ty); 311 } else { 312 // Use the 'A' constraint for EAX:EDX. 313 Constraints += "=A"; 314 ResultRegTypes.push_back(CGF.Int64Ty); 315 } 316 317 // Truncate EAX or EAX:EDX to an integer of the appropriate size. 318 llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth); 319 ResultTruncRegTypes.push_back(CoerceTy); 320 321 // Coerce the integer by bitcasting the return slot pointer. 322 ReturnSlot.setAddress(ReturnSlot.getAddress(CGF).withElementType(CoerceTy)); 323 ResultRegDests.push_back(ReturnSlot); 324 325 rewriteInputConstraintReferences(NumOutputs, 1, AsmString); 326 } 327 328 /// shouldReturnTypeInRegister - Determine if the given type should be 329 /// returned in a register (for the Darwin and MCU ABI). 330 bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, 331 ASTContext &Context) const { 332 uint64_t Size = Context.getTypeSize(Ty); 333 334 // For i386, type must be register sized. 335 // For the MCU ABI, it only needs to be <= 8-byte 336 if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size))) 337 return false; 338 339 if (Ty->isVectorType()) { 340 // 64- and 128- bit vectors inside structures are not returned in 341 // registers. 342 if (Size == 64 || Size == 128) 343 return false; 344 345 return true; 346 } 347 348 // If this is a builtin, pointer, enum, complex type, member pointer, or 349 // member function pointer it is ok. 350 if (Ty->getAs<BuiltinType>() || Ty->hasPointerRepresentation() || 351 Ty->isAnyComplexType() || Ty->isEnumeralType() || 352 Ty->isBlockPointerType() || Ty->isMemberPointerType()) 353 return true; 354 355 // Arrays are treated like records. 356 if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) 357 return shouldReturnTypeInRegister(AT->getElementType(), Context); 358 359 // Otherwise, it must be a record type. 360 const RecordType *RT = Ty->getAs<RecordType>(); 361 if (!RT) return false; 362 363 // FIXME: Traverse bases here too. 364 365 // Structure types are passed in register if all fields would be 366 // passed in a register. 367 for (const auto *FD : RT->getDecl()->fields()) { 368 // Empty fields are ignored. 369 if (isEmptyField(Context, FD, true)) 370 continue; 371 372 // Check fields recursively. 373 if (!shouldReturnTypeInRegister(FD->getType(), Context)) 374 return false; 375 } 376 return true; 377 } 378 379 static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { 380 // Treat complex types as the element type. 381 if (const ComplexType *CTy = Ty->getAs<ComplexType>()) 382 Ty = CTy->getElementType(); 383 384 // Check for a type which we know has a simple scalar argument-passing 385 // convention without any padding. (We're specifically looking for 32 386 // and 64-bit integer and integer-equivalents, float, and double.) 387 if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && 388 !Ty->isEnumeralType() && !Ty->isBlockPointerType()) 389 return false; 390 391 uint64_t Size = Context.getTypeSize(Ty); 392 return Size == 32 || Size == 64; 393 } 394 395 static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD, 396 uint64_t &Size) { 397 for (const auto *FD : RD->fields()) { 398 // Scalar arguments on the stack get 4 byte alignment on x86. If the 399 // argument is smaller than 32-bits, expanding the struct will create 400 // alignment padding. 401 if (!is32Or64BitBasicType(FD->getType(), Context)) 402 return false; 403 404 // FIXME: Reject bit-fields wholesale; there are two problems, we don't know 405 // how to expand them yet, and the predicate for telling if a bitfield still 406 // counts as "basic" is more complicated than what we were doing previously. 407 if (FD->isBitField()) 408 return false; 409 410 Size += Context.getTypeSize(FD->getType()); 411 } 412 return true; 413 } 414 415 static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD, 416 uint64_t &Size) { 417 // Don't do this if there are any non-empty bases. 418 for (const CXXBaseSpecifier &Base : RD->bases()) { 419 if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(), 420 Size)) 421 return false; 422 } 423 if (!addFieldSizes(Context, RD, Size)) 424 return false; 425 return true; 426 } 427 428 /// Test whether an argument type which is to be passed indirectly (on the 429 /// stack) would have the equivalent layout if it was expanded into separate 430 /// arguments. If so, we prefer to do the latter to avoid inhibiting 431 /// optimizations. 432 bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { 433 // We can only expand structure types. 434 const RecordType *RT = Ty->getAs<RecordType>(); 435 if (!RT) 436 return false; 437 const RecordDecl *RD = RT->getDecl(); 438 uint64_t Size = 0; 439 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 440 if (!IsWin32StructABI) { 441 // On non-Windows, we have to conservatively match our old bitcode 442 // prototypes in order to be ABI-compatible at the bitcode level. 443 if (!CXXRD->isCLike()) 444 return false; 445 } else { 446 // Don't do this for dynamic classes. 447 if (CXXRD->isDynamicClass()) 448 return false; 449 } 450 if (!addBaseAndFieldSizes(getContext(), CXXRD, Size)) 451 return false; 452 } else { 453 if (!addFieldSizes(getContext(), RD, Size)) 454 return false; 455 } 456 457 // We can do this if there was no alignment padding. 458 return Size == getContext().getTypeSize(Ty); 459 } 460 461 ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { 462 // If the return value is indirect, then the hidden argument is consuming one 463 // integer register. 464 if (State.FreeRegs) { 465 --State.FreeRegs; 466 if (!IsMCUABI) 467 return getNaturalAlignIndirectInReg(RetTy); 468 } 469 return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); 470 } 471 472 ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, 473 CCState &State) const { 474 if (RetTy->isVoidType()) 475 return ABIArgInfo::getIgnore(); 476 477 const Type *Base = nullptr; 478 uint64_t NumElts = 0; 479 if ((State.CC == llvm::CallingConv::X86_VectorCall || 480 State.CC == llvm::CallingConv::X86_RegCall) && 481 isHomogeneousAggregate(RetTy, Base, NumElts)) { 482 // The LLVM struct type for such an aggregate should lower properly. 483 return ABIArgInfo::getDirect(); 484 } 485 486 if (const VectorType *VT = RetTy->getAs<VectorType>()) { 487 // On Darwin, some vectors are returned in registers. 488 if (IsDarwinVectorABI) { 489 uint64_t Size = getContext().getTypeSize(RetTy); 490 491 // 128-bit vectors are a special case; they are returned in 492 // registers and we need to make sure to pick a type the LLVM 493 // backend will like. 494 if (Size == 128) 495 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 496 llvm::Type::getInt64Ty(getVMContext()), 2)); 497 498 // Always return in register if it fits in a general purpose 499 // register, or if it is 64 bits and has a single element. 500 if ((Size == 8 || Size == 16 || Size == 32) || 501 (Size == 64 && VT->getNumElements() == 1)) 502 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 503 Size)); 504 505 return getIndirectReturnResult(RetTy, State); 506 } 507 508 return ABIArgInfo::getDirect(); 509 } 510 511 if (isAggregateTypeForABI(RetTy)) { 512 if (const RecordType *RT = RetTy->getAs<RecordType>()) { 513 // Structures with flexible arrays are always indirect. 514 if (RT->getDecl()->hasFlexibleArrayMember()) 515 return getIndirectReturnResult(RetTy, State); 516 } 517 518 // If specified, structs and unions are always indirect. 519 if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType()) 520 return getIndirectReturnResult(RetTy, State); 521 522 // Ignore empty structs/unions. 523 if (isEmptyRecord(getContext(), RetTy, true)) 524 return ABIArgInfo::getIgnore(); 525 526 // Return complex of _Float16 as <2 x half> so the backend will use xmm0. 527 if (const ComplexType *CT = RetTy->getAs<ComplexType>()) { 528 QualType ET = getContext().getCanonicalType(CT->getElementType()); 529 if (ET->isFloat16Type()) 530 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 531 llvm::Type::getHalfTy(getVMContext()), 2)); 532 } 533 534 // Small structures which are register sized are generally returned 535 // in a register. 536 if (shouldReturnTypeInRegister(RetTy, getContext())) { 537 uint64_t Size = getContext().getTypeSize(RetTy); 538 539 // As a special-case, if the struct is a "single-element" struct, and 540 // the field is of type "float" or "double", return it in a 541 // floating-point register. (MSVC does not apply this special case.) 542 // We apply a similar transformation for pointer types to improve the 543 // quality of the generated IR. 544 if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) 545 if ((!IsWin32StructABI && SeltTy->isRealFloatingType()) 546 || SeltTy->hasPointerRepresentation()) 547 return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); 548 549 // FIXME: We should be able to narrow this integer in cases with dead 550 // padding. 551 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size)); 552 } 553 554 return getIndirectReturnResult(RetTy, State); 555 } 556 557 // Treat an enum type as its underlying type. 558 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 559 RetTy = EnumTy->getDecl()->getIntegerType(); 560 561 if (const auto *EIT = RetTy->getAs<BitIntType>()) 562 if (EIT->getNumBits() > 64) 563 return getIndirectReturnResult(RetTy, State); 564 565 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) 566 : ABIArgInfo::getDirect()); 567 } 568 569 unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, 570 unsigned Align) const { 571 // Otherwise, if the alignment is less than or equal to the minimum ABI 572 // alignment, just use the default; the backend will handle this. 573 if (Align <= MinABIStackAlignInBytes) 574 return 0; // Use default alignment. 575 576 if (IsLinuxABI) { 577 // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't 578 // want to spend any effort dealing with the ramifications of ABI breaks. 579 // 580 // If the vector type is __m128/__m256/__m512, return the default alignment. 581 if (Ty->isVectorType() && (Align == 16 || Align == 32 || Align == 64)) 582 return Align; 583 } 584 // On non-Darwin, the stack type alignment is always 4. 585 if (!IsDarwinVectorABI) { 586 // Set explicit alignment, since we may need to realign the top. 587 return MinABIStackAlignInBytes; 588 } 589 590 // Otherwise, if the type contains an SSE vector type, the alignment is 16. 591 if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) || 592 isRecordWithSIMDVectorType(getContext(), Ty))) 593 return 16; 594 595 return MinABIStackAlignInBytes; 596 } 597 598 ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal, 599 CCState &State) const { 600 if (!ByVal) { 601 if (State.FreeRegs) { 602 --State.FreeRegs; // Non-byval indirects just use one pointer. 603 if (!IsMCUABI) 604 return getNaturalAlignIndirectInReg(Ty); 605 } 606 return getNaturalAlignIndirect(Ty, false); 607 } 608 609 // Compute the byval alignment. 610 unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; 611 unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign); 612 if (StackAlign == 0) 613 return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true); 614 615 // If the stack alignment is less than the type alignment, realign the 616 // argument. 617 bool Realign = TypeAlign > StackAlign; 618 return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign), 619 /*ByVal=*/true, Realign); 620 } 621 622 X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const { 623 const Type *T = isSingleElementStruct(Ty, getContext()); 624 if (!T) 625 T = Ty.getTypePtr(); 626 627 if (const BuiltinType *BT = T->getAs<BuiltinType>()) { 628 BuiltinType::Kind K = BT->getKind(); 629 if (K == BuiltinType::Float || K == BuiltinType::Double) 630 return Float; 631 } 632 return Integer; 633 } 634 635 bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const { 636 if (!IsSoftFloatABI) { 637 Class C = classify(Ty); 638 if (C == Float) 639 return false; 640 } 641 642 unsigned Size = getContext().getTypeSize(Ty); 643 unsigned SizeInRegs = (Size + 31) / 32; 644 645 if (SizeInRegs == 0) 646 return false; 647 648 if (!IsMCUABI) { 649 if (SizeInRegs > State.FreeRegs) { 650 State.FreeRegs = 0; 651 return false; 652 } 653 } else { 654 // The MCU psABI allows passing parameters in-reg even if there are 655 // earlier parameters that are passed on the stack. Also, 656 // it does not allow passing >8-byte structs in-register, 657 // even if there are 3 free registers available. 658 if (SizeInRegs > State.FreeRegs || SizeInRegs > 2) 659 return false; 660 } 661 662 State.FreeRegs -= SizeInRegs; 663 return true; 664 } 665 666 bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, 667 bool &InReg, 668 bool &NeedsPadding) const { 669 // On Windows, aggregates other than HFAs are never passed in registers, and 670 // they do not consume register slots. Homogenous floating-point aggregates 671 // (HFAs) have already been dealt with at this point. 672 if (IsWin32StructABI && isAggregateTypeForABI(Ty)) 673 return false; 674 675 NeedsPadding = false; 676 InReg = !IsMCUABI; 677 678 if (!updateFreeRegs(Ty, State)) 679 return false; 680 681 if (IsMCUABI) 682 return true; 683 684 if (State.CC == llvm::CallingConv::X86_FastCall || 685 State.CC == llvm::CallingConv::X86_VectorCall || 686 State.CC == llvm::CallingConv::X86_RegCall) { 687 if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) 688 NeedsPadding = true; 689 690 return false; 691 } 692 693 return true; 694 } 695 696 bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { 697 bool IsPtrOrInt = (getContext().getTypeSize(Ty) <= 32) && 698 (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || 699 Ty->isReferenceType()); 700 701 if (!IsPtrOrInt && (State.CC == llvm::CallingConv::X86_FastCall || 702 State.CC == llvm::CallingConv::X86_VectorCall)) 703 return false; 704 705 if (!updateFreeRegs(Ty, State)) 706 return false; 707 708 if (!IsPtrOrInt && State.CC == llvm::CallingConv::X86_RegCall) 709 return false; 710 711 // Return true to apply inreg to all legal parameters except for MCU targets. 712 return !IsMCUABI; 713 } 714 715 void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const { 716 // Vectorcall x86 works subtly different than in x64, so the format is 717 // a bit different than the x64 version. First, all vector types (not HVAs) 718 // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers. 719 // This differs from the x64 implementation, where the first 6 by INDEX get 720 // registers. 721 // In the second pass over the arguments, HVAs are passed in the remaining 722 // vector registers if possible, or indirectly by address. The address will be 723 // passed in ECX/EDX if available. Any other arguments are passed according to 724 // the usual fastcall rules. 725 MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); 726 for (int I = 0, E = Args.size(); I < E; ++I) { 727 const Type *Base = nullptr; 728 uint64_t NumElts = 0; 729 const QualType &Ty = Args[I].type; 730 if ((Ty->isVectorType() || Ty->isBuiltinType()) && 731 isHomogeneousAggregate(Ty, Base, NumElts)) { 732 if (State.FreeSSERegs >= NumElts) { 733 State.FreeSSERegs -= NumElts; 734 Args[I].info = ABIArgInfo::getDirectInReg(); 735 State.IsPreassigned.set(I); 736 } 737 } 738 } 739 } 740 741 ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State, 742 bool isDelegateCall) const { 743 // FIXME: Set alignment on indirect arguments. 744 bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall; 745 bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall; 746 bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall; 747 748 Ty = useFirstFieldIfTransparentUnion(Ty); 749 TypeInfo TI = getContext().getTypeInfo(Ty); 750 751 // Check with the C++ ABI first. 752 const RecordType *RT = Ty->getAs<RecordType>(); 753 if (RT) { 754 CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); 755 if (RAA == CGCXXABI::RAA_Indirect) { 756 return getIndirectResult(Ty, false, State); 757 } else if (isDelegateCall) { 758 // Avoid having different alignments on delegate call args by always 759 // setting the alignment to 4, which is what we do for inallocas. 760 ABIArgInfo Res = getIndirectResult(Ty, false, State); 761 Res.setIndirectAlign(CharUnits::fromQuantity(4)); 762 return Res; 763 } else if (RAA == CGCXXABI::RAA_DirectInMemory) { 764 // The field index doesn't matter, we'll fix it up later. 765 return ABIArgInfo::getInAlloca(/*FieldIndex=*/0); 766 } 767 } 768 769 // Regcall uses the concept of a homogenous vector aggregate, similar 770 // to other targets. 771 const Type *Base = nullptr; 772 uint64_t NumElts = 0; 773 if ((IsRegCall || IsVectorCall) && 774 isHomogeneousAggregate(Ty, Base, NumElts)) { 775 if (State.FreeSSERegs >= NumElts) { 776 State.FreeSSERegs -= NumElts; 777 778 // Vectorcall passes HVAs directly and does not flatten them, but regcall 779 // does. 780 if (IsVectorCall) 781 return getDirectX86Hva(); 782 783 if (Ty->isBuiltinType() || Ty->isVectorType()) 784 return ABIArgInfo::getDirect(); 785 return ABIArgInfo::getExpand(); 786 } 787 return getIndirectResult(Ty, /*ByVal=*/false, State); 788 } 789 790 if (isAggregateTypeForABI(Ty)) { 791 // Structures with flexible arrays are always indirect. 792 // FIXME: This should not be byval! 793 if (RT && RT->getDecl()->hasFlexibleArrayMember()) 794 return getIndirectResult(Ty, true, State); 795 796 // Ignore empty structs/unions on non-Windows. 797 if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) 798 return ABIArgInfo::getIgnore(); 799 800 llvm::LLVMContext &LLVMContext = getVMContext(); 801 llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); 802 bool NeedsPadding = false; 803 bool InReg; 804 if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { 805 unsigned SizeInRegs = (TI.Width + 31) / 32; 806 SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32); 807 llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); 808 if (InReg) 809 return ABIArgInfo::getDirectInReg(Result); 810 else 811 return ABIArgInfo::getDirect(Result); 812 } 813 llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr; 814 815 // Pass over-aligned aggregates on Windows indirectly. This behavior was 816 // added in MSVC 2015. Use the required alignment from the record layout, 817 // since that may be less than the regular type alignment, and types with 818 // required alignment of less than 4 bytes are not passed indirectly. 819 if (IsWin32StructABI) { 820 unsigned AlignInBits = 0; 821 if (RT) { 822 const ASTRecordLayout &Layout = 823 getContext().getASTRecordLayout(RT->getDecl()); 824 AlignInBits = getContext().toBits(Layout.getRequiredAlignment()); 825 } else if (TI.isAlignRequired()) { 826 AlignInBits = TI.Align; 827 } 828 if (AlignInBits > 32) 829 return getIndirectResult(Ty, /*ByVal=*/false, State); 830 } 831 832 // Expand small (<= 128-bit) record types when we know that the stack layout 833 // of those arguments will match the struct. This is important because the 834 // LLVM backend isn't smart enough to remove byval, which inhibits many 835 // optimizations. 836 // Don't do this for the MCU if there are still free integer registers 837 // (see X86_64 ABI for full explanation). 838 if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) && 839 canExpandIndirectArgument(Ty)) 840 return ABIArgInfo::getExpandWithPadding( 841 IsFastCall || IsVectorCall || IsRegCall, PaddingType); 842 843 return getIndirectResult(Ty, true, State); 844 } 845 846 if (const VectorType *VT = Ty->getAs<VectorType>()) { 847 // On Windows, vectors are passed directly if registers are available, or 848 // indirectly if not. This avoids the need to align argument memory. Pass 849 // user-defined vector types larger than 512 bits indirectly for simplicity. 850 if (IsWin32StructABI) { 851 if (TI.Width <= 512 && State.FreeSSERegs > 0) { 852 --State.FreeSSERegs; 853 return ABIArgInfo::getDirectInReg(); 854 } 855 return getIndirectResult(Ty, /*ByVal=*/false, State); 856 } 857 858 // On Darwin, some vectors are passed in memory, we handle this by passing 859 // it as an i8/i16/i32/i64. 860 if (IsDarwinVectorABI) { 861 if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) || 862 (TI.Width == 64 && VT->getNumElements() == 1)) 863 return ABIArgInfo::getDirect( 864 llvm::IntegerType::get(getVMContext(), TI.Width)); 865 } 866 867 if (IsX86_MMXType(CGT.ConvertType(Ty))) 868 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); 869 870 return ABIArgInfo::getDirect(); 871 } 872 873 874 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 875 Ty = EnumTy->getDecl()->getIntegerType(); 876 877 bool InReg = shouldPrimitiveUseInReg(Ty, State); 878 879 if (isPromotableIntegerTypeForABI(Ty)) { 880 if (InReg) 881 return ABIArgInfo::getExtendInReg(Ty); 882 return ABIArgInfo::getExtend(Ty); 883 } 884 885 if (const auto *EIT = Ty->getAs<BitIntType>()) { 886 if (EIT->getNumBits() <= 64) { 887 if (InReg) 888 return ABIArgInfo::getDirectInReg(); 889 return ABIArgInfo::getDirect(); 890 } 891 return getIndirectResult(Ty, /*ByVal=*/false, State); 892 } 893 894 if (InReg) 895 return ABIArgInfo::getDirectInReg(); 896 return ABIArgInfo::getDirect(); 897 } 898 899 void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { 900 CCState State(FI); 901 if (IsMCUABI) 902 State.FreeRegs = 3; 903 else if (State.CC == llvm::CallingConv::X86_FastCall) { 904 State.FreeRegs = 2; 905 State.FreeSSERegs = 3; 906 } else if (State.CC == llvm::CallingConv::X86_VectorCall) { 907 State.FreeRegs = 2; 908 State.FreeSSERegs = 6; 909 } else if (FI.getHasRegParm()) 910 State.FreeRegs = FI.getRegParm(); 911 else if (State.CC == llvm::CallingConv::X86_RegCall) { 912 State.FreeRegs = 5; 913 State.FreeSSERegs = 8; 914 } else if (IsWin32StructABI) { 915 // Since MSVC 2015, the first three SSE vectors have been passed in 916 // registers. The rest are passed indirectly. 917 State.FreeRegs = DefaultNumRegisterParameters; 918 State.FreeSSERegs = 3; 919 } else 920 State.FreeRegs = DefaultNumRegisterParameters; 921 922 if (!::classifyReturnType(getCXXABI(), FI, *this)) { 923 FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State); 924 } else if (FI.getReturnInfo().isIndirect()) { 925 // The C++ ABI is not aware of register usage, so we have to check if the 926 // return value was sret and put it in a register ourselves if appropriate. 927 if (State.FreeRegs) { 928 --State.FreeRegs; // The sret parameter consumes a register. 929 if (!IsMCUABI) 930 FI.getReturnInfo().setInReg(true); 931 } 932 } 933 934 // The chain argument effectively gives us another free register. 935 if (FI.isChainCall()) 936 ++State.FreeRegs; 937 938 // For vectorcall, do a first pass over the arguments, assigning FP and vector 939 // arguments to XMM registers as available. 940 if (State.CC == llvm::CallingConv::X86_VectorCall) 941 runVectorCallFirstPass(FI, State); 942 943 bool UsedInAlloca = false; 944 MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); 945 for (int I = 0, E = Args.size(); I < E; ++I) { 946 // Skip arguments that have already been assigned. 947 if (State.IsPreassigned.test(I)) 948 continue; 949 950 Args[I].info = 951 classifyArgumentType(Args[I].type, State, FI.isDelegateCall()); 952 UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca); 953 } 954 955 // If we needed to use inalloca for any argument, do a second pass and rewrite 956 // all the memory arguments to use inalloca. 957 if (UsedInAlloca) 958 rewriteWithInAlloca(FI); 959 } 960 961 void 962 X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, 963 CharUnits &StackOffset, ABIArgInfo &Info, 964 QualType Type) const { 965 // Arguments are always 4-byte-aligned. 966 CharUnits WordSize = CharUnits::fromQuantity(4); 967 assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct"); 968 969 // sret pointers and indirect things will require an extra pointer 970 // indirection, unless they are byval. Most things are byval, and will not 971 // require this indirection. 972 bool IsIndirect = false; 973 if (Info.isIndirect() && !Info.getIndirectByVal()) 974 IsIndirect = true; 975 Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect); 976 llvm::Type *LLTy = CGT.ConvertTypeForMem(Type); 977 if (IsIndirect) 978 LLTy = llvm::PointerType::getUnqual(getVMContext()); 979 FrameFields.push_back(LLTy); 980 StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type); 981 982 // Insert padding bytes to respect alignment. 983 CharUnits FieldEnd = StackOffset; 984 StackOffset = FieldEnd.alignTo(WordSize); 985 if (StackOffset != FieldEnd) { 986 CharUnits NumBytes = StackOffset - FieldEnd; 987 llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); 988 Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity()); 989 FrameFields.push_back(Ty); 990 } 991 } 992 993 static bool isArgInAlloca(const ABIArgInfo &Info) { 994 // Leave ignored and inreg arguments alone. 995 switch (Info.getKind()) { 996 case ABIArgInfo::InAlloca: 997 return true; 998 case ABIArgInfo::Ignore: 999 case ABIArgInfo::IndirectAliased: 1000 return false; 1001 case ABIArgInfo::Indirect: 1002 case ABIArgInfo::Direct: 1003 case ABIArgInfo::Extend: 1004 return !Info.getInReg(); 1005 case ABIArgInfo::Expand: 1006 case ABIArgInfo::CoerceAndExpand: 1007 // These are aggregate types which are never passed in registers when 1008 // inalloca is involved. 1009 return true; 1010 } 1011 llvm_unreachable("invalid enum"); 1012 } 1013 1014 void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const { 1015 assert(IsWin32StructABI && "inalloca only supported on win32"); 1016 1017 // Build a packed struct type for all of the arguments in memory. 1018 SmallVector<llvm::Type *, 6> FrameFields; 1019 1020 // The stack alignment is always 4. 1021 CharUnits StackAlign = CharUnits::fromQuantity(4); 1022 1023 CharUnits StackOffset; 1024 CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end(); 1025 1026 // Put 'this' into the struct before 'sret', if necessary. 1027 bool IsThisCall = 1028 FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall; 1029 ABIArgInfo &Ret = FI.getReturnInfo(); 1030 if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall && 1031 isArgInAlloca(I->info)) { 1032 addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); 1033 ++I; 1034 } 1035 1036 // Put the sret parameter into the inalloca struct if it's in memory. 1037 if (Ret.isIndirect() && !Ret.getInReg()) { 1038 addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType()); 1039 // On Windows, the hidden sret parameter is always returned in eax. 1040 Ret.setInAllocaSRet(IsWin32StructABI); 1041 } 1042 1043 // Skip the 'this' parameter in ecx. 1044 if (IsThisCall) 1045 ++I; 1046 1047 // Put arguments passed in memory into the struct. 1048 for (; I != E; ++I) { 1049 if (isArgInAlloca(I->info)) 1050 addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); 1051 } 1052 1053 FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields, 1054 /*isPacked=*/true), 1055 StackAlign); 1056 } 1057 1058 Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, 1059 Address VAListAddr, QualType Ty) const { 1060 1061 auto TypeInfo = getContext().getTypeInfoInChars(Ty); 1062 1063 // x86-32 changes the alignment of certain arguments on the stack. 1064 // 1065 // Just messing with TypeInfo like this works because we never pass 1066 // anything indirectly. 1067 TypeInfo.Align = CharUnits::fromQuantity( 1068 getTypeStackAlignInBytes(Ty, TypeInfo.Align.getQuantity())); 1069 1070 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, 1071 TypeInfo, CharUnits::fromQuantity(4), 1072 /*AllowHigherAlign*/ true); 1073 } 1074 1075 bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( 1076 const llvm::Triple &Triple, const CodeGenOptions &Opts) { 1077 assert(Triple.getArch() == llvm::Triple::x86); 1078 1079 switch (Opts.getStructReturnConvention()) { 1080 case CodeGenOptions::SRCK_Default: 1081 break; 1082 case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return 1083 return false; 1084 case CodeGenOptions::SRCK_InRegs: // -freg-struct-return 1085 return true; 1086 } 1087 1088 if (Triple.isOSDarwin() || Triple.isOSIAMCU()) 1089 return true; 1090 1091 switch (Triple.getOS()) { 1092 case llvm::Triple::DragonFly: 1093 case llvm::Triple::FreeBSD: 1094 case llvm::Triple::OpenBSD: 1095 case llvm::Triple::Win32: 1096 return true; 1097 default: 1098 return false; 1099 } 1100 } 1101 1102 static void addX86InterruptAttrs(const FunctionDecl *FD, llvm::GlobalValue *GV, 1103 CodeGen::CodeGenModule &CGM) { 1104 if (!FD->hasAttr<AnyX86InterruptAttr>()) 1105 return; 1106 1107 llvm::Function *Fn = cast<llvm::Function>(GV); 1108 Fn->setCallingConv(llvm::CallingConv::X86_INTR); 1109 if (FD->getNumParams() == 0) 1110 return; 1111 1112 auto PtrTy = cast<PointerType>(FD->getParamDecl(0)->getType()); 1113 llvm::Type *ByValTy = CGM.getTypes().ConvertType(PtrTy->getPointeeType()); 1114 llvm::Attribute NewAttr = llvm::Attribute::getWithByValType( 1115 Fn->getContext(), ByValTy); 1116 Fn->addParamAttr(0, NewAttr); 1117 } 1118 1119 void X86_32TargetCodeGenInfo::setTargetAttributes( 1120 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1121 if (GV->isDeclaration()) 1122 return; 1123 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1124 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1125 llvm::Function *Fn = cast<llvm::Function>(GV); 1126 Fn->addFnAttr("stackrealign"); 1127 } 1128 1129 addX86InterruptAttrs(FD, GV, CGM); 1130 } 1131 } 1132 1133 bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable( 1134 CodeGen::CodeGenFunction &CGF, 1135 llvm::Value *Address) const { 1136 CodeGen::CGBuilderTy &Builder = CGF.Builder; 1137 1138 llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); 1139 1140 // 0-7 are the eight integer registers; the order is different 1141 // on Darwin (for EH), but the range is the same. 1142 // 8 is %eip. 1143 AssignToArrayRange(Builder, Address, Four8, 0, 8); 1144 1145 if (CGF.CGM.getTarget().getTriple().isOSDarwin()) { 1146 // 12-16 are st(0..4). Not sure why we stop at 4. 1147 // These have size 16, which is sizeof(long double) on 1148 // platforms with 8-byte alignment for that type. 1149 llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); 1150 AssignToArrayRange(Builder, Address, Sixteen8, 12, 16); 1151 1152 } else { 1153 // 9 is %eflags, which doesn't get a size on Darwin for some 1154 // reason. 1155 Builder.CreateAlignedStore( 1156 Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9), 1157 CharUnits::One()); 1158 1159 // 11-16 are st(0..5). Not sure why we stop at 5. 1160 // These have size 12, which is sizeof(long double) on 1161 // platforms with 4-byte alignment for that type. 1162 llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12); 1163 AssignToArrayRange(Builder, Address, Twelve8, 11, 16); 1164 } 1165 1166 return false; 1167 } 1168 1169 //===----------------------------------------------------------------------===// 1170 // X86-64 ABI Implementation 1171 //===----------------------------------------------------------------------===// 1172 1173 1174 namespace { 1175 1176 /// \p returns the size in bits of the largest (native) vector for \p AVXLevel. 1177 static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { 1178 switch (AVXLevel) { 1179 case X86AVXABILevel::AVX512: 1180 return 512; 1181 case X86AVXABILevel::AVX: 1182 return 256; 1183 case X86AVXABILevel::None: 1184 return 128; 1185 } 1186 llvm_unreachable("Unknown AVXLevel"); 1187 } 1188 1189 /// X86_64ABIInfo - The X86_64 ABI information. 1190 class X86_64ABIInfo : public ABIInfo { 1191 enum Class { 1192 Integer = 0, 1193 SSE, 1194 SSEUp, 1195 X87, 1196 X87Up, 1197 ComplexX87, 1198 NoClass, 1199 Memory 1200 }; 1201 1202 /// merge - Implement the X86_64 ABI merging algorithm. 1203 /// 1204 /// Merge an accumulating classification \arg Accum with a field 1205 /// classification \arg Field. 1206 /// 1207 /// \param Accum - The accumulating classification. This should 1208 /// always be either NoClass or the result of a previous merge 1209 /// call. In addition, this should never be Memory (the caller 1210 /// should just return Memory for the aggregate). 1211 static Class merge(Class Accum, Class Field); 1212 1213 /// postMerge - Implement the X86_64 ABI post merging algorithm. 1214 /// 1215 /// Post merger cleanup, reduces a malformed Hi and Lo pair to 1216 /// final MEMORY or SSE classes when necessary. 1217 /// 1218 /// \param AggregateSize - The size of the current aggregate in 1219 /// the classification process. 1220 /// 1221 /// \param Lo - The classification for the parts of the type 1222 /// residing in the low word of the containing object. 1223 /// 1224 /// \param Hi - The classification for the parts of the type 1225 /// residing in the higher words of the containing object. 1226 /// 1227 void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; 1228 1229 /// classify - Determine the x86_64 register classes in which the 1230 /// given type T should be passed. 1231 /// 1232 /// \param Lo - The classification for the parts of the type 1233 /// residing in the low word of the containing object. 1234 /// 1235 /// \param Hi - The classification for the parts of the type 1236 /// residing in the high word of the containing object. 1237 /// 1238 /// \param OffsetBase - The bit offset of this type in the 1239 /// containing object. Some parameters are classified different 1240 /// depending on whether they straddle an eightbyte boundary. 1241 /// 1242 /// \param isNamedArg - Whether the argument in question is a "named" 1243 /// argument, as used in AMD64-ABI 3.5.7. 1244 /// 1245 /// \param IsRegCall - Whether the calling conversion is regcall. 1246 /// 1247 /// If a word is unused its result will be NoClass; if a type should 1248 /// be passed in Memory then at least the classification of \arg Lo 1249 /// will be Memory. 1250 /// 1251 /// The \arg Lo class will be NoClass iff the argument is ignored. 1252 /// 1253 /// If the \arg Lo class is ComplexX87, then the \arg Hi class will 1254 /// also be ComplexX87. 1255 void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi, 1256 bool isNamedArg, bool IsRegCall = false) const; 1257 1258 llvm::Type *GetByteVectorType(QualType Ty) const; 1259 llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType, 1260 unsigned IROffset, QualType SourceTy, 1261 unsigned SourceOffset) const; 1262 llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType, 1263 unsigned IROffset, QualType SourceTy, 1264 unsigned SourceOffset) const; 1265 1266 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 1267 /// such that the argument will be returned in memory. 1268 ABIArgInfo getIndirectReturnResult(QualType Ty) const; 1269 1270 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 1271 /// such that the argument will be passed in memory. 1272 /// 1273 /// \param freeIntRegs - The number of free integer registers remaining 1274 /// available. 1275 ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const; 1276 1277 ABIArgInfo classifyReturnType(QualType RetTy) const; 1278 1279 ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, 1280 unsigned &neededInt, unsigned &neededSSE, 1281 bool isNamedArg, 1282 bool IsRegCall = false) const; 1283 1284 ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt, 1285 unsigned &NeededSSE, 1286 unsigned &MaxVectorWidth) const; 1287 1288 ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, 1289 unsigned &NeededSSE, 1290 unsigned &MaxVectorWidth) const; 1291 1292 bool IsIllegalVectorType(QualType Ty) const; 1293 1294 /// The 0.98 ABI revision clarified a lot of ambiguities, 1295 /// unfortunately in ways that were not always consistent with 1296 /// certain previous compilers. In particular, platforms which 1297 /// required strict binary compatibility with older versions of GCC 1298 /// may need to exempt themselves. 1299 bool honorsRevision0_98() const { 1300 return !getTarget().getTriple().isOSDarwin(); 1301 } 1302 1303 /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to 1304 /// classify it as INTEGER (for compatibility with older clang compilers). 1305 bool classifyIntegerMMXAsSSE() const { 1306 // Clang <= 3.8 did not do this. 1307 if (getContext().getLangOpts().getClangABICompat() <= 1308 LangOptions::ClangABI::Ver3_8) 1309 return false; 1310 1311 const llvm::Triple &Triple = getTarget().getTriple(); 1312 if (Triple.isOSDarwin() || Triple.isPS() || Triple.isOSFreeBSD()) 1313 return false; 1314 return true; 1315 } 1316 1317 // GCC classifies vectors of __int128 as memory. 1318 bool passInt128VectorsInMem() const { 1319 // Clang <= 9.0 did not do this. 1320 if (getContext().getLangOpts().getClangABICompat() <= 1321 LangOptions::ClangABI::Ver9) 1322 return false; 1323 1324 const llvm::Triple &T = getTarget().getTriple(); 1325 return T.isOSLinux() || T.isOSNetBSD(); 1326 } 1327 1328 X86AVXABILevel AVXLevel; 1329 // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on 1330 // 64-bit hardware. 1331 bool Has64BitPointers; 1332 1333 public: 1334 X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1335 : ABIInfo(CGT), AVXLevel(AVXLevel), 1336 Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {} 1337 1338 bool isPassedUsingAVXType(QualType type) const { 1339 unsigned neededInt, neededSSE; 1340 // The freeIntRegs argument doesn't matter here. 1341 ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE, 1342 /*isNamedArg*/true); 1343 if (info.isDirect()) { 1344 llvm::Type *ty = info.getCoerceToType(); 1345 if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty)) 1346 return vectorTy->getPrimitiveSizeInBits().getFixedValue() > 128; 1347 } 1348 return false; 1349 } 1350 1351 void computeInfo(CGFunctionInfo &FI) const override; 1352 1353 Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 1354 QualType Ty) const override; 1355 Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, 1356 QualType Ty) const override; 1357 1358 bool has64BitPointers() const { 1359 return Has64BitPointers; 1360 } 1361 }; 1362 1363 /// WinX86_64ABIInfo - The Windows X86_64 ABI information. 1364 class WinX86_64ABIInfo : public ABIInfo { 1365 public: 1366 WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1367 : ABIInfo(CGT), AVXLevel(AVXLevel), 1368 IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} 1369 1370 void computeInfo(CGFunctionInfo &FI) const override; 1371 1372 Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 1373 QualType Ty) const override; 1374 1375 bool isHomogeneousAggregateBaseType(QualType Ty) const override { 1376 // FIXME: Assumes vectorcall is in use. 1377 return isX86VectorTypeForVectorCall(getContext(), Ty); 1378 } 1379 1380 bool isHomogeneousAggregateSmallEnough(const Type *Ty, 1381 uint64_t NumMembers) const override { 1382 // FIXME: Assumes vectorcall is in use. 1383 return isX86VectorCallAggregateSmallEnough(NumMembers); 1384 } 1385 1386 private: 1387 ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, 1388 bool IsVectorCall, bool IsRegCall) const; 1389 ABIArgInfo reclassifyHvaArgForVectorCall(QualType Ty, unsigned &FreeSSERegs, 1390 const ABIArgInfo ¤t) const; 1391 1392 X86AVXABILevel AVXLevel; 1393 1394 bool IsMingw64; 1395 }; 1396 1397 class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { 1398 public: 1399 X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1400 : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) { 1401 SwiftInfo = 1402 std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); 1403 } 1404 1405 /// Disable tail call on x86-64. The epilogue code before the tail jump blocks 1406 /// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations. 1407 bool markARCOptimizedReturnCallsAsNoTail() const override { return true; } 1408 1409 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 1410 return 7; 1411 } 1412 1413 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 1414 llvm::Value *Address) const override { 1415 llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); 1416 1417 // 0-15 are the 16 integer registers. 1418 // 16 is %rip. 1419 AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); 1420 return false; 1421 } 1422 1423 llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 1424 StringRef Constraint, 1425 llvm::Type* Ty) const override { 1426 return X86AdjustInlineAsmType(CGF, Constraint, Ty); 1427 } 1428 1429 bool isNoProtoCallVariadic(const CallArgList &args, 1430 const FunctionNoProtoType *fnType) const override { 1431 // The default CC on x86-64 sets %al to the number of SSA 1432 // registers used, and GCC sets this when calling an unprototyped 1433 // function, so we override the default behavior. However, don't do 1434 // that when AVX types are involved: the ABI explicitly states it is 1435 // undefined, and it doesn't work in practice because of how the ABI 1436 // defines varargs anyway. 1437 if (fnType->getCallConv() == CC_C) { 1438 bool HasAVXType = false; 1439 for (CallArgList::const_iterator 1440 it = args.begin(), ie = args.end(); it != ie; ++it) { 1441 if (getABIInfo<X86_64ABIInfo>().isPassedUsingAVXType(it->Ty)) { 1442 HasAVXType = true; 1443 break; 1444 } 1445 } 1446 1447 if (!HasAVXType) 1448 return true; 1449 } 1450 1451 return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType); 1452 } 1453 1454 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1455 CodeGen::CodeGenModule &CGM) const override { 1456 if (GV->isDeclaration()) 1457 return; 1458 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1459 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1460 llvm::Function *Fn = cast<llvm::Function>(GV); 1461 Fn->addFnAttr("stackrealign"); 1462 } 1463 1464 addX86InterruptAttrs(FD, GV, CGM); 1465 } 1466 } 1467 1468 void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, 1469 const FunctionDecl *Caller, 1470 const FunctionDecl *Callee, 1471 const CallArgList &Args) const override; 1472 }; 1473 } // namespace 1474 1475 static void initFeatureMaps(const ASTContext &Ctx, 1476 llvm::StringMap<bool> &CallerMap, 1477 const FunctionDecl *Caller, 1478 llvm::StringMap<bool> &CalleeMap, 1479 const FunctionDecl *Callee) { 1480 if (CalleeMap.empty() && CallerMap.empty()) { 1481 // The caller is potentially nullptr in the case where the call isn't in a 1482 // function. In this case, the getFunctionFeatureMap ensures we just get 1483 // the TU level setting (since it cannot be modified by 'target'.. 1484 Ctx.getFunctionFeatureMap(CallerMap, Caller); 1485 Ctx.getFunctionFeatureMap(CalleeMap, Callee); 1486 } 1487 } 1488 1489 static bool checkAVXParamFeature(DiagnosticsEngine &Diag, 1490 SourceLocation CallLoc, 1491 const llvm::StringMap<bool> &CallerMap, 1492 const llvm::StringMap<bool> &CalleeMap, 1493 QualType Ty, StringRef Feature, 1494 bool IsArgument) { 1495 bool CallerHasFeat = CallerMap.lookup(Feature); 1496 bool CalleeHasFeat = CalleeMap.lookup(Feature); 1497 if (!CallerHasFeat && !CalleeHasFeat) 1498 return Diag.Report(CallLoc, diag::warn_avx_calling_convention) 1499 << IsArgument << Ty << Feature; 1500 1501 // Mixing calling conventions here is very clearly an error. 1502 if (!CallerHasFeat || !CalleeHasFeat) 1503 return Diag.Report(CallLoc, diag::err_avx_calling_convention) 1504 << IsArgument << Ty << Feature; 1505 1506 // Else, both caller and callee have the required feature, so there is no need 1507 // to diagnose. 1508 return false; 1509 } 1510 1511 static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx, 1512 SourceLocation CallLoc, 1513 const llvm::StringMap<bool> &CallerMap, 1514 const llvm::StringMap<bool> &CalleeMap, QualType Ty, 1515 bool IsArgument) { 1516 uint64_t Size = Ctx.getTypeSize(Ty); 1517 if (Size > 256) 1518 return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, 1519 "avx512f", IsArgument); 1520 1521 if (Size > 128) 1522 return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx", 1523 IsArgument); 1524 1525 return false; 1526 } 1527 1528 void X86_64TargetCodeGenInfo::checkFunctionCallABI( 1529 CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, 1530 const FunctionDecl *Callee, const CallArgList &Args) const { 1531 llvm::StringMap<bool> CallerMap; 1532 llvm::StringMap<bool> CalleeMap; 1533 unsigned ArgIndex = 0; 1534 1535 // We need to loop through the actual call arguments rather than the 1536 // function's parameters, in case this variadic. 1537 for (const CallArg &Arg : Args) { 1538 // The "avx" feature changes how vectors >128 in size are passed. "avx512f" 1539 // additionally changes how vectors >256 in size are passed. Like GCC, we 1540 // warn when a function is called with an argument where this will change. 1541 // Unlike GCC, we also error when it is an obvious ABI mismatch, that is, 1542 // the caller and callee features are mismatched. 1543 // Unfortunately, we cannot do this diagnostic in SEMA, since the callee can 1544 // change its ABI with attribute-target after this call. 1545 if (Arg.getType()->isVectorType() && 1546 CGM.getContext().getTypeSize(Arg.getType()) > 128) { 1547 initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); 1548 QualType Ty = Arg.getType(); 1549 // The CallArg seems to have desugared the type already, so for clearer 1550 // diagnostics, replace it with the type in the FunctionDecl if possible. 1551 if (ArgIndex < Callee->getNumParams()) 1552 Ty = Callee->getParamDecl(ArgIndex)->getType(); 1553 1554 if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, 1555 CalleeMap, Ty, /*IsArgument*/ true)) 1556 return; 1557 } 1558 ++ArgIndex; 1559 } 1560 1561 // Check return always, as we don't have a good way of knowing in codegen 1562 // whether this value is used, tail-called, etc. 1563 if (Callee->getReturnType()->isVectorType() && 1564 CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) { 1565 initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); 1566 checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, 1567 CalleeMap, Callee->getReturnType(), 1568 /*IsArgument*/ false); 1569 } 1570 } 1571 1572 std::string TargetCodeGenInfo::qualifyWindowsLibrary(StringRef Lib) { 1573 // If the argument does not end in .lib, automatically add the suffix. 1574 // If the argument contains a space, enclose it in quotes. 1575 // This matches the behavior of MSVC. 1576 bool Quote = Lib.contains(' '); 1577 std::string ArgStr = Quote ? "\"" : ""; 1578 ArgStr += Lib; 1579 if (!Lib.ends_with_insensitive(".lib") && !Lib.ends_with_insensitive(".a")) 1580 ArgStr += ".lib"; 1581 ArgStr += Quote ? "\"" : ""; 1582 return ArgStr; 1583 } 1584 1585 namespace { 1586 class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo { 1587 public: 1588 WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, 1589 bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, 1590 unsigned NumRegisterParameters) 1591 : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI, 1592 Win32StructABI, NumRegisterParameters, false) {} 1593 1594 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1595 CodeGen::CodeGenModule &CGM) const override; 1596 1597 void getDependentLibraryOption(llvm::StringRef Lib, 1598 llvm::SmallString<24> &Opt) const override { 1599 Opt = "/DEFAULTLIB:"; 1600 Opt += qualifyWindowsLibrary(Lib); 1601 } 1602 1603 void getDetectMismatchOption(llvm::StringRef Name, 1604 llvm::StringRef Value, 1605 llvm::SmallString<32> &Opt) const override { 1606 Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; 1607 } 1608 }; 1609 } // namespace 1610 1611 void WinX86_32TargetCodeGenInfo::setTargetAttributes( 1612 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1613 X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); 1614 if (GV->isDeclaration()) 1615 return; 1616 addStackProbeTargetAttributes(D, GV, CGM); 1617 } 1618 1619 namespace { 1620 class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { 1621 public: 1622 WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, 1623 X86AVXABILevel AVXLevel) 1624 : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) { 1625 SwiftInfo = 1626 std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); 1627 } 1628 1629 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1630 CodeGen::CodeGenModule &CGM) const override; 1631 1632 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 1633 return 7; 1634 } 1635 1636 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 1637 llvm::Value *Address) const override { 1638 llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); 1639 1640 // 0-15 are the 16 integer registers. 1641 // 16 is %rip. 1642 AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); 1643 return false; 1644 } 1645 1646 void getDependentLibraryOption(llvm::StringRef Lib, 1647 llvm::SmallString<24> &Opt) const override { 1648 Opt = "/DEFAULTLIB:"; 1649 Opt += qualifyWindowsLibrary(Lib); 1650 } 1651 1652 void getDetectMismatchOption(llvm::StringRef Name, 1653 llvm::StringRef Value, 1654 llvm::SmallString<32> &Opt) const override { 1655 Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; 1656 } 1657 }; 1658 } // namespace 1659 1660 void WinX86_64TargetCodeGenInfo::setTargetAttributes( 1661 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1662 TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); 1663 if (GV->isDeclaration()) 1664 return; 1665 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1666 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1667 llvm::Function *Fn = cast<llvm::Function>(GV); 1668 Fn->addFnAttr("stackrealign"); 1669 } 1670 1671 addX86InterruptAttrs(FD, GV, CGM); 1672 } 1673 1674 addStackProbeTargetAttributes(D, GV, CGM); 1675 } 1676 1677 void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo, 1678 Class &Hi) const { 1679 // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done: 1680 // 1681 // (a) If one of the classes is Memory, the whole argument is passed in 1682 // memory. 1683 // 1684 // (b) If X87UP is not preceded by X87, the whole argument is passed in 1685 // memory. 1686 // 1687 // (c) If the size of the aggregate exceeds two eightbytes and the first 1688 // eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole 1689 // argument is passed in memory. NOTE: This is necessary to keep the 1690 // ABI working for processors that don't support the __m256 type. 1691 // 1692 // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE. 1693 // 1694 // Some of these are enforced by the merging logic. Others can arise 1695 // only with unions; for example: 1696 // union { _Complex double; unsigned; } 1697 // 1698 // Note that clauses (b) and (c) were added in 0.98. 1699 // 1700 if (Hi == Memory) 1701 Lo = Memory; 1702 if (Hi == X87Up && Lo != X87 && honorsRevision0_98()) 1703 Lo = Memory; 1704 if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp)) 1705 Lo = Memory; 1706 if (Hi == SSEUp && Lo != SSE) 1707 Hi = SSE; 1708 } 1709 1710 X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) { 1711 // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is 1712 // classified recursively so that always two fields are 1713 // considered. The resulting class is calculated according to 1714 // the classes of the fields in the eightbyte: 1715 // 1716 // (a) If both classes are equal, this is the resulting class. 1717 // 1718 // (b) If one of the classes is NO_CLASS, the resulting class is 1719 // the other class. 1720 // 1721 // (c) If one of the classes is MEMORY, the result is the MEMORY 1722 // class. 1723 // 1724 // (d) If one of the classes is INTEGER, the result is the 1725 // INTEGER. 1726 // 1727 // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, 1728 // MEMORY is used as class. 1729 // 1730 // (f) Otherwise class SSE is used. 1731 1732 // Accum should never be memory (we should have returned) or 1733 // ComplexX87 (because this cannot be passed in a structure). 1734 assert((Accum != Memory && Accum != ComplexX87) && 1735 "Invalid accumulated classification during merge."); 1736 if (Accum == Field || Field == NoClass) 1737 return Accum; 1738 if (Field == Memory) 1739 return Memory; 1740 if (Accum == NoClass) 1741 return Field; 1742 if (Accum == Integer || Field == Integer) 1743 return Integer; 1744 if (Field == X87 || Field == X87Up || Field == ComplexX87 || 1745 Accum == X87 || Accum == X87Up) 1746 return Memory; 1747 return SSE; 1748 } 1749 1750 void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, 1751 Class &Hi, bool isNamedArg, bool IsRegCall) const { 1752 // FIXME: This code can be simplified by introducing a simple value class for 1753 // Class pairs with appropriate constructor methods for the various 1754 // situations. 1755 1756 // FIXME: Some of the split computations are wrong; unaligned vectors 1757 // shouldn't be passed in registers for example, so there is no chance they 1758 // can straddle an eightbyte. Verify & simplify. 1759 1760 Lo = Hi = NoClass; 1761 1762 Class &Current = OffsetBase < 64 ? Lo : Hi; 1763 Current = Memory; 1764 1765 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 1766 BuiltinType::Kind k = BT->getKind(); 1767 1768 if (k == BuiltinType::Void) { 1769 Current = NoClass; 1770 } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) { 1771 Lo = Integer; 1772 Hi = Integer; 1773 } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) { 1774 Current = Integer; 1775 } else if (k == BuiltinType::Float || k == BuiltinType::Double || 1776 k == BuiltinType::Float16 || k == BuiltinType::BFloat16) { 1777 Current = SSE; 1778 } else if (k == BuiltinType::LongDouble) { 1779 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 1780 if (LDF == &llvm::APFloat::IEEEquad()) { 1781 Lo = SSE; 1782 Hi = SSEUp; 1783 } else if (LDF == &llvm::APFloat::x87DoubleExtended()) { 1784 Lo = X87; 1785 Hi = X87Up; 1786 } else if (LDF == &llvm::APFloat::IEEEdouble()) { 1787 Current = SSE; 1788 } else 1789 llvm_unreachable("unexpected long double representation!"); 1790 } 1791 // FIXME: _Decimal32 and _Decimal64 are SSE. 1792 // FIXME: _float128 and _Decimal128 are (SSE, SSEUp). 1793 return; 1794 } 1795 1796 if (const EnumType *ET = Ty->getAs<EnumType>()) { 1797 // Classify the underlying integer type. 1798 classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg); 1799 return; 1800 } 1801 1802 if (Ty->hasPointerRepresentation()) { 1803 Current = Integer; 1804 return; 1805 } 1806 1807 if (Ty->isMemberPointerType()) { 1808 if (Ty->isMemberFunctionPointerType()) { 1809 if (Has64BitPointers) { 1810 // If Has64BitPointers, this is an {i64, i64}, so classify both 1811 // Lo and Hi now. 1812 Lo = Hi = Integer; 1813 } else { 1814 // Otherwise, with 32-bit pointers, this is an {i32, i32}. If that 1815 // straddles an eightbyte boundary, Hi should be classified as well. 1816 uint64_t EB_FuncPtr = (OffsetBase) / 64; 1817 uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64; 1818 if (EB_FuncPtr != EB_ThisAdj) { 1819 Lo = Hi = Integer; 1820 } else { 1821 Current = Integer; 1822 } 1823 } 1824 } else { 1825 Current = Integer; 1826 } 1827 return; 1828 } 1829 1830 if (const VectorType *VT = Ty->getAs<VectorType>()) { 1831 uint64_t Size = getContext().getTypeSize(VT); 1832 if (Size == 1 || Size == 8 || Size == 16 || Size == 32) { 1833 // gcc passes the following as integer: 1834 // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float> 1835 // 2 bytes - <2 x char>, <1 x short> 1836 // 1 byte - <1 x char> 1837 Current = Integer; 1838 1839 // If this type crosses an eightbyte boundary, it should be 1840 // split. 1841 uint64_t EB_Lo = (OffsetBase) / 64; 1842 uint64_t EB_Hi = (OffsetBase + Size - 1) / 64; 1843 if (EB_Lo != EB_Hi) 1844 Hi = Lo; 1845 } else if (Size == 64) { 1846 QualType ElementType = VT->getElementType(); 1847 1848 // gcc passes <1 x double> in memory. :( 1849 if (ElementType->isSpecificBuiltinType(BuiltinType::Double)) 1850 return; 1851 1852 // gcc passes <1 x long long> as SSE but clang used to unconditionally 1853 // pass them as integer. For platforms where clang is the de facto 1854 // platform compiler, we must continue to use integer. 1855 if (!classifyIntegerMMXAsSSE() && 1856 (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) || 1857 ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) || 1858 ElementType->isSpecificBuiltinType(BuiltinType::Long) || 1859 ElementType->isSpecificBuiltinType(BuiltinType::ULong))) 1860 Current = Integer; 1861 else 1862 Current = SSE; 1863 1864 // If this type crosses an eightbyte boundary, it should be 1865 // split. 1866 if (OffsetBase && OffsetBase != 64) 1867 Hi = Lo; 1868 } else if (Size == 128 || 1869 (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { 1870 QualType ElementType = VT->getElementType(); 1871 1872 // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :( 1873 if (passInt128VectorsInMem() && Size != 128 && 1874 (ElementType->isSpecificBuiltinType(BuiltinType::Int128) || 1875 ElementType->isSpecificBuiltinType(BuiltinType::UInt128))) 1876 return; 1877 1878 // Arguments of 256-bits are split into four eightbyte chunks. The 1879 // least significant one belongs to class SSE and all the others to class 1880 // SSEUP. The original Lo and Hi design considers that types can't be 1881 // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense. 1882 // This design isn't correct for 256-bits, but since there're no cases 1883 // where the upper parts would need to be inspected, avoid adding 1884 // complexity and just consider Hi to match the 64-256 part. 1885 // 1886 // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in 1887 // registers if they are "named", i.e. not part of the "..." of a 1888 // variadic function. 1889 // 1890 // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are 1891 // split into eight eightbyte chunks, one SSE and seven SSEUP. 1892 Lo = SSE; 1893 Hi = SSEUp; 1894 } 1895 return; 1896 } 1897 1898 if (const ComplexType *CT = Ty->getAs<ComplexType>()) { 1899 QualType ET = getContext().getCanonicalType(CT->getElementType()); 1900 1901 uint64_t Size = getContext().getTypeSize(Ty); 1902 if (ET->isIntegralOrEnumerationType()) { 1903 if (Size <= 64) 1904 Current = Integer; 1905 else if (Size <= 128) 1906 Lo = Hi = Integer; 1907 } else if (ET->isFloat16Type() || ET == getContext().FloatTy || 1908 ET->isBFloat16Type()) { 1909 Current = SSE; 1910 } else if (ET == getContext().DoubleTy) { 1911 Lo = Hi = SSE; 1912 } else if (ET == getContext().LongDoubleTy) { 1913 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 1914 if (LDF == &llvm::APFloat::IEEEquad()) 1915 Current = Memory; 1916 else if (LDF == &llvm::APFloat::x87DoubleExtended()) 1917 Current = ComplexX87; 1918 else if (LDF == &llvm::APFloat::IEEEdouble()) 1919 Lo = Hi = SSE; 1920 else 1921 llvm_unreachable("unexpected long double representation!"); 1922 } 1923 1924 // If this complex type crosses an eightbyte boundary then it 1925 // should be split. 1926 uint64_t EB_Real = (OffsetBase) / 64; 1927 uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64; 1928 if (Hi == NoClass && EB_Real != EB_Imag) 1929 Hi = Lo; 1930 1931 return; 1932 } 1933 1934 if (const auto *EITy = Ty->getAs<BitIntType>()) { 1935 if (EITy->getNumBits() <= 64) 1936 Current = Integer; 1937 else if (EITy->getNumBits() <= 128) 1938 Lo = Hi = Integer; 1939 // Larger values need to get passed in memory. 1940 return; 1941 } 1942 1943 if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { 1944 // Arrays are treated like structures. 1945 1946 uint64_t Size = getContext().getTypeSize(Ty); 1947 1948 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger 1949 // than eight eightbytes, ..., it has class MEMORY. 1950 // regcall ABI doesn't have limitation to an object. The only limitation 1951 // is the free registers, which will be checked in computeInfo. 1952 if (!IsRegCall && Size > 512) 1953 return; 1954 1955 // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned 1956 // fields, it has class MEMORY. 1957 // 1958 // Only need to check alignment of array base. 1959 if (OffsetBase % getContext().getTypeAlign(AT->getElementType())) 1960 return; 1961 1962 // Otherwise implement simplified merge. We could be smarter about 1963 // this, but it isn't worth it and would be harder to verify. 1964 Current = NoClass; 1965 uint64_t EltSize = getContext().getTypeSize(AT->getElementType()); 1966 uint64_t ArraySize = AT->getSize().getZExtValue(); 1967 1968 // The only case a 256-bit wide vector could be used is when the array 1969 // contains a single 256-bit element. Since Lo and Hi logic isn't extended 1970 // to work for sizes wider than 128, early check and fallback to memory. 1971 // 1972 if (Size > 128 && 1973 (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel))) 1974 return; 1975 1976 for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) { 1977 Class FieldLo, FieldHi; 1978 classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg); 1979 Lo = merge(Lo, FieldLo); 1980 Hi = merge(Hi, FieldHi); 1981 if (Lo == Memory || Hi == Memory) 1982 break; 1983 } 1984 1985 postMerge(Size, Lo, Hi); 1986 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification."); 1987 return; 1988 } 1989 1990 if (const RecordType *RT = Ty->getAs<RecordType>()) { 1991 uint64_t Size = getContext().getTypeSize(Ty); 1992 1993 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger 1994 // than eight eightbytes, ..., it has class MEMORY. 1995 if (Size > 512) 1996 return; 1997 1998 // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial 1999 // copy constructor or a non-trivial destructor, it is passed by invisible 2000 // reference. 2001 if (getRecordArgABI(RT, getCXXABI())) 2002 return; 2003 2004 const RecordDecl *RD = RT->getDecl(); 2005 2006 // Assume variable sized types are passed in memory. 2007 if (RD->hasFlexibleArrayMember()) 2008 return; 2009 2010 const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); 2011 2012 // Reset Lo class, this will be recomputed. 2013 Current = NoClass; 2014 2015 // If this is a C++ record, classify the bases first. 2016 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 2017 for (const auto &I : CXXRD->bases()) { 2018 assert(!I.isVirtual() && !I.getType()->isDependentType() && 2019 "Unexpected base class!"); 2020 const auto *Base = 2021 cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); 2022 2023 // Classify this field. 2024 // 2025 // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a 2026 // single eightbyte, each is classified separately. Each eightbyte gets 2027 // initialized to class NO_CLASS. 2028 Class FieldLo, FieldHi; 2029 uint64_t Offset = 2030 OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base)); 2031 classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg); 2032 Lo = merge(Lo, FieldLo); 2033 Hi = merge(Hi, FieldHi); 2034 if (Lo == Memory || Hi == Memory) { 2035 postMerge(Size, Lo, Hi); 2036 return; 2037 } 2038 } 2039 } 2040 2041 // Classify the fields one at a time, merging the results. 2042 unsigned idx = 0; 2043 bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <= 2044 LangOptions::ClangABI::Ver11 || 2045 getContext().getTargetInfo().getTriple().isPS(); 2046 bool IsUnion = RT->isUnionType() && !UseClang11Compat; 2047 2048 for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); 2049 i != e; ++i, ++idx) { 2050 uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); 2051 bool BitField = i->isBitField(); 2052 2053 // Ignore padding bit-fields. 2054 if (BitField && i->isUnnamedBitfield()) 2055 continue; 2056 2057 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than 2058 // eight eightbytes, or it contains unaligned fields, it has class MEMORY. 2059 // 2060 // The only case a 256-bit or a 512-bit wide vector could be used is when 2061 // the struct contains a single 256-bit or 512-bit element. Early check 2062 // and fallback to memory. 2063 // 2064 // FIXME: Extended the Lo and Hi logic properly to work for size wider 2065 // than 128. 2066 if (Size > 128 && 2067 ((!IsUnion && Size != getContext().getTypeSize(i->getType())) || 2068 Size > getNativeVectorSizeForAVXABI(AVXLevel))) { 2069 Lo = Memory; 2070 postMerge(Size, Lo, Hi); 2071 return; 2072 } 2073 // Note, skip this test for bit-fields, see below. 2074 if (!BitField && Offset % getContext().getTypeAlign(i->getType())) { 2075 Lo = Memory; 2076 postMerge(Size, Lo, Hi); 2077 return; 2078 } 2079 2080 // Classify this field. 2081 // 2082 // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate 2083 // exceeds a single eightbyte, each is classified 2084 // separately. Each eightbyte gets initialized to class 2085 // NO_CLASS. 2086 Class FieldLo, FieldHi; 2087 2088 // Bit-fields require special handling, they do not force the 2089 // structure to be passed in memory even if unaligned, and 2090 // therefore they can straddle an eightbyte. 2091 if (BitField) { 2092 assert(!i->isUnnamedBitfield()); 2093 uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); 2094 uint64_t Size = i->getBitWidthValue(getContext()); 2095 2096 uint64_t EB_Lo = Offset / 64; 2097 uint64_t EB_Hi = (Offset + Size - 1) / 64; 2098 2099 if (EB_Lo) { 2100 assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes."); 2101 FieldLo = NoClass; 2102 FieldHi = Integer; 2103 } else { 2104 FieldLo = Integer; 2105 FieldHi = EB_Hi ? Integer : NoClass; 2106 } 2107 } else 2108 classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg); 2109 Lo = merge(Lo, FieldLo); 2110 Hi = merge(Hi, FieldHi); 2111 if (Lo == Memory || Hi == Memory) 2112 break; 2113 } 2114 2115 postMerge(Size, Lo, Hi); 2116 } 2117 } 2118 2119 ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { 2120 // If this is a scalar LLVM value then assume LLVM will pass it in the right 2121 // place naturally. 2122 if (!isAggregateTypeForABI(Ty)) { 2123 // Treat an enum type as its underlying type. 2124 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2125 Ty = EnumTy->getDecl()->getIntegerType(); 2126 2127 if (Ty->isBitIntType()) 2128 return getNaturalAlignIndirect(Ty); 2129 2130 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 2131 : ABIArgInfo::getDirect()); 2132 } 2133 2134 return getNaturalAlignIndirect(Ty); 2135 } 2136 2137 bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { 2138 if (const VectorType *VecTy = Ty->getAs<VectorType>()) { 2139 uint64_t Size = getContext().getTypeSize(VecTy); 2140 unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); 2141 if (Size <= 64 || Size > LargestVector) 2142 return true; 2143 QualType EltTy = VecTy->getElementType(); 2144 if (passInt128VectorsInMem() && 2145 (EltTy->isSpecificBuiltinType(BuiltinType::Int128) || 2146 EltTy->isSpecificBuiltinType(BuiltinType::UInt128))) 2147 return true; 2148 } 2149 2150 return false; 2151 } 2152 2153 ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, 2154 unsigned freeIntRegs) const { 2155 // If this is a scalar LLVM value then assume LLVM will pass it in the right 2156 // place naturally. 2157 // 2158 // This assumption is optimistic, as there could be free registers available 2159 // when we need to pass this argument in memory, and LLVM could try to pass 2160 // the argument in the free register. This does not seem to happen currently, 2161 // but this code would be much safer if we could mark the argument with 2162 // 'onstack'. See PR12193. 2163 if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) && 2164 !Ty->isBitIntType()) { 2165 // Treat an enum type as its underlying type. 2166 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2167 Ty = EnumTy->getDecl()->getIntegerType(); 2168 2169 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 2170 : ABIArgInfo::getDirect()); 2171 } 2172 2173 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) 2174 return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); 2175 2176 // Compute the byval alignment. We specify the alignment of the byval in all 2177 // cases so that the mid-level optimizer knows the alignment of the byval. 2178 unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U); 2179 2180 // Attempt to avoid passing indirect results using byval when possible. This 2181 // is important for good codegen. 2182 // 2183 // We do this by coercing the value into a scalar type which the backend can 2184 // handle naturally (i.e., without using byval). 2185 // 2186 // For simplicity, we currently only do this when we have exhausted all of the 2187 // free integer registers. Doing this when there are free integer registers 2188 // would require more care, as we would have to ensure that the coerced value 2189 // did not claim the unused register. That would require either reording the 2190 // arguments to the function (so that any subsequent inreg values came first), 2191 // or only doing this optimization when there were no following arguments that 2192 // might be inreg. 2193 // 2194 // We currently expect it to be rare (particularly in well written code) for 2195 // arguments to be passed on the stack when there are still free integer 2196 // registers available (this would typically imply large structs being passed 2197 // by value), so this seems like a fair tradeoff for now. 2198 // 2199 // We can revisit this if the backend grows support for 'onstack' parameter 2200 // attributes. See PR12193. 2201 if (freeIntRegs == 0) { 2202 uint64_t Size = getContext().getTypeSize(Ty); 2203 2204 // If this type fits in an eightbyte, coerce it into the matching integral 2205 // type, which will end up on the stack (with alignment 8). 2206 if (Align == 8 && Size <= 64) 2207 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 2208 Size)); 2209 } 2210 2211 return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align)); 2212 } 2213 2214 /// The ABI specifies that a value should be passed in a full vector XMM/YMM 2215 /// register. Pick an LLVM IR type that will be passed as a vector register. 2216 llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { 2217 // Wrapper structs/arrays that only contain vectors are passed just like 2218 // vectors; strip them off if present. 2219 if (const Type *InnerTy = isSingleElementStruct(Ty, getContext())) 2220 Ty = QualType(InnerTy, 0); 2221 2222 llvm::Type *IRType = CGT.ConvertType(Ty); 2223 if (isa<llvm::VectorType>(IRType)) { 2224 // Don't pass vXi128 vectors in their native type, the backend can't 2225 // legalize them. 2226 if (passInt128VectorsInMem() && 2227 cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy(128)) { 2228 // Use a vXi64 vector. 2229 uint64_t Size = getContext().getTypeSize(Ty); 2230 return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()), 2231 Size / 64); 2232 } 2233 2234 return IRType; 2235 } 2236 2237 if (IRType->getTypeID() == llvm::Type::FP128TyID) 2238 return IRType; 2239 2240 // We couldn't find the preferred IR vector type for 'Ty'. 2241 uint64_t Size = getContext().getTypeSize(Ty); 2242 assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); 2243 2244 2245 // Return a LLVM IR vector type based on the size of 'Ty'. 2246 return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()), 2247 Size / 64); 2248 } 2249 2250 /// BitsContainNoUserData - Return true if the specified [start,end) bit range 2251 /// is known to either be off the end of the specified type or being in 2252 /// alignment padding. The user type specified is known to be at most 128 bits 2253 /// in size, and have passed through X86_64ABIInfo::classify with a successful 2254 /// classification that put one of the two halves in the INTEGER class. 2255 /// 2256 /// It is conservatively correct to return false. 2257 static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, 2258 unsigned EndBit, ASTContext &Context) { 2259 // If the bytes being queried are off the end of the type, there is no user 2260 // data hiding here. This handles analysis of builtins, vectors and other 2261 // types that don't contain interesting padding. 2262 unsigned TySize = (unsigned)Context.getTypeSize(Ty); 2263 if (TySize <= StartBit) 2264 return true; 2265 2266 if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { 2267 unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType()); 2268 unsigned NumElts = (unsigned)AT->getSize().getZExtValue(); 2269 2270 // Check each element to see if the element overlaps with the queried range. 2271 for (unsigned i = 0; i != NumElts; ++i) { 2272 // If the element is after the span we care about, then we're done.. 2273 unsigned EltOffset = i*EltSize; 2274 if (EltOffset >= EndBit) break; 2275 2276 unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0; 2277 if (!BitsContainNoUserData(AT->getElementType(), EltStart, 2278 EndBit-EltOffset, Context)) 2279 return false; 2280 } 2281 // If it overlaps no elements, then it is safe to process as padding. 2282 return true; 2283 } 2284 2285 if (const RecordType *RT = Ty->getAs<RecordType>()) { 2286 const RecordDecl *RD = RT->getDecl(); 2287 const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); 2288 2289 // If this is a C++ record, check the bases first. 2290 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 2291 for (const auto &I : CXXRD->bases()) { 2292 assert(!I.isVirtual() && !I.getType()->isDependentType() && 2293 "Unexpected base class!"); 2294 const auto *Base = 2295 cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); 2296 2297 // If the base is after the span we care about, ignore it. 2298 unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); 2299 if (BaseOffset >= EndBit) continue; 2300 2301 unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0; 2302 if (!BitsContainNoUserData(I.getType(), BaseStart, 2303 EndBit-BaseOffset, Context)) 2304 return false; 2305 } 2306 } 2307 2308 // Verify that no field has data that overlaps the region of interest. Yes 2309 // this could be sped up a lot by being smarter about queried fields, 2310 // however we're only looking at structs up to 16 bytes, so we don't care 2311 // much. 2312 unsigned idx = 0; 2313 for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); 2314 i != e; ++i, ++idx) { 2315 unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx); 2316 2317 // If we found a field after the region we care about, then we're done. 2318 if (FieldOffset >= EndBit) break; 2319 2320 unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0; 2321 if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset, 2322 Context)) 2323 return false; 2324 } 2325 2326 // If nothing in this record overlapped the area of interest, then we're 2327 // clean. 2328 return true; 2329 } 2330 2331 return false; 2332 } 2333 2334 /// getFPTypeAtOffset - Return a floating point type at the specified offset. 2335 static llvm::Type *getFPTypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2336 const llvm::DataLayout &TD) { 2337 if (IROffset == 0 && IRType->isFloatingPointTy()) 2338 return IRType; 2339 2340 // If this is a struct, recurse into the field at the specified offset. 2341 if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { 2342 if (!STy->getNumContainedTypes()) 2343 return nullptr; 2344 2345 const llvm::StructLayout *SL = TD.getStructLayout(STy); 2346 unsigned Elt = SL->getElementContainingOffset(IROffset); 2347 IROffset -= SL->getElementOffset(Elt); 2348 return getFPTypeAtOffset(STy->getElementType(Elt), IROffset, TD); 2349 } 2350 2351 // If this is an array, recurse into the field at the specified offset. 2352 if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { 2353 llvm::Type *EltTy = ATy->getElementType(); 2354 unsigned EltSize = TD.getTypeAllocSize(EltTy); 2355 IROffset -= IROffset / EltSize * EltSize; 2356 return getFPTypeAtOffset(EltTy, IROffset, TD); 2357 } 2358 2359 return nullptr; 2360 } 2361 2362 /// GetSSETypeAtOffset - Return a type that will be passed by the backend in the 2363 /// low 8 bytes of an XMM register, corresponding to the SSE class. 2364 llvm::Type *X86_64ABIInfo:: 2365 GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2366 QualType SourceTy, unsigned SourceOffset) const { 2367 const llvm::DataLayout &TD = getDataLayout(); 2368 unsigned SourceSize = 2369 (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset; 2370 llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD); 2371 if (!T0 || T0->isDoubleTy()) 2372 return llvm::Type::getDoubleTy(getVMContext()); 2373 2374 // Get the adjacent FP type. 2375 llvm::Type *T1 = nullptr; 2376 unsigned T0Size = TD.getTypeAllocSize(T0); 2377 if (SourceSize > T0Size) 2378 T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD); 2379 if (T1 == nullptr) { 2380 // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due 2381 // to its alignment. 2382 if (T0->is16bitFPTy() && SourceSize > 4) 2383 T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD); 2384 // If we can't get a second FP type, return a simple half or float. 2385 // avx512fp16-abi.c:pr51813_2 shows it works to return float for 2386 // {float, i8} too. 2387 if (T1 == nullptr) 2388 return T0; 2389 } 2390 2391 if (T0->isFloatTy() && T1->isFloatTy()) 2392 return llvm::FixedVectorType::get(T0, 2); 2393 2394 if (T0->is16bitFPTy() && T1->is16bitFPTy()) { 2395 llvm::Type *T2 = nullptr; 2396 if (SourceSize > 4) 2397 T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); 2398 if (T2 == nullptr) 2399 return llvm::FixedVectorType::get(T0, 2); 2400 return llvm::FixedVectorType::get(T0, 4); 2401 } 2402 2403 if (T0->is16bitFPTy() || T1->is16bitFPTy()) 2404 return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4); 2405 2406 return llvm::Type::getDoubleTy(getVMContext()); 2407 } 2408 2409 2410 /// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in 2411 /// an 8-byte GPR. This means that we either have a scalar or we are talking 2412 /// about the high or low part of an up-to-16-byte struct. This routine picks 2413 /// the best LLVM IR type to represent this, which may be i64 or may be anything 2414 /// else that the backend will pass in a GPR that works better (e.g. i8, %foo*, 2415 /// etc). 2416 /// 2417 /// PrefType is an LLVM IR type that corresponds to (part of) the IR type for 2418 /// the source type. IROffset is an offset in bytes into the LLVM IR type that 2419 /// the 8-byte value references. PrefType may be null. 2420 /// 2421 /// SourceTy is the source-level type for the entire argument. SourceOffset is 2422 /// an offset into this that we're processing (which is always either 0 or 8). 2423 /// 2424 llvm::Type *X86_64ABIInfo:: 2425 GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2426 QualType SourceTy, unsigned SourceOffset) const { 2427 // If we're dealing with an un-offset LLVM IR type, then it means that we're 2428 // returning an 8-byte unit starting with it. See if we can safely use it. 2429 if (IROffset == 0) { 2430 // Pointers and int64's always fill the 8-byte unit. 2431 if ((isa<llvm::PointerType>(IRType) && Has64BitPointers) || 2432 IRType->isIntegerTy(64)) 2433 return IRType; 2434 2435 // If we have a 1/2/4-byte integer, we can use it only if the rest of the 2436 // goodness in the source type is just tail padding. This is allowed to 2437 // kick in for struct {double,int} on the int, but not on 2438 // struct{double,int,int} because we wouldn't return the second int. We 2439 // have to do this analysis on the source type because we can't depend on 2440 // unions being lowered a specific way etc. 2441 if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) || 2442 IRType->isIntegerTy(32) || 2443 (isa<llvm::PointerType>(IRType) && !Has64BitPointers)) { 2444 unsigned BitWidth = isa<llvm::PointerType>(IRType) ? 32 : 2445 cast<llvm::IntegerType>(IRType)->getBitWidth(); 2446 2447 if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth, 2448 SourceOffset*8+64, getContext())) 2449 return IRType; 2450 } 2451 } 2452 2453 if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { 2454 // If this is a struct, recurse into the field at the specified offset. 2455 const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy); 2456 if (IROffset < SL->getSizeInBytes()) { 2457 unsigned FieldIdx = SL->getElementContainingOffset(IROffset); 2458 IROffset -= SL->getElementOffset(FieldIdx); 2459 2460 return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset, 2461 SourceTy, SourceOffset); 2462 } 2463 } 2464 2465 if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { 2466 llvm::Type *EltTy = ATy->getElementType(); 2467 unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy); 2468 unsigned EltOffset = IROffset/EltSize*EltSize; 2469 return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy, 2470 SourceOffset); 2471 } 2472 2473 // Okay, we don't have any better idea of what to pass, so we pass this in an 2474 // integer register that isn't too big to fit the rest of the struct. 2475 unsigned TySizeInBytes = 2476 (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity(); 2477 2478 assert(TySizeInBytes != SourceOffset && "Empty field?"); 2479 2480 // It is always safe to classify this as an integer type up to i64 that 2481 // isn't larger than the structure. 2482 return llvm::IntegerType::get(getVMContext(), 2483 std::min(TySizeInBytes-SourceOffset, 8U)*8); 2484 } 2485 2486 2487 /// GetX86_64ByValArgumentPair - Given a high and low type that can ideally 2488 /// be used as elements of a two register pair to pass or return, return a 2489 /// first class aggregate to represent them. For example, if the low part of 2490 /// a by-value argument should be passed as i32* and the high part as float, 2491 /// return {i32*, float}. 2492 static llvm::Type * 2493 GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, 2494 const llvm::DataLayout &TD) { 2495 // In order to correctly satisfy the ABI, we need to the high part to start 2496 // at offset 8. If the high and low parts we inferred are both 4-byte types 2497 // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have 2498 // the second element at offset 8. Check for this: 2499 unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo); 2500 llvm::Align HiAlign = TD.getABITypeAlign(Hi); 2501 unsigned HiStart = llvm::alignTo(LoSize, HiAlign); 2502 assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); 2503 2504 // To handle this, we have to increase the size of the low part so that the 2505 // second element will start at an 8 byte offset. We can't increase the size 2506 // of the second element because it might make us access off the end of the 2507 // struct. 2508 if (HiStart != 8) { 2509 // There are usually two sorts of types the ABI generation code can produce 2510 // for the low part of a pair that aren't 8 bytes in size: half, float or 2511 // i8/i16/i32. This can also include pointers when they are 32-bit (X32 and 2512 // NaCl). 2513 // Promote these to a larger type. 2514 if (Lo->isHalfTy() || Lo->isFloatTy()) 2515 Lo = llvm::Type::getDoubleTy(Lo->getContext()); 2516 else { 2517 assert((Lo->isIntegerTy() || Lo->isPointerTy()) 2518 && "Invalid/unknown lo type"); 2519 Lo = llvm::Type::getInt64Ty(Lo->getContext()); 2520 } 2521 } 2522 2523 llvm::StructType *Result = llvm::StructType::get(Lo, Hi); 2524 2525 // Verify that the second element is at an 8-byte offset. 2526 assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && 2527 "Invalid x86-64 argument pair!"); 2528 return Result; 2529 } 2530 2531 ABIArgInfo X86_64ABIInfo:: 2532 classifyReturnType(QualType RetTy) const { 2533 // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the 2534 // classification algorithm. 2535 X86_64ABIInfo::Class Lo, Hi; 2536 classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true); 2537 2538 // Check some invariants. 2539 assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); 2540 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); 2541 2542 llvm::Type *ResType = nullptr; 2543 switch (Lo) { 2544 case NoClass: 2545 if (Hi == NoClass) 2546 return ABIArgInfo::getIgnore(); 2547 // If the low part is just padding, it takes no register, leave ResType 2548 // null. 2549 assert((Hi == SSE || Hi == Integer || Hi == X87Up) && 2550 "Unknown missing lo part"); 2551 break; 2552 2553 case SSEUp: 2554 case X87Up: 2555 llvm_unreachable("Invalid classification for lo word."); 2556 2557 // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via 2558 // hidden argument. 2559 case Memory: 2560 return getIndirectReturnResult(RetTy); 2561 2562 // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next 2563 // available register of the sequence %rax, %rdx is used. 2564 case Integer: 2565 ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); 2566 2567 // If we have a sign or zero extended integer, make sure to return Extend 2568 // so that the parameter gets the right LLVM IR attributes. 2569 if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { 2570 // Treat an enum type as its underlying type. 2571 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 2572 RetTy = EnumTy->getDecl()->getIntegerType(); 2573 2574 if (RetTy->isIntegralOrEnumerationType() && 2575 isPromotableIntegerTypeForABI(RetTy)) 2576 return ABIArgInfo::getExtend(RetTy); 2577 } 2578 break; 2579 2580 // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next 2581 // available SSE register of the sequence %xmm0, %xmm1 is used. 2582 case SSE: 2583 ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); 2584 break; 2585 2586 // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is 2587 // returned on the X87 stack in %st0 as 80-bit x87 number. 2588 case X87: 2589 ResType = llvm::Type::getX86_FP80Ty(getVMContext()); 2590 break; 2591 2592 // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real 2593 // part of the value is returned in %st0 and the imaginary part in 2594 // %st1. 2595 case ComplexX87: 2596 assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); 2597 ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), 2598 llvm::Type::getX86_FP80Ty(getVMContext())); 2599 break; 2600 } 2601 2602 llvm::Type *HighPart = nullptr; 2603 switch (Hi) { 2604 // Memory was handled previously and X87 should 2605 // never occur as a hi class. 2606 case Memory: 2607 case X87: 2608 llvm_unreachable("Invalid classification for hi word."); 2609 2610 case ComplexX87: // Previously handled. 2611 case NoClass: 2612 break; 2613 2614 case Integer: 2615 HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2616 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2617 return ABIArgInfo::getDirect(HighPart, 8); 2618 break; 2619 case SSE: 2620 HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2621 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2622 return ABIArgInfo::getDirect(HighPart, 8); 2623 break; 2624 2625 // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte 2626 // is passed in the next available eightbyte chunk if the last used 2627 // vector register. 2628 // 2629 // SSEUP should always be preceded by SSE, just widen. 2630 case SSEUp: 2631 assert(Lo == SSE && "Unexpected SSEUp classification."); 2632 ResType = GetByteVectorType(RetTy); 2633 break; 2634 2635 // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is 2636 // returned together with the previous X87 value in %st0. 2637 case X87Up: 2638 // If X87Up is preceded by X87, we don't need to do 2639 // anything. However, in some cases with unions it may not be 2640 // preceded by X87. In such situations we follow gcc and pass the 2641 // extra bits in an SSE reg. 2642 if (Lo != X87) { 2643 HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2644 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2645 return ABIArgInfo::getDirect(HighPart, 8); 2646 } 2647 break; 2648 } 2649 2650 // If a high part was specified, merge it together with the low part. It is 2651 // known to pass in the high eightbyte of the result. We do this by forming a 2652 // first class struct aggregate with the high and low part: {low, high} 2653 if (HighPart) 2654 ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); 2655 2656 return ABIArgInfo::getDirect(ResType); 2657 } 2658 2659 ABIArgInfo 2660 X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs, 2661 unsigned &neededInt, unsigned &neededSSE, 2662 bool isNamedArg, bool IsRegCall) const { 2663 Ty = useFirstFieldIfTransparentUnion(Ty); 2664 2665 X86_64ABIInfo::Class Lo, Hi; 2666 classify(Ty, 0, Lo, Hi, isNamedArg, IsRegCall); 2667 2668 // Check some invariants. 2669 // FIXME: Enforce these by construction. 2670 assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); 2671 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); 2672 2673 neededInt = 0; 2674 neededSSE = 0; 2675 llvm::Type *ResType = nullptr; 2676 switch (Lo) { 2677 case NoClass: 2678 if (Hi == NoClass) 2679 return ABIArgInfo::getIgnore(); 2680 // If the low part is just padding, it takes no register, leave ResType 2681 // null. 2682 assert((Hi == SSE || Hi == Integer || Hi == X87Up) && 2683 "Unknown missing lo part"); 2684 break; 2685 2686 // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument 2687 // on the stack. 2688 case Memory: 2689 2690 // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or 2691 // COMPLEX_X87, it is passed in memory. 2692 case X87: 2693 case ComplexX87: 2694 if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect) 2695 ++neededInt; 2696 return getIndirectResult(Ty, freeIntRegs); 2697 2698 case SSEUp: 2699 case X87Up: 2700 llvm_unreachable("Invalid classification for lo word."); 2701 2702 // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next 2703 // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8 2704 // and %r9 is used. 2705 case Integer: 2706 ++neededInt; 2707 2708 // Pick an 8-byte type based on the preferred type. 2709 ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0); 2710 2711 // If we have a sign or zero extended integer, make sure to return Extend 2712 // so that the parameter gets the right LLVM IR attributes. 2713 if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { 2714 // Treat an enum type as its underlying type. 2715 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2716 Ty = EnumTy->getDecl()->getIntegerType(); 2717 2718 if (Ty->isIntegralOrEnumerationType() && 2719 isPromotableIntegerTypeForABI(Ty)) 2720 return ABIArgInfo::getExtend(Ty); 2721 } 2722 2723 break; 2724 2725 // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next 2726 // available SSE register is used, the registers are taken in the 2727 // order from %xmm0 to %xmm7. 2728 case SSE: { 2729 llvm::Type *IRType = CGT.ConvertType(Ty); 2730 ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0); 2731 ++neededSSE; 2732 break; 2733 } 2734 } 2735 2736 llvm::Type *HighPart = nullptr; 2737 switch (Hi) { 2738 // Memory was handled previously, ComplexX87 and X87 should 2739 // never occur as hi classes, and X87Up must be preceded by X87, 2740 // which is passed in memory. 2741 case Memory: 2742 case X87: 2743 case ComplexX87: 2744 llvm_unreachable("Invalid classification for hi word."); 2745 2746 case NoClass: break; 2747 2748 case Integer: 2749 ++neededInt; 2750 // Pick an 8-byte type based on the preferred type. 2751 HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); 2752 2753 if (Lo == NoClass) // Pass HighPart at offset 8 in memory. 2754 return ABIArgInfo::getDirect(HighPart, 8); 2755 break; 2756 2757 // X87Up generally doesn't occur here (long double is passed in 2758 // memory), except in situations involving unions. 2759 case X87Up: 2760 case SSE: 2761 HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); 2762 2763 if (Lo == NoClass) // Pass HighPart at offset 8 in memory. 2764 return ABIArgInfo::getDirect(HighPart, 8); 2765 2766 ++neededSSE; 2767 break; 2768 2769 // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the 2770 // eightbyte is passed in the upper half of the last used SSE 2771 // register. This only happens when 128-bit vectors are passed. 2772 case SSEUp: 2773 assert(Lo == SSE && "Unexpected SSEUp classification"); 2774 ResType = GetByteVectorType(Ty); 2775 break; 2776 } 2777 2778 // If a high part was specified, merge it together with the low part. It is 2779 // known to pass in the high eightbyte of the result. We do this by forming a 2780 // first class struct aggregate with the high and low part: {low, high} 2781 if (HighPart) 2782 ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); 2783 2784 return ABIArgInfo::getDirect(ResType); 2785 } 2786 2787 ABIArgInfo 2788 X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, 2789 unsigned &NeededSSE, 2790 unsigned &MaxVectorWidth) const { 2791 auto RT = Ty->getAs<RecordType>(); 2792 assert(RT && "classifyRegCallStructType only valid with struct types"); 2793 2794 if (RT->getDecl()->hasFlexibleArrayMember()) 2795 return getIndirectReturnResult(Ty); 2796 2797 // Sum up bases 2798 if (auto CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl())) { 2799 if (CXXRD->isDynamicClass()) { 2800 NeededInt = NeededSSE = 0; 2801 return getIndirectReturnResult(Ty); 2802 } 2803 2804 for (const auto &I : CXXRD->bases()) 2805 if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE, 2806 MaxVectorWidth) 2807 .isIndirect()) { 2808 NeededInt = NeededSSE = 0; 2809 return getIndirectReturnResult(Ty); 2810 } 2811 } 2812 2813 // Sum up members 2814 for (const auto *FD : RT->getDecl()->fields()) { 2815 QualType MTy = FD->getType(); 2816 if (MTy->isRecordType() && !MTy->isUnionType()) { 2817 if (classifyRegCallStructTypeImpl(MTy, NeededInt, NeededSSE, 2818 MaxVectorWidth) 2819 .isIndirect()) { 2820 NeededInt = NeededSSE = 0; 2821 return getIndirectReturnResult(Ty); 2822 } 2823 } else { 2824 unsigned LocalNeededInt, LocalNeededSSE; 2825 if (classifyArgumentType(MTy, UINT_MAX, LocalNeededInt, LocalNeededSSE, 2826 true, true) 2827 .isIndirect()) { 2828 NeededInt = NeededSSE = 0; 2829 return getIndirectReturnResult(Ty); 2830 } 2831 if (const auto *AT = getContext().getAsConstantArrayType(MTy)) 2832 MTy = AT->getElementType(); 2833 if (const auto *VT = MTy->getAs<VectorType>()) 2834 if (getContext().getTypeSize(VT) > MaxVectorWidth) 2835 MaxVectorWidth = getContext().getTypeSize(VT); 2836 NeededInt += LocalNeededInt; 2837 NeededSSE += LocalNeededSSE; 2838 } 2839 } 2840 2841 return ABIArgInfo::getDirect(); 2842 } 2843 2844 ABIArgInfo 2845 X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt, 2846 unsigned &NeededSSE, 2847 unsigned &MaxVectorWidth) const { 2848 2849 NeededInt = 0; 2850 NeededSSE = 0; 2851 MaxVectorWidth = 0; 2852 2853 return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE, 2854 MaxVectorWidth); 2855 } 2856 2857 void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { 2858 2859 const unsigned CallingConv = FI.getCallingConvention(); 2860 // It is possible to force Win64 calling convention on any x86_64 target by 2861 // using __attribute__((ms_abi)). In such case to correctly emit Win64 2862 // compatible code delegate this call to WinX86_64ABIInfo::computeInfo. 2863 if (CallingConv == llvm::CallingConv::Win64) { 2864 WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel); 2865 Win64ABIInfo.computeInfo(FI); 2866 return; 2867 } 2868 2869 bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall; 2870 2871 // Keep track of the number of assigned registers. 2872 unsigned FreeIntRegs = IsRegCall ? 11 : 6; 2873 unsigned FreeSSERegs = IsRegCall ? 16 : 8; 2874 unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0; 2875 2876 if (!::classifyReturnType(getCXXABI(), FI, *this)) { 2877 if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && 2878 !FI.getReturnType()->getTypePtr()->isUnionType()) { 2879 FI.getReturnInfo() = classifyRegCallStructType( 2880 FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth); 2881 if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { 2882 FreeIntRegs -= NeededInt; 2883 FreeSSERegs -= NeededSSE; 2884 } else { 2885 FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); 2886 } 2887 } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() && 2888 getContext().getCanonicalType(FI.getReturnType() 2889 ->getAs<ComplexType>() 2890 ->getElementType()) == 2891 getContext().LongDoubleTy) 2892 // Complex Long Double Type is passed in Memory when Regcall 2893 // calling convention is used. 2894 FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); 2895 else 2896 FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); 2897 } 2898 2899 // If the return value is indirect, then the hidden argument is consuming one 2900 // integer register. 2901 if (FI.getReturnInfo().isIndirect()) 2902 --FreeIntRegs; 2903 else if (NeededSSE && MaxVectorWidth > 0) 2904 FI.setMaxVectorWidth(MaxVectorWidth); 2905 2906 // The chain argument effectively gives us another free register. 2907 if (FI.isChainCall()) 2908 ++FreeIntRegs; 2909 2910 unsigned NumRequiredArgs = FI.getNumRequiredArgs(); 2911 // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers 2912 // get assigned (in left-to-right order) for passing as follows... 2913 unsigned ArgNo = 0; 2914 for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); 2915 it != ie; ++it, ++ArgNo) { 2916 bool IsNamedArg = ArgNo < NumRequiredArgs; 2917 2918 if (IsRegCall && it->type->isStructureOrClassType()) 2919 it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE, 2920 MaxVectorWidth); 2921 else 2922 it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt, 2923 NeededSSE, IsNamedArg); 2924 2925 // AMD64-ABI 3.2.3p3: If there are no registers available for any 2926 // eightbyte of an argument, the whole argument is passed on the 2927 // stack. If registers have already been assigned for some 2928 // eightbytes of such an argument, the assignments get reverted. 2929 if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { 2930 FreeIntRegs -= NeededInt; 2931 FreeSSERegs -= NeededSSE; 2932 if (MaxVectorWidth > FI.getMaxVectorWidth()) 2933 FI.setMaxVectorWidth(MaxVectorWidth); 2934 } else { 2935 it->info = getIndirectResult(it->type, FreeIntRegs); 2936 } 2937 } 2938 } 2939 2940 static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF, 2941 Address VAListAddr, QualType Ty) { 2942 Address overflow_arg_area_p = 2943 CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p"); 2944 llvm::Value *overflow_arg_area = 2945 CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area"); 2946 2947 // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 2948 // byte boundary if alignment needed by type exceeds 8 byte boundary. 2949 // It isn't stated explicitly in the standard, but in practice we use 2950 // alignment greater than 16 where necessary. 2951 CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); 2952 if (Align > CharUnits::fromQuantity(8)) { 2953 overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area, 2954 Align); 2955 } 2956 2957 // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. 2958 llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); 2959 llvm::Value *Res = 2960 CGF.Builder.CreateBitCast(overflow_arg_area, 2961 llvm::PointerType::getUnqual(LTy)); 2962 2963 // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: 2964 // l->overflow_arg_area + sizeof(type). 2965 // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to 2966 // an 8 byte boundary. 2967 2968 uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8; 2969 llvm::Value *Offset = 2970 llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7) & ~7); 2971 overflow_arg_area = CGF.Builder.CreateGEP(CGF.Int8Ty, overflow_arg_area, 2972 Offset, "overflow_arg_area.next"); 2973 CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p); 2974 2975 // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type. 2976 return Address(Res, LTy, Align); 2977 } 2978 2979 Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 2980 QualType Ty) const { 2981 // Assume that va_list type is correct; should be pointer to LLVM type: 2982 // struct { 2983 // i32 gp_offset; 2984 // i32 fp_offset; 2985 // i8* overflow_arg_area; 2986 // i8* reg_save_area; 2987 // }; 2988 unsigned neededInt, neededSSE; 2989 2990 Ty = getContext().getCanonicalType(Ty); 2991 ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE, 2992 /*isNamedArg*/false); 2993 2994 // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed 2995 // in the registers. If not go to step 7. 2996 if (!neededInt && !neededSSE) 2997 return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); 2998 2999 // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of 3000 // general purpose registers needed to pass type and num_fp to hold 3001 // the number of floating point registers needed. 3002 3003 // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into 3004 // registers. In the case: l->gp_offset > 48 - num_gp * 8 or 3005 // l->fp_offset > 304 - num_fp * 16 go to step 7. 3006 // 3007 // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of 3008 // register save space). 3009 3010 llvm::Value *InRegs = nullptr; 3011 Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid(); 3012 llvm::Value *gp_offset = nullptr, *fp_offset = nullptr; 3013 if (neededInt) { 3014 gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p"); 3015 gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset"); 3016 InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8); 3017 InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp"); 3018 } 3019 3020 if (neededSSE) { 3021 fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p"); 3022 fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset"); 3023 llvm::Value *FitsInFP = 3024 llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16); 3025 FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp"); 3026 InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP; 3027 } 3028 3029 llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); 3030 llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); 3031 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); 3032 CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); 3033 3034 // Emit code to load the value if it was passed in registers. 3035 3036 CGF.EmitBlock(InRegBlock); 3037 3038 // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with 3039 // an offset of l->gp_offset and/or l->fp_offset. This may require 3040 // copying to a temporary location in case the parameter is passed 3041 // in different register classes or requires an alignment greater 3042 // than 8 for general purpose registers and 16 for XMM registers. 3043 // 3044 // FIXME: This really results in shameful code when we end up needing to 3045 // collect arguments from different places; often what should result in a 3046 // simple assembling of a structure from scattered addresses has many more 3047 // loads than necessary. Can we clean this up? 3048 llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); 3049 llvm::Value *RegSaveArea = CGF.Builder.CreateLoad( 3050 CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area"); 3051 3052 Address RegAddr = Address::invalid(); 3053 if (neededInt && neededSSE) { 3054 // FIXME: Cleanup. 3055 assert(AI.isDirect() && "Unexpected ABI info for mixed regs"); 3056 llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType()); 3057 Address Tmp = CGF.CreateMemTemp(Ty); 3058 Tmp = Tmp.withElementType(ST); 3059 assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs"); 3060 llvm::Type *TyLo = ST->getElementType(0); 3061 llvm::Type *TyHi = ST->getElementType(1); 3062 assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) && 3063 "Unexpected ABI info for mixed regs"); 3064 llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo); 3065 llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi); 3066 llvm::Value *GPAddr = 3067 CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset); 3068 llvm::Value *FPAddr = 3069 CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset); 3070 llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr; 3071 llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr; 3072 3073 // Copy the first element. 3074 // FIXME: Our choice of alignment here and below is probably pessimistic. 3075 llvm::Value *V = CGF.Builder.CreateAlignedLoad( 3076 TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo), 3077 CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyLo))); 3078 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); 3079 3080 // Copy the second element. 3081 V = CGF.Builder.CreateAlignedLoad( 3082 TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi), 3083 CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi))); 3084 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); 3085 3086 RegAddr = Tmp.withElementType(LTy); 3087 } else if (neededInt) { 3088 RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset), 3089 LTy, CharUnits::fromQuantity(8)); 3090 3091 // Copy to a temporary if necessary to ensure the appropriate alignment. 3092 auto TInfo = getContext().getTypeInfoInChars(Ty); 3093 uint64_t TySize = TInfo.Width.getQuantity(); 3094 CharUnits TyAlign = TInfo.Align; 3095 3096 // Copy into a temporary if the type is more aligned than the 3097 // register save area. 3098 if (TyAlign.getQuantity() > 8) { 3099 Address Tmp = CGF.CreateMemTemp(Ty); 3100 CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); 3101 RegAddr = Tmp; 3102 } 3103 3104 } else if (neededSSE == 1) { 3105 RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset), 3106 LTy, CharUnits::fromQuantity(16)); 3107 } else { 3108 assert(neededSSE == 2 && "Invalid number of needed registers!"); 3109 // SSE registers are spaced 16 bytes apart in the register save 3110 // area, we need to collect the two eightbytes together. 3111 // The ABI isn't explicit about this, but it seems reasonable 3112 // to assume that the slots are 16-byte aligned, since the stack is 3113 // naturally 16-byte aligned and the prologue is expected to store 3114 // all the SSE registers to the RSA. 3115 Address RegAddrLo = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, 3116 fp_offset), 3117 CGF.Int8Ty, CharUnits::fromQuantity(16)); 3118 Address RegAddrHi = 3119 CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, 3120 CharUnits::fromQuantity(16)); 3121 llvm::Type *ST = AI.canHaveCoerceToType() 3122 ? AI.getCoerceToType() 3123 : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy); 3124 llvm::Value *V; 3125 Address Tmp = CGF.CreateMemTemp(Ty); 3126 Tmp = Tmp.withElementType(ST); 3127 V = CGF.Builder.CreateLoad( 3128 RegAddrLo.withElementType(ST->getStructElementType(0))); 3129 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); 3130 V = CGF.Builder.CreateLoad( 3131 RegAddrHi.withElementType(ST->getStructElementType(1))); 3132 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); 3133 3134 RegAddr = Tmp.withElementType(LTy); 3135 } 3136 3137 // AMD64-ABI 3.5.7p5: Step 5. Set: 3138 // l->gp_offset = l->gp_offset + num_gp * 8 3139 // l->fp_offset = l->fp_offset + num_fp * 16. 3140 if (neededInt) { 3141 llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8); 3142 CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset), 3143 gp_offset_p); 3144 } 3145 if (neededSSE) { 3146 llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16); 3147 CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset), 3148 fp_offset_p); 3149 } 3150 CGF.EmitBranch(ContBlock); 3151 3152 // Emit code to load the value if it was passed in memory. 3153 3154 CGF.EmitBlock(InMemBlock); 3155 Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); 3156 3157 // Return the appropriate result. 3158 3159 CGF.EmitBlock(ContBlock); 3160 Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, 3161 "vaarg.addr"); 3162 return ResAddr; 3163 } 3164 3165 Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, 3166 QualType Ty) const { 3167 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3168 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3169 uint64_t Width = getContext().getTypeSize(Ty); 3170 bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); 3171 3172 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, 3173 CGF.getContext().getTypeInfoInChars(Ty), 3174 CharUnits::fromQuantity(8), 3175 /*allowHigherAlign*/ false); 3176 } 3177 3178 ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgForVectorCall( 3179 QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo ¤t) const { 3180 const Type *Base = nullptr; 3181 uint64_t NumElts = 0; 3182 3183 if (!Ty->isBuiltinType() && !Ty->isVectorType() && 3184 isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) { 3185 FreeSSERegs -= NumElts; 3186 return getDirectX86Hva(); 3187 } 3188 return current; 3189 } 3190 3191 ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, 3192 bool IsReturnType, bool IsVectorCall, 3193 bool IsRegCall) const { 3194 3195 if (Ty->isVoidType()) 3196 return ABIArgInfo::getIgnore(); 3197 3198 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 3199 Ty = EnumTy->getDecl()->getIntegerType(); 3200 3201 TypeInfo Info = getContext().getTypeInfo(Ty); 3202 uint64_t Width = Info.Width; 3203 CharUnits Align = getContext().toCharUnitsFromBits(Info.Align); 3204 3205 const RecordType *RT = Ty->getAs<RecordType>(); 3206 if (RT) { 3207 if (!IsReturnType) { 3208 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI())) 3209 return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); 3210 } 3211 3212 if (RT->getDecl()->hasFlexibleArrayMember()) 3213 return getNaturalAlignIndirect(Ty, /*ByVal=*/false); 3214 3215 } 3216 3217 const Type *Base = nullptr; 3218 uint64_t NumElts = 0; 3219 // vectorcall adds the concept of a homogenous vector aggregate, similar to 3220 // other targets. 3221 if ((IsVectorCall || IsRegCall) && 3222 isHomogeneousAggregate(Ty, Base, NumElts)) { 3223 if (IsRegCall) { 3224 if (FreeSSERegs >= NumElts) { 3225 FreeSSERegs -= NumElts; 3226 if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) 3227 return ABIArgInfo::getDirect(); 3228 return ABIArgInfo::getExpand(); 3229 } 3230 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3231 } else if (IsVectorCall) { 3232 if (FreeSSERegs >= NumElts && 3233 (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) { 3234 FreeSSERegs -= NumElts; 3235 return ABIArgInfo::getDirect(); 3236 } else if (IsReturnType) { 3237 return ABIArgInfo::getExpand(); 3238 } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { 3239 // HVAs are delayed and reclassified in the 2nd step. 3240 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3241 } 3242 } 3243 } 3244 3245 if (Ty->isMemberPointerType()) { 3246 // If the member pointer is represented by an LLVM int or ptr, pass it 3247 // directly. 3248 llvm::Type *LLTy = CGT.ConvertType(Ty); 3249 if (LLTy->isPointerTy() || LLTy->isIntegerTy()) 3250 return ABIArgInfo::getDirect(); 3251 } 3252 3253 if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) { 3254 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3255 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3256 if (Width > 64 || !llvm::isPowerOf2_64(Width)) 3257 return getNaturalAlignIndirect(Ty, /*ByVal=*/false); 3258 3259 // Otherwise, coerce it to a small integer. 3260 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width)); 3261 } 3262 3263 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 3264 switch (BT->getKind()) { 3265 case BuiltinType::Bool: 3266 // Bool type is always extended to the ABI, other builtin types are not 3267 // extended. 3268 return ABIArgInfo::getExtend(Ty); 3269 3270 case BuiltinType::LongDouble: 3271 // Mingw64 GCC uses the old 80 bit extended precision floating point 3272 // unit. It passes them indirectly through memory. 3273 if (IsMingw64) { 3274 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 3275 if (LDF == &llvm::APFloat::x87DoubleExtended()) 3276 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3277 } 3278 break; 3279 3280 case BuiltinType::Int128: 3281 case BuiltinType::UInt128: 3282 // If it's a parameter type, the normal ABI rule is that arguments larger 3283 // than 8 bytes are passed indirectly. GCC follows it. We follow it too, 3284 // even though it isn't particularly efficient. 3285 if (!IsReturnType) 3286 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3287 3288 // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. 3289 // Clang matches them for compatibility. 3290 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 3291 llvm::Type::getInt64Ty(getVMContext()), 2)); 3292 3293 default: 3294 break; 3295 } 3296 } 3297 3298 if (Ty->isBitIntType()) { 3299 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3300 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3301 // However, non-power-of-two bit-precise integers will be passed as 1, 2, 4, 3302 // or 8 bytes anyway as long is it fits in them, so we don't have to check 3303 // the power of 2. 3304 if (Width <= 64) 3305 return ABIArgInfo::getDirect(); 3306 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3307 } 3308 3309 return ABIArgInfo::getDirect(); 3310 } 3311 3312 void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { 3313 const unsigned CC = FI.getCallingConvention(); 3314 bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall; 3315 bool IsRegCall = CC == llvm::CallingConv::X86_RegCall; 3316 3317 // If __attribute__((sysv_abi)) is in use, use the SysV argument 3318 // classification rules. 3319 if (CC == llvm::CallingConv::X86_64_SysV) { 3320 X86_64ABIInfo SysVABIInfo(CGT, AVXLevel); 3321 SysVABIInfo.computeInfo(FI); 3322 return; 3323 } 3324 3325 unsigned FreeSSERegs = 0; 3326 if (IsVectorCall) { 3327 // We can use up to 4 SSE return registers with vectorcall. 3328 FreeSSERegs = 4; 3329 } else if (IsRegCall) { 3330 // RegCall gives us 16 SSE registers. 3331 FreeSSERegs = 16; 3332 } 3333 3334 if (!getCXXABI().classifyReturnType(FI)) 3335 FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true, 3336 IsVectorCall, IsRegCall); 3337 3338 if (IsVectorCall) { 3339 // We can use up to 6 SSE register parameters with vectorcall. 3340 FreeSSERegs = 6; 3341 } else if (IsRegCall) { 3342 // RegCall gives us 16 SSE registers, we can reuse the return registers. 3343 FreeSSERegs = 16; 3344 } 3345 3346 unsigned ArgNum = 0; 3347 unsigned ZeroSSERegs = 0; 3348 for (auto &I : FI.arguments()) { 3349 // Vectorcall in x64 only permits the first 6 arguments to be passed as 3350 // XMM/YMM registers. After the sixth argument, pretend no vector 3351 // registers are left. 3352 unsigned *MaybeFreeSSERegs = 3353 (IsVectorCall && ArgNum >= 6) ? &ZeroSSERegs : &FreeSSERegs; 3354 I.info = 3355 classify(I.type, *MaybeFreeSSERegs, false, IsVectorCall, IsRegCall); 3356 ++ArgNum; 3357 } 3358 3359 if (IsVectorCall) { 3360 // For vectorcall, assign aggregate HVAs to any free vector registers in a 3361 // second pass. 3362 for (auto &I : FI.arguments()) 3363 I.info = reclassifyHvaArgForVectorCall(I.type, FreeSSERegs, I.info); 3364 } 3365 } 3366 3367 Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 3368 QualType Ty) const { 3369 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3370 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3371 uint64_t Width = getContext().getTypeSize(Ty); 3372 bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); 3373 3374 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, 3375 CGF.getContext().getTypeInfoInChars(Ty), 3376 CharUnits::fromQuantity(8), 3377 /*allowHigherAlign*/ false); 3378 } 3379 3380 std::unique_ptr<TargetCodeGenInfo> CodeGen::createX86_32TargetCodeGenInfo( 3381 CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI, 3382 unsigned NumRegisterParameters, bool SoftFloatABI) { 3383 bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI( 3384 CGM.getTriple(), CGM.getCodeGenOpts()); 3385 return std::make_unique<X86_32TargetCodeGenInfo>( 3386 CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 3387 NumRegisterParameters, SoftFloatABI); 3388 } 3389 3390 std::unique_ptr<TargetCodeGenInfo> CodeGen::createWinX86_32TargetCodeGenInfo( 3391 CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI, 3392 unsigned NumRegisterParameters) { 3393 bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI( 3394 CGM.getTriple(), CGM.getCodeGenOpts()); 3395 return std::make_unique<WinX86_32TargetCodeGenInfo>( 3396 CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 3397 NumRegisterParameters); 3398 } 3399 3400 std::unique_ptr<TargetCodeGenInfo> 3401 CodeGen::createX86_64TargetCodeGenInfo(CodeGenModule &CGM, 3402 X86AVXABILevel AVXLevel) { 3403 return std::make_unique<X86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel); 3404 } 3405 3406 std::unique_ptr<TargetCodeGenInfo> 3407 CodeGen::createWinX86_64TargetCodeGenInfo(CodeGenModule &CGM, 3408 X86AVXABILevel AVXLevel) { 3409 return std::make_unique<WinX86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel); 3410 } 3411