1 //===- X86.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ABIInfoImpl.h" 10 #include "TargetInfo.h" 11 #include "clang/Basic/DiagnosticFrontend.h" 12 #include "llvm/ADT/SmallBitVector.h" 13 14 using namespace clang; 15 using namespace clang::CodeGen; 16 17 namespace { 18 19 /// IsX86_MMXType - Return true if this is an MMX type. 20 bool IsX86_MMXType(llvm::Type *IRType) { 21 // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>. 22 return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 && 23 cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() && 24 IRType->getScalarSizeInBits() != 64; 25 } 26 27 static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 28 StringRef Constraint, 29 llvm::Type* Ty) { 30 bool IsMMXCons = llvm::StringSwitch<bool>(Constraint) 31 .Cases("y", "&y", "^Ym", true) 32 .Default(false); 33 if (IsMMXCons && Ty->isVectorTy()) { 34 if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedValue() != 35 64) { 36 // Invalid MMX constraint 37 return nullptr; 38 } 39 40 return llvm::Type::getX86_MMXTy(CGF.getLLVMContext()); 41 } 42 43 // No operation needed 44 return Ty; 45 } 46 47 /// Returns true if this type can be passed in SSE registers with the 48 /// X86_VectorCall calling convention. Shared between x86_32 and x86_64. 49 static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { 50 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 51 if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) { 52 if (BT->getKind() == BuiltinType::LongDouble) { 53 if (&Context.getTargetInfo().getLongDoubleFormat() == 54 &llvm::APFloat::x87DoubleExtended()) 55 return false; 56 } 57 return true; 58 } 59 } else if (const VectorType *VT = Ty->getAs<VectorType>()) { 60 // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX 61 // registers specially. 62 unsigned VecSize = Context.getTypeSize(VT); 63 if (VecSize == 128 || VecSize == 256 || VecSize == 512) 64 return true; 65 } 66 return false; 67 } 68 69 /// Returns true if this aggregate is small enough to be passed in SSE registers 70 /// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64. 71 static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) { 72 return NumMembers <= 4; 73 } 74 75 /// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86. 76 static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { 77 auto AI = ABIArgInfo::getDirect(T); 78 AI.setInReg(true); 79 AI.setCanBeFlattened(false); 80 return AI; 81 } 82 83 //===----------------------------------------------------------------------===// 84 // X86-32 ABI Implementation 85 //===----------------------------------------------------------------------===// 86 87 /// Similar to llvm::CCState, but for Clang. 88 struct CCState { 89 CCState(CGFunctionInfo &FI) 90 : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {} 91 92 llvm::SmallBitVector IsPreassigned; 93 unsigned CC = CallingConv::CC_C; 94 unsigned FreeRegs = 0; 95 unsigned FreeSSERegs = 0; 96 }; 97 98 /// X86_32ABIInfo - The X86-32 ABI information. 99 class X86_32ABIInfo : public ABIInfo { 100 enum Class { 101 Integer, 102 Float 103 }; 104 105 static const unsigned MinABIStackAlignInBytes = 4; 106 107 bool IsDarwinVectorABI; 108 bool IsRetSmallStructInRegABI; 109 bool IsWin32StructABI; 110 bool IsSoftFloatABI; 111 bool IsMCUABI; 112 bool IsLinuxABI; 113 unsigned DefaultNumRegisterParameters; 114 115 static bool isRegisterSize(unsigned Size) { 116 return (Size == 8 || Size == 16 || Size == 32 || Size == 64); 117 } 118 119 bool isHomogeneousAggregateBaseType(QualType Ty) const override { 120 // FIXME: Assumes vectorcall is in use. 121 return isX86VectorTypeForVectorCall(getContext(), Ty); 122 } 123 124 bool isHomogeneousAggregateSmallEnough(const Type *Ty, 125 uint64_t NumMembers) const override { 126 // FIXME: Assumes vectorcall is in use. 127 return isX86VectorCallAggregateSmallEnough(NumMembers); 128 } 129 130 bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const; 131 132 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 133 /// such that the argument will be passed in memory. 134 ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; 135 136 ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const; 137 138 /// Return the alignment to use for the given type on the stack. 139 unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; 140 141 Class classify(QualType Ty) const; 142 ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; 143 ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; 144 145 /// Updates the number of available free registers, returns 146 /// true if any registers were allocated. 147 bool updateFreeRegs(QualType Ty, CCState &State) const; 148 149 bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, 150 bool &NeedsPadding) const; 151 bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; 152 153 bool canExpandIndirectArgument(QualType Ty) const; 154 155 /// Rewrite the function info so that all memory arguments use 156 /// inalloca. 157 void rewriteWithInAlloca(CGFunctionInfo &FI) const; 158 159 void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, 160 CharUnits &StackOffset, ABIArgInfo &Info, 161 QualType Type) const; 162 void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const; 163 164 public: 165 166 void computeInfo(CGFunctionInfo &FI) const override; 167 Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 168 QualType Ty) const override; 169 170 X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, 171 bool RetSmallStructInRegABI, bool Win32StructABI, 172 unsigned NumRegisterParameters, bool SoftFloatABI) 173 : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), 174 IsRetSmallStructInRegABI(RetSmallStructInRegABI), 175 IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), 176 IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), 177 IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() || 178 CGT.getTarget().getTriple().isOSCygMing()), 179 DefaultNumRegisterParameters(NumRegisterParameters) {} 180 }; 181 182 class X86_32SwiftABIInfo : public SwiftABIInfo { 183 public: 184 explicit X86_32SwiftABIInfo(CodeGenTypes &CGT) 185 : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false) {} 186 187 bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, 188 bool AsReturnValue) const override { 189 // LLVM's x86-32 lowering currently only assigns up to three 190 // integer registers and three fp registers. Oddly, it'll use up to 191 // four vector registers for vectors, but those can overlap with the 192 // scalar registers. 193 return occupiesMoreThan(ComponentTys, /*total=*/3); 194 } 195 }; 196 197 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { 198 public: 199 X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, 200 bool RetSmallStructInRegABI, bool Win32StructABI, 201 unsigned NumRegisterParameters, bool SoftFloatABI) 202 : TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>( 203 CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 204 NumRegisterParameters, SoftFloatABI)) { 205 SwiftInfo = std::make_unique<X86_32SwiftABIInfo>(CGT); 206 } 207 208 static bool isStructReturnInRegABI( 209 const llvm::Triple &Triple, const CodeGenOptions &Opts); 210 211 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 212 CodeGen::CodeGenModule &CGM) const override; 213 214 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 215 // Darwin uses different dwarf register numbers for EH. 216 if (CGM.getTarget().getTriple().isOSDarwin()) return 5; 217 return 4; 218 } 219 220 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 221 llvm::Value *Address) const override; 222 223 llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 224 StringRef Constraint, 225 llvm::Type* Ty) const override { 226 return X86AdjustInlineAsmType(CGF, Constraint, Ty); 227 } 228 229 void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue, 230 std::string &Constraints, 231 std::vector<llvm::Type *> &ResultRegTypes, 232 std::vector<llvm::Type *> &ResultTruncRegTypes, 233 std::vector<LValue> &ResultRegDests, 234 std::string &AsmString, 235 unsigned NumOutputs) const override; 236 237 StringRef getARCRetainAutoreleasedReturnValueMarker() const override { 238 return "movl\t%ebp, %ebp" 239 "\t\t// marker for objc_retainAutoreleaseReturnValue"; 240 } 241 }; 242 243 } 244 245 /// Rewrite input constraint references after adding some output constraints. 246 /// In the case where there is one output and one input and we add one output, 247 /// we need to replace all operand references greater than or equal to 1: 248 /// mov $0, $1 249 /// mov eax, $1 250 /// The result will be: 251 /// mov $0, $2 252 /// mov eax, $2 253 static void rewriteInputConstraintReferences(unsigned FirstIn, 254 unsigned NumNewOuts, 255 std::string &AsmString) { 256 std::string Buf; 257 llvm::raw_string_ostream OS(Buf); 258 size_t Pos = 0; 259 while (Pos < AsmString.size()) { 260 size_t DollarStart = AsmString.find('$', Pos); 261 if (DollarStart == std::string::npos) 262 DollarStart = AsmString.size(); 263 size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart); 264 if (DollarEnd == std::string::npos) 265 DollarEnd = AsmString.size(); 266 OS << StringRef(&AsmString[Pos], DollarEnd - Pos); 267 Pos = DollarEnd; 268 size_t NumDollars = DollarEnd - DollarStart; 269 if (NumDollars % 2 != 0 && Pos < AsmString.size()) { 270 // We have an operand reference. 271 size_t DigitStart = Pos; 272 if (AsmString[DigitStart] == '{') { 273 OS << '{'; 274 ++DigitStart; 275 } 276 size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart); 277 if (DigitEnd == std::string::npos) 278 DigitEnd = AsmString.size(); 279 StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart); 280 unsigned OperandIndex; 281 if (!OperandStr.getAsInteger(10, OperandIndex)) { 282 if (OperandIndex >= FirstIn) 283 OperandIndex += NumNewOuts; 284 OS << OperandIndex; 285 } else { 286 OS << OperandStr; 287 } 288 Pos = DigitEnd; 289 } 290 } 291 AsmString = std::move(OS.str()); 292 } 293 294 /// Add output constraints for EAX:EDX because they are return registers. 295 void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( 296 CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints, 297 std::vector<llvm::Type *> &ResultRegTypes, 298 std::vector<llvm::Type *> &ResultTruncRegTypes, 299 std::vector<LValue> &ResultRegDests, std::string &AsmString, 300 unsigned NumOutputs) const { 301 uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType()); 302 303 // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is 304 // larger. 305 if (!Constraints.empty()) 306 Constraints += ','; 307 if (RetWidth <= 32) { 308 Constraints += "={eax}"; 309 ResultRegTypes.push_back(CGF.Int32Ty); 310 } else { 311 // Use the 'A' constraint for EAX:EDX. 312 Constraints += "=A"; 313 ResultRegTypes.push_back(CGF.Int64Ty); 314 } 315 316 // Truncate EAX or EAX:EDX to an integer of the appropriate size. 317 llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth); 318 ResultTruncRegTypes.push_back(CoerceTy); 319 320 // Coerce the integer by bitcasting the return slot pointer. 321 ReturnSlot.setAddress(ReturnSlot.getAddress(CGF).withElementType(CoerceTy)); 322 ResultRegDests.push_back(ReturnSlot); 323 324 rewriteInputConstraintReferences(NumOutputs, 1, AsmString); 325 } 326 327 /// shouldReturnTypeInRegister - Determine if the given type should be 328 /// returned in a register (for the Darwin and MCU ABI). 329 bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, 330 ASTContext &Context) const { 331 uint64_t Size = Context.getTypeSize(Ty); 332 333 // For i386, type must be register sized. 334 // For the MCU ABI, it only needs to be <= 8-byte 335 if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size))) 336 return false; 337 338 if (Ty->isVectorType()) { 339 // 64- and 128- bit vectors inside structures are not returned in 340 // registers. 341 if (Size == 64 || Size == 128) 342 return false; 343 344 return true; 345 } 346 347 // If this is a builtin, pointer, enum, complex type, member pointer, or 348 // member function pointer it is ok. 349 if (Ty->getAs<BuiltinType>() || Ty->hasPointerRepresentation() || 350 Ty->isAnyComplexType() || Ty->isEnumeralType() || 351 Ty->isBlockPointerType() || Ty->isMemberPointerType()) 352 return true; 353 354 // Arrays are treated like records. 355 if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) 356 return shouldReturnTypeInRegister(AT->getElementType(), Context); 357 358 // Otherwise, it must be a record type. 359 const RecordType *RT = Ty->getAs<RecordType>(); 360 if (!RT) return false; 361 362 // FIXME: Traverse bases here too. 363 364 // Structure types are passed in register if all fields would be 365 // passed in a register. 366 for (const auto *FD : RT->getDecl()->fields()) { 367 // Empty fields are ignored. 368 if (isEmptyField(Context, FD, true)) 369 continue; 370 371 // Check fields recursively. 372 if (!shouldReturnTypeInRegister(FD->getType(), Context)) 373 return false; 374 } 375 return true; 376 } 377 378 static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { 379 // Treat complex types as the element type. 380 if (const ComplexType *CTy = Ty->getAs<ComplexType>()) 381 Ty = CTy->getElementType(); 382 383 // Check for a type which we know has a simple scalar argument-passing 384 // convention without any padding. (We're specifically looking for 32 385 // and 64-bit integer and integer-equivalents, float, and double.) 386 if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && 387 !Ty->isEnumeralType() && !Ty->isBlockPointerType()) 388 return false; 389 390 uint64_t Size = Context.getTypeSize(Ty); 391 return Size == 32 || Size == 64; 392 } 393 394 static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD, 395 uint64_t &Size) { 396 for (const auto *FD : RD->fields()) { 397 // Scalar arguments on the stack get 4 byte alignment on x86. If the 398 // argument is smaller than 32-bits, expanding the struct will create 399 // alignment padding. 400 if (!is32Or64BitBasicType(FD->getType(), Context)) 401 return false; 402 403 // FIXME: Reject bit-fields wholesale; there are two problems, we don't know 404 // how to expand them yet, and the predicate for telling if a bitfield still 405 // counts as "basic" is more complicated than what we were doing previously. 406 if (FD->isBitField()) 407 return false; 408 409 Size += Context.getTypeSize(FD->getType()); 410 } 411 return true; 412 } 413 414 static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD, 415 uint64_t &Size) { 416 // Don't do this if there are any non-empty bases. 417 for (const CXXBaseSpecifier &Base : RD->bases()) { 418 if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(), 419 Size)) 420 return false; 421 } 422 if (!addFieldSizes(Context, RD, Size)) 423 return false; 424 return true; 425 } 426 427 /// Test whether an argument type which is to be passed indirectly (on the 428 /// stack) would have the equivalent layout if it was expanded into separate 429 /// arguments. If so, we prefer to do the latter to avoid inhibiting 430 /// optimizations. 431 bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { 432 // We can only expand structure types. 433 const RecordType *RT = Ty->getAs<RecordType>(); 434 if (!RT) 435 return false; 436 const RecordDecl *RD = RT->getDecl(); 437 uint64_t Size = 0; 438 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 439 if (!IsWin32StructABI) { 440 // On non-Windows, we have to conservatively match our old bitcode 441 // prototypes in order to be ABI-compatible at the bitcode level. 442 if (!CXXRD->isCLike()) 443 return false; 444 } else { 445 // Don't do this for dynamic classes. 446 if (CXXRD->isDynamicClass()) 447 return false; 448 } 449 if (!addBaseAndFieldSizes(getContext(), CXXRD, Size)) 450 return false; 451 } else { 452 if (!addFieldSizes(getContext(), RD, Size)) 453 return false; 454 } 455 456 // We can do this if there was no alignment padding. 457 return Size == getContext().getTypeSize(Ty); 458 } 459 460 ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { 461 // If the return value is indirect, then the hidden argument is consuming one 462 // integer register. 463 if (State.FreeRegs) { 464 --State.FreeRegs; 465 if (!IsMCUABI) 466 return getNaturalAlignIndirectInReg(RetTy); 467 } 468 return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); 469 } 470 471 ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, 472 CCState &State) const { 473 if (RetTy->isVoidType()) 474 return ABIArgInfo::getIgnore(); 475 476 const Type *Base = nullptr; 477 uint64_t NumElts = 0; 478 if ((State.CC == llvm::CallingConv::X86_VectorCall || 479 State.CC == llvm::CallingConv::X86_RegCall) && 480 isHomogeneousAggregate(RetTy, Base, NumElts)) { 481 // The LLVM struct type for such an aggregate should lower properly. 482 return ABIArgInfo::getDirect(); 483 } 484 485 if (const VectorType *VT = RetTy->getAs<VectorType>()) { 486 // On Darwin, some vectors are returned in registers. 487 if (IsDarwinVectorABI) { 488 uint64_t Size = getContext().getTypeSize(RetTy); 489 490 // 128-bit vectors are a special case; they are returned in 491 // registers and we need to make sure to pick a type the LLVM 492 // backend will like. 493 if (Size == 128) 494 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 495 llvm::Type::getInt64Ty(getVMContext()), 2)); 496 497 // Always return in register if it fits in a general purpose 498 // register, or if it is 64 bits and has a single element. 499 if ((Size == 8 || Size == 16 || Size == 32) || 500 (Size == 64 && VT->getNumElements() == 1)) 501 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 502 Size)); 503 504 return getIndirectReturnResult(RetTy, State); 505 } 506 507 return ABIArgInfo::getDirect(); 508 } 509 510 if (isAggregateTypeForABI(RetTy)) { 511 if (const RecordType *RT = RetTy->getAs<RecordType>()) { 512 // Structures with flexible arrays are always indirect. 513 if (RT->getDecl()->hasFlexibleArrayMember()) 514 return getIndirectReturnResult(RetTy, State); 515 } 516 517 // If specified, structs and unions are always indirect. 518 if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType()) 519 return getIndirectReturnResult(RetTy, State); 520 521 // Ignore empty structs/unions. 522 if (isEmptyRecord(getContext(), RetTy, true)) 523 return ABIArgInfo::getIgnore(); 524 525 // Return complex of _Float16 as <2 x half> so the backend will use xmm0. 526 if (const ComplexType *CT = RetTy->getAs<ComplexType>()) { 527 QualType ET = getContext().getCanonicalType(CT->getElementType()); 528 if (ET->isFloat16Type()) 529 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 530 llvm::Type::getHalfTy(getVMContext()), 2)); 531 } 532 533 // Small structures which are register sized are generally returned 534 // in a register. 535 if (shouldReturnTypeInRegister(RetTy, getContext())) { 536 uint64_t Size = getContext().getTypeSize(RetTy); 537 538 // As a special-case, if the struct is a "single-element" struct, and 539 // the field is of type "float" or "double", return it in a 540 // floating-point register. (MSVC does not apply this special case.) 541 // We apply a similar transformation for pointer types to improve the 542 // quality of the generated IR. 543 if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) 544 if ((!IsWin32StructABI && SeltTy->isRealFloatingType()) 545 || SeltTy->hasPointerRepresentation()) 546 return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); 547 548 // FIXME: We should be able to narrow this integer in cases with dead 549 // padding. 550 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size)); 551 } 552 553 return getIndirectReturnResult(RetTy, State); 554 } 555 556 // Treat an enum type as its underlying type. 557 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 558 RetTy = EnumTy->getDecl()->getIntegerType(); 559 560 if (const auto *EIT = RetTy->getAs<BitIntType>()) 561 if (EIT->getNumBits() > 64) 562 return getIndirectReturnResult(RetTy, State); 563 564 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) 565 : ABIArgInfo::getDirect()); 566 } 567 568 unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, 569 unsigned Align) const { 570 // Otherwise, if the alignment is less than or equal to the minimum ABI 571 // alignment, just use the default; the backend will handle this. 572 if (Align <= MinABIStackAlignInBytes) 573 return 0; // Use default alignment. 574 575 if (IsLinuxABI) { 576 // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't 577 // want to spend any effort dealing with the ramifications of ABI breaks. 578 // 579 // If the vector type is __m128/__m256/__m512, return the default alignment. 580 if (Ty->isVectorType() && (Align == 16 || Align == 32 || Align == 64)) 581 return Align; 582 } 583 // On non-Darwin, the stack type alignment is always 4. 584 if (!IsDarwinVectorABI) { 585 // Set explicit alignment, since we may need to realign the top. 586 return MinABIStackAlignInBytes; 587 } 588 589 // Otherwise, if the type contains an SSE vector type, the alignment is 16. 590 if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) || 591 isRecordWithSIMDVectorType(getContext(), Ty))) 592 return 16; 593 594 return MinABIStackAlignInBytes; 595 } 596 597 ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal, 598 CCState &State) const { 599 if (!ByVal) { 600 if (State.FreeRegs) { 601 --State.FreeRegs; // Non-byval indirects just use one pointer. 602 if (!IsMCUABI) 603 return getNaturalAlignIndirectInReg(Ty); 604 } 605 return getNaturalAlignIndirect(Ty, false); 606 } 607 608 // Compute the byval alignment. 609 unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; 610 unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign); 611 if (StackAlign == 0) 612 return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true); 613 614 // If the stack alignment is less than the type alignment, realign the 615 // argument. 616 bool Realign = TypeAlign > StackAlign; 617 return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign), 618 /*ByVal=*/true, Realign); 619 } 620 621 X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const { 622 const Type *T = isSingleElementStruct(Ty, getContext()); 623 if (!T) 624 T = Ty.getTypePtr(); 625 626 if (const BuiltinType *BT = T->getAs<BuiltinType>()) { 627 BuiltinType::Kind K = BT->getKind(); 628 if (K == BuiltinType::Float || K == BuiltinType::Double) 629 return Float; 630 } 631 return Integer; 632 } 633 634 bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const { 635 if (!IsSoftFloatABI) { 636 Class C = classify(Ty); 637 if (C == Float) 638 return false; 639 } 640 641 unsigned Size = getContext().getTypeSize(Ty); 642 unsigned SizeInRegs = (Size + 31) / 32; 643 644 if (SizeInRegs == 0) 645 return false; 646 647 if (!IsMCUABI) { 648 if (SizeInRegs > State.FreeRegs) { 649 State.FreeRegs = 0; 650 return false; 651 } 652 } else { 653 // The MCU psABI allows passing parameters in-reg even if there are 654 // earlier parameters that are passed on the stack. Also, 655 // it does not allow passing >8-byte structs in-register, 656 // even if there are 3 free registers available. 657 if (SizeInRegs > State.FreeRegs || SizeInRegs > 2) 658 return false; 659 } 660 661 State.FreeRegs -= SizeInRegs; 662 return true; 663 } 664 665 bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, 666 bool &InReg, 667 bool &NeedsPadding) const { 668 // On Windows, aggregates other than HFAs are never passed in registers, and 669 // they do not consume register slots. Homogenous floating-point aggregates 670 // (HFAs) have already been dealt with at this point. 671 if (IsWin32StructABI && isAggregateTypeForABI(Ty)) 672 return false; 673 674 NeedsPadding = false; 675 InReg = !IsMCUABI; 676 677 if (!updateFreeRegs(Ty, State)) 678 return false; 679 680 if (IsMCUABI) 681 return true; 682 683 if (State.CC == llvm::CallingConv::X86_FastCall || 684 State.CC == llvm::CallingConv::X86_VectorCall || 685 State.CC == llvm::CallingConv::X86_RegCall) { 686 if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) 687 NeedsPadding = true; 688 689 return false; 690 } 691 692 return true; 693 } 694 695 bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { 696 bool IsPtrOrInt = (getContext().getTypeSize(Ty) <= 32) && 697 (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || 698 Ty->isReferenceType()); 699 700 if (!IsPtrOrInt && (State.CC == llvm::CallingConv::X86_FastCall || 701 State.CC == llvm::CallingConv::X86_VectorCall)) 702 return false; 703 704 if (!updateFreeRegs(Ty, State)) 705 return false; 706 707 if (!IsPtrOrInt && State.CC == llvm::CallingConv::X86_RegCall) 708 return false; 709 710 // Return true to apply inreg to all legal parameters except for MCU targets. 711 return !IsMCUABI; 712 } 713 714 void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const { 715 // Vectorcall x86 works subtly different than in x64, so the format is 716 // a bit different than the x64 version. First, all vector types (not HVAs) 717 // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers. 718 // This differs from the x64 implementation, where the first 6 by INDEX get 719 // registers. 720 // In the second pass over the arguments, HVAs are passed in the remaining 721 // vector registers if possible, or indirectly by address. The address will be 722 // passed in ECX/EDX if available. Any other arguments are passed according to 723 // the usual fastcall rules. 724 MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); 725 for (int I = 0, E = Args.size(); I < E; ++I) { 726 const Type *Base = nullptr; 727 uint64_t NumElts = 0; 728 const QualType &Ty = Args[I].type; 729 if ((Ty->isVectorType() || Ty->isBuiltinType()) && 730 isHomogeneousAggregate(Ty, Base, NumElts)) { 731 if (State.FreeSSERegs >= NumElts) { 732 State.FreeSSERegs -= NumElts; 733 Args[I].info = ABIArgInfo::getDirectInReg(); 734 State.IsPreassigned.set(I); 735 } 736 } 737 } 738 } 739 740 ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, 741 CCState &State) const { 742 // FIXME: Set alignment on indirect arguments. 743 bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall; 744 bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall; 745 bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall; 746 747 Ty = useFirstFieldIfTransparentUnion(Ty); 748 TypeInfo TI = getContext().getTypeInfo(Ty); 749 750 // Check with the C++ ABI first. 751 const RecordType *RT = Ty->getAs<RecordType>(); 752 if (RT) { 753 CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); 754 if (RAA == CGCXXABI::RAA_Indirect) { 755 return getIndirectResult(Ty, false, State); 756 } else if (RAA == CGCXXABI::RAA_DirectInMemory) { 757 // The field index doesn't matter, we'll fix it up later. 758 return ABIArgInfo::getInAlloca(/*FieldIndex=*/0); 759 } 760 } 761 762 // Regcall uses the concept of a homogenous vector aggregate, similar 763 // to other targets. 764 const Type *Base = nullptr; 765 uint64_t NumElts = 0; 766 if ((IsRegCall || IsVectorCall) && 767 isHomogeneousAggregate(Ty, Base, NumElts)) { 768 if (State.FreeSSERegs >= NumElts) { 769 State.FreeSSERegs -= NumElts; 770 771 // Vectorcall passes HVAs directly and does not flatten them, but regcall 772 // does. 773 if (IsVectorCall) 774 return getDirectX86Hva(); 775 776 if (Ty->isBuiltinType() || Ty->isVectorType()) 777 return ABIArgInfo::getDirect(); 778 return ABIArgInfo::getExpand(); 779 } 780 return getIndirectResult(Ty, /*ByVal=*/false, State); 781 } 782 783 if (isAggregateTypeForABI(Ty)) { 784 // Structures with flexible arrays are always indirect. 785 // FIXME: This should not be byval! 786 if (RT && RT->getDecl()->hasFlexibleArrayMember()) 787 return getIndirectResult(Ty, true, State); 788 789 // Ignore empty structs/unions on non-Windows. 790 if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) 791 return ABIArgInfo::getIgnore(); 792 793 llvm::LLVMContext &LLVMContext = getVMContext(); 794 llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); 795 bool NeedsPadding = false; 796 bool InReg; 797 if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { 798 unsigned SizeInRegs = (TI.Width + 31) / 32; 799 SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32); 800 llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); 801 if (InReg) 802 return ABIArgInfo::getDirectInReg(Result); 803 else 804 return ABIArgInfo::getDirect(Result); 805 } 806 llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr; 807 808 // Pass over-aligned aggregates on Windows indirectly. This behavior was 809 // added in MSVC 2015. Use the required alignment from the record layout, 810 // since that may be less than the regular type alignment, and types with 811 // required alignment of less than 4 bytes are not passed indirectly. 812 if (IsWin32StructABI) { 813 unsigned AlignInBits = 0; 814 if (RT) { 815 const ASTRecordLayout &Layout = 816 getContext().getASTRecordLayout(RT->getDecl()); 817 AlignInBits = getContext().toBits(Layout.getRequiredAlignment()); 818 } else if (TI.isAlignRequired()) { 819 AlignInBits = TI.Align; 820 } 821 if (AlignInBits > 32) 822 return getIndirectResult(Ty, /*ByVal=*/false, State); 823 } 824 825 // Expand small (<= 128-bit) record types when we know that the stack layout 826 // of those arguments will match the struct. This is important because the 827 // LLVM backend isn't smart enough to remove byval, which inhibits many 828 // optimizations. 829 // Don't do this for the MCU if there are still free integer registers 830 // (see X86_64 ABI for full explanation). 831 if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) && 832 canExpandIndirectArgument(Ty)) 833 return ABIArgInfo::getExpandWithPadding( 834 IsFastCall || IsVectorCall || IsRegCall, PaddingType); 835 836 return getIndirectResult(Ty, true, State); 837 } 838 839 if (const VectorType *VT = Ty->getAs<VectorType>()) { 840 // On Windows, vectors are passed directly if registers are available, or 841 // indirectly if not. This avoids the need to align argument memory. Pass 842 // user-defined vector types larger than 512 bits indirectly for simplicity. 843 if (IsWin32StructABI) { 844 if (TI.Width <= 512 && State.FreeSSERegs > 0) { 845 --State.FreeSSERegs; 846 return ABIArgInfo::getDirectInReg(); 847 } 848 return getIndirectResult(Ty, /*ByVal=*/false, State); 849 } 850 851 // On Darwin, some vectors are passed in memory, we handle this by passing 852 // it as an i8/i16/i32/i64. 853 if (IsDarwinVectorABI) { 854 if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) || 855 (TI.Width == 64 && VT->getNumElements() == 1)) 856 return ABIArgInfo::getDirect( 857 llvm::IntegerType::get(getVMContext(), TI.Width)); 858 } 859 860 if (IsX86_MMXType(CGT.ConvertType(Ty))) 861 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); 862 863 return ABIArgInfo::getDirect(); 864 } 865 866 867 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 868 Ty = EnumTy->getDecl()->getIntegerType(); 869 870 bool InReg = shouldPrimitiveUseInReg(Ty, State); 871 872 if (isPromotableIntegerTypeForABI(Ty)) { 873 if (InReg) 874 return ABIArgInfo::getExtendInReg(Ty); 875 return ABIArgInfo::getExtend(Ty); 876 } 877 878 if (const auto *EIT = Ty->getAs<BitIntType>()) { 879 if (EIT->getNumBits() <= 64) { 880 if (InReg) 881 return ABIArgInfo::getDirectInReg(); 882 return ABIArgInfo::getDirect(); 883 } 884 return getIndirectResult(Ty, /*ByVal=*/false, State); 885 } 886 887 if (InReg) 888 return ABIArgInfo::getDirectInReg(); 889 return ABIArgInfo::getDirect(); 890 } 891 892 void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { 893 CCState State(FI); 894 if (IsMCUABI) 895 State.FreeRegs = 3; 896 else if (State.CC == llvm::CallingConv::X86_FastCall) { 897 State.FreeRegs = 2; 898 State.FreeSSERegs = 3; 899 } else if (State.CC == llvm::CallingConv::X86_VectorCall) { 900 State.FreeRegs = 2; 901 State.FreeSSERegs = 6; 902 } else if (FI.getHasRegParm()) 903 State.FreeRegs = FI.getRegParm(); 904 else if (State.CC == llvm::CallingConv::X86_RegCall) { 905 State.FreeRegs = 5; 906 State.FreeSSERegs = 8; 907 } else if (IsWin32StructABI) { 908 // Since MSVC 2015, the first three SSE vectors have been passed in 909 // registers. The rest are passed indirectly. 910 State.FreeRegs = DefaultNumRegisterParameters; 911 State.FreeSSERegs = 3; 912 } else 913 State.FreeRegs = DefaultNumRegisterParameters; 914 915 if (!::classifyReturnType(getCXXABI(), FI, *this)) { 916 FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State); 917 } else if (FI.getReturnInfo().isIndirect()) { 918 // The C++ ABI is not aware of register usage, so we have to check if the 919 // return value was sret and put it in a register ourselves if appropriate. 920 if (State.FreeRegs) { 921 --State.FreeRegs; // The sret parameter consumes a register. 922 if (!IsMCUABI) 923 FI.getReturnInfo().setInReg(true); 924 } 925 } 926 927 // The chain argument effectively gives us another free register. 928 if (FI.isChainCall()) 929 ++State.FreeRegs; 930 931 // For vectorcall, do a first pass over the arguments, assigning FP and vector 932 // arguments to XMM registers as available. 933 if (State.CC == llvm::CallingConv::X86_VectorCall) 934 runVectorCallFirstPass(FI, State); 935 936 bool UsedInAlloca = false; 937 MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); 938 for (int I = 0, E = Args.size(); I < E; ++I) { 939 // Skip arguments that have already been assigned. 940 if (State.IsPreassigned.test(I)) 941 continue; 942 943 Args[I].info = classifyArgumentType(Args[I].type, State); 944 UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca); 945 } 946 947 // If we needed to use inalloca for any argument, do a second pass and rewrite 948 // all the memory arguments to use inalloca. 949 if (UsedInAlloca) 950 rewriteWithInAlloca(FI); 951 } 952 953 void 954 X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, 955 CharUnits &StackOffset, ABIArgInfo &Info, 956 QualType Type) const { 957 // Arguments are always 4-byte-aligned. 958 CharUnits WordSize = CharUnits::fromQuantity(4); 959 assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct"); 960 961 // sret pointers and indirect things will require an extra pointer 962 // indirection, unless they are byval. Most things are byval, and will not 963 // require this indirection. 964 bool IsIndirect = false; 965 if (Info.isIndirect() && !Info.getIndirectByVal()) 966 IsIndirect = true; 967 Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect); 968 llvm::Type *LLTy = CGT.ConvertTypeForMem(Type); 969 if (IsIndirect) 970 LLTy = llvm::PointerType::getUnqual(getVMContext()); 971 FrameFields.push_back(LLTy); 972 StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type); 973 974 // Insert padding bytes to respect alignment. 975 CharUnits FieldEnd = StackOffset; 976 StackOffset = FieldEnd.alignTo(WordSize); 977 if (StackOffset != FieldEnd) { 978 CharUnits NumBytes = StackOffset - FieldEnd; 979 llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); 980 Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity()); 981 FrameFields.push_back(Ty); 982 } 983 } 984 985 static bool isArgInAlloca(const ABIArgInfo &Info) { 986 // Leave ignored and inreg arguments alone. 987 switch (Info.getKind()) { 988 case ABIArgInfo::InAlloca: 989 return true; 990 case ABIArgInfo::Ignore: 991 case ABIArgInfo::IndirectAliased: 992 return false; 993 case ABIArgInfo::Indirect: 994 case ABIArgInfo::Direct: 995 case ABIArgInfo::Extend: 996 return !Info.getInReg(); 997 case ABIArgInfo::Expand: 998 case ABIArgInfo::CoerceAndExpand: 999 // These are aggregate types which are never passed in registers when 1000 // inalloca is involved. 1001 return true; 1002 } 1003 llvm_unreachable("invalid enum"); 1004 } 1005 1006 void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const { 1007 assert(IsWin32StructABI && "inalloca only supported on win32"); 1008 1009 // Build a packed struct type for all of the arguments in memory. 1010 SmallVector<llvm::Type *, 6> FrameFields; 1011 1012 // The stack alignment is always 4. 1013 CharUnits StackAlign = CharUnits::fromQuantity(4); 1014 1015 CharUnits StackOffset; 1016 CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end(); 1017 1018 // Put 'this' into the struct before 'sret', if necessary. 1019 bool IsThisCall = 1020 FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall; 1021 ABIArgInfo &Ret = FI.getReturnInfo(); 1022 if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall && 1023 isArgInAlloca(I->info)) { 1024 addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); 1025 ++I; 1026 } 1027 1028 // Put the sret parameter into the inalloca struct if it's in memory. 1029 if (Ret.isIndirect() && !Ret.getInReg()) { 1030 addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType()); 1031 // On Windows, the hidden sret parameter is always returned in eax. 1032 Ret.setInAllocaSRet(IsWin32StructABI); 1033 } 1034 1035 // Skip the 'this' parameter in ecx. 1036 if (IsThisCall) 1037 ++I; 1038 1039 // Put arguments passed in memory into the struct. 1040 for (; I != E; ++I) { 1041 if (isArgInAlloca(I->info)) 1042 addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); 1043 } 1044 1045 FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields, 1046 /*isPacked=*/true), 1047 StackAlign); 1048 } 1049 1050 Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, 1051 Address VAListAddr, QualType Ty) const { 1052 1053 auto TypeInfo = getContext().getTypeInfoInChars(Ty); 1054 1055 // x86-32 changes the alignment of certain arguments on the stack. 1056 // 1057 // Just messing with TypeInfo like this works because we never pass 1058 // anything indirectly. 1059 TypeInfo.Align = CharUnits::fromQuantity( 1060 getTypeStackAlignInBytes(Ty, TypeInfo.Align.getQuantity())); 1061 1062 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, 1063 TypeInfo, CharUnits::fromQuantity(4), 1064 /*AllowHigherAlign*/ true); 1065 } 1066 1067 bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( 1068 const llvm::Triple &Triple, const CodeGenOptions &Opts) { 1069 assert(Triple.getArch() == llvm::Triple::x86); 1070 1071 switch (Opts.getStructReturnConvention()) { 1072 case CodeGenOptions::SRCK_Default: 1073 break; 1074 case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return 1075 return false; 1076 case CodeGenOptions::SRCK_InRegs: // -freg-struct-return 1077 return true; 1078 } 1079 1080 if (Triple.isOSDarwin() || Triple.isOSIAMCU()) 1081 return true; 1082 1083 switch (Triple.getOS()) { 1084 case llvm::Triple::DragonFly: 1085 case llvm::Triple::FreeBSD: 1086 case llvm::Triple::OpenBSD: 1087 case llvm::Triple::Win32: 1088 return true; 1089 default: 1090 return false; 1091 } 1092 } 1093 1094 static void addX86InterruptAttrs(const FunctionDecl *FD, llvm::GlobalValue *GV, 1095 CodeGen::CodeGenModule &CGM) { 1096 if (!FD->hasAttr<AnyX86InterruptAttr>()) 1097 return; 1098 1099 llvm::Function *Fn = cast<llvm::Function>(GV); 1100 Fn->setCallingConv(llvm::CallingConv::X86_INTR); 1101 if (FD->getNumParams() == 0) 1102 return; 1103 1104 auto PtrTy = cast<PointerType>(FD->getParamDecl(0)->getType()); 1105 llvm::Type *ByValTy = CGM.getTypes().ConvertType(PtrTy->getPointeeType()); 1106 llvm::Attribute NewAttr = llvm::Attribute::getWithByValType( 1107 Fn->getContext(), ByValTy); 1108 Fn->addParamAttr(0, NewAttr); 1109 } 1110 1111 void X86_32TargetCodeGenInfo::setTargetAttributes( 1112 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1113 if (GV->isDeclaration()) 1114 return; 1115 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1116 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1117 llvm::Function *Fn = cast<llvm::Function>(GV); 1118 Fn->addFnAttr("stackrealign"); 1119 } 1120 1121 addX86InterruptAttrs(FD, GV, CGM); 1122 } 1123 } 1124 1125 bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable( 1126 CodeGen::CodeGenFunction &CGF, 1127 llvm::Value *Address) const { 1128 CodeGen::CGBuilderTy &Builder = CGF.Builder; 1129 1130 llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); 1131 1132 // 0-7 are the eight integer registers; the order is different 1133 // on Darwin (for EH), but the range is the same. 1134 // 8 is %eip. 1135 AssignToArrayRange(Builder, Address, Four8, 0, 8); 1136 1137 if (CGF.CGM.getTarget().getTriple().isOSDarwin()) { 1138 // 12-16 are st(0..4). Not sure why we stop at 4. 1139 // These have size 16, which is sizeof(long double) on 1140 // platforms with 8-byte alignment for that type. 1141 llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); 1142 AssignToArrayRange(Builder, Address, Sixteen8, 12, 16); 1143 1144 } else { 1145 // 9 is %eflags, which doesn't get a size on Darwin for some 1146 // reason. 1147 Builder.CreateAlignedStore( 1148 Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9), 1149 CharUnits::One()); 1150 1151 // 11-16 are st(0..5). Not sure why we stop at 5. 1152 // These have size 12, which is sizeof(long double) on 1153 // platforms with 4-byte alignment for that type. 1154 llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12); 1155 AssignToArrayRange(Builder, Address, Twelve8, 11, 16); 1156 } 1157 1158 return false; 1159 } 1160 1161 //===----------------------------------------------------------------------===// 1162 // X86-64 ABI Implementation 1163 //===----------------------------------------------------------------------===// 1164 1165 1166 namespace { 1167 1168 /// \p returns the size in bits of the largest (native) vector for \p AVXLevel. 1169 static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { 1170 switch (AVXLevel) { 1171 case X86AVXABILevel::AVX512: 1172 return 512; 1173 case X86AVXABILevel::AVX: 1174 return 256; 1175 case X86AVXABILevel::None: 1176 return 128; 1177 } 1178 llvm_unreachable("Unknown AVXLevel"); 1179 } 1180 1181 /// X86_64ABIInfo - The X86_64 ABI information. 1182 class X86_64ABIInfo : public ABIInfo { 1183 enum Class { 1184 Integer = 0, 1185 SSE, 1186 SSEUp, 1187 X87, 1188 X87Up, 1189 ComplexX87, 1190 NoClass, 1191 Memory 1192 }; 1193 1194 /// merge - Implement the X86_64 ABI merging algorithm. 1195 /// 1196 /// Merge an accumulating classification \arg Accum with a field 1197 /// classification \arg Field. 1198 /// 1199 /// \param Accum - The accumulating classification. This should 1200 /// always be either NoClass or the result of a previous merge 1201 /// call. In addition, this should never be Memory (the caller 1202 /// should just return Memory for the aggregate). 1203 static Class merge(Class Accum, Class Field); 1204 1205 /// postMerge - Implement the X86_64 ABI post merging algorithm. 1206 /// 1207 /// Post merger cleanup, reduces a malformed Hi and Lo pair to 1208 /// final MEMORY or SSE classes when necessary. 1209 /// 1210 /// \param AggregateSize - The size of the current aggregate in 1211 /// the classification process. 1212 /// 1213 /// \param Lo - The classification for the parts of the type 1214 /// residing in the low word of the containing object. 1215 /// 1216 /// \param Hi - The classification for the parts of the type 1217 /// residing in the higher words of the containing object. 1218 /// 1219 void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; 1220 1221 /// classify - Determine the x86_64 register classes in which the 1222 /// given type T should be passed. 1223 /// 1224 /// \param Lo - The classification for the parts of the type 1225 /// residing in the low word of the containing object. 1226 /// 1227 /// \param Hi - The classification for the parts of the type 1228 /// residing in the high word of the containing object. 1229 /// 1230 /// \param OffsetBase - The bit offset of this type in the 1231 /// containing object. Some parameters are classified different 1232 /// depending on whether they straddle an eightbyte boundary. 1233 /// 1234 /// \param isNamedArg - Whether the argument in question is a "named" 1235 /// argument, as used in AMD64-ABI 3.5.7. 1236 /// 1237 /// \param IsRegCall - Whether the calling conversion is regcall. 1238 /// 1239 /// If a word is unused its result will be NoClass; if a type should 1240 /// be passed in Memory then at least the classification of \arg Lo 1241 /// will be Memory. 1242 /// 1243 /// The \arg Lo class will be NoClass iff the argument is ignored. 1244 /// 1245 /// If the \arg Lo class is ComplexX87, then the \arg Hi class will 1246 /// also be ComplexX87. 1247 void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi, 1248 bool isNamedArg, bool IsRegCall = false) const; 1249 1250 llvm::Type *GetByteVectorType(QualType Ty) const; 1251 llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType, 1252 unsigned IROffset, QualType SourceTy, 1253 unsigned SourceOffset) const; 1254 llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType, 1255 unsigned IROffset, QualType SourceTy, 1256 unsigned SourceOffset) const; 1257 1258 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 1259 /// such that the argument will be returned in memory. 1260 ABIArgInfo getIndirectReturnResult(QualType Ty) const; 1261 1262 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 1263 /// such that the argument will be passed in memory. 1264 /// 1265 /// \param freeIntRegs - The number of free integer registers remaining 1266 /// available. 1267 ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const; 1268 1269 ABIArgInfo classifyReturnType(QualType RetTy) const; 1270 1271 ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, 1272 unsigned &neededInt, unsigned &neededSSE, 1273 bool isNamedArg, 1274 bool IsRegCall = false) const; 1275 1276 ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt, 1277 unsigned &NeededSSE, 1278 unsigned &MaxVectorWidth) const; 1279 1280 ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, 1281 unsigned &NeededSSE, 1282 unsigned &MaxVectorWidth) const; 1283 1284 bool IsIllegalVectorType(QualType Ty) const; 1285 1286 /// The 0.98 ABI revision clarified a lot of ambiguities, 1287 /// unfortunately in ways that were not always consistent with 1288 /// certain previous compilers. In particular, platforms which 1289 /// required strict binary compatibility with older versions of GCC 1290 /// may need to exempt themselves. 1291 bool honorsRevision0_98() const { 1292 return !getTarget().getTriple().isOSDarwin(); 1293 } 1294 1295 /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to 1296 /// classify it as INTEGER (for compatibility with older clang compilers). 1297 bool classifyIntegerMMXAsSSE() const { 1298 // Clang <= 3.8 did not do this. 1299 if (getContext().getLangOpts().getClangABICompat() <= 1300 LangOptions::ClangABI::Ver3_8) 1301 return false; 1302 1303 const llvm::Triple &Triple = getTarget().getTriple(); 1304 if (Triple.isOSDarwin() || Triple.isPS() || Triple.isOSFreeBSD()) 1305 return false; 1306 return true; 1307 } 1308 1309 // GCC classifies vectors of __int128 as memory. 1310 bool passInt128VectorsInMem() const { 1311 // Clang <= 9.0 did not do this. 1312 if (getContext().getLangOpts().getClangABICompat() <= 1313 LangOptions::ClangABI::Ver9) 1314 return false; 1315 1316 const llvm::Triple &T = getTarget().getTriple(); 1317 return T.isOSLinux() || T.isOSNetBSD(); 1318 } 1319 1320 X86AVXABILevel AVXLevel; 1321 // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on 1322 // 64-bit hardware. 1323 bool Has64BitPointers; 1324 1325 public: 1326 X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1327 : ABIInfo(CGT), AVXLevel(AVXLevel), 1328 Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {} 1329 1330 bool isPassedUsingAVXType(QualType type) const { 1331 unsigned neededInt, neededSSE; 1332 // The freeIntRegs argument doesn't matter here. 1333 ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE, 1334 /*isNamedArg*/true); 1335 if (info.isDirect()) { 1336 llvm::Type *ty = info.getCoerceToType(); 1337 if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty)) 1338 return vectorTy->getPrimitiveSizeInBits().getFixedValue() > 128; 1339 } 1340 return false; 1341 } 1342 1343 void computeInfo(CGFunctionInfo &FI) const override; 1344 1345 Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 1346 QualType Ty) const override; 1347 Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, 1348 QualType Ty) const override; 1349 1350 bool has64BitPointers() const { 1351 return Has64BitPointers; 1352 } 1353 }; 1354 1355 /// WinX86_64ABIInfo - The Windows X86_64 ABI information. 1356 class WinX86_64ABIInfo : public ABIInfo { 1357 public: 1358 WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1359 : ABIInfo(CGT), AVXLevel(AVXLevel), 1360 IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} 1361 1362 void computeInfo(CGFunctionInfo &FI) const override; 1363 1364 Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 1365 QualType Ty) const override; 1366 1367 bool isHomogeneousAggregateBaseType(QualType Ty) const override { 1368 // FIXME: Assumes vectorcall is in use. 1369 return isX86VectorTypeForVectorCall(getContext(), Ty); 1370 } 1371 1372 bool isHomogeneousAggregateSmallEnough(const Type *Ty, 1373 uint64_t NumMembers) const override { 1374 // FIXME: Assumes vectorcall is in use. 1375 return isX86VectorCallAggregateSmallEnough(NumMembers); 1376 } 1377 1378 private: 1379 ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, 1380 bool IsVectorCall, bool IsRegCall) const; 1381 ABIArgInfo reclassifyHvaArgForVectorCall(QualType Ty, unsigned &FreeSSERegs, 1382 const ABIArgInfo ¤t) const; 1383 1384 X86AVXABILevel AVXLevel; 1385 1386 bool IsMingw64; 1387 }; 1388 1389 class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { 1390 public: 1391 X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1392 : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) { 1393 SwiftInfo = 1394 std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); 1395 } 1396 1397 /// Disable tail call on x86-64. The epilogue code before the tail jump blocks 1398 /// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations. 1399 bool markARCOptimizedReturnCallsAsNoTail() const override { return true; } 1400 1401 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 1402 return 7; 1403 } 1404 1405 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 1406 llvm::Value *Address) const override { 1407 llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); 1408 1409 // 0-15 are the 16 integer registers. 1410 // 16 is %rip. 1411 AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); 1412 return false; 1413 } 1414 1415 llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 1416 StringRef Constraint, 1417 llvm::Type* Ty) const override { 1418 return X86AdjustInlineAsmType(CGF, Constraint, Ty); 1419 } 1420 1421 bool isNoProtoCallVariadic(const CallArgList &args, 1422 const FunctionNoProtoType *fnType) const override { 1423 // The default CC on x86-64 sets %al to the number of SSA 1424 // registers used, and GCC sets this when calling an unprototyped 1425 // function, so we override the default behavior. However, don't do 1426 // that when AVX types are involved: the ABI explicitly states it is 1427 // undefined, and it doesn't work in practice because of how the ABI 1428 // defines varargs anyway. 1429 if (fnType->getCallConv() == CC_C) { 1430 bool HasAVXType = false; 1431 for (CallArgList::const_iterator 1432 it = args.begin(), ie = args.end(); it != ie; ++it) { 1433 if (getABIInfo<X86_64ABIInfo>().isPassedUsingAVXType(it->Ty)) { 1434 HasAVXType = true; 1435 break; 1436 } 1437 } 1438 1439 if (!HasAVXType) 1440 return true; 1441 } 1442 1443 return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType); 1444 } 1445 1446 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1447 CodeGen::CodeGenModule &CGM) const override { 1448 if (GV->isDeclaration()) 1449 return; 1450 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1451 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1452 llvm::Function *Fn = cast<llvm::Function>(GV); 1453 Fn->addFnAttr("stackrealign"); 1454 } 1455 1456 addX86InterruptAttrs(FD, GV, CGM); 1457 } 1458 } 1459 1460 void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, 1461 const FunctionDecl *Caller, 1462 const FunctionDecl *Callee, 1463 const CallArgList &Args) const override; 1464 }; 1465 } // namespace 1466 1467 static void initFeatureMaps(const ASTContext &Ctx, 1468 llvm::StringMap<bool> &CallerMap, 1469 const FunctionDecl *Caller, 1470 llvm::StringMap<bool> &CalleeMap, 1471 const FunctionDecl *Callee) { 1472 if (CalleeMap.empty() && CallerMap.empty()) { 1473 // The caller is potentially nullptr in the case where the call isn't in a 1474 // function. In this case, the getFunctionFeatureMap ensures we just get 1475 // the TU level setting (since it cannot be modified by 'target'.. 1476 Ctx.getFunctionFeatureMap(CallerMap, Caller); 1477 Ctx.getFunctionFeatureMap(CalleeMap, Callee); 1478 } 1479 } 1480 1481 static bool checkAVXParamFeature(DiagnosticsEngine &Diag, 1482 SourceLocation CallLoc, 1483 const llvm::StringMap<bool> &CallerMap, 1484 const llvm::StringMap<bool> &CalleeMap, 1485 QualType Ty, StringRef Feature, 1486 bool IsArgument) { 1487 bool CallerHasFeat = CallerMap.lookup(Feature); 1488 bool CalleeHasFeat = CalleeMap.lookup(Feature); 1489 if (!CallerHasFeat && !CalleeHasFeat) 1490 return Diag.Report(CallLoc, diag::warn_avx_calling_convention) 1491 << IsArgument << Ty << Feature; 1492 1493 // Mixing calling conventions here is very clearly an error. 1494 if (!CallerHasFeat || !CalleeHasFeat) 1495 return Diag.Report(CallLoc, diag::err_avx_calling_convention) 1496 << IsArgument << Ty << Feature; 1497 1498 // Else, both caller and callee have the required feature, so there is no need 1499 // to diagnose. 1500 return false; 1501 } 1502 1503 static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx, 1504 SourceLocation CallLoc, 1505 const llvm::StringMap<bool> &CallerMap, 1506 const llvm::StringMap<bool> &CalleeMap, QualType Ty, 1507 bool IsArgument) { 1508 uint64_t Size = Ctx.getTypeSize(Ty); 1509 if (Size > 256) 1510 return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, 1511 "avx512f", IsArgument); 1512 1513 if (Size > 128) 1514 return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx", 1515 IsArgument); 1516 1517 return false; 1518 } 1519 1520 void X86_64TargetCodeGenInfo::checkFunctionCallABI( 1521 CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, 1522 const FunctionDecl *Callee, const CallArgList &Args) const { 1523 llvm::StringMap<bool> CallerMap; 1524 llvm::StringMap<bool> CalleeMap; 1525 unsigned ArgIndex = 0; 1526 1527 // We need to loop through the actual call arguments rather than the 1528 // function's parameters, in case this variadic. 1529 for (const CallArg &Arg : Args) { 1530 // The "avx" feature changes how vectors >128 in size are passed. "avx512f" 1531 // additionally changes how vectors >256 in size are passed. Like GCC, we 1532 // warn when a function is called with an argument where this will change. 1533 // Unlike GCC, we also error when it is an obvious ABI mismatch, that is, 1534 // the caller and callee features are mismatched. 1535 // Unfortunately, we cannot do this diagnostic in SEMA, since the callee can 1536 // change its ABI with attribute-target after this call. 1537 if (Arg.getType()->isVectorType() && 1538 CGM.getContext().getTypeSize(Arg.getType()) > 128) { 1539 initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); 1540 QualType Ty = Arg.getType(); 1541 // The CallArg seems to have desugared the type already, so for clearer 1542 // diagnostics, replace it with the type in the FunctionDecl if possible. 1543 if (ArgIndex < Callee->getNumParams()) 1544 Ty = Callee->getParamDecl(ArgIndex)->getType(); 1545 1546 if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, 1547 CalleeMap, Ty, /*IsArgument*/ true)) 1548 return; 1549 } 1550 ++ArgIndex; 1551 } 1552 1553 // Check return always, as we don't have a good way of knowing in codegen 1554 // whether this value is used, tail-called, etc. 1555 if (Callee->getReturnType()->isVectorType() && 1556 CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) { 1557 initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); 1558 checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, 1559 CalleeMap, Callee->getReturnType(), 1560 /*IsArgument*/ false); 1561 } 1562 } 1563 1564 std::string TargetCodeGenInfo::qualifyWindowsLibrary(StringRef Lib) { 1565 // If the argument does not end in .lib, automatically add the suffix. 1566 // If the argument contains a space, enclose it in quotes. 1567 // This matches the behavior of MSVC. 1568 bool Quote = Lib.contains(' '); 1569 std::string ArgStr = Quote ? "\"" : ""; 1570 ArgStr += Lib; 1571 if (!Lib.ends_with_insensitive(".lib") && !Lib.ends_with_insensitive(".a")) 1572 ArgStr += ".lib"; 1573 ArgStr += Quote ? "\"" : ""; 1574 return ArgStr; 1575 } 1576 1577 namespace { 1578 class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo { 1579 public: 1580 WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, 1581 bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, 1582 unsigned NumRegisterParameters) 1583 : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI, 1584 Win32StructABI, NumRegisterParameters, false) {} 1585 1586 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1587 CodeGen::CodeGenModule &CGM) const override; 1588 1589 void getDependentLibraryOption(llvm::StringRef Lib, 1590 llvm::SmallString<24> &Opt) const override { 1591 Opt = "/DEFAULTLIB:"; 1592 Opt += qualifyWindowsLibrary(Lib); 1593 } 1594 1595 void getDetectMismatchOption(llvm::StringRef Name, 1596 llvm::StringRef Value, 1597 llvm::SmallString<32> &Opt) const override { 1598 Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; 1599 } 1600 }; 1601 } // namespace 1602 1603 void WinX86_32TargetCodeGenInfo::setTargetAttributes( 1604 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1605 X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); 1606 if (GV->isDeclaration()) 1607 return; 1608 addStackProbeTargetAttributes(D, GV, CGM); 1609 } 1610 1611 namespace { 1612 class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { 1613 public: 1614 WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, 1615 X86AVXABILevel AVXLevel) 1616 : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) { 1617 SwiftInfo = 1618 std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); 1619 } 1620 1621 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1622 CodeGen::CodeGenModule &CGM) const override; 1623 1624 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 1625 return 7; 1626 } 1627 1628 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 1629 llvm::Value *Address) const override { 1630 llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); 1631 1632 // 0-15 are the 16 integer registers. 1633 // 16 is %rip. 1634 AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); 1635 return false; 1636 } 1637 1638 void getDependentLibraryOption(llvm::StringRef Lib, 1639 llvm::SmallString<24> &Opt) const override { 1640 Opt = "/DEFAULTLIB:"; 1641 Opt += qualifyWindowsLibrary(Lib); 1642 } 1643 1644 void getDetectMismatchOption(llvm::StringRef Name, 1645 llvm::StringRef Value, 1646 llvm::SmallString<32> &Opt) const override { 1647 Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; 1648 } 1649 }; 1650 } // namespace 1651 1652 void WinX86_64TargetCodeGenInfo::setTargetAttributes( 1653 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1654 TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); 1655 if (GV->isDeclaration()) 1656 return; 1657 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1658 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1659 llvm::Function *Fn = cast<llvm::Function>(GV); 1660 Fn->addFnAttr("stackrealign"); 1661 } 1662 1663 addX86InterruptAttrs(FD, GV, CGM); 1664 } 1665 1666 addStackProbeTargetAttributes(D, GV, CGM); 1667 } 1668 1669 void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo, 1670 Class &Hi) const { 1671 // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done: 1672 // 1673 // (a) If one of the classes is Memory, the whole argument is passed in 1674 // memory. 1675 // 1676 // (b) If X87UP is not preceded by X87, the whole argument is passed in 1677 // memory. 1678 // 1679 // (c) If the size of the aggregate exceeds two eightbytes and the first 1680 // eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole 1681 // argument is passed in memory. NOTE: This is necessary to keep the 1682 // ABI working for processors that don't support the __m256 type. 1683 // 1684 // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE. 1685 // 1686 // Some of these are enforced by the merging logic. Others can arise 1687 // only with unions; for example: 1688 // union { _Complex double; unsigned; } 1689 // 1690 // Note that clauses (b) and (c) were added in 0.98. 1691 // 1692 if (Hi == Memory) 1693 Lo = Memory; 1694 if (Hi == X87Up && Lo != X87 && honorsRevision0_98()) 1695 Lo = Memory; 1696 if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp)) 1697 Lo = Memory; 1698 if (Hi == SSEUp && Lo != SSE) 1699 Hi = SSE; 1700 } 1701 1702 X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) { 1703 // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is 1704 // classified recursively so that always two fields are 1705 // considered. The resulting class is calculated according to 1706 // the classes of the fields in the eightbyte: 1707 // 1708 // (a) If both classes are equal, this is the resulting class. 1709 // 1710 // (b) If one of the classes is NO_CLASS, the resulting class is 1711 // the other class. 1712 // 1713 // (c) If one of the classes is MEMORY, the result is the MEMORY 1714 // class. 1715 // 1716 // (d) If one of the classes is INTEGER, the result is the 1717 // INTEGER. 1718 // 1719 // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, 1720 // MEMORY is used as class. 1721 // 1722 // (f) Otherwise class SSE is used. 1723 1724 // Accum should never be memory (we should have returned) or 1725 // ComplexX87 (because this cannot be passed in a structure). 1726 assert((Accum != Memory && Accum != ComplexX87) && 1727 "Invalid accumulated classification during merge."); 1728 if (Accum == Field || Field == NoClass) 1729 return Accum; 1730 if (Field == Memory) 1731 return Memory; 1732 if (Accum == NoClass) 1733 return Field; 1734 if (Accum == Integer || Field == Integer) 1735 return Integer; 1736 if (Field == X87 || Field == X87Up || Field == ComplexX87 || 1737 Accum == X87 || Accum == X87Up) 1738 return Memory; 1739 return SSE; 1740 } 1741 1742 void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, 1743 Class &Hi, bool isNamedArg, bool IsRegCall) const { 1744 // FIXME: This code can be simplified by introducing a simple value class for 1745 // Class pairs with appropriate constructor methods for the various 1746 // situations. 1747 1748 // FIXME: Some of the split computations are wrong; unaligned vectors 1749 // shouldn't be passed in registers for example, so there is no chance they 1750 // can straddle an eightbyte. Verify & simplify. 1751 1752 Lo = Hi = NoClass; 1753 1754 Class &Current = OffsetBase < 64 ? Lo : Hi; 1755 Current = Memory; 1756 1757 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 1758 BuiltinType::Kind k = BT->getKind(); 1759 1760 if (k == BuiltinType::Void) { 1761 Current = NoClass; 1762 } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) { 1763 Lo = Integer; 1764 Hi = Integer; 1765 } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) { 1766 Current = Integer; 1767 } else if (k == BuiltinType::Float || k == BuiltinType::Double || 1768 k == BuiltinType::Float16 || k == BuiltinType::BFloat16) { 1769 Current = SSE; 1770 } else if (k == BuiltinType::LongDouble) { 1771 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 1772 if (LDF == &llvm::APFloat::IEEEquad()) { 1773 Lo = SSE; 1774 Hi = SSEUp; 1775 } else if (LDF == &llvm::APFloat::x87DoubleExtended()) { 1776 Lo = X87; 1777 Hi = X87Up; 1778 } else if (LDF == &llvm::APFloat::IEEEdouble()) { 1779 Current = SSE; 1780 } else 1781 llvm_unreachable("unexpected long double representation!"); 1782 } 1783 // FIXME: _Decimal32 and _Decimal64 are SSE. 1784 // FIXME: _float128 and _Decimal128 are (SSE, SSEUp). 1785 return; 1786 } 1787 1788 if (const EnumType *ET = Ty->getAs<EnumType>()) { 1789 // Classify the underlying integer type. 1790 classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg); 1791 return; 1792 } 1793 1794 if (Ty->hasPointerRepresentation()) { 1795 Current = Integer; 1796 return; 1797 } 1798 1799 if (Ty->isMemberPointerType()) { 1800 if (Ty->isMemberFunctionPointerType()) { 1801 if (Has64BitPointers) { 1802 // If Has64BitPointers, this is an {i64, i64}, so classify both 1803 // Lo and Hi now. 1804 Lo = Hi = Integer; 1805 } else { 1806 // Otherwise, with 32-bit pointers, this is an {i32, i32}. If that 1807 // straddles an eightbyte boundary, Hi should be classified as well. 1808 uint64_t EB_FuncPtr = (OffsetBase) / 64; 1809 uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64; 1810 if (EB_FuncPtr != EB_ThisAdj) { 1811 Lo = Hi = Integer; 1812 } else { 1813 Current = Integer; 1814 } 1815 } 1816 } else { 1817 Current = Integer; 1818 } 1819 return; 1820 } 1821 1822 if (const VectorType *VT = Ty->getAs<VectorType>()) { 1823 uint64_t Size = getContext().getTypeSize(VT); 1824 if (Size == 1 || Size == 8 || Size == 16 || Size == 32) { 1825 // gcc passes the following as integer: 1826 // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float> 1827 // 2 bytes - <2 x char>, <1 x short> 1828 // 1 byte - <1 x char> 1829 Current = Integer; 1830 1831 // If this type crosses an eightbyte boundary, it should be 1832 // split. 1833 uint64_t EB_Lo = (OffsetBase) / 64; 1834 uint64_t EB_Hi = (OffsetBase + Size - 1) / 64; 1835 if (EB_Lo != EB_Hi) 1836 Hi = Lo; 1837 } else if (Size == 64) { 1838 QualType ElementType = VT->getElementType(); 1839 1840 // gcc passes <1 x double> in memory. :( 1841 if (ElementType->isSpecificBuiltinType(BuiltinType::Double)) 1842 return; 1843 1844 // gcc passes <1 x long long> as SSE but clang used to unconditionally 1845 // pass them as integer. For platforms where clang is the de facto 1846 // platform compiler, we must continue to use integer. 1847 if (!classifyIntegerMMXAsSSE() && 1848 (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) || 1849 ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) || 1850 ElementType->isSpecificBuiltinType(BuiltinType::Long) || 1851 ElementType->isSpecificBuiltinType(BuiltinType::ULong))) 1852 Current = Integer; 1853 else 1854 Current = SSE; 1855 1856 // If this type crosses an eightbyte boundary, it should be 1857 // split. 1858 if (OffsetBase && OffsetBase != 64) 1859 Hi = Lo; 1860 } else if (Size == 128 || 1861 (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { 1862 QualType ElementType = VT->getElementType(); 1863 1864 // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :( 1865 if (passInt128VectorsInMem() && Size != 128 && 1866 (ElementType->isSpecificBuiltinType(BuiltinType::Int128) || 1867 ElementType->isSpecificBuiltinType(BuiltinType::UInt128))) 1868 return; 1869 1870 // Arguments of 256-bits are split into four eightbyte chunks. The 1871 // least significant one belongs to class SSE and all the others to class 1872 // SSEUP. The original Lo and Hi design considers that types can't be 1873 // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense. 1874 // This design isn't correct for 256-bits, but since there're no cases 1875 // where the upper parts would need to be inspected, avoid adding 1876 // complexity and just consider Hi to match the 64-256 part. 1877 // 1878 // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in 1879 // registers if they are "named", i.e. not part of the "..." of a 1880 // variadic function. 1881 // 1882 // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are 1883 // split into eight eightbyte chunks, one SSE and seven SSEUP. 1884 Lo = SSE; 1885 Hi = SSEUp; 1886 } 1887 return; 1888 } 1889 1890 if (const ComplexType *CT = Ty->getAs<ComplexType>()) { 1891 QualType ET = getContext().getCanonicalType(CT->getElementType()); 1892 1893 uint64_t Size = getContext().getTypeSize(Ty); 1894 if (ET->isIntegralOrEnumerationType()) { 1895 if (Size <= 64) 1896 Current = Integer; 1897 else if (Size <= 128) 1898 Lo = Hi = Integer; 1899 } else if (ET->isFloat16Type() || ET == getContext().FloatTy || 1900 ET->isBFloat16Type()) { 1901 Current = SSE; 1902 } else if (ET == getContext().DoubleTy) { 1903 Lo = Hi = SSE; 1904 } else if (ET == getContext().LongDoubleTy) { 1905 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 1906 if (LDF == &llvm::APFloat::IEEEquad()) 1907 Current = Memory; 1908 else if (LDF == &llvm::APFloat::x87DoubleExtended()) 1909 Current = ComplexX87; 1910 else if (LDF == &llvm::APFloat::IEEEdouble()) 1911 Lo = Hi = SSE; 1912 else 1913 llvm_unreachable("unexpected long double representation!"); 1914 } 1915 1916 // If this complex type crosses an eightbyte boundary then it 1917 // should be split. 1918 uint64_t EB_Real = (OffsetBase) / 64; 1919 uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64; 1920 if (Hi == NoClass && EB_Real != EB_Imag) 1921 Hi = Lo; 1922 1923 return; 1924 } 1925 1926 if (const auto *EITy = Ty->getAs<BitIntType>()) { 1927 if (EITy->getNumBits() <= 64) 1928 Current = Integer; 1929 else if (EITy->getNumBits() <= 128) 1930 Lo = Hi = Integer; 1931 // Larger values need to get passed in memory. 1932 return; 1933 } 1934 1935 if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { 1936 // Arrays are treated like structures. 1937 1938 uint64_t Size = getContext().getTypeSize(Ty); 1939 1940 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger 1941 // than eight eightbytes, ..., it has class MEMORY. 1942 // regcall ABI doesn't have limitation to an object. The only limitation 1943 // is the free registers, which will be checked in computeInfo. 1944 if (!IsRegCall && Size > 512) 1945 return; 1946 1947 // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned 1948 // fields, it has class MEMORY. 1949 // 1950 // Only need to check alignment of array base. 1951 if (OffsetBase % getContext().getTypeAlign(AT->getElementType())) 1952 return; 1953 1954 // Otherwise implement simplified merge. We could be smarter about 1955 // this, but it isn't worth it and would be harder to verify. 1956 Current = NoClass; 1957 uint64_t EltSize = getContext().getTypeSize(AT->getElementType()); 1958 uint64_t ArraySize = AT->getSize().getZExtValue(); 1959 1960 // The only case a 256-bit wide vector could be used is when the array 1961 // contains a single 256-bit element. Since Lo and Hi logic isn't extended 1962 // to work for sizes wider than 128, early check and fallback to memory. 1963 // 1964 if (Size > 128 && 1965 (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel))) 1966 return; 1967 1968 for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) { 1969 Class FieldLo, FieldHi; 1970 classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg); 1971 Lo = merge(Lo, FieldLo); 1972 Hi = merge(Hi, FieldHi); 1973 if (Lo == Memory || Hi == Memory) 1974 break; 1975 } 1976 1977 postMerge(Size, Lo, Hi); 1978 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification."); 1979 return; 1980 } 1981 1982 if (const RecordType *RT = Ty->getAs<RecordType>()) { 1983 uint64_t Size = getContext().getTypeSize(Ty); 1984 1985 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger 1986 // than eight eightbytes, ..., it has class MEMORY. 1987 if (Size > 512) 1988 return; 1989 1990 // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial 1991 // copy constructor or a non-trivial destructor, it is passed by invisible 1992 // reference. 1993 if (getRecordArgABI(RT, getCXXABI())) 1994 return; 1995 1996 const RecordDecl *RD = RT->getDecl(); 1997 1998 // Assume variable sized types are passed in memory. 1999 if (RD->hasFlexibleArrayMember()) 2000 return; 2001 2002 const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); 2003 2004 // Reset Lo class, this will be recomputed. 2005 Current = NoClass; 2006 2007 // If this is a C++ record, classify the bases first. 2008 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 2009 for (const auto &I : CXXRD->bases()) { 2010 assert(!I.isVirtual() && !I.getType()->isDependentType() && 2011 "Unexpected base class!"); 2012 const auto *Base = 2013 cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); 2014 2015 // Classify this field. 2016 // 2017 // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a 2018 // single eightbyte, each is classified separately. Each eightbyte gets 2019 // initialized to class NO_CLASS. 2020 Class FieldLo, FieldHi; 2021 uint64_t Offset = 2022 OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base)); 2023 classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg); 2024 Lo = merge(Lo, FieldLo); 2025 Hi = merge(Hi, FieldHi); 2026 if (Lo == Memory || Hi == Memory) { 2027 postMerge(Size, Lo, Hi); 2028 return; 2029 } 2030 } 2031 } 2032 2033 // Classify the fields one at a time, merging the results. 2034 unsigned idx = 0; 2035 bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <= 2036 LangOptions::ClangABI::Ver11 || 2037 getContext().getTargetInfo().getTriple().isPS(); 2038 bool IsUnion = RT->isUnionType() && !UseClang11Compat; 2039 2040 for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); 2041 i != e; ++i, ++idx) { 2042 uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); 2043 bool BitField = i->isBitField(); 2044 2045 // Ignore padding bit-fields. 2046 if (BitField && i->isUnnamedBitfield()) 2047 continue; 2048 2049 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than 2050 // eight eightbytes, or it contains unaligned fields, it has class MEMORY. 2051 // 2052 // The only case a 256-bit or a 512-bit wide vector could be used is when 2053 // the struct contains a single 256-bit or 512-bit element. Early check 2054 // and fallback to memory. 2055 // 2056 // FIXME: Extended the Lo and Hi logic properly to work for size wider 2057 // than 128. 2058 if (Size > 128 && 2059 ((!IsUnion && Size != getContext().getTypeSize(i->getType())) || 2060 Size > getNativeVectorSizeForAVXABI(AVXLevel))) { 2061 Lo = Memory; 2062 postMerge(Size, Lo, Hi); 2063 return; 2064 } 2065 // Note, skip this test for bit-fields, see below. 2066 if (!BitField && Offset % getContext().getTypeAlign(i->getType())) { 2067 Lo = Memory; 2068 postMerge(Size, Lo, Hi); 2069 return; 2070 } 2071 2072 // Classify this field. 2073 // 2074 // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate 2075 // exceeds a single eightbyte, each is classified 2076 // separately. Each eightbyte gets initialized to class 2077 // NO_CLASS. 2078 Class FieldLo, FieldHi; 2079 2080 // Bit-fields require special handling, they do not force the 2081 // structure to be passed in memory even if unaligned, and 2082 // therefore they can straddle an eightbyte. 2083 if (BitField) { 2084 assert(!i->isUnnamedBitfield()); 2085 uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); 2086 uint64_t Size = i->getBitWidthValue(getContext()); 2087 2088 uint64_t EB_Lo = Offset / 64; 2089 uint64_t EB_Hi = (Offset + Size - 1) / 64; 2090 2091 if (EB_Lo) { 2092 assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes."); 2093 FieldLo = NoClass; 2094 FieldHi = Integer; 2095 } else { 2096 FieldLo = Integer; 2097 FieldHi = EB_Hi ? Integer : NoClass; 2098 } 2099 } else 2100 classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg); 2101 Lo = merge(Lo, FieldLo); 2102 Hi = merge(Hi, FieldHi); 2103 if (Lo == Memory || Hi == Memory) 2104 break; 2105 } 2106 2107 postMerge(Size, Lo, Hi); 2108 } 2109 } 2110 2111 ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { 2112 // If this is a scalar LLVM value then assume LLVM will pass it in the right 2113 // place naturally. 2114 if (!isAggregateTypeForABI(Ty)) { 2115 // Treat an enum type as its underlying type. 2116 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2117 Ty = EnumTy->getDecl()->getIntegerType(); 2118 2119 if (Ty->isBitIntType()) 2120 return getNaturalAlignIndirect(Ty); 2121 2122 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 2123 : ABIArgInfo::getDirect()); 2124 } 2125 2126 return getNaturalAlignIndirect(Ty); 2127 } 2128 2129 bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { 2130 if (const VectorType *VecTy = Ty->getAs<VectorType>()) { 2131 uint64_t Size = getContext().getTypeSize(VecTy); 2132 unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); 2133 if (Size <= 64 || Size > LargestVector) 2134 return true; 2135 QualType EltTy = VecTy->getElementType(); 2136 if (passInt128VectorsInMem() && 2137 (EltTy->isSpecificBuiltinType(BuiltinType::Int128) || 2138 EltTy->isSpecificBuiltinType(BuiltinType::UInt128))) 2139 return true; 2140 } 2141 2142 return false; 2143 } 2144 2145 ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, 2146 unsigned freeIntRegs) const { 2147 // If this is a scalar LLVM value then assume LLVM will pass it in the right 2148 // place naturally. 2149 // 2150 // This assumption is optimistic, as there could be free registers available 2151 // when we need to pass this argument in memory, and LLVM could try to pass 2152 // the argument in the free register. This does not seem to happen currently, 2153 // but this code would be much safer if we could mark the argument with 2154 // 'onstack'. See PR12193. 2155 if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) && 2156 !Ty->isBitIntType()) { 2157 // Treat an enum type as its underlying type. 2158 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2159 Ty = EnumTy->getDecl()->getIntegerType(); 2160 2161 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 2162 : ABIArgInfo::getDirect()); 2163 } 2164 2165 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) 2166 return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); 2167 2168 // Compute the byval alignment. We specify the alignment of the byval in all 2169 // cases so that the mid-level optimizer knows the alignment of the byval. 2170 unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U); 2171 2172 // Attempt to avoid passing indirect results using byval when possible. This 2173 // is important for good codegen. 2174 // 2175 // We do this by coercing the value into a scalar type which the backend can 2176 // handle naturally (i.e., without using byval). 2177 // 2178 // For simplicity, we currently only do this when we have exhausted all of the 2179 // free integer registers. Doing this when there are free integer registers 2180 // would require more care, as we would have to ensure that the coerced value 2181 // did not claim the unused register. That would require either reording the 2182 // arguments to the function (so that any subsequent inreg values came first), 2183 // or only doing this optimization when there were no following arguments that 2184 // might be inreg. 2185 // 2186 // We currently expect it to be rare (particularly in well written code) for 2187 // arguments to be passed on the stack when there are still free integer 2188 // registers available (this would typically imply large structs being passed 2189 // by value), so this seems like a fair tradeoff for now. 2190 // 2191 // We can revisit this if the backend grows support for 'onstack' parameter 2192 // attributes. See PR12193. 2193 if (freeIntRegs == 0) { 2194 uint64_t Size = getContext().getTypeSize(Ty); 2195 2196 // If this type fits in an eightbyte, coerce it into the matching integral 2197 // type, which will end up on the stack (with alignment 8). 2198 if (Align == 8 && Size <= 64) 2199 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 2200 Size)); 2201 } 2202 2203 return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align)); 2204 } 2205 2206 /// The ABI specifies that a value should be passed in a full vector XMM/YMM 2207 /// register. Pick an LLVM IR type that will be passed as a vector register. 2208 llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { 2209 // Wrapper structs/arrays that only contain vectors are passed just like 2210 // vectors; strip them off if present. 2211 if (const Type *InnerTy = isSingleElementStruct(Ty, getContext())) 2212 Ty = QualType(InnerTy, 0); 2213 2214 llvm::Type *IRType = CGT.ConvertType(Ty); 2215 if (isa<llvm::VectorType>(IRType)) { 2216 // Don't pass vXi128 vectors in their native type, the backend can't 2217 // legalize them. 2218 if (passInt128VectorsInMem() && 2219 cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy(128)) { 2220 // Use a vXi64 vector. 2221 uint64_t Size = getContext().getTypeSize(Ty); 2222 return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()), 2223 Size / 64); 2224 } 2225 2226 return IRType; 2227 } 2228 2229 if (IRType->getTypeID() == llvm::Type::FP128TyID) 2230 return IRType; 2231 2232 // We couldn't find the preferred IR vector type for 'Ty'. 2233 uint64_t Size = getContext().getTypeSize(Ty); 2234 assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); 2235 2236 2237 // Return a LLVM IR vector type based on the size of 'Ty'. 2238 return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()), 2239 Size / 64); 2240 } 2241 2242 /// BitsContainNoUserData - Return true if the specified [start,end) bit range 2243 /// is known to either be off the end of the specified type or being in 2244 /// alignment padding. The user type specified is known to be at most 128 bits 2245 /// in size, and have passed through X86_64ABIInfo::classify with a successful 2246 /// classification that put one of the two halves in the INTEGER class. 2247 /// 2248 /// It is conservatively correct to return false. 2249 static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, 2250 unsigned EndBit, ASTContext &Context) { 2251 // If the bytes being queried are off the end of the type, there is no user 2252 // data hiding here. This handles analysis of builtins, vectors and other 2253 // types that don't contain interesting padding. 2254 unsigned TySize = (unsigned)Context.getTypeSize(Ty); 2255 if (TySize <= StartBit) 2256 return true; 2257 2258 if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { 2259 unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType()); 2260 unsigned NumElts = (unsigned)AT->getSize().getZExtValue(); 2261 2262 // Check each element to see if the element overlaps with the queried range. 2263 for (unsigned i = 0; i != NumElts; ++i) { 2264 // If the element is after the span we care about, then we're done.. 2265 unsigned EltOffset = i*EltSize; 2266 if (EltOffset >= EndBit) break; 2267 2268 unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0; 2269 if (!BitsContainNoUserData(AT->getElementType(), EltStart, 2270 EndBit-EltOffset, Context)) 2271 return false; 2272 } 2273 // If it overlaps no elements, then it is safe to process as padding. 2274 return true; 2275 } 2276 2277 if (const RecordType *RT = Ty->getAs<RecordType>()) { 2278 const RecordDecl *RD = RT->getDecl(); 2279 const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); 2280 2281 // If this is a C++ record, check the bases first. 2282 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 2283 for (const auto &I : CXXRD->bases()) { 2284 assert(!I.isVirtual() && !I.getType()->isDependentType() && 2285 "Unexpected base class!"); 2286 const auto *Base = 2287 cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); 2288 2289 // If the base is after the span we care about, ignore it. 2290 unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); 2291 if (BaseOffset >= EndBit) continue; 2292 2293 unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0; 2294 if (!BitsContainNoUserData(I.getType(), BaseStart, 2295 EndBit-BaseOffset, Context)) 2296 return false; 2297 } 2298 } 2299 2300 // Verify that no field has data that overlaps the region of interest. Yes 2301 // this could be sped up a lot by being smarter about queried fields, 2302 // however we're only looking at structs up to 16 bytes, so we don't care 2303 // much. 2304 unsigned idx = 0; 2305 for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); 2306 i != e; ++i, ++idx) { 2307 unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx); 2308 2309 // If we found a field after the region we care about, then we're done. 2310 if (FieldOffset >= EndBit) break; 2311 2312 unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0; 2313 if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset, 2314 Context)) 2315 return false; 2316 } 2317 2318 // If nothing in this record overlapped the area of interest, then we're 2319 // clean. 2320 return true; 2321 } 2322 2323 return false; 2324 } 2325 2326 /// getFPTypeAtOffset - Return a floating point type at the specified offset. 2327 static llvm::Type *getFPTypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2328 const llvm::DataLayout &TD) { 2329 if (IROffset == 0 && IRType->isFloatingPointTy()) 2330 return IRType; 2331 2332 // If this is a struct, recurse into the field at the specified offset. 2333 if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { 2334 if (!STy->getNumContainedTypes()) 2335 return nullptr; 2336 2337 const llvm::StructLayout *SL = TD.getStructLayout(STy); 2338 unsigned Elt = SL->getElementContainingOffset(IROffset); 2339 IROffset -= SL->getElementOffset(Elt); 2340 return getFPTypeAtOffset(STy->getElementType(Elt), IROffset, TD); 2341 } 2342 2343 // If this is an array, recurse into the field at the specified offset. 2344 if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { 2345 llvm::Type *EltTy = ATy->getElementType(); 2346 unsigned EltSize = TD.getTypeAllocSize(EltTy); 2347 IROffset -= IROffset / EltSize * EltSize; 2348 return getFPTypeAtOffset(EltTy, IROffset, TD); 2349 } 2350 2351 return nullptr; 2352 } 2353 2354 /// GetSSETypeAtOffset - Return a type that will be passed by the backend in the 2355 /// low 8 bytes of an XMM register, corresponding to the SSE class. 2356 llvm::Type *X86_64ABIInfo:: 2357 GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2358 QualType SourceTy, unsigned SourceOffset) const { 2359 const llvm::DataLayout &TD = getDataLayout(); 2360 unsigned SourceSize = 2361 (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset; 2362 llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD); 2363 if (!T0 || T0->isDoubleTy()) 2364 return llvm::Type::getDoubleTy(getVMContext()); 2365 2366 // Get the adjacent FP type. 2367 llvm::Type *T1 = nullptr; 2368 unsigned T0Size = TD.getTypeAllocSize(T0); 2369 if (SourceSize > T0Size) 2370 T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD); 2371 if (T1 == nullptr) { 2372 // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due 2373 // to its alignment. 2374 if (T0->is16bitFPTy() && SourceSize > 4) 2375 T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD); 2376 // If we can't get a second FP type, return a simple half or float. 2377 // avx512fp16-abi.c:pr51813_2 shows it works to return float for 2378 // {float, i8} too. 2379 if (T1 == nullptr) 2380 return T0; 2381 } 2382 2383 if (T0->isFloatTy() && T1->isFloatTy()) 2384 return llvm::FixedVectorType::get(T0, 2); 2385 2386 if (T0->is16bitFPTy() && T1->is16bitFPTy()) { 2387 llvm::Type *T2 = nullptr; 2388 if (SourceSize > 4) 2389 T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); 2390 if (T2 == nullptr) 2391 return llvm::FixedVectorType::get(T0, 2); 2392 return llvm::FixedVectorType::get(T0, 4); 2393 } 2394 2395 if (T0->is16bitFPTy() || T1->is16bitFPTy()) 2396 return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4); 2397 2398 return llvm::Type::getDoubleTy(getVMContext()); 2399 } 2400 2401 2402 /// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in 2403 /// an 8-byte GPR. This means that we either have a scalar or we are talking 2404 /// about the high or low part of an up-to-16-byte struct. This routine picks 2405 /// the best LLVM IR type to represent this, which may be i64 or may be anything 2406 /// else that the backend will pass in a GPR that works better (e.g. i8, %foo*, 2407 /// etc). 2408 /// 2409 /// PrefType is an LLVM IR type that corresponds to (part of) the IR type for 2410 /// the source type. IROffset is an offset in bytes into the LLVM IR type that 2411 /// the 8-byte value references. PrefType may be null. 2412 /// 2413 /// SourceTy is the source-level type for the entire argument. SourceOffset is 2414 /// an offset into this that we're processing (which is always either 0 or 8). 2415 /// 2416 llvm::Type *X86_64ABIInfo:: 2417 GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2418 QualType SourceTy, unsigned SourceOffset) const { 2419 // If we're dealing with an un-offset LLVM IR type, then it means that we're 2420 // returning an 8-byte unit starting with it. See if we can safely use it. 2421 if (IROffset == 0) { 2422 // Pointers and int64's always fill the 8-byte unit. 2423 if ((isa<llvm::PointerType>(IRType) && Has64BitPointers) || 2424 IRType->isIntegerTy(64)) 2425 return IRType; 2426 2427 // If we have a 1/2/4-byte integer, we can use it only if the rest of the 2428 // goodness in the source type is just tail padding. This is allowed to 2429 // kick in for struct {double,int} on the int, but not on 2430 // struct{double,int,int} because we wouldn't return the second int. We 2431 // have to do this analysis on the source type because we can't depend on 2432 // unions being lowered a specific way etc. 2433 if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) || 2434 IRType->isIntegerTy(32) || 2435 (isa<llvm::PointerType>(IRType) && !Has64BitPointers)) { 2436 unsigned BitWidth = isa<llvm::PointerType>(IRType) ? 32 : 2437 cast<llvm::IntegerType>(IRType)->getBitWidth(); 2438 2439 if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth, 2440 SourceOffset*8+64, getContext())) 2441 return IRType; 2442 } 2443 } 2444 2445 if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { 2446 // If this is a struct, recurse into the field at the specified offset. 2447 const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy); 2448 if (IROffset < SL->getSizeInBytes()) { 2449 unsigned FieldIdx = SL->getElementContainingOffset(IROffset); 2450 IROffset -= SL->getElementOffset(FieldIdx); 2451 2452 return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset, 2453 SourceTy, SourceOffset); 2454 } 2455 } 2456 2457 if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { 2458 llvm::Type *EltTy = ATy->getElementType(); 2459 unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy); 2460 unsigned EltOffset = IROffset/EltSize*EltSize; 2461 return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy, 2462 SourceOffset); 2463 } 2464 2465 // Okay, we don't have any better idea of what to pass, so we pass this in an 2466 // integer register that isn't too big to fit the rest of the struct. 2467 unsigned TySizeInBytes = 2468 (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity(); 2469 2470 assert(TySizeInBytes != SourceOffset && "Empty field?"); 2471 2472 // It is always safe to classify this as an integer type up to i64 that 2473 // isn't larger than the structure. 2474 return llvm::IntegerType::get(getVMContext(), 2475 std::min(TySizeInBytes-SourceOffset, 8U)*8); 2476 } 2477 2478 2479 /// GetX86_64ByValArgumentPair - Given a high and low type that can ideally 2480 /// be used as elements of a two register pair to pass or return, return a 2481 /// first class aggregate to represent them. For example, if the low part of 2482 /// a by-value argument should be passed as i32* and the high part as float, 2483 /// return {i32*, float}. 2484 static llvm::Type * 2485 GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, 2486 const llvm::DataLayout &TD) { 2487 // In order to correctly satisfy the ABI, we need to the high part to start 2488 // at offset 8. If the high and low parts we inferred are both 4-byte types 2489 // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have 2490 // the second element at offset 8. Check for this: 2491 unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo); 2492 llvm::Align HiAlign = TD.getABITypeAlign(Hi); 2493 unsigned HiStart = llvm::alignTo(LoSize, HiAlign); 2494 assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); 2495 2496 // To handle this, we have to increase the size of the low part so that the 2497 // second element will start at an 8 byte offset. We can't increase the size 2498 // of the second element because it might make us access off the end of the 2499 // struct. 2500 if (HiStart != 8) { 2501 // There are usually two sorts of types the ABI generation code can produce 2502 // for the low part of a pair that aren't 8 bytes in size: half, float or 2503 // i8/i16/i32. This can also include pointers when they are 32-bit (X32 and 2504 // NaCl). 2505 // Promote these to a larger type. 2506 if (Lo->isHalfTy() || Lo->isFloatTy()) 2507 Lo = llvm::Type::getDoubleTy(Lo->getContext()); 2508 else { 2509 assert((Lo->isIntegerTy() || Lo->isPointerTy()) 2510 && "Invalid/unknown lo type"); 2511 Lo = llvm::Type::getInt64Ty(Lo->getContext()); 2512 } 2513 } 2514 2515 llvm::StructType *Result = llvm::StructType::get(Lo, Hi); 2516 2517 // Verify that the second element is at an 8-byte offset. 2518 assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && 2519 "Invalid x86-64 argument pair!"); 2520 return Result; 2521 } 2522 2523 ABIArgInfo X86_64ABIInfo:: 2524 classifyReturnType(QualType RetTy) const { 2525 // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the 2526 // classification algorithm. 2527 X86_64ABIInfo::Class Lo, Hi; 2528 classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true); 2529 2530 // Check some invariants. 2531 assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); 2532 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); 2533 2534 llvm::Type *ResType = nullptr; 2535 switch (Lo) { 2536 case NoClass: 2537 if (Hi == NoClass) 2538 return ABIArgInfo::getIgnore(); 2539 // If the low part is just padding, it takes no register, leave ResType 2540 // null. 2541 assert((Hi == SSE || Hi == Integer || Hi == X87Up) && 2542 "Unknown missing lo part"); 2543 break; 2544 2545 case SSEUp: 2546 case X87Up: 2547 llvm_unreachable("Invalid classification for lo word."); 2548 2549 // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via 2550 // hidden argument. 2551 case Memory: 2552 return getIndirectReturnResult(RetTy); 2553 2554 // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next 2555 // available register of the sequence %rax, %rdx is used. 2556 case Integer: 2557 ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); 2558 2559 // If we have a sign or zero extended integer, make sure to return Extend 2560 // so that the parameter gets the right LLVM IR attributes. 2561 if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { 2562 // Treat an enum type as its underlying type. 2563 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 2564 RetTy = EnumTy->getDecl()->getIntegerType(); 2565 2566 if (RetTy->isIntegralOrEnumerationType() && 2567 isPromotableIntegerTypeForABI(RetTy)) 2568 return ABIArgInfo::getExtend(RetTy); 2569 } 2570 break; 2571 2572 // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next 2573 // available SSE register of the sequence %xmm0, %xmm1 is used. 2574 case SSE: 2575 ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); 2576 break; 2577 2578 // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is 2579 // returned on the X87 stack in %st0 as 80-bit x87 number. 2580 case X87: 2581 ResType = llvm::Type::getX86_FP80Ty(getVMContext()); 2582 break; 2583 2584 // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real 2585 // part of the value is returned in %st0 and the imaginary part in 2586 // %st1. 2587 case ComplexX87: 2588 assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); 2589 ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), 2590 llvm::Type::getX86_FP80Ty(getVMContext())); 2591 break; 2592 } 2593 2594 llvm::Type *HighPart = nullptr; 2595 switch (Hi) { 2596 // Memory was handled previously and X87 should 2597 // never occur as a hi class. 2598 case Memory: 2599 case X87: 2600 llvm_unreachable("Invalid classification for hi word."); 2601 2602 case ComplexX87: // Previously handled. 2603 case NoClass: 2604 break; 2605 2606 case Integer: 2607 HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2608 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2609 return ABIArgInfo::getDirect(HighPart, 8); 2610 break; 2611 case SSE: 2612 HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2613 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2614 return ABIArgInfo::getDirect(HighPart, 8); 2615 break; 2616 2617 // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte 2618 // is passed in the next available eightbyte chunk if the last used 2619 // vector register. 2620 // 2621 // SSEUP should always be preceded by SSE, just widen. 2622 case SSEUp: 2623 assert(Lo == SSE && "Unexpected SSEUp classification."); 2624 ResType = GetByteVectorType(RetTy); 2625 break; 2626 2627 // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is 2628 // returned together with the previous X87 value in %st0. 2629 case X87Up: 2630 // If X87Up is preceded by X87, we don't need to do 2631 // anything. However, in some cases with unions it may not be 2632 // preceded by X87. In such situations we follow gcc and pass the 2633 // extra bits in an SSE reg. 2634 if (Lo != X87) { 2635 HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2636 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2637 return ABIArgInfo::getDirect(HighPart, 8); 2638 } 2639 break; 2640 } 2641 2642 // If a high part was specified, merge it together with the low part. It is 2643 // known to pass in the high eightbyte of the result. We do this by forming a 2644 // first class struct aggregate with the high and low part: {low, high} 2645 if (HighPart) 2646 ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); 2647 2648 return ABIArgInfo::getDirect(ResType); 2649 } 2650 2651 ABIArgInfo 2652 X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs, 2653 unsigned &neededInt, unsigned &neededSSE, 2654 bool isNamedArg, bool IsRegCall) const { 2655 Ty = useFirstFieldIfTransparentUnion(Ty); 2656 2657 X86_64ABIInfo::Class Lo, Hi; 2658 classify(Ty, 0, Lo, Hi, isNamedArg, IsRegCall); 2659 2660 // Check some invariants. 2661 // FIXME: Enforce these by construction. 2662 assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); 2663 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); 2664 2665 neededInt = 0; 2666 neededSSE = 0; 2667 llvm::Type *ResType = nullptr; 2668 switch (Lo) { 2669 case NoClass: 2670 if (Hi == NoClass) 2671 return ABIArgInfo::getIgnore(); 2672 // If the low part is just padding, it takes no register, leave ResType 2673 // null. 2674 assert((Hi == SSE || Hi == Integer || Hi == X87Up) && 2675 "Unknown missing lo part"); 2676 break; 2677 2678 // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument 2679 // on the stack. 2680 case Memory: 2681 2682 // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or 2683 // COMPLEX_X87, it is passed in memory. 2684 case X87: 2685 case ComplexX87: 2686 if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect) 2687 ++neededInt; 2688 return getIndirectResult(Ty, freeIntRegs); 2689 2690 case SSEUp: 2691 case X87Up: 2692 llvm_unreachable("Invalid classification for lo word."); 2693 2694 // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next 2695 // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8 2696 // and %r9 is used. 2697 case Integer: 2698 ++neededInt; 2699 2700 // Pick an 8-byte type based on the preferred type. 2701 ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0); 2702 2703 // If we have a sign or zero extended integer, make sure to return Extend 2704 // so that the parameter gets the right LLVM IR attributes. 2705 if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { 2706 // Treat an enum type as its underlying type. 2707 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2708 Ty = EnumTy->getDecl()->getIntegerType(); 2709 2710 if (Ty->isIntegralOrEnumerationType() && 2711 isPromotableIntegerTypeForABI(Ty)) 2712 return ABIArgInfo::getExtend(Ty); 2713 } 2714 2715 break; 2716 2717 // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next 2718 // available SSE register is used, the registers are taken in the 2719 // order from %xmm0 to %xmm7. 2720 case SSE: { 2721 llvm::Type *IRType = CGT.ConvertType(Ty); 2722 ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0); 2723 ++neededSSE; 2724 break; 2725 } 2726 } 2727 2728 llvm::Type *HighPart = nullptr; 2729 switch (Hi) { 2730 // Memory was handled previously, ComplexX87 and X87 should 2731 // never occur as hi classes, and X87Up must be preceded by X87, 2732 // which is passed in memory. 2733 case Memory: 2734 case X87: 2735 case ComplexX87: 2736 llvm_unreachable("Invalid classification for hi word."); 2737 2738 case NoClass: break; 2739 2740 case Integer: 2741 ++neededInt; 2742 // Pick an 8-byte type based on the preferred type. 2743 HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); 2744 2745 if (Lo == NoClass) // Pass HighPart at offset 8 in memory. 2746 return ABIArgInfo::getDirect(HighPart, 8); 2747 break; 2748 2749 // X87Up generally doesn't occur here (long double is passed in 2750 // memory), except in situations involving unions. 2751 case X87Up: 2752 case SSE: 2753 HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); 2754 2755 if (Lo == NoClass) // Pass HighPart at offset 8 in memory. 2756 return ABIArgInfo::getDirect(HighPart, 8); 2757 2758 ++neededSSE; 2759 break; 2760 2761 // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the 2762 // eightbyte is passed in the upper half of the last used SSE 2763 // register. This only happens when 128-bit vectors are passed. 2764 case SSEUp: 2765 assert(Lo == SSE && "Unexpected SSEUp classification"); 2766 ResType = GetByteVectorType(Ty); 2767 break; 2768 } 2769 2770 // If a high part was specified, merge it together with the low part. It is 2771 // known to pass in the high eightbyte of the result. We do this by forming a 2772 // first class struct aggregate with the high and low part: {low, high} 2773 if (HighPart) 2774 ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); 2775 2776 return ABIArgInfo::getDirect(ResType); 2777 } 2778 2779 ABIArgInfo 2780 X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, 2781 unsigned &NeededSSE, 2782 unsigned &MaxVectorWidth) const { 2783 auto RT = Ty->getAs<RecordType>(); 2784 assert(RT && "classifyRegCallStructType only valid with struct types"); 2785 2786 if (RT->getDecl()->hasFlexibleArrayMember()) 2787 return getIndirectReturnResult(Ty); 2788 2789 // Sum up bases 2790 if (auto CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl())) { 2791 if (CXXRD->isDynamicClass()) { 2792 NeededInt = NeededSSE = 0; 2793 return getIndirectReturnResult(Ty); 2794 } 2795 2796 for (const auto &I : CXXRD->bases()) 2797 if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE, 2798 MaxVectorWidth) 2799 .isIndirect()) { 2800 NeededInt = NeededSSE = 0; 2801 return getIndirectReturnResult(Ty); 2802 } 2803 } 2804 2805 // Sum up members 2806 for (const auto *FD : RT->getDecl()->fields()) { 2807 QualType MTy = FD->getType(); 2808 if (MTy->isRecordType() && !MTy->isUnionType()) { 2809 if (classifyRegCallStructTypeImpl(MTy, NeededInt, NeededSSE, 2810 MaxVectorWidth) 2811 .isIndirect()) { 2812 NeededInt = NeededSSE = 0; 2813 return getIndirectReturnResult(Ty); 2814 } 2815 } else { 2816 unsigned LocalNeededInt, LocalNeededSSE; 2817 if (classifyArgumentType(MTy, UINT_MAX, LocalNeededInt, LocalNeededSSE, 2818 true, true) 2819 .isIndirect()) { 2820 NeededInt = NeededSSE = 0; 2821 return getIndirectReturnResult(Ty); 2822 } 2823 if (const auto *AT = getContext().getAsConstantArrayType(MTy)) 2824 MTy = AT->getElementType(); 2825 if (const auto *VT = MTy->getAs<VectorType>()) 2826 if (getContext().getTypeSize(VT) > MaxVectorWidth) 2827 MaxVectorWidth = getContext().getTypeSize(VT); 2828 NeededInt += LocalNeededInt; 2829 NeededSSE += LocalNeededSSE; 2830 } 2831 } 2832 2833 return ABIArgInfo::getDirect(); 2834 } 2835 2836 ABIArgInfo 2837 X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt, 2838 unsigned &NeededSSE, 2839 unsigned &MaxVectorWidth) const { 2840 2841 NeededInt = 0; 2842 NeededSSE = 0; 2843 MaxVectorWidth = 0; 2844 2845 return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE, 2846 MaxVectorWidth); 2847 } 2848 2849 void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { 2850 2851 const unsigned CallingConv = FI.getCallingConvention(); 2852 // It is possible to force Win64 calling convention on any x86_64 target by 2853 // using __attribute__((ms_abi)). In such case to correctly emit Win64 2854 // compatible code delegate this call to WinX86_64ABIInfo::computeInfo. 2855 if (CallingConv == llvm::CallingConv::Win64) { 2856 WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel); 2857 Win64ABIInfo.computeInfo(FI); 2858 return; 2859 } 2860 2861 bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall; 2862 2863 // Keep track of the number of assigned registers. 2864 unsigned FreeIntRegs = IsRegCall ? 11 : 6; 2865 unsigned FreeSSERegs = IsRegCall ? 16 : 8; 2866 unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0; 2867 2868 if (!::classifyReturnType(getCXXABI(), FI, *this)) { 2869 if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && 2870 !FI.getReturnType()->getTypePtr()->isUnionType()) { 2871 FI.getReturnInfo() = classifyRegCallStructType( 2872 FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth); 2873 if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { 2874 FreeIntRegs -= NeededInt; 2875 FreeSSERegs -= NeededSSE; 2876 } else { 2877 FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); 2878 } 2879 } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() && 2880 getContext().getCanonicalType(FI.getReturnType() 2881 ->getAs<ComplexType>() 2882 ->getElementType()) == 2883 getContext().LongDoubleTy) 2884 // Complex Long Double Type is passed in Memory when Regcall 2885 // calling convention is used. 2886 FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); 2887 else 2888 FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); 2889 } 2890 2891 // If the return value is indirect, then the hidden argument is consuming one 2892 // integer register. 2893 if (FI.getReturnInfo().isIndirect()) 2894 --FreeIntRegs; 2895 else if (NeededSSE && MaxVectorWidth > 0) 2896 FI.setMaxVectorWidth(MaxVectorWidth); 2897 2898 // The chain argument effectively gives us another free register. 2899 if (FI.isChainCall()) 2900 ++FreeIntRegs; 2901 2902 unsigned NumRequiredArgs = FI.getNumRequiredArgs(); 2903 // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers 2904 // get assigned (in left-to-right order) for passing as follows... 2905 unsigned ArgNo = 0; 2906 for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); 2907 it != ie; ++it, ++ArgNo) { 2908 bool IsNamedArg = ArgNo < NumRequiredArgs; 2909 2910 if (IsRegCall && it->type->isStructureOrClassType()) 2911 it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE, 2912 MaxVectorWidth); 2913 else 2914 it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt, 2915 NeededSSE, IsNamedArg); 2916 2917 // AMD64-ABI 3.2.3p3: If there are no registers available for any 2918 // eightbyte of an argument, the whole argument is passed on the 2919 // stack. If registers have already been assigned for some 2920 // eightbytes of such an argument, the assignments get reverted. 2921 if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { 2922 FreeIntRegs -= NeededInt; 2923 FreeSSERegs -= NeededSSE; 2924 if (MaxVectorWidth > FI.getMaxVectorWidth()) 2925 FI.setMaxVectorWidth(MaxVectorWidth); 2926 } else { 2927 it->info = getIndirectResult(it->type, FreeIntRegs); 2928 } 2929 } 2930 } 2931 2932 static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF, 2933 Address VAListAddr, QualType Ty) { 2934 Address overflow_arg_area_p = 2935 CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p"); 2936 llvm::Value *overflow_arg_area = 2937 CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area"); 2938 2939 // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 2940 // byte boundary if alignment needed by type exceeds 8 byte boundary. 2941 // It isn't stated explicitly in the standard, but in practice we use 2942 // alignment greater than 16 where necessary. 2943 CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); 2944 if (Align > CharUnits::fromQuantity(8)) { 2945 overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area, 2946 Align); 2947 } 2948 2949 // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. 2950 llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); 2951 llvm::Value *Res = 2952 CGF.Builder.CreateBitCast(overflow_arg_area, 2953 llvm::PointerType::getUnqual(LTy)); 2954 2955 // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: 2956 // l->overflow_arg_area + sizeof(type). 2957 // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to 2958 // an 8 byte boundary. 2959 2960 uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8; 2961 llvm::Value *Offset = 2962 llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7) & ~7); 2963 overflow_arg_area = CGF.Builder.CreateGEP(CGF.Int8Ty, overflow_arg_area, 2964 Offset, "overflow_arg_area.next"); 2965 CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p); 2966 2967 // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type. 2968 return Address(Res, LTy, Align); 2969 } 2970 2971 Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 2972 QualType Ty) const { 2973 // Assume that va_list type is correct; should be pointer to LLVM type: 2974 // struct { 2975 // i32 gp_offset; 2976 // i32 fp_offset; 2977 // i8* overflow_arg_area; 2978 // i8* reg_save_area; 2979 // }; 2980 unsigned neededInt, neededSSE; 2981 2982 Ty = getContext().getCanonicalType(Ty); 2983 ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE, 2984 /*isNamedArg*/false); 2985 2986 // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed 2987 // in the registers. If not go to step 7. 2988 if (!neededInt && !neededSSE) 2989 return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); 2990 2991 // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of 2992 // general purpose registers needed to pass type and num_fp to hold 2993 // the number of floating point registers needed. 2994 2995 // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into 2996 // registers. In the case: l->gp_offset > 48 - num_gp * 8 or 2997 // l->fp_offset > 304 - num_fp * 16 go to step 7. 2998 // 2999 // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of 3000 // register save space). 3001 3002 llvm::Value *InRegs = nullptr; 3003 Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid(); 3004 llvm::Value *gp_offset = nullptr, *fp_offset = nullptr; 3005 if (neededInt) { 3006 gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p"); 3007 gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset"); 3008 InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8); 3009 InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp"); 3010 } 3011 3012 if (neededSSE) { 3013 fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p"); 3014 fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset"); 3015 llvm::Value *FitsInFP = 3016 llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16); 3017 FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp"); 3018 InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP; 3019 } 3020 3021 llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); 3022 llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); 3023 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); 3024 CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); 3025 3026 // Emit code to load the value if it was passed in registers. 3027 3028 CGF.EmitBlock(InRegBlock); 3029 3030 // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with 3031 // an offset of l->gp_offset and/or l->fp_offset. This may require 3032 // copying to a temporary location in case the parameter is passed 3033 // in different register classes or requires an alignment greater 3034 // than 8 for general purpose registers and 16 for XMM registers. 3035 // 3036 // FIXME: This really results in shameful code when we end up needing to 3037 // collect arguments from different places; often what should result in a 3038 // simple assembling of a structure from scattered addresses has many more 3039 // loads than necessary. Can we clean this up? 3040 llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); 3041 llvm::Value *RegSaveArea = CGF.Builder.CreateLoad( 3042 CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area"); 3043 3044 Address RegAddr = Address::invalid(); 3045 if (neededInt && neededSSE) { 3046 // FIXME: Cleanup. 3047 assert(AI.isDirect() && "Unexpected ABI info for mixed regs"); 3048 llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType()); 3049 Address Tmp = CGF.CreateMemTemp(Ty); 3050 Tmp = Tmp.withElementType(ST); 3051 assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs"); 3052 llvm::Type *TyLo = ST->getElementType(0); 3053 llvm::Type *TyHi = ST->getElementType(1); 3054 assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) && 3055 "Unexpected ABI info for mixed regs"); 3056 llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo); 3057 llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi); 3058 llvm::Value *GPAddr = 3059 CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset); 3060 llvm::Value *FPAddr = 3061 CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset); 3062 llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr; 3063 llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr; 3064 3065 // Copy the first element. 3066 // FIXME: Our choice of alignment here and below is probably pessimistic. 3067 llvm::Value *V = CGF.Builder.CreateAlignedLoad( 3068 TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo), 3069 CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyLo))); 3070 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); 3071 3072 // Copy the second element. 3073 V = CGF.Builder.CreateAlignedLoad( 3074 TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi), 3075 CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi))); 3076 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); 3077 3078 RegAddr = Tmp.withElementType(LTy); 3079 } else if (neededInt) { 3080 RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset), 3081 LTy, CharUnits::fromQuantity(8)); 3082 3083 // Copy to a temporary if necessary to ensure the appropriate alignment. 3084 auto TInfo = getContext().getTypeInfoInChars(Ty); 3085 uint64_t TySize = TInfo.Width.getQuantity(); 3086 CharUnits TyAlign = TInfo.Align; 3087 3088 // Copy into a temporary if the type is more aligned than the 3089 // register save area. 3090 if (TyAlign.getQuantity() > 8) { 3091 Address Tmp = CGF.CreateMemTemp(Ty); 3092 CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); 3093 RegAddr = Tmp; 3094 } 3095 3096 } else if (neededSSE == 1) { 3097 RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset), 3098 LTy, CharUnits::fromQuantity(16)); 3099 } else { 3100 assert(neededSSE == 2 && "Invalid number of needed registers!"); 3101 // SSE registers are spaced 16 bytes apart in the register save 3102 // area, we need to collect the two eightbytes together. 3103 // The ABI isn't explicit about this, but it seems reasonable 3104 // to assume that the slots are 16-byte aligned, since the stack is 3105 // naturally 16-byte aligned and the prologue is expected to store 3106 // all the SSE registers to the RSA. 3107 Address RegAddrLo = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, 3108 fp_offset), 3109 CGF.Int8Ty, CharUnits::fromQuantity(16)); 3110 Address RegAddrHi = 3111 CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, 3112 CharUnits::fromQuantity(16)); 3113 llvm::Type *ST = AI.canHaveCoerceToType() 3114 ? AI.getCoerceToType() 3115 : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy); 3116 llvm::Value *V; 3117 Address Tmp = CGF.CreateMemTemp(Ty); 3118 Tmp = Tmp.withElementType(ST); 3119 V = CGF.Builder.CreateLoad( 3120 RegAddrLo.withElementType(ST->getStructElementType(0))); 3121 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); 3122 V = CGF.Builder.CreateLoad( 3123 RegAddrHi.withElementType(ST->getStructElementType(1))); 3124 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); 3125 3126 RegAddr = Tmp.withElementType(LTy); 3127 } 3128 3129 // AMD64-ABI 3.5.7p5: Step 5. Set: 3130 // l->gp_offset = l->gp_offset + num_gp * 8 3131 // l->fp_offset = l->fp_offset + num_fp * 16. 3132 if (neededInt) { 3133 llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8); 3134 CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset), 3135 gp_offset_p); 3136 } 3137 if (neededSSE) { 3138 llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16); 3139 CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset), 3140 fp_offset_p); 3141 } 3142 CGF.EmitBranch(ContBlock); 3143 3144 // Emit code to load the value if it was passed in memory. 3145 3146 CGF.EmitBlock(InMemBlock); 3147 Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); 3148 3149 // Return the appropriate result. 3150 3151 CGF.EmitBlock(ContBlock); 3152 Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, 3153 "vaarg.addr"); 3154 return ResAddr; 3155 } 3156 3157 Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, 3158 QualType Ty) const { 3159 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3160 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3161 uint64_t Width = getContext().getTypeSize(Ty); 3162 bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); 3163 3164 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, 3165 CGF.getContext().getTypeInfoInChars(Ty), 3166 CharUnits::fromQuantity(8), 3167 /*allowHigherAlign*/ false); 3168 } 3169 3170 ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgForVectorCall( 3171 QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo ¤t) const { 3172 const Type *Base = nullptr; 3173 uint64_t NumElts = 0; 3174 3175 if (!Ty->isBuiltinType() && !Ty->isVectorType() && 3176 isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) { 3177 FreeSSERegs -= NumElts; 3178 return getDirectX86Hva(); 3179 } 3180 return current; 3181 } 3182 3183 ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, 3184 bool IsReturnType, bool IsVectorCall, 3185 bool IsRegCall) const { 3186 3187 if (Ty->isVoidType()) 3188 return ABIArgInfo::getIgnore(); 3189 3190 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 3191 Ty = EnumTy->getDecl()->getIntegerType(); 3192 3193 TypeInfo Info = getContext().getTypeInfo(Ty); 3194 uint64_t Width = Info.Width; 3195 CharUnits Align = getContext().toCharUnitsFromBits(Info.Align); 3196 3197 const RecordType *RT = Ty->getAs<RecordType>(); 3198 if (RT) { 3199 if (!IsReturnType) { 3200 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI())) 3201 return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); 3202 } 3203 3204 if (RT->getDecl()->hasFlexibleArrayMember()) 3205 return getNaturalAlignIndirect(Ty, /*ByVal=*/false); 3206 3207 } 3208 3209 const Type *Base = nullptr; 3210 uint64_t NumElts = 0; 3211 // vectorcall adds the concept of a homogenous vector aggregate, similar to 3212 // other targets. 3213 if ((IsVectorCall || IsRegCall) && 3214 isHomogeneousAggregate(Ty, Base, NumElts)) { 3215 if (IsRegCall) { 3216 if (FreeSSERegs >= NumElts) { 3217 FreeSSERegs -= NumElts; 3218 if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) 3219 return ABIArgInfo::getDirect(); 3220 return ABIArgInfo::getExpand(); 3221 } 3222 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3223 } else if (IsVectorCall) { 3224 if (FreeSSERegs >= NumElts && 3225 (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) { 3226 FreeSSERegs -= NumElts; 3227 return ABIArgInfo::getDirect(); 3228 } else if (IsReturnType) { 3229 return ABIArgInfo::getExpand(); 3230 } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { 3231 // HVAs are delayed and reclassified in the 2nd step. 3232 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3233 } 3234 } 3235 } 3236 3237 if (Ty->isMemberPointerType()) { 3238 // If the member pointer is represented by an LLVM int or ptr, pass it 3239 // directly. 3240 llvm::Type *LLTy = CGT.ConvertType(Ty); 3241 if (LLTy->isPointerTy() || LLTy->isIntegerTy()) 3242 return ABIArgInfo::getDirect(); 3243 } 3244 3245 if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) { 3246 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3247 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3248 if (Width > 64 || !llvm::isPowerOf2_64(Width)) 3249 return getNaturalAlignIndirect(Ty, /*ByVal=*/false); 3250 3251 // Otherwise, coerce it to a small integer. 3252 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width)); 3253 } 3254 3255 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 3256 switch (BT->getKind()) { 3257 case BuiltinType::Bool: 3258 // Bool type is always extended to the ABI, other builtin types are not 3259 // extended. 3260 return ABIArgInfo::getExtend(Ty); 3261 3262 case BuiltinType::LongDouble: 3263 // Mingw64 GCC uses the old 80 bit extended precision floating point 3264 // unit. It passes them indirectly through memory. 3265 if (IsMingw64) { 3266 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 3267 if (LDF == &llvm::APFloat::x87DoubleExtended()) 3268 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3269 } 3270 break; 3271 3272 case BuiltinType::Int128: 3273 case BuiltinType::UInt128: 3274 // If it's a parameter type, the normal ABI rule is that arguments larger 3275 // than 8 bytes are passed indirectly. GCC follows it. We follow it too, 3276 // even though it isn't particularly efficient. 3277 if (!IsReturnType) 3278 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3279 3280 // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. 3281 // Clang matches them for compatibility. 3282 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 3283 llvm::Type::getInt64Ty(getVMContext()), 2)); 3284 3285 default: 3286 break; 3287 } 3288 } 3289 3290 if (Ty->isBitIntType()) { 3291 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3292 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3293 // However, non-power-of-two bit-precise integers will be passed as 1, 2, 4, 3294 // or 8 bytes anyway as long is it fits in them, so we don't have to check 3295 // the power of 2. 3296 if (Width <= 64) 3297 return ABIArgInfo::getDirect(); 3298 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3299 } 3300 3301 return ABIArgInfo::getDirect(); 3302 } 3303 3304 void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { 3305 const unsigned CC = FI.getCallingConvention(); 3306 bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall; 3307 bool IsRegCall = CC == llvm::CallingConv::X86_RegCall; 3308 3309 // If __attribute__((sysv_abi)) is in use, use the SysV argument 3310 // classification rules. 3311 if (CC == llvm::CallingConv::X86_64_SysV) { 3312 X86_64ABIInfo SysVABIInfo(CGT, AVXLevel); 3313 SysVABIInfo.computeInfo(FI); 3314 return; 3315 } 3316 3317 unsigned FreeSSERegs = 0; 3318 if (IsVectorCall) { 3319 // We can use up to 4 SSE return registers with vectorcall. 3320 FreeSSERegs = 4; 3321 } else if (IsRegCall) { 3322 // RegCall gives us 16 SSE registers. 3323 FreeSSERegs = 16; 3324 } 3325 3326 if (!getCXXABI().classifyReturnType(FI)) 3327 FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true, 3328 IsVectorCall, IsRegCall); 3329 3330 if (IsVectorCall) { 3331 // We can use up to 6 SSE register parameters with vectorcall. 3332 FreeSSERegs = 6; 3333 } else if (IsRegCall) { 3334 // RegCall gives us 16 SSE registers, we can reuse the return registers. 3335 FreeSSERegs = 16; 3336 } 3337 3338 unsigned ArgNum = 0; 3339 unsigned ZeroSSERegs = 0; 3340 for (auto &I : FI.arguments()) { 3341 // Vectorcall in x64 only permits the first 6 arguments to be passed as 3342 // XMM/YMM registers. After the sixth argument, pretend no vector 3343 // registers are left. 3344 unsigned *MaybeFreeSSERegs = 3345 (IsVectorCall && ArgNum >= 6) ? &ZeroSSERegs : &FreeSSERegs; 3346 I.info = 3347 classify(I.type, *MaybeFreeSSERegs, false, IsVectorCall, IsRegCall); 3348 ++ArgNum; 3349 } 3350 3351 if (IsVectorCall) { 3352 // For vectorcall, assign aggregate HVAs to any free vector registers in a 3353 // second pass. 3354 for (auto &I : FI.arguments()) 3355 I.info = reclassifyHvaArgForVectorCall(I.type, FreeSSERegs, I.info); 3356 } 3357 } 3358 3359 Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 3360 QualType Ty) const { 3361 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3362 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3363 uint64_t Width = getContext().getTypeSize(Ty); 3364 bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); 3365 3366 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, 3367 CGF.getContext().getTypeInfoInChars(Ty), 3368 CharUnits::fromQuantity(8), 3369 /*allowHigherAlign*/ false); 3370 } 3371 3372 std::unique_ptr<TargetCodeGenInfo> CodeGen::createX86_32TargetCodeGenInfo( 3373 CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI, 3374 unsigned NumRegisterParameters, bool SoftFloatABI) { 3375 bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI( 3376 CGM.getTriple(), CGM.getCodeGenOpts()); 3377 return std::make_unique<X86_32TargetCodeGenInfo>( 3378 CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 3379 NumRegisterParameters, SoftFloatABI); 3380 } 3381 3382 std::unique_ptr<TargetCodeGenInfo> CodeGen::createWinX86_32TargetCodeGenInfo( 3383 CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI, 3384 unsigned NumRegisterParameters) { 3385 bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI( 3386 CGM.getTriple(), CGM.getCodeGenOpts()); 3387 return std::make_unique<WinX86_32TargetCodeGenInfo>( 3388 CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 3389 NumRegisterParameters); 3390 } 3391 3392 std::unique_ptr<TargetCodeGenInfo> 3393 CodeGen::createX86_64TargetCodeGenInfo(CodeGenModule &CGM, 3394 X86AVXABILevel AVXLevel) { 3395 return std::make_unique<X86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel); 3396 } 3397 3398 std::unique_ptr<TargetCodeGenInfo> 3399 CodeGen::createWinX86_64TargetCodeGenInfo(CodeGenModule &CGM, 3400 X86AVXABILevel AVXLevel) { 3401 return std::make_unique<WinX86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel); 3402 } 3403