1 //===- X86.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ABIInfoImpl.h" 10 #include "TargetInfo.h" 11 #include "clang/Basic/DiagnosticFrontend.h" 12 #include "llvm/ADT/SmallBitVector.h" 13 14 using namespace clang; 15 using namespace clang::CodeGen; 16 17 namespace { 18 19 /// IsX86_MMXType - Return true if this is an MMX type. 20 bool IsX86_MMXType(llvm::Type *IRType) { 21 // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>. 22 return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 && 23 cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() && 24 IRType->getScalarSizeInBits() != 64; 25 } 26 27 static llvm::Type *X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 28 StringRef Constraint, 29 llvm::Type *Ty) { 30 if (Constraint == "k") { 31 llvm::Type *Int1Ty = llvm::Type::getInt1Ty(CGF.getLLVMContext()); 32 return llvm::FixedVectorType::get(Int1Ty, Ty->getScalarSizeInBits()); 33 } 34 35 // No operation needed 36 return Ty; 37 } 38 39 /// Returns true if this type can be passed in SSE registers with the 40 /// X86_VectorCall calling convention. Shared between x86_32 and x86_64. 41 static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { 42 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 43 if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) { 44 if (BT->getKind() == BuiltinType::LongDouble) { 45 if (&Context.getTargetInfo().getLongDoubleFormat() == 46 &llvm::APFloat::x87DoubleExtended()) 47 return false; 48 } 49 return true; 50 } 51 } else if (const VectorType *VT = Ty->getAs<VectorType>()) { 52 // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX 53 // registers specially. 54 unsigned VecSize = Context.getTypeSize(VT); 55 if (VecSize == 128 || VecSize == 256 || VecSize == 512) 56 return true; 57 } 58 return false; 59 } 60 61 /// Returns true if this aggregate is small enough to be passed in SSE registers 62 /// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64. 63 static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) { 64 return NumMembers <= 4; 65 } 66 67 /// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86. 68 static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { 69 auto AI = ABIArgInfo::getDirect(T); 70 AI.setInReg(true); 71 AI.setCanBeFlattened(false); 72 return AI; 73 } 74 75 //===----------------------------------------------------------------------===// 76 // X86-32 ABI Implementation 77 //===----------------------------------------------------------------------===// 78 79 /// Similar to llvm::CCState, but for Clang. 80 struct CCState { 81 CCState(CGFunctionInfo &FI) 82 : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()), 83 Required(FI.getRequiredArgs()), IsDelegateCall(FI.isDelegateCall()) {} 84 85 llvm::SmallBitVector IsPreassigned; 86 unsigned CC = CallingConv::CC_C; 87 unsigned FreeRegs = 0; 88 unsigned FreeSSERegs = 0; 89 RequiredArgs Required; 90 bool IsDelegateCall = false; 91 }; 92 93 /// X86_32ABIInfo - The X86-32 ABI information. 94 class X86_32ABIInfo : public ABIInfo { 95 enum Class { 96 Integer, 97 Float 98 }; 99 100 static const unsigned MinABIStackAlignInBytes = 4; 101 102 bool IsDarwinVectorABI; 103 bool IsRetSmallStructInRegABI; 104 bool IsWin32StructABI; 105 bool IsSoftFloatABI; 106 bool IsMCUABI; 107 bool IsLinuxABI; 108 unsigned DefaultNumRegisterParameters; 109 110 static bool isRegisterSize(unsigned Size) { 111 return (Size == 8 || Size == 16 || Size == 32 || Size == 64); 112 } 113 114 bool isHomogeneousAggregateBaseType(QualType Ty) const override { 115 // FIXME: Assumes vectorcall is in use. 116 return isX86VectorTypeForVectorCall(getContext(), Ty); 117 } 118 119 bool isHomogeneousAggregateSmallEnough(const Type *Ty, 120 uint64_t NumMembers) const override { 121 // FIXME: Assumes vectorcall is in use. 122 return isX86VectorCallAggregateSmallEnough(NumMembers); 123 } 124 125 bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const; 126 127 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 128 /// such that the argument will be passed in memory. 129 ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; 130 131 ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const; 132 133 /// Return the alignment to use for the given type on the stack. 134 unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; 135 136 Class classify(QualType Ty) const; 137 ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; 138 ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State, 139 unsigned ArgIndex) const; 140 141 /// Updates the number of available free registers, returns 142 /// true if any registers were allocated. 143 bool updateFreeRegs(QualType Ty, CCState &State) const; 144 145 bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, 146 bool &NeedsPadding) const; 147 bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; 148 149 bool canExpandIndirectArgument(QualType Ty) const; 150 151 /// Rewrite the function info so that all memory arguments use 152 /// inalloca. 153 void rewriteWithInAlloca(CGFunctionInfo &FI) const; 154 155 void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, 156 CharUnits &StackOffset, ABIArgInfo &Info, 157 QualType Type) const; 158 void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const; 159 160 public: 161 162 void computeInfo(CGFunctionInfo &FI) const override; 163 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 164 AggValueSlot Slot) const override; 165 166 X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, 167 bool RetSmallStructInRegABI, bool Win32StructABI, 168 unsigned NumRegisterParameters, bool SoftFloatABI) 169 : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), 170 IsRetSmallStructInRegABI(RetSmallStructInRegABI), 171 IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), 172 IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), 173 IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() || 174 CGT.getTarget().getTriple().isOSCygMing()), 175 DefaultNumRegisterParameters(NumRegisterParameters) {} 176 }; 177 178 class X86_32SwiftABIInfo : public SwiftABIInfo { 179 public: 180 explicit X86_32SwiftABIInfo(CodeGenTypes &CGT) 181 : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false) {} 182 183 bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, 184 bool AsReturnValue) const override { 185 // LLVM's x86-32 lowering currently only assigns up to three 186 // integer registers and three fp registers. Oddly, it'll use up to 187 // four vector registers for vectors, but those can overlap with the 188 // scalar registers. 189 return occupiesMoreThan(ComponentTys, /*total=*/3); 190 } 191 }; 192 193 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { 194 public: 195 X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, 196 bool RetSmallStructInRegABI, bool Win32StructABI, 197 unsigned NumRegisterParameters, bool SoftFloatABI) 198 : TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>( 199 CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 200 NumRegisterParameters, SoftFloatABI)) { 201 SwiftInfo = std::make_unique<X86_32SwiftABIInfo>(CGT); 202 } 203 204 static bool isStructReturnInRegABI( 205 const llvm::Triple &Triple, const CodeGenOptions &Opts); 206 207 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 208 CodeGen::CodeGenModule &CGM) const override; 209 210 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 211 // Darwin uses different dwarf register numbers for EH. 212 if (CGM.getTarget().getTriple().isOSDarwin()) return 5; 213 return 4; 214 } 215 216 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 217 llvm::Value *Address) const override; 218 219 llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 220 StringRef Constraint, 221 llvm::Type* Ty) const override { 222 return X86AdjustInlineAsmType(CGF, Constraint, Ty); 223 } 224 225 void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue, 226 std::string &Constraints, 227 std::vector<llvm::Type *> &ResultRegTypes, 228 std::vector<llvm::Type *> &ResultTruncRegTypes, 229 std::vector<LValue> &ResultRegDests, 230 std::string &AsmString, 231 unsigned NumOutputs) const override; 232 233 StringRef getARCRetainAutoreleasedReturnValueMarker() const override { 234 return "movl\t%ebp, %ebp" 235 "\t\t// marker for objc_retainAutoreleaseReturnValue"; 236 } 237 }; 238 239 } 240 241 /// Rewrite input constraint references after adding some output constraints. 242 /// In the case where there is one output and one input and we add one output, 243 /// we need to replace all operand references greater than or equal to 1: 244 /// mov $0, $1 245 /// mov eax, $1 246 /// The result will be: 247 /// mov $0, $2 248 /// mov eax, $2 249 static void rewriteInputConstraintReferences(unsigned FirstIn, 250 unsigned NumNewOuts, 251 std::string &AsmString) { 252 std::string Buf; 253 llvm::raw_string_ostream OS(Buf); 254 size_t Pos = 0; 255 while (Pos < AsmString.size()) { 256 size_t DollarStart = AsmString.find('$', Pos); 257 if (DollarStart == std::string::npos) 258 DollarStart = AsmString.size(); 259 size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart); 260 if (DollarEnd == std::string::npos) 261 DollarEnd = AsmString.size(); 262 OS << StringRef(&AsmString[Pos], DollarEnd - Pos); 263 Pos = DollarEnd; 264 size_t NumDollars = DollarEnd - DollarStart; 265 if (NumDollars % 2 != 0 && Pos < AsmString.size()) { 266 // We have an operand reference. 267 size_t DigitStart = Pos; 268 if (AsmString[DigitStart] == '{') { 269 OS << '{'; 270 ++DigitStart; 271 } 272 size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart); 273 if (DigitEnd == std::string::npos) 274 DigitEnd = AsmString.size(); 275 StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart); 276 unsigned OperandIndex; 277 if (!OperandStr.getAsInteger(10, OperandIndex)) { 278 if (OperandIndex >= FirstIn) 279 OperandIndex += NumNewOuts; 280 OS << OperandIndex; 281 } else { 282 OS << OperandStr; 283 } 284 Pos = DigitEnd; 285 } 286 } 287 AsmString = std::move(Buf); 288 } 289 290 /// Add output constraints for EAX:EDX because they are return registers. 291 void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( 292 CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints, 293 std::vector<llvm::Type *> &ResultRegTypes, 294 std::vector<llvm::Type *> &ResultTruncRegTypes, 295 std::vector<LValue> &ResultRegDests, std::string &AsmString, 296 unsigned NumOutputs) const { 297 uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType()); 298 299 // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is 300 // larger. 301 if (!Constraints.empty()) 302 Constraints += ','; 303 if (RetWidth <= 32) { 304 Constraints += "={eax}"; 305 ResultRegTypes.push_back(CGF.Int32Ty); 306 } else { 307 // Use the 'A' constraint for EAX:EDX. 308 Constraints += "=A"; 309 ResultRegTypes.push_back(CGF.Int64Ty); 310 } 311 312 // Truncate EAX or EAX:EDX to an integer of the appropriate size. 313 llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth); 314 ResultTruncRegTypes.push_back(CoerceTy); 315 316 // Coerce the integer by bitcasting the return slot pointer. 317 ReturnSlot.setAddress(ReturnSlot.getAddress().withElementType(CoerceTy)); 318 ResultRegDests.push_back(ReturnSlot); 319 320 rewriteInputConstraintReferences(NumOutputs, 1, AsmString); 321 } 322 323 /// shouldReturnTypeInRegister - Determine if the given type should be 324 /// returned in a register (for the Darwin and MCU ABI). 325 bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, 326 ASTContext &Context) const { 327 uint64_t Size = Context.getTypeSize(Ty); 328 329 // For i386, type must be register sized. 330 // For the MCU ABI, it only needs to be <= 8-byte 331 if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size))) 332 return false; 333 334 if (Ty->isVectorType()) { 335 // 64- and 128- bit vectors inside structures are not returned in 336 // registers. 337 if (Size == 64 || Size == 128) 338 return false; 339 340 return true; 341 } 342 343 // If this is a builtin, pointer, enum, complex type, member pointer, or 344 // member function pointer it is ok. 345 if (Ty->getAs<BuiltinType>() || Ty->hasPointerRepresentation() || 346 Ty->isAnyComplexType() || Ty->isEnumeralType() || 347 Ty->isBlockPointerType() || Ty->isMemberPointerType()) 348 return true; 349 350 // Arrays are treated like records. 351 if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) 352 return shouldReturnTypeInRegister(AT->getElementType(), Context); 353 354 // Otherwise, it must be a record type. 355 const RecordType *RT = Ty->getAs<RecordType>(); 356 if (!RT) return false; 357 358 // FIXME: Traverse bases here too. 359 360 // Structure types are passed in register if all fields would be 361 // passed in a register. 362 for (const auto *FD : RT->getDecl()->fields()) { 363 // Empty fields are ignored. 364 if (isEmptyField(Context, FD, true)) 365 continue; 366 367 // Check fields recursively. 368 if (!shouldReturnTypeInRegister(FD->getType(), Context)) 369 return false; 370 } 371 return true; 372 } 373 374 static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { 375 // Treat complex types as the element type. 376 if (const ComplexType *CTy = Ty->getAs<ComplexType>()) 377 Ty = CTy->getElementType(); 378 379 // Check for a type which we know has a simple scalar argument-passing 380 // convention without any padding. (We're specifically looking for 32 381 // and 64-bit integer and integer-equivalents, float, and double.) 382 if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && 383 !Ty->isEnumeralType() && !Ty->isBlockPointerType()) 384 return false; 385 386 uint64_t Size = Context.getTypeSize(Ty); 387 return Size == 32 || Size == 64; 388 } 389 390 static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD, 391 uint64_t &Size) { 392 for (const auto *FD : RD->fields()) { 393 // Scalar arguments on the stack get 4 byte alignment on x86. If the 394 // argument is smaller than 32-bits, expanding the struct will create 395 // alignment padding. 396 if (!is32Or64BitBasicType(FD->getType(), Context)) 397 return false; 398 399 // FIXME: Reject bit-fields wholesale; there are two problems, we don't know 400 // how to expand them yet, and the predicate for telling if a bitfield still 401 // counts as "basic" is more complicated than what we were doing previously. 402 if (FD->isBitField()) 403 return false; 404 405 Size += Context.getTypeSize(FD->getType()); 406 } 407 return true; 408 } 409 410 static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD, 411 uint64_t &Size) { 412 // Don't do this if there are any non-empty bases. 413 for (const CXXBaseSpecifier &Base : RD->bases()) { 414 if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(), 415 Size)) 416 return false; 417 } 418 if (!addFieldSizes(Context, RD, Size)) 419 return false; 420 return true; 421 } 422 423 /// Test whether an argument type which is to be passed indirectly (on the 424 /// stack) would have the equivalent layout if it was expanded into separate 425 /// arguments. If so, we prefer to do the latter to avoid inhibiting 426 /// optimizations. 427 bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { 428 // We can only expand structure types. 429 const RecordType *RT = Ty->getAs<RecordType>(); 430 if (!RT) 431 return false; 432 const RecordDecl *RD = RT->getDecl(); 433 uint64_t Size = 0; 434 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 435 if (!IsWin32StructABI) { 436 // On non-Windows, we have to conservatively match our old bitcode 437 // prototypes in order to be ABI-compatible at the bitcode level. 438 if (!CXXRD->isCLike()) 439 return false; 440 } else { 441 // Don't do this for dynamic classes. 442 if (CXXRD->isDynamicClass()) 443 return false; 444 } 445 if (!addBaseAndFieldSizes(getContext(), CXXRD, Size)) 446 return false; 447 } else { 448 if (!addFieldSizes(getContext(), RD, Size)) 449 return false; 450 } 451 452 // We can do this if there was no alignment padding. 453 return Size == getContext().getTypeSize(Ty); 454 } 455 456 ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { 457 // If the return value is indirect, then the hidden argument is consuming one 458 // integer register. 459 if (State.CC != llvm::CallingConv::X86_FastCall && 460 State.CC != llvm::CallingConv::X86_VectorCall && State.FreeRegs) { 461 --State.FreeRegs; 462 if (!IsMCUABI) 463 return getNaturalAlignIndirectInReg(RetTy); 464 } 465 return getNaturalAlignIndirect( 466 RetTy, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(), 467 /*ByVal=*/false); 468 } 469 470 ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, 471 CCState &State) const { 472 if (RetTy->isVoidType()) 473 return ABIArgInfo::getIgnore(); 474 475 const Type *Base = nullptr; 476 uint64_t NumElts = 0; 477 if ((State.CC == llvm::CallingConv::X86_VectorCall || 478 State.CC == llvm::CallingConv::X86_RegCall) && 479 isHomogeneousAggregate(RetTy, Base, NumElts)) { 480 // The LLVM struct type for such an aggregate should lower properly. 481 return ABIArgInfo::getDirect(); 482 } 483 484 if (const VectorType *VT = RetTy->getAs<VectorType>()) { 485 // On Darwin, some vectors are returned in registers. 486 if (IsDarwinVectorABI) { 487 uint64_t Size = getContext().getTypeSize(RetTy); 488 489 // 128-bit vectors are a special case; they are returned in 490 // registers and we need to make sure to pick a type the LLVM 491 // backend will like. 492 if (Size == 128) 493 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 494 llvm::Type::getInt64Ty(getVMContext()), 2)); 495 496 // Always return in register if it fits in a general purpose 497 // register, or if it is 64 bits and has a single element. 498 if ((Size == 8 || Size == 16 || Size == 32) || 499 (Size == 64 && VT->getNumElements() == 1)) 500 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 501 Size)); 502 503 return getIndirectReturnResult(RetTy, State); 504 } 505 506 return ABIArgInfo::getDirect(); 507 } 508 509 if (isAggregateTypeForABI(RetTy)) { 510 if (const RecordType *RT = RetTy->getAs<RecordType>()) { 511 // Structures with flexible arrays are always indirect. 512 if (RT->getDecl()->hasFlexibleArrayMember()) 513 return getIndirectReturnResult(RetTy, State); 514 } 515 516 // If specified, structs and unions are always indirect. 517 if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType()) 518 return getIndirectReturnResult(RetTy, State); 519 520 // Ignore empty structs/unions. 521 if (isEmptyRecord(getContext(), RetTy, true)) 522 return ABIArgInfo::getIgnore(); 523 524 // Return complex of _Float16 as <2 x half> so the backend will use xmm0. 525 if (const ComplexType *CT = RetTy->getAs<ComplexType>()) { 526 QualType ET = getContext().getCanonicalType(CT->getElementType()); 527 if (ET->isFloat16Type()) 528 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 529 llvm::Type::getHalfTy(getVMContext()), 2)); 530 } 531 532 // Small structures which are register sized are generally returned 533 // in a register. 534 if (shouldReturnTypeInRegister(RetTy, getContext())) { 535 uint64_t Size = getContext().getTypeSize(RetTy); 536 537 // As a special-case, if the struct is a "single-element" struct, and 538 // the field is of type "float" or "double", return it in a 539 // floating-point register. (MSVC does not apply this special case.) 540 // We apply a similar transformation for pointer types to improve the 541 // quality of the generated IR. 542 if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) 543 if ((!IsWin32StructABI && SeltTy->isRealFloatingType()) 544 || SeltTy->hasPointerRepresentation()) 545 return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); 546 547 // FIXME: We should be able to narrow this integer in cases with dead 548 // padding. 549 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size)); 550 } 551 552 return getIndirectReturnResult(RetTy, State); 553 } 554 555 // Treat an enum type as its underlying type. 556 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 557 RetTy = EnumTy->getDecl()->getIntegerType(); 558 559 if (const auto *EIT = RetTy->getAs<BitIntType>()) 560 if (EIT->getNumBits() > 64) 561 return getIndirectReturnResult(RetTy, State); 562 563 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) 564 : ABIArgInfo::getDirect()); 565 } 566 567 unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, 568 unsigned Align) const { 569 // Otherwise, if the alignment is less than or equal to the minimum ABI 570 // alignment, just use the default; the backend will handle this. 571 if (Align <= MinABIStackAlignInBytes) 572 return 0; // Use default alignment. 573 574 if (IsLinuxABI) { 575 // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't 576 // want to spend any effort dealing with the ramifications of ABI breaks. 577 // 578 // If the vector type is __m128/__m256/__m512, return the default alignment. 579 if (Ty->isVectorType() && (Align == 16 || Align == 32 || Align == 64)) 580 return Align; 581 } 582 // On non-Darwin, the stack type alignment is always 4. 583 if (!IsDarwinVectorABI) { 584 // Set explicit alignment, since we may need to realign the top. 585 return MinABIStackAlignInBytes; 586 } 587 588 // Otherwise, if the type contains an SSE vector type, the alignment is 16. 589 if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) || 590 isRecordWithSIMDVectorType(getContext(), Ty))) 591 return 16; 592 593 return MinABIStackAlignInBytes; 594 } 595 596 ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal, 597 CCState &State) const { 598 if (!ByVal) { 599 if (State.FreeRegs) { 600 --State.FreeRegs; // Non-byval indirects just use one pointer. 601 if (!IsMCUABI) 602 return getNaturalAlignIndirectInReg(Ty); 603 } 604 return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), 605 false); 606 } 607 608 // Compute the byval alignment. 609 unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; 610 unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign); 611 if (StackAlign == 0) 612 return ABIArgInfo::getIndirect( 613 CharUnits::fromQuantity(4), 614 /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(), 615 /*ByVal=*/true); 616 617 // If the stack alignment is less than the type alignment, realign the 618 // argument. 619 bool Realign = TypeAlign > StackAlign; 620 return ABIArgInfo::getIndirect( 621 CharUnits::fromQuantity(StackAlign), 622 /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(), /*ByVal=*/true, 623 Realign); 624 } 625 626 X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const { 627 const Type *T = isSingleElementStruct(Ty, getContext()); 628 if (!T) 629 T = Ty.getTypePtr(); 630 631 if (const BuiltinType *BT = T->getAs<BuiltinType>()) { 632 BuiltinType::Kind K = BT->getKind(); 633 if (K == BuiltinType::Float || K == BuiltinType::Double) 634 return Float; 635 } 636 return Integer; 637 } 638 639 bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const { 640 if (!IsSoftFloatABI) { 641 Class C = classify(Ty); 642 if (C == Float) 643 return false; 644 } 645 646 unsigned Size = getContext().getTypeSize(Ty); 647 unsigned SizeInRegs = (Size + 31) / 32; 648 649 if (SizeInRegs == 0) 650 return false; 651 652 if (!IsMCUABI) { 653 if (SizeInRegs > State.FreeRegs) { 654 State.FreeRegs = 0; 655 return false; 656 } 657 } else { 658 // The MCU psABI allows passing parameters in-reg even if there are 659 // earlier parameters that are passed on the stack. Also, 660 // it does not allow passing >8-byte structs in-register, 661 // even if there are 3 free registers available. 662 if (SizeInRegs > State.FreeRegs || SizeInRegs > 2) 663 return false; 664 } 665 666 State.FreeRegs -= SizeInRegs; 667 return true; 668 } 669 670 bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, 671 bool &InReg, 672 bool &NeedsPadding) const { 673 // On Windows, aggregates other than HFAs are never passed in registers, and 674 // they do not consume register slots. Homogenous floating-point aggregates 675 // (HFAs) have already been dealt with at this point. 676 if (IsWin32StructABI && isAggregateTypeForABI(Ty)) 677 return false; 678 679 NeedsPadding = false; 680 InReg = !IsMCUABI; 681 682 if (!updateFreeRegs(Ty, State)) 683 return false; 684 685 if (IsMCUABI) 686 return true; 687 688 if (State.CC == llvm::CallingConv::X86_FastCall || 689 State.CC == llvm::CallingConv::X86_VectorCall || 690 State.CC == llvm::CallingConv::X86_RegCall) { 691 if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) 692 NeedsPadding = true; 693 694 return false; 695 } 696 697 return true; 698 } 699 700 bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { 701 bool IsPtrOrInt = (getContext().getTypeSize(Ty) <= 32) && 702 (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || 703 Ty->isReferenceType()); 704 705 if (!IsPtrOrInt && (State.CC == llvm::CallingConv::X86_FastCall || 706 State.CC == llvm::CallingConv::X86_VectorCall)) 707 return false; 708 709 if (!updateFreeRegs(Ty, State)) 710 return false; 711 712 if (!IsPtrOrInt && State.CC == llvm::CallingConv::X86_RegCall) 713 return false; 714 715 // Return true to apply inreg to all legal parameters except for MCU targets. 716 return !IsMCUABI; 717 } 718 719 void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const { 720 // Vectorcall x86 works subtly different than in x64, so the format is 721 // a bit different than the x64 version. First, all vector types (not HVAs) 722 // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers. 723 // This differs from the x64 implementation, where the first 6 by INDEX get 724 // registers. 725 // In the second pass over the arguments, HVAs are passed in the remaining 726 // vector registers if possible, or indirectly by address. The address will be 727 // passed in ECX/EDX if available. Any other arguments are passed according to 728 // the usual fastcall rules. 729 MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); 730 for (int I = 0, E = Args.size(); I < E; ++I) { 731 const Type *Base = nullptr; 732 uint64_t NumElts = 0; 733 const QualType &Ty = Args[I].type; 734 if ((Ty->isVectorType() || Ty->isBuiltinType()) && 735 isHomogeneousAggregate(Ty, Base, NumElts)) { 736 if (State.FreeSSERegs >= NumElts) { 737 State.FreeSSERegs -= NumElts; 738 Args[I].info = ABIArgInfo::getDirectInReg(); 739 State.IsPreassigned.set(I); 740 } 741 } 742 } 743 } 744 745 ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State, 746 unsigned ArgIndex) const { 747 // FIXME: Set alignment on indirect arguments. 748 bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall; 749 bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall; 750 bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall; 751 752 Ty = useFirstFieldIfTransparentUnion(Ty); 753 TypeInfo TI = getContext().getTypeInfo(Ty); 754 755 // Check with the C++ ABI first. 756 const RecordType *RT = Ty->getAs<RecordType>(); 757 if (RT) { 758 CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); 759 if (RAA == CGCXXABI::RAA_Indirect) { 760 return getIndirectResult(Ty, false, State); 761 } else if (State.IsDelegateCall) { 762 // Avoid having different alignments on delegate call args by always 763 // setting the alignment to 4, which is what we do for inallocas. 764 ABIArgInfo Res = getIndirectResult(Ty, false, State); 765 Res.setIndirectAlign(CharUnits::fromQuantity(4)); 766 return Res; 767 } else if (RAA == CGCXXABI::RAA_DirectInMemory) { 768 // The field index doesn't matter, we'll fix it up later. 769 return ABIArgInfo::getInAlloca(/*FieldIndex=*/0); 770 } 771 } 772 773 // Regcall uses the concept of a homogenous vector aggregate, similar 774 // to other targets. 775 const Type *Base = nullptr; 776 uint64_t NumElts = 0; 777 if ((IsRegCall || IsVectorCall) && 778 isHomogeneousAggregate(Ty, Base, NumElts)) { 779 if (State.FreeSSERegs >= NumElts) { 780 State.FreeSSERegs -= NumElts; 781 782 // Vectorcall passes HVAs directly and does not flatten them, but regcall 783 // does. 784 if (IsVectorCall) 785 return getDirectX86Hva(); 786 787 if (Ty->isBuiltinType() || Ty->isVectorType()) 788 return ABIArgInfo::getDirect(); 789 return ABIArgInfo::getExpand(); 790 } 791 if (IsVectorCall && Ty->isBuiltinType()) 792 return ABIArgInfo::getDirect(); 793 return getIndirectResult(Ty, /*ByVal=*/false, State); 794 } 795 796 if (isAggregateTypeForABI(Ty)) { 797 // Structures with flexible arrays are always indirect. 798 // FIXME: This should not be byval! 799 if (RT && RT->getDecl()->hasFlexibleArrayMember()) 800 return getIndirectResult(Ty, true, State); 801 802 // Ignore empty structs/unions on non-Windows. 803 if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) 804 return ABIArgInfo::getIgnore(); 805 806 // Ignore 0 sized structs. 807 if (TI.Width == 0) 808 return ABIArgInfo::getIgnore(); 809 810 llvm::LLVMContext &LLVMContext = getVMContext(); 811 llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); 812 bool NeedsPadding = false; 813 bool InReg; 814 if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { 815 unsigned SizeInRegs = (TI.Width + 31) / 32; 816 SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32); 817 llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); 818 if (InReg) 819 return ABIArgInfo::getDirectInReg(Result); 820 else 821 return ABIArgInfo::getDirect(Result); 822 } 823 llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr; 824 825 // Pass over-aligned aggregates to non-variadic functions on Windows 826 // indirectly. This behavior was added in MSVC 2015. Use the required 827 // alignment from the record layout, since that may be less than the 828 // regular type alignment, and types with required alignment of less than 4 829 // bytes are not passed indirectly. 830 if (IsWin32StructABI && State.Required.isRequiredArg(ArgIndex)) { 831 unsigned AlignInBits = 0; 832 if (RT) { 833 const ASTRecordLayout &Layout = 834 getContext().getASTRecordLayout(RT->getDecl()); 835 AlignInBits = getContext().toBits(Layout.getRequiredAlignment()); 836 } else if (TI.isAlignRequired()) { 837 AlignInBits = TI.Align; 838 } 839 if (AlignInBits > 32) 840 return getIndirectResult(Ty, /*ByVal=*/false, State); 841 } 842 843 // Expand small (<= 128-bit) record types when we know that the stack layout 844 // of those arguments will match the struct. This is important because the 845 // LLVM backend isn't smart enough to remove byval, which inhibits many 846 // optimizations. 847 // Don't do this for the MCU if there are still free integer registers 848 // (see X86_64 ABI for full explanation). 849 if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) && 850 canExpandIndirectArgument(Ty)) 851 return ABIArgInfo::getExpandWithPadding( 852 IsFastCall || IsVectorCall || IsRegCall, PaddingType); 853 854 return getIndirectResult(Ty, true, State); 855 } 856 857 if (const VectorType *VT = Ty->getAs<VectorType>()) { 858 // On Windows, vectors are passed directly if registers are available, or 859 // indirectly if not. This avoids the need to align argument memory. Pass 860 // user-defined vector types larger than 512 bits indirectly for simplicity. 861 if (IsWin32StructABI) { 862 if (TI.Width <= 512 && State.FreeSSERegs > 0) { 863 --State.FreeSSERegs; 864 return ABIArgInfo::getDirectInReg(); 865 } 866 return getIndirectResult(Ty, /*ByVal=*/false, State); 867 } 868 869 // On Darwin, some vectors are passed in memory, we handle this by passing 870 // it as an i8/i16/i32/i64. 871 if (IsDarwinVectorABI) { 872 if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) || 873 (TI.Width == 64 && VT->getNumElements() == 1)) 874 return ABIArgInfo::getDirect( 875 llvm::IntegerType::get(getVMContext(), TI.Width)); 876 } 877 878 if (IsX86_MMXType(CGT.ConvertType(Ty))) 879 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); 880 881 return ABIArgInfo::getDirect(); 882 } 883 884 885 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 886 Ty = EnumTy->getDecl()->getIntegerType(); 887 888 bool InReg = shouldPrimitiveUseInReg(Ty, State); 889 890 if (isPromotableIntegerTypeForABI(Ty)) { 891 if (InReg) 892 return ABIArgInfo::getExtendInReg(Ty, CGT.ConvertType(Ty)); 893 return ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty)); 894 } 895 896 if (const auto *EIT = Ty->getAs<BitIntType>()) { 897 if (EIT->getNumBits() <= 64) { 898 if (InReg) 899 return ABIArgInfo::getDirectInReg(); 900 return ABIArgInfo::getDirect(); 901 } 902 return getIndirectResult(Ty, /*ByVal=*/false, State); 903 } 904 905 if (InReg) 906 return ABIArgInfo::getDirectInReg(); 907 return ABIArgInfo::getDirect(); 908 } 909 910 void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { 911 CCState State(FI); 912 if (IsMCUABI) 913 State.FreeRegs = 3; 914 else if (State.CC == llvm::CallingConv::X86_FastCall) { 915 State.FreeRegs = 2; 916 State.FreeSSERegs = 3; 917 } else if (State.CC == llvm::CallingConv::X86_VectorCall) { 918 State.FreeRegs = 2; 919 State.FreeSSERegs = 6; 920 } else if (FI.getHasRegParm()) 921 State.FreeRegs = FI.getRegParm(); 922 else if (State.CC == llvm::CallingConv::X86_RegCall) { 923 State.FreeRegs = 5; 924 State.FreeSSERegs = 8; 925 } else if (IsWin32StructABI) { 926 // Since MSVC 2015, the first three SSE vectors have been passed in 927 // registers. The rest are passed indirectly. 928 State.FreeRegs = DefaultNumRegisterParameters; 929 State.FreeSSERegs = 3; 930 } else 931 State.FreeRegs = DefaultNumRegisterParameters; 932 933 if (!::classifyReturnType(getCXXABI(), FI, *this)) { 934 FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State); 935 } else if (FI.getReturnInfo().isIndirect()) { 936 // The C++ ABI is not aware of register usage, so we have to check if the 937 // return value was sret and put it in a register ourselves if appropriate. 938 if (State.FreeRegs) { 939 --State.FreeRegs; // The sret parameter consumes a register. 940 if (!IsMCUABI) 941 FI.getReturnInfo().setInReg(true); 942 } 943 } 944 945 // The chain argument effectively gives us another free register. 946 if (FI.isChainCall()) 947 ++State.FreeRegs; 948 949 // For vectorcall, do a first pass over the arguments, assigning FP and vector 950 // arguments to XMM registers as available. 951 if (State.CC == llvm::CallingConv::X86_VectorCall) 952 runVectorCallFirstPass(FI, State); 953 954 bool UsedInAlloca = false; 955 MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); 956 for (unsigned I = 0, E = Args.size(); I < E; ++I) { 957 // Skip arguments that have already been assigned. 958 if (State.IsPreassigned.test(I)) 959 continue; 960 961 Args[I].info = 962 classifyArgumentType(Args[I].type, State, I); 963 UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca); 964 } 965 966 // If we needed to use inalloca for any argument, do a second pass and rewrite 967 // all the memory arguments to use inalloca. 968 if (UsedInAlloca) 969 rewriteWithInAlloca(FI); 970 } 971 972 void 973 X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, 974 CharUnits &StackOffset, ABIArgInfo &Info, 975 QualType Type) const { 976 // Arguments are always 4-byte-aligned. 977 CharUnits WordSize = CharUnits::fromQuantity(4); 978 assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct"); 979 980 // sret pointers and indirect things will require an extra pointer 981 // indirection, unless they are byval. Most things are byval, and will not 982 // require this indirection. 983 bool IsIndirect = false; 984 if (Info.isIndirect() && !Info.getIndirectByVal()) 985 IsIndirect = true; 986 Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect); 987 llvm::Type *LLTy = CGT.ConvertTypeForMem(Type); 988 if (IsIndirect) 989 LLTy = llvm::PointerType::getUnqual(getVMContext()); 990 FrameFields.push_back(LLTy); 991 StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type); 992 993 // Insert padding bytes to respect alignment. 994 CharUnits FieldEnd = StackOffset; 995 StackOffset = FieldEnd.alignTo(WordSize); 996 if (StackOffset != FieldEnd) { 997 CharUnits NumBytes = StackOffset - FieldEnd; 998 llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); 999 Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity()); 1000 FrameFields.push_back(Ty); 1001 } 1002 } 1003 1004 static bool isArgInAlloca(const ABIArgInfo &Info) { 1005 // Leave ignored and inreg arguments alone. 1006 switch (Info.getKind()) { 1007 case ABIArgInfo::InAlloca: 1008 return true; 1009 case ABIArgInfo::Ignore: 1010 case ABIArgInfo::IndirectAliased: 1011 return false; 1012 case ABIArgInfo::Indirect: 1013 case ABIArgInfo::Direct: 1014 case ABIArgInfo::Extend: 1015 return !Info.getInReg(); 1016 case ABIArgInfo::Expand: 1017 case ABIArgInfo::CoerceAndExpand: 1018 // These are aggregate types which are never passed in registers when 1019 // inalloca is involved. 1020 return true; 1021 } 1022 llvm_unreachable("invalid enum"); 1023 } 1024 1025 void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const { 1026 assert(IsWin32StructABI && "inalloca only supported on win32"); 1027 1028 // Build a packed struct type for all of the arguments in memory. 1029 SmallVector<llvm::Type *, 6> FrameFields; 1030 1031 // The stack alignment is always 4. 1032 CharUnits StackAlign = CharUnits::fromQuantity(4); 1033 1034 CharUnits StackOffset; 1035 CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end(); 1036 1037 // Put 'this' into the struct before 'sret', if necessary. 1038 bool IsThisCall = 1039 FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall; 1040 ABIArgInfo &Ret = FI.getReturnInfo(); 1041 if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall && 1042 isArgInAlloca(I->info)) { 1043 addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); 1044 ++I; 1045 } 1046 1047 // Put the sret parameter into the inalloca struct if it's in memory. 1048 if (Ret.isIndirect() && !Ret.getInReg()) { 1049 addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType()); 1050 // On Windows, the hidden sret parameter is always returned in eax. 1051 Ret.setInAllocaSRet(IsWin32StructABI); 1052 } 1053 1054 // Skip the 'this' parameter in ecx. 1055 if (IsThisCall) 1056 ++I; 1057 1058 // Put arguments passed in memory into the struct. 1059 for (; I != E; ++I) { 1060 if (isArgInAlloca(I->info)) 1061 addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); 1062 } 1063 1064 FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields, 1065 /*isPacked=*/true), 1066 StackAlign); 1067 } 1068 1069 RValue X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 1070 QualType Ty, AggValueSlot Slot) const { 1071 1072 auto TypeInfo = getContext().getTypeInfoInChars(Ty); 1073 1074 CCState State(*const_cast<CGFunctionInfo *>(CGF.CurFnInfo)); 1075 ABIArgInfo AI = classifyArgumentType(Ty, State, /*ArgIndex*/ 0); 1076 // Empty records are ignored for parameter passing purposes. 1077 if (AI.isIgnore()) 1078 return Slot.asRValue(); 1079 1080 // x86-32 changes the alignment of certain arguments on the stack. 1081 // 1082 // Just messing with TypeInfo like this works because we never pass 1083 // anything indirectly. 1084 TypeInfo.Align = CharUnits::fromQuantity( 1085 getTypeStackAlignInBytes(Ty, TypeInfo.Align.getQuantity())); 1086 1087 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, 1088 CharUnits::fromQuantity(4), 1089 /*AllowHigherAlign*/ true, Slot); 1090 } 1091 1092 bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( 1093 const llvm::Triple &Triple, const CodeGenOptions &Opts) { 1094 assert(Triple.getArch() == llvm::Triple::x86); 1095 1096 switch (Opts.getStructReturnConvention()) { 1097 case CodeGenOptions::SRCK_Default: 1098 break; 1099 case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return 1100 return false; 1101 case CodeGenOptions::SRCK_InRegs: // -freg-struct-return 1102 return true; 1103 } 1104 1105 if (Triple.isOSDarwin() || Triple.isOSIAMCU()) 1106 return true; 1107 1108 switch (Triple.getOS()) { 1109 case llvm::Triple::DragonFly: 1110 case llvm::Triple::FreeBSD: 1111 case llvm::Triple::OpenBSD: 1112 case llvm::Triple::Win32: 1113 return true; 1114 default: 1115 return false; 1116 } 1117 } 1118 1119 static void addX86InterruptAttrs(const FunctionDecl *FD, llvm::GlobalValue *GV, 1120 CodeGen::CodeGenModule &CGM) { 1121 if (!FD->hasAttr<AnyX86InterruptAttr>()) 1122 return; 1123 1124 llvm::Function *Fn = cast<llvm::Function>(GV); 1125 Fn->setCallingConv(llvm::CallingConv::X86_INTR); 1126 if (FD->getNumParams() == 0) 1127 return; 1128 1129 auto PtrTy = cast<PointerType>(FD->getParamDecl(0)->getType()); 1130 llvm::Type *ByValTy = CGM.getTypes().ConvertType(PtrTy->getPointeeType()); 1131 llvm::Attribute NewAttr = llvm::Attribute::getWithByValType( 1132 Fn->getContext(), ByValTy); 1133 Fn->addParamAttr(0, NewAttr); 1134 } 1135 1136 void X86_32TargetCodeGenInfo::setTargetAttributes( 1137 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1138 if (GV->isDeclaration()) 1139 return; 1140 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1141 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1142 llvm::Function *Fn = cast<llvm::Function>(GV); 1143 Fn->addFnAttr("stackrealign"); 1144 } 1145 1146 addX86InterruptAttrs(FD, GV, CGM); 1147 } 1148 } 1149 1150 bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable( 1151 CodeGen::CodeGenFunction &CGF, 1152 llvm::Value *Address) const { 1153 CodeGen::CGBuilderTy &Builder = CGF.Builder; 1154 1155 llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); 1156 1157 // 0-7 are the eight integer registers; the order is different 1158 // on Darwin (for EH), but the range is the same. 1159 // 8 is %eip. 1160 AssignToArrayRange(Builder, Address, Four8, 0, 8); 1161 1162 if (CGF.CGM.getTarget().getTriple().isOSDarwin()) { 1163 // 12-16 are st(0..4). Not sure why we stop at 4. 1164 // These have size 16, which is sizeof(long double) on 1165 // platforms with 8-byte alignment for that type. 1166 llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); 1167 AssignToArrayRange(Builder, Address, Sixteen8, 12, 16); 1168 1169 } else { 1170 // 9 is %eflags, which doesn't get a size on Darwin for some 1171 // reason. 1172 Builder.CreateAlignedStore( 1173 Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9), 1174 CharUnits::One()); 1175 1176 // 11-16 are st(0..5). Not sure why we stop at 5. 1177 // These have size 12, which is sizeof(long double) on 1178 // platforms with 4-byte alignment for that type. 1179 llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12); 1180 AssignToArrayRange(Builder, Address, Twelve8, 11, 16); 1181 } 1182 1183 return false; 1184 } 1185 1186 //===----------------------------------------------------------------------===// 1187 // X86-64 ABI Implementation 1188 //===----------------------------------------------------------------------===// 1189 1190 1191 namespace { 1192 1193 /// \p returns the size in bits of the largest (native) vector for \p AVXLevel. 1194 static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { 1195 switch (AVXLevel) { 1196 case X86AVXABILevel::AVX512: 1197 return 512; 1198 case X86AVXABILevel::AVX: 1199 return 256; 1200 case X86AVXABILevel::None: 1201 return 128; 1202 } 1203 llvm_unreachable("Unknown AVXLevel"); 1204 } 1205 1206 /// X86_64ABIInfo - The X86_64 ABI information. 1207 class X86_64ABIInfo : public ABIInfo { 1208 enum Class { 1209 Integer = 0, 1210 SSE, 1211 SSEUp, 1212 X87, 1213 X87Up, 1214 ComplexX87, 1215 NoClass, 1216 Memory 1217 }; 1218 1219 /// merge - Implement the X86_64 ABI merging algorithm. 1220 /// 1221 /// Merge an accumulating classification \arg Accum with a field 1222 /// classification \arg Field. 1223 /// 1224 /// \param Accum - The accumulating classification. This should 1225 /// always be either NoClass or the result of a previous merge 1226 /// call. In addition, this should never be Memory (the caller 1227 /// should just return Memory for the aggregate). 1228 static Class merge(Class Accum, Class Field); 1229 1230 /// postMerge - Implement the X86_64 ABI post merging algorithm. 1231 /// 1232 /// Post merger cleanup, reduces a malformed Hi and Lo pair to 1233 /// final MEMORY or SSE classes when necessary. 1234 /// 1235 /// \param AggregateSize - The size of the current aggregate in 1236 /// the classification process. 1237 /// 1238 /// \param Lo - The classification for the parts of the type 1239 /// residing in the low word of the containing object. 1240 /// 1241 /// \param Hi - The classification for the parts of the type 1242 /// residing in the higher words of the containing object. 1243 /// 1244 void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; 1245 1246 /// classify - Determine the x86_64 register classes in which the 1247 /// given type T should be passed. 1248 /// 1249 /// \param Lo - The classification for the parts of the type 1250 /// residing in the low word of the containing object. 1251 /// 1252 /// \param Hi - The classification for the parts of the type 1253 /// residing in the high word of the containing object. 1254 /// 1255 /// \param OffsetBase - The bit offset of this type in the 1256 /// containing object. Some parameters are classified different 1257 /// depending on whether they straddle an eightbyte boundary. 1258 /// 1259 /// \param isNamedArg - Whether the argument in question is a "named" 1260 /// argument, as used in AMD64-ABI 3.5.7. 1261 /// 1262 /// \param IsRegCall - Whether the calling conversion is regcall. 1263 /// 1264 /// If a word is unused its result will be NoClass; if a type should 1265 /// be passed in Memory then at least the classification of \arg Lo 1266 /// will be Memory. 1267 /// 1268 /// The \arg Lo class will be NoClass iff the argument is ignored. 1269 /// 1270 /// If the \arg Lo class is ComplexX87, then the \arg Hi class will 1271 /// also be ComplexX87. 1272 void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi, 1273 bool isNamedArg, bool IsRegCall = false) const; 1274 1275 llvm::Type *GetByteVectorType(QualType Ty) const; 1276 llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType, 1277 unsigned IROffset, QualType SourceTy, 1278 unsigned SourceOffset) const; 1279 llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType, 1280 unsigned IROffset, QualType SourceTy, 1281 unsigned SourceOffset) const; 1282 1283 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 1284 /// such that the argument will be returned in memory. 1285 ABIArgInfo getIndirectReturnResult(QualType Ty) const; 1286 1287 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 1288 /// such that the argument will be passed in memory. 1289 /// 1290 /// \param freeIntRegs - The number of free integer registers remaining 1291 /// available. 1292 ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const; 1293 1294 ABIArgInfo classifyReturnType(QualType RetTy) const; 1295 1296 ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, 1297 unsigned &neededInt, unsigned &neededSSE, 1298 bool isNamedArg, 1299 bool IsRegCall = false) const; 1300 1301 ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt, 1302 unsigned &NeededSSE, 1303 unsigned &MaxVectorWidth) const; 1304 1305 ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, 1306 unsigned &NeededSSE, 1307 unsigned &MaxVectorWidth) const; 1308 1309 bool IsIllegalVectorType(QualType Ty) const; 1310 1311 /// The 0.98 ABI revision clarified a lot of ambiguities, 1312 /// unfortunately in ways that were not always consistent with 1313 /// certain previous compilers. In particular, platforms which 1314 /// required strict binary compatibility with older versions of GCC 1315 /// may need to exempt themselves. 1316 bool honorsRevision0_98() const { 1317 return !getTarget().getTriple().isOSDarwin(); 1318 } 1319 1320 /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to 1321 /// classify it as INTEGER (for compatibility with older clang compilers). 1322 bool classifyIntegerMMXAsSSE() const { 1323 // Clang <= 3.8 did not do this. 1324 if (getContext().getLangOpts().getClangABICompat() <= 1325 LangOptions::ClangABI::Ver3_8) 1326 return false; 1327 1328 const llvm::Triple &Triple = getTarget().getTriple(); 1329 if (Triple.isOSDarwin() || Triple.isPS() || Triple.isOSFreeBSD()) 1330 return false; 1331 return true; 1332 } 1333 1334 // GCC classifies vectors of __int128 as memory. 1335 bool passInt128VectorsInMem() const { 1336 // Clang <= 9.0 did not do this. 1337 if (getContext().getLangOpts().getClangABICompat() <= 1338 LangOptions::ClangABI::Ver9) 1339 return false; 1340 1341 const llvm::Triple &T = getTarget().getTriple(); 1342 return T.isOSLinux() || T.isOSNetBSD(); 1343 } 1344 1345 bool returnCXXRecordGreaterThan128InMem() const { 1346 // Clang <= 20.0 did not do this. 1347 if (getContext().getLangOpts().getClangABICompat() <= 1348 LangOptions::ClangABI::Ver20) 1349 return false; 1350 1351 return true; 1352 } 1353 1354 X86AVXABILevel AVXLevel; 1355 // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on 1356 // 64-bit hardware. 1357 bool Has64BitPointers; 1358 1359 public: 1360 X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1361 : ABIInfo(CGT), AVXLevel(AVXLevel), 1362 Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {} 1363 1364 bool isPassedUsingAVXType(QualType type) const { 1365 unsigned neededInt, neededSSE; 1366 // The freeIntRegs argument doesn't matter here. 1367 ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE, 1368 /*isNamedArg*/true); 1369 if (info.isDirect()) { 1370 llvm::Type *ty = info.getCoerceToType(); 1371 if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty)) 1372 return vectorTy->getPrimitiveSizeInBits().getFixedValue() > 128; 1373 } 1374 return false; 1375 } 1376 1377 void computeInfo(CGFunctionInfo &FI) const override; 1378 1379 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 1380 AggValueSlot Slot) const override; 1381 RValue EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 1382 AggValueSlot Slot) const override; 1383 1384 bool has64BitPointers() const { 1385 return Has64BitPointers; 1386 } 1387 }; 1388 1389 /// WinX86_64ABIInfo - The Windows X86_64 ABI information. 1390 class WinX86_64ABIInfo : public ABIInfo { 1391 public: 1392 WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1393 : ABIInfo(CGT), AVXLevel(AVXLevel), 1394 IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} 1395 1396 void computeInfo(CGFunctionInfo &FI) const override; 1397 1398 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, 1399 AggValueSlot Slot) const override; 1400 1401 bool isHomogeneousAggregateBaseType(QualType Ty) const override { 1402 // FIXME: Assumes vectorcall is in use. 1403 return isX86VectorTypeForVectorCall(getContext(), Ty); 1404 } 1405 1406 bool isHomogeneousAggregateSmallEnough(const Type *Ty, 1407 uint64_t NumMembers) const override { 1408 // FIXME: Assumes vectorcall is in use. 1409 return isX86VectorCallAggregateSmallEnough(NumMembers); 1410 } 1411 1412 private: 1413 ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, 1414 bool IsVectorCall, bool IsRegCall) const; 1415 ABIArgInfo reclassifyHvaArgForVectorCall(QualType Ty, unsigned &FreeSSERegs, 1416 const ABIArgInfo ¤t) const; 1417 1418 X86AVXABILevel AVXLevel; 1419 1420 bool IsMingw64; 1421 }; 1422 1423 class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { 1424 public: 1425 X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1426 : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) { 1427 SwiftInfo = 1428 std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); 1429 } 1430 1431 /// Disable tail call on x86-64. The epilogue code before the tail jump blocks 1432 /// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations. 1433 bool markARCOptimizedReturnCallsAsNoTail() const override { return true; } 1434 1435 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 1436 return 7; 1437 } 1438 1439 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 1440 llvm::Value *Address) const override { 1441 llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); 1442 1443 // 0-15 are the 16 integer registers. 1444 // 16 is %rip. 1445 AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); 1446 return false; 1447 } 1448 1449 llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 1450 StringRef Constraint, 1451 llvm::Type* Ty) const override { 1452 return X86AdjustInlineAsmType(CGF, Constraint, Ty); 1453 } 1454 1455 bool isNoProtoCallVariadic(const CallArgList &args, 1456 const FunctionNoProtoType *fnType) const override { 1457 // The default CC on x86-64 sets %al to the number of SSA 1458 // registers used, and GCC sets this when calling an unprototyped 1459 // function, so we override the default behavior. However, don't do 1460 // that when AVX types are involved: the ABI explicitly states it is 1461 // undefined, and it doesn't work in practice because of how the ABI 1462 // defines varargs anyway. 1463 if (fnType->getCallConv() == CC_C) { 1464 bool HasAVXType = false; 1465 for (const CallArg &arg : args) { 1466 if (getABIInfo<X86_64ABIInfo>().isPassedUsingAVXType(arg.Ty)) { 1467 HasAVXType = true; 1468 break; 1469 } 1470 } 1471 1472 if (!HasAVXType) 1473 return true; 1474 } 1475 1476 return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType); 1477 } 1478 1479 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1480 CodeGen::CodeGenModule &CGM) const override { 1481 if (GV->isDeclaration()) 1482 return; 1483 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1484 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1485 llvm::Function *Fn = cast<llvm::Function>(GV); 1486 Fn->addFnAttr("stackrealign"); 1487 } 1488 1489 addX86InterruptAttrs(FD, GV, CGM); 1490 } 1491 } 1492 1493 void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, 1494 const FunctionDecl *Caller, 1495 const FunctionDecl *Callee, const CallArgList &Args, 1496 QualType ReturnType) const override; 1497 }; 1498 } // namespace 1499 1500 static void initFeatureMaps(const ASTContext &Ctx, 1501 llvm::StringMap<bool> &CallerMap, 1502 const FunctionDecl *Caller, 1503 llvm::StringMap<bool> &CalleeMap, 1504 const FunctionDecl *Callee) { 1505 if (CalleeMap.empty() && CallerMap.empty()) { 1506 // The caller is potentially nullptr in the case where the call isn't in a 1507 // function. In this case, the getFunctionFeatureMap ensures we just get 1508 // the TU level setting (since it cannot be modified by 'target'.. 1509 Ctx.getFunctionFeatureMap(CallerMap, Caller); 1510 Ctx.getFunctionFeatureMap(CalleeMap, Callee); 1511 } 1512 } 1513 1514 static bool checkAVXParamFeature(DiagnosticsEngine &Diag, 1515 SourceLocation CallLoc, 1516 const llvm::StringMap<bool> &CallerMap, 1517 const llvm::StringMap<bool> &CalleeMap, 1518 QualType Ty, StringRef Feature, 1519 bool IsArgument) { 1520 bool CallerHasFeat = CallerMap.lookup(Feature); 1521 bool CalleeHasFeat = CalleeMap.lookup(Feature); 1522 if (!CallerHasFeat && !CalleeHasFeat) 1523 return Diag.Report(CallLoc, diag::warn_avx_calling_convention) 1524 << IsArgument << Ty << Feature; 1525 1526 // Mixing calling conventions here is very clearly an error. 1527 if (!CallerHasFeat || !CalleeHasFeat) 1528 return Diag.Report(CallLoc, diag::err_avx_calling_convention) 1529 << IsArgument << Ty << Feature; 1530 1531 // Else, both caller and callee have the required feature, so there is no need 1532 // to diagnose. 1533 return false; 1534 } 1535 1536 static bool checkAVX512ParamFeature(DiagnosticsEngine &Diag, 1537 SourceLocation CallLoc, 1538 const llvm::StringMap<bool> &CallerMap, 1539 const llvm::StringMap<bool> &CalleeMap, 1540 QualType Ty, bool IsArgument) { 1541 bool Caller256 = CallerMap.lookup("avx512f") && !CallerMap.lookup("evex512"); 1542 bool Callee256 = CalleeMap.lookup("avx512f") && !CalleeMap.lookup("evex512"); 1543 1544 // Forbid 512-bit or larger vector pass or return when we disabled ZMM 1545 // instructions. 1546 if (Caller256 || Callee256) 1547 return Diag.Report(CallLoc, diag::err_avx_calling_convention) 1548 << IsArgument << Ty << "evex512"; 1549 1550 return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, 1551 "avx512f", IsArgument); 1552 } 1553 1554 static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx, 1555 SourceLocation CallLoc, 1556 const llvm::StringMap<bool> &CallerMap, 1557 const llvm::StringMap<bool> &CalleeMap, QualType Ty, 1558 bool IsArgument) { 1559 uint64_t Size = Ctx.getTypeSize(Ty); 1560 if (Size > 256) 1561 return checkAVX512ParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, 1562 IsArgument); 1563 1564 if (Size > 128) 1565 return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx", 1566 IsArgument); 1567 1568 return false; 1569 } 1570 1571 void X86_64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM, 1572 SourceLocation CallLoc, 1573 const FunctionDecl *Caller, 1574 const FunctionDecl *Callee, 1575 const CallArgList &Args, 1576 QualType ReturnType) const { 1577 if (!Callee) 1578 return; 1579 1580 llvm::StringMap<bool> CallerMap; 1581 llvm::StringMap<bool> CalleeMap; 1582 unsigned ArgIndex = 0; 1583 1584 // We need to loop through the actual call arguments rather than the 1585 // function's parameters, in case this variadic. 1586 for (const CallArg &Arg : Args) { 1587 // The "avx" feature changes how vectors >128 in size are passed. "avx512f" 1588 // additionally changes how vectors >256 in size are passed. Like GCC, we 1589 // warn when a function is called with an argument where this will change. 1590 // Unlike GCC, we also error when it is an obvious ABI mismatch, that is, 1591 // the caller and callee features are mismatched. 1592 // Unfortunately, we cannot do this diagnostic in SEMA, since the callee can 1593 // change its ABI with attribute-target after this call. 1594 if (Arg.getType()->isVectorType() && 1595 CGM.getContext().getTypeSize(Arg.getType()) > 128) { 1596 initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); 1597 QualType Ty = Arg.getType(); 1598 // The CallArg seems to have desugared the type already, so for clearer 1599 // diagnostics, replace it with the type in the FunctionDecl if possible. 1600 if (ArgIndex < Callee->getNumParams()) 1601 Ty = Callee->getParamDecl(ArgIndex)->getType(); 1602 1603 if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, 1604 CalleeMap, Ty, /*IsArgument*/ true)) 1605 return; 1606 } 1607 ++ArgIndex; 1608 } 1609 1610 // Check return always, as we don't have a good way of knowing in codegen 1611 // whether this value is used, tail-called, etc. 1612 if (Callee->getReturnType()->isVectorType() && 1613 CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) { 1614 initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); 1615 checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, 1616 CalleeMap, Callee->getReturnType(), 1617 /*IsArgument*/ false); 1618 } 1619 } 1620 1621 std::string TargetCodeGenInfo::qualifyWindowsLibrary(StringRef Lib) { 1622 // If the argument does not end in .lib, automatically add the suffix. 1623 // If the argument contains a space, enclose it in quotes. 1624 // This matches the behavior of MSVC. 1625 bool Quote = Lib.contains(' '); 1626 std::string ArgStr = Quote ? "\"" : ""; 1627 ArgStr += Lib; 1628 if (!Lib.ends_with_insensitive(".lib") && !Lib.ends_with_insensitive(".a")) 1629 ArgStr += ".lib"; 1630 ArgStr += Quote ? "\"" : ""; 1631 return ArgStr; 1632 } 1633 1634 namespace { 1635 class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo { 1636 public: 1637 WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, 1638 bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, 1639 unsigned NumRegisterParameters) 1640 : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI, 1641 Win32StructABI, NumRegisterParameters, false) {} 1642 1643 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1644 CodeGen::CodeGenModule &CGM) const override; 1645 1646 void getDependentLibraryOption(llvm::StringRef Lib, 1647 llvm::SmallString<24> &Opt) const override { 1648 Opt = "/DEFAULTLIB:"; 1649 Opt += qualifyWindowsLibrary(Lib); 1650 } 1651 1652 void getDetectMismatchOption(llvm::StringRef Name, 1653 llvm::StringRef Value, 1654 llvm::SmallString<32> &Opt) const override { 1655 Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; 1656 } 1657 }; 1658 } // namespace 1659 1660 void WinX86_32TargetCodeGenInfo::setTargetAttributes( 1661 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1662 X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); 1663 if (GV->isDeclaration()) 1664 return; 1665 addStackProbeTargetAttributes(D, GV, CGM); 1666 } 1667 1668 namespace { 1669 class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { 1670 public: 1671 WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, 1672 X86AVXABILevel AVXLevel) 1673 : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) { 1674 SwiftInfo = 1675 std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); 1676 } 1677 1678 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1679 CodeGen::CodeGenModule &CGM) const override; 1680 1681 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 1682 return 7; 1683 } 1684 1685 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 1686 llvm::Value *Address) const override { 1687 llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); 1688 1689 // 0-15 are the 16 integer registers. 1690 // 16 is %rip. 1691 AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); 1692 return false; 1693 } 1694 1695 void getDependentLibraryOption(llvm::StringRef Lib, 1696 llvm::SmallString<24> &Opt) const override { 1697 Opt = "/DEFAULTLIB:"; 1698 Opt += qualifyWindowsLibrary(Lib); 1699 } 1700 1701 void getDetectMismatchOption(llvm::StringRef Name, 1702 llvm::StringRef Value, 1703 llvm::SmallString<32> &Opt) const override { 1704 Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; 1705 } 1706 }; 1707 } // namespace 1708 1709 void WinX86_64TargetCodeGenInfo::setTargetAttributes( 1710 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1711 TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); 1712 if (GV->isDeclaration()) 1713 return; 1714 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1715 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1716 llvm::Function *Fn = cast<llvm::Function>(GV); 1717 Fn->addFnAttr("stackrealign"); 1718 } 1719 1720 addX86InterruptAttrs(FD, GV, CGM); 1721 } 1722 1723 addStackProbeTargetAttributes(D, GV, CGM); 1724 } 1725 1726 void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo, 1727 Class &Hi) const { 1728 // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done: 1729 // 1730 // (a) If one of the classes is Memory, the whole argument is passed in 1731 // memory. 1732 // 1733 // (b) If X87UP is not preceded by X87, the whole argument is passed in 1734 // memory. 1735 // 1736 // (c) If the size of the aggregate exceeds two eightbytes and the first 1737 // eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole 1738 // argument is passed in memory. NOTE: This is necessary to keep the 1739 // ABI working for processors that don't support the __m256 type. 1740 // 1741 // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE. 1742 // 1743 // Some of these are enforced by the merging logic. Others can arise 1744 // only with unions; for example: 1745 // union { _Complex double; unsigned; } 1746 // 1747 // Note that clauses (b) and (c) were added in 0.98. 1748 // 1749 if (Hi == Memory) 1750 Lo = Memory; 1751 if (Hi == X87Up && Lo != X87 && honorsRevision0_98()) 1752 Lo = Memory; 1753 if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp)) 1754 Lo = Memory; 1755 if (Hi == SSEUp && Lo != SSE) 1756 Hi = SSE; 1757 } 1758 1759 X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) { 1760 // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is 1761 // classified recursively so that always two fields are 1762 // considered. The resulting class is calculated according to 1763 // the classes of the fields in the eightbyte: 1764 // 1765 // (a) If both classes are equal, this is the resulting class. 1766 // 1767 // (b) If one of the classes is NO_CLASS, the resulting class is 1768 // the other class. 1769 // 1770 // (c) If one of the classes is MEMORY, the result is the MEMORY 1771 // class. 1772 // 1773 // (d) If one of the classes is INTEGER, the result is the 1774 // INTEGER. 1775 // 1776 // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, 1777 // MEMORY is used as class. 1778 // 1779 // (f) Otherwise class SSE is used. 1780 1781 // Accum should never be memory (we should have returned) or 1782 // ComplexX87 (because this cannot be passed in a structure). 1783 assert((Accum != Memory && Accum != ComplexX87) && 1784 "Invalid accumulated classification during merge."); 1785 if (Accum == Field || Field == NoClass) 1786 return Accum; 1787 if (Field == Memory) 1788 return Memory; 1789 if (Accum == NoClass) 1790 return Field; 1791 if (Accum == Integer || Field == Integer) 1792 return Integer; 1793 if (Field == X87 || Field == X87Up || Field == ComplexX87 || 1794 Accum == X87 || Accum == X87Up) 1795 return Memory; 1796 return SSE; 1797 } 1798 1799 void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, 1800 Class &Hi, bool isNamedArg, bool IsRegCall) const { 1801 // FIXME: This code can be simplified by introducing a simple value class for 1802 // Class pairs with appropriate constructor methods for the various 1803 // situations. 1804 1805 // FIXME: Some of the split computations are wrong; unaligned vectors 1806 // shouldn't be passed in registers for example, so there is no chance they 1807 // can straddle an eightbyte. Verify & simplify. 1808 1809 Lo = Hi = NoClass; 1810 1811 Class &Current = OffsetBase < 64 ? Lo : Hi; 1812 Current = Memory; 1813 1814 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 1815 BuiltinType::Kind k = BT->getKind(); 1816 1817 if (k == BuiltinType::Void) { 1818 Current = NoClass; 1819 } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) { 1820 Lo = Integer; 1821 Hi = Integer; 1822 } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) { 1823 Current = Integer; 1824 } else if (k == BuiltinType::Float || k == BuiltinType::Double || 1825 k == BuiltinType::Float16 || k == BuiltinType::BFloat16) { 1826 Current = SSE; 1827 } else if (k == BuiltinType::Float128) { 1828 Lo = SSE; 1829 Hi = SSEUp; 1830 } else if (k == BuiltinType::LongDouble) { 1831 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 1832 if (LDF == &llvm::APFloat::IEEEquad()) { 1833 Lo = SSE; 1834 Hi = SSEUp; 1835 } else if (LDF == &llvm::APFloat::x87DoubleExtended()) { 1836 Lo = X87; 1837 Hi = X87Up; 1838 } else if (LDF == &llvm::APFloat::IEEEdouble()) { 1839 Current = SSE; 1840 } else 1841 llvm_unreachable("unexpected long double representation!"); 1842 } 1843 // FIXME: _Decimal32 and _Decimal64 are SSE. 1844 // FIXME: _float128 and _Decimal128 are (SSE, SSEUp). 1845 return; 1846 } 1847 1848 if (const EnumType *ET = Ty->getAs<EnumType>()) { 1849 // Classify the underlying integer type. 1850 classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg); 1851 return; 1852 } 1853 1854 if (Ty->hasPointerRepresentation()) { 1855 Current = Integer; 1856 return; 1857 } 1858 1859 if (Ty->isMemberPointerType()) { 1860 if (Ty->isMemberFunctionPointerType()) { 1861 if (Has64BitPointers) { 1862 // If Has64BitPointers, this is an {i64, i64}, so classify both 1863 // Lo and Hi now. 1864 Lo = Hi = Integer; 1865 } else { 1866 // Otherwise, with 32-bit pointers, this is an {i32, i32}. If that 1867 // straddles an eightbyte boundary, Hi should be classified as well. 1868 uint64_t EB_FuncPtr = (OffsetBase) / 64; 1869 uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64; 1870 if (EB_FuncPtr != EB_ThisAdj) { 1871 Lo = Hi = Integer; 1872 } else { 1873 Current = Integer; 1874 } 1875 } 1876 } else { 1877 Current = Integer; 1878 } 1879 return; 1880 } 1881 1882 if (const VectorType *VT = Ty->getAs<VectorType>()) { 1883 uint64_t Size = getContext().getTypeSize(VT); 1884 if (Size == 1 || Size == 8 || Size == 16 || Size == 32) { 1885 // gcc passes the following as integer: 1886 // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float> 1887 // 2 bytes - <2 x char>, <1 x short> 1888 // 1 byte - <1 x char> 1889 Current = Integer; 1890 1891 // If this type crosses an eightbyte boundary, it should be 1892 // split. 1893 uint64_t EB_Lo = (OffsetBase) / 64; 1894 uint64_t EB_Hi = (OffsetBase + Size - 1) / 64; 1895 if (EB_Lo != EB_Hi) 1896 Hi = Lo; 1897 } else if (Size == 64) { 1898 QualType ElementType = VT->getElementType(); 1899 1900 // gcc passes <1 x double> in memory. :( 1901 if (ElementType->isSpecificBuiltinType(BuiltinType::Double)) 1902 return; 1903 1904 // gcc passes <1 x long long> as SSE but clang used to unconditionally 1905 // pass them as integer. For platforms where clang is the de facto 1906 // platform compiler, we must continue to use integer. 1907 if (!classifyIntegerMMXAsSSE() && 1908 (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) || 1909 ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) || 1910 ElementType->isSpecificBuiltinType(BuiltinType::Long) || 1911 ElementType->isSpecificBuiltinType(BuiltinType::ULong))) 1912 Current = Integer; 1913 else 1914 Current = SSE; 1915 1916 // If this type crosses an eightbyte boundary, it should be 1917 // split. 1918 if (OffsetBase && OffsetBase != 64) 1919 Hi = Lo; 1920 } else if (Size == 128 || 1921 (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { 1922 QualType ElementType = VT->getElementType(); 1923 1924 // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :( 1925 if (passInt128VectorsInMem() && Size != 128 && 1926 (ElementType->isSpecificBuiltinType(BuiltinType::Int128) || 1927 ElementType->isSpecificBuiltinType(BuiltinType::UInt128))) 1928 return; 1929 1930 // Arguments of 256-bits are split into four eightbyte chunks. The 1931 // least significant one belongs to class SSE and all the others to class 1932 // SSEUP. The original Lo and Hi design considers that types can't be 1933 // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense. 1934 // This design isn't correct for 256-bits, but since there're no cases 1935 // where the upper parts would need to be inspected, avoid adding 1936 // complexity and just consider Hi to match the 64-256 part. 1937 // 1938 // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in 1939 // registers if they are "named", i.e. not part of the "..." of a 1940 // variadic function. 1941 // 1942 // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are 1943 // split into eight eightbyte chunks, one SSE and seven SSEUP. 1944 Lo = SSE; 1945 Hi = SSEUp; 1946 } 1947 return; 1948 } 1949 1950 if (const ComplexType *CT = Ty->getAs<ComplexType>()) { 1951 QualType ET = getContext().getCanonicalType(CT->getElementType()); 1952 1953 uint64_t Size = getContext().getTypeSize(Ty); 1954 if (ET->isIntegralOrEnumerationType()) { 1955 if (Size <= 64) 1956 Current = Integer; 1957 else if (Size <= 128) 1958 Lo = Hi = Integer; 1959 } else if (ET->isFloat16Type() || ET == getContext().FloatTy || 1960 ET->isBFloat16Type()) { 1961 Current = SSE; 1962 } else if (ET == getContext().DoubleTy) { 1963 Lo = Hi = SSE; 1964 } else if (ET == getContext().LongDoubleTy) { 1965 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 1966 if (LDF == &llvm::APFloat::IEEEquad()) 1967 Current = Memory; 1968 else if (LDF == &llvm::APFloat::x87DoubleExtended()) 1969 Current = ComplexX87; 1970 else if (LDF == &llvm::APFloat::IEEEdouble()) 1971 Lo = Hi = SSE; 1972 else 1973 llvm_unreachable("unexpected long double representation!"); 1974 } 1975 1976 // If this complex type crosses an eightbyte boundary then it 1977 // should be split. 1978 uint64_t EB_Real = (OffsetBase) / 64; 1979 uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64; 1980 if (Hi == NoClass && EB_Real != EB_Imag) 1981 Hi = Lo; 1982 1983 return; 1984 } 1985 1986 if (const auto *EITy = Ty->getAs<BitIntType>()) { 1987 if (EITy->getNumBits() <= 64) 1988 Current = Integer; 1989 else if (EITy->getNumBits() <= 128) 1990 Lo = Hi = Integer; 1991 // Larger values need to get passed in memory. 1992 return; 1993 } 1994 1995 if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { 1996 // Arrays are treated like structures. 1997 1998 uint64_t Size = getContext().getTypeSize(Ty); 1999 2000 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger 2001 // than eight eightbytes, ..., it has class MEMORY. 2002 // regcall ABI doesn't have limitation to an object. The only limitation 2003 // is the free registers, which will be checked in computeInfo. 2004 if (!IsRegCall && Size > 512) 2005 return; 2006 2007 // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned 2008 // fields, it has class MEMORY. 2009 // 2010 // Only need to check alignment of array base. 2011 if (OffsetBase % getContext().getTypeAlign(AT->getElementType())) 2012 return; 2013 2014 // Otherwise implement simplified merge. We could be smarter about 2015 // this, but it isn't worth it and would be harder to verify. 2016 Current = NoClass; 2017 uint64_t EltSize = getContext().getTypeSize(AT->getElementType()); 2018 uint64_t ArraySize = AT->getZExtSize(); 2019 2020 // The only case a 256-bit wide vector could be used is when the array 2021 // contains a single 256-bit element. Since Lo and Hi logic isn't extended 2022 // to work for sizes wider than 128, early check and fallback to memory. 2023 // 2024 if (Size > 128 && 2025 (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel))) 2026 return; 2027 2028 for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) { 2029 Class FieldLo, FieldHi; 2030 classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg); 2031 Lo = merge(Lo, FieldLo); 2032 Hi = merge(Hi, FieldHi); 2033 if (Lo == Memory || Hi == Memory) 2034 break; 2035 } 2036 2037 postMerge(Size, Lo, Hi); 2038 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification."); 2039 return; 2040 } 2041 2042 if (const RecordType *RT = Ty->getAs<RecordType>()) { 2043 uint64_t Size = getContext().getTypeSize(Ty); 2044 2045 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger 2046 // than eight eightbytes, ..., it has class MEMORY. 2047 if (Size > 512) 2048 return; 2049 2050 // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial 2051 // copy constructor or a non-trivial destructor, it is passed by invisible 2052 // reference. 2053 if (getRecordArgABI(RT, getCXXABI())) 2054 return; 2055 2056 const RecordDecl *RD = RT->getDecl(); 2057 2058 // Assume variable sized types are passed in memory. 2059 if (RD->hasFlexibleArrayMember()) 2060 return; 2061 2062 const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); 2063 2064 // Reset Lo class, this will be recomputed. 2065 Current = NoClass; 2066 2067 // If this is a C++ record, classify the bases first. 2068 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 2069 for (const auto &I : CXXRD->bases()) { 2070 assert(!I.isVirtual() && !I.getType()->isDependentType() && 2071 "Unexpected base class!"); 2072 const auto *Base = 2073 cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); 2074 2075 // Classify this field. 2076 // 2077 // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a 2078 // single eightbyte, each is classified separately. Each eightbyte gets 2079 // initialized to class NO_CLASS. 2080 Class FieldLo, FieldHi; 2081 uint64_t Offset = 2082 OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base)); 2083 classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg); 2084 Lo = merge(Lo, FieldLo); 2085 Hi = merge(Hi, FieldHi); 2086 if (returnCXXRecordGreaterThan128InMem() && 2087 (Size > 128 && (Size != getContext().getTypeSize(I.getType()) || 2088 Size > getNativeVectorSizeForAVXABI(AVXLevel)))) { 2089 // The only case a 256(or 512)-bit wide vector could be used to return 2090 // is when CXX record contains a single 256(or 512)-bit element. 2091 Lo = Memory; 2092 } 2093 if (Lo == Memory || Hi == Memory) { 2094 postMerge(Size, Lo, Hi); 2095 return; 2096 } 2097 } 2098 } 2099 2100 // Classify the fields one at a time, merging the results. 2101 unsigned idx = 0; 2102 bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <= 2103 LangOptions::ClangABI::Ver11 || 2104 getContext().getTargetInfo().getTriple().isPS(); 2105 bool IsUnion = RT->isUnionType() && !UseClang11Compat; 2106 2107 for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); 2108 i != e; ++i, ++idx) { 2109 uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); 2110 bool BitField = i->isBitField(); 2111 2112 // Ignore padding bit-fields. 2113 if (BitField && i->isUnnamedBitField()) 2114 continue; 2115 2116 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than 2117 // eight eightbytes, or it contains unaligned fields, it has class MEMORY. 2118 // 2119 // The only case a 256-bit or a 512-bit wide vector could be used is when 2120 // the struct contains a single 256-bit or 512-bit element. Early check 2121 // and fallback to memory. 2122 // 2123 // FIXME: Extended the Lo and Hi logic properly to work for size wider 2124 // than 128. 2125 if (Size > 128 && 2126 ((!IsUnion && Size != getContext().getTypeSize(i->getType())) || 2127 Size > getNativeVectorSizeForAVXABI(AVXLevel))) { 2128 Lo = Memory; 2129 postMerge(Size, Lo, Hi); 2130 return; 2131 } 2132 2133 bool IsInMemory = 2134 Offset % getContext().getTypeAlign(i->getType().getCanonicalType()); 2135 // Note, skip this test for bit-fields, see below. 2136 if (!BitField && IsInMemory) { 2137 Lo = Memory; 2138 postMerge(Size, Lo, Hi); 2139 return; 2140 } 2141 2142 // Classify this field. 2143 // 2144 // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate 2145 // exceeds a single eightbyte, each is classified 2146 // separately. Each eightbyte gets initialized to class 2147 // NO_CLASS. 2148 Class FieldLo, FieldHi; 2149 2150 // Bit-fields require special handling, they do not force the 2151 // structure to be passed in memory even if unaligned, and 2152 // therefore they can straddle an eightbyte. 2153 if (BitField) { 2154 assert(!i->isUnnamedBitField()); 2155 uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); 2156 uint64_t Size = i->getBitWidthValue(); 2157 2158 uint64_t EB_Lo = Offset / 64; 2159 uint64_t EB_Hi = (Offset + Size - 1) / 64; 2160 2161 if (EB_Lo) { 2162 assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes."); 2163 FieldLo = NoClass; 2164 FieldHi = Integer; 2165 } else { 2166 FieldLo = Integer; 2167 FieldHi = EB_Hi ? Integer : NoClass; 2168 } 2169 } else 2170 classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg); 2171 Lo = merge(Lo, FieldLo); 2172 Hi = merge(Hi, FieldHi); 2173 if (Lo == Memory || Hi == Memory) 2174 break; 2175 } 2176 2177 postMerge(Size, Lo, Hi); 2178 } 2179 } 2180 2181 ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { 2182 // If this is a scalar LLVM value then assume LLVM will pass it in the right 2183 // place naturally. 2184 if (!isAggregateTypeForABI(Ty)) { 2185 // Treat an enum type as its underlying type. 2186 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2187 Ty = EnumTy->getDecl()->getIntegerType(); 2188 2189 if (Ty->isBitIntType()) 2190 return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace()); 2191 2192 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 2193 : ABIArgInfo::getDirect()); 2194 } 2195 2196 return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace()); 2197 } 2198 2199 bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { 2200 if (const VectorType *VecTy = Ty->getAs<VectorType>()) { 2201 uint64_t Size = getContext().getTypeSize(VecTy); 2202 unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); 2203 if (Size <= 64 || Size > LargestVector) 2204 return true; 2205 QualType EltTy = VecTy->getElementType(); 2206 if (passInt128VectorsInMem() && 2207 (EltTy->isSpecificBuiltinType(BuiltinType::Int128) || 2208 EltTy->isSpecificBuiltinType(BuiltinType::UInt128))) 2209 return true; 2210 } 2211 2212 return false; 2213 } 2214 2215 ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, 2216 unsigned freeIntRegs) const { 2217 // If this is a scalar LLVM value then assume LLVM will pass it in the right 2218 // place naturally. 2219 // 2220 // This assumption is optimistic, as there could be free registers available 2221 // when we need to pass this argument in memory, and LLVM could try to pass 2222 // the argument in the free register. This does not seem to happen currently, 2223 // but this code would be much safer if we could mark the argument with 2224 // 'onstack'. See PR12193. 2225 if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) && 2226 !Ty->isBitIntType()) { 2227 // Treat an enum type as its underlying type. 2228 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2229 Ty = EnumTy->getDecl()->getIntegerType(); 2230 2231 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 2232 : ABIArgInfo::getDirect()); 2233 } 2234 2235 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) 2236 return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), 2237 RAA == CGCXXABI::RAA_DirectInMemory); 2238 2239 // Compute the byval alignment. We specify the alignment of the byval in all 2240 // cases so that the mid-level optimizer knows the alignment of the byval. 2241 unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U); 2242 2243 // Attempt to avoid passing indirect results using byval when possible. This 2244 // is important for good codegen. 2245 // 2246 // We do this by coercing the value into a scalar type which the backend can 2247 // handle naturally (i.e., without using byval). 2248 // 2249 // For simplicity, we currently only do this when we have exhausted all of the 2250 // free integer registers. Doing this when there are free integer registers 2251 // would require more care, as we would have to ensure that the coerced value 2252 // did not claim the unused register. That would require either reording the 2253 // arguments to the function (so that any subsequent inreg values came first), 2254 // or only doing this optimization when there were no following arguments that 2255 // might be inreg. 2256 // 2257 // We currently expect it to be rare (particularly in well written code) for 2258 // arguments to be passed on the stack when there are still free integer 2259 // registers available (this would typically imply large structs being passed 2260 // by value), so this seems like a fair tradeoff for now. 2261 // 2262 // We can revisit this if the backend grows support for 'onstack' parameter 2263 // attributes. See PR12193. 2264 if (freeIntRegs == 0) { 2265 uint64_t Size = getContext().getTypeSize(Ty); 2266 2267 // If this type fits in an eightbyte, coerce it into the matching integral 2268 // type, which will end up on the stack (with alignment 8). 2269 if (Align == 8 && Size <= 64) 2270 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 2271 Size)); 2272 } 2273 2274 return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align), 2275 getDataLayout().getAllocaAddrSpace()); 2276 } 2277 2278 /// The ABI specifies that a value should be passed in a full vector XMM/YMM 2279 /// register. Pick an LLVM IR type that will be passed as a vector register. 2280 llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { 2281 // Wrapper structs/arrays that only contain vectors are passed just like 2282 // vectors; strip them off if present. 2283 if (const Type *InnerTy = isSingleElementStruct(Ty, getContext())) 2284 Ty = QualType(InnerTy, 0); 2285 2286 llvm::Type *IRType = CGT.ConvertType(Ty); 2287 if (isa<llvm::VectorType>(IRType)) { 2288 // Don't pass vXi128 vectors in their native type, the backend can't 2289 // legalize them. 2290 if (passInt128VectorsInMem() && 2291 cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy(128)) { 2292 // Use a vXi64 vector. 2293 uint64_t Size = getContext().getTypeSize(Ty); 2294 return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()), 2295 Size / 64); 2296 } 2297 2298 return IRType; 2299 } 2300 2301 if (IRType->getTypeID() == llvm::Type::FP128TyID) 2302 return IRType; 2303 2304 // We couldn't find the preferred IR vector type for 'Ty'. 2305 uint64_t Size = getContext().getTypeSize(Ty); 2306 assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); 2307 2308 2309 // Return a LLVM IR vector type based on the size of 'Ty'. 2310 return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()), 2311 Size / 64); 2312 } 2313 2314 /// BitsContainNoUserData - Return true if the specified [start,end) bit range 2315 /// is known to either be off the end of the specified type or being in 2316 /// alignment padding. The user type specified is known to be at most 128 bits 2317 /// in size, and have passed through X86_64ABIInfo::classify with a successful 2318 /// classification that put one of the two halves in the INTEGER class. 2319 /// 2320 /// It is conservatively correct to return false. 2321 static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, 2322 unsigned EndBit, ASTContext &Context) { 2323 // If the bytes being queried are off the end of the type, there is no user 2324 // data hiding here. This handles analysis of builtins, vectors and other 2325 // types that don't contain interesting padding. 2326 unsigned TySize = (unsigned)Context.getTypeSize(Ty); 2327 if (TySize <= StartBit) 2328 return true; 2329 2330 if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { 2331 unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType()); 2332 unsigned NumElts = (unsigned)AT->getZExtSize(); 2333 2334 // Check each element to see if the element overlaps with the queried range. 2335 for (unsigned i = 0; i != NumElts; ++i) { 2336 // If the element is after the span we care about, then we're done.. 2337 unsigned EltOffset = i*EltSize; 2338 if (EltOffset >= EndBit) break; 2339 2340 unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0; 2341 if (!BitsContainNoUserData(AT->getElementType(), EltStart, 2342 EndBit-EltOffset, Context)) 2343 return false; 2344 } 2345 // If it overlaps no elements, then it is safe to process as padding. 2346 return true; 2347 } 2348 2349 if (const RecordType *RT = Ty->getAs<RecordType>()) { 2350 const RecordDecl *RD = RT->getDecl(); 2351 const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); 2352 2353 // If this is a C++ record, check the bases first. 2354 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 2355 for (const auto &I : CXXRD->bases()) { 2356 assert(!I.isVirtual() && !I.getType()->isDependentType() && 2357 "Unexpected base class!"); 2358 const auto *Base = 2359 cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); 2360 2361 // If the base is after the span we care about, ignore it. 2362 unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); 2363 if (BaseOffset >= EndBit) continue; 2364 2365 unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0; 2366 if (!BitsContainNoUserData(I.getType(), BaseStart, 2367 EndBit-BaseOffset, Context)) 2368 return false; 2369 } 2370 } 2371 2372 // Verify that no field has data that overlaps the region of interest. Yes 2373 // this could be sped up a lot by being smarter about queried fields, 2374 // however we're only looking at structs up to 16 bytes, so we don't care 2375 // much. 2376 unsigned idx = 0; 2377 for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); 2378 i != e; ++i, ++idx) { 2379 unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx); 2380 2381 // If we found a field after the region we care about, then we're done. 2382 if (FieldOffset >= EndBit) break; 2383 2384 unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0; 2385 if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset, 2386 Context)) 2387 return false; 2388 } 2389 2390 // If nothing in this record overlapped the area of interest, then we're 2391 // clean. 2392 return true; 2393 } 2394 2395 return false; 2396 } 2397 2398 /// getFPTypeAtOffset - Return a floating point type at the specified offset. 2399 static llvm::Type *getFPTypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2400 const llvm::DataLayout &TD) { 2401 if (IROffset == 0 && IRType->isFloatingPointTy()) 2402 return IRType; 2403 2404 // If this is a struct, recurse into the field at the specified offset. 2405 if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { 2406 if (!STy->getNumContainedTypes()) 2407 return nullptr; 2408 2409 const llvm::StructLayout *SL = TD.getStructLayout(STy); 2410 unsigned Elt = SL->getElementContainingOffset(IROffset); 2411 IROffset -= SL->getElementOffset(Elt); 2412 return getFPTypeAtOffset(STy->getElementType(Elt), IROffset, TD); 2413 } 2414 2415 // If this is an array, recurse into the field at the specified offset. 2416 if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { 2417 llvm::Type *EltTy = ATy->getElementType(); 2418 unsigned EltSize = TD.getTypeAllocSize(EltTy); 2419 IROffset -= IROffset / EltSize * EltSize; 2420 return getFPTypeAtOffset(EltTy, IROffset, TD); 2421 } 2422 2423 return nullptr; 2424 } 2425 2426 /// GetSSETypeAtOffset - Return a type that will be passed by the backend in the 2427 /// low 8 bytes of an XMM register, corresponding to the SSE class. 2428 llvm::Type *X86_64ABIInfo:: 2429 GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2430 QualType SourceTy, unsigned SourceOffset) const { 2431 const llvm::DataLayout &TD = getDataLayout(); 2432 unsigned SourceSize = 2433 (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset; 2434 llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD); 2435 if (!T0 || T0->isDoubleTy()) 2436 return llvm::Type::getDoubleTy(getVMContext()); 2437 2438 // Get the adjacent FP type. 2439 llvm::Type *T1 = nullptr; 2440 unsigned T0Size = TD.getTypeAllocSize(T0); 2441 if (SourceSize > T0Size) 2442 T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD); 2443 if (T1 == nullptr) { 2444 // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due 2445 // to its alignment. 2446 if (T0->is16bitFPTy() && SourceSize > 4) 2447 T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD); 2448 // If we can't get a second FP type, return a simple half or float. 2449 // avx512fp16-abi.c:pr51813_2 shows it works to return float for 2450 // {float, i8} too. 2451 if (T1 == nullptr) 2452 return T0; 2453 } 2454 2455 if (T0->isFloatTy() && T1->isFloatTy()) 2456 return llvm::FixedVectorType::get(T0, 2); 2457 2458 if (T0->is16bitFPTy() && T1->is16bitFPTy()) { 2459 llvm::Type *T2 = nullptr; 2460 if (SourceSize > 4) 2461 T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); 2462 if (T2 == nullptr) 2463 return llvm::FixedVectorType::get(T0, 2); 2464 return llvm::FixedVectorType::get(T0, 4); 2465 } 2466 2467 if (T0->is16bitFPTy() || T1->is16bitFPTy()) 2468 return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4); 2469 2470 return llvm::Type::getDoubleTy(getVMContext()); 2471 } 2472 2473 2474 /// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in 2475 /// an 8-byte GPR. This means that we either have a scalar or we are talking 2476 /// about the high or low part of an up-to-16-byte struct. This routine picks 2477 /// the best LLVM IR type to represent this, which may be i64 or may be anything 2478 /// else that the backend will pass in a GPR that works better (e.g. i8, %foo*, 2479 /// etc). 2480 /// 2481 /// PrefType is an LLVM IR type that corresponds to (part of) the IR type for 2482 /// the source type. IROffset is an offset in bytes into the LLVM IR type that 2483 /// the 8-byte value references. PrefType may be null. 2484 /// 2485 /// SourceTy is the source-level type for the entire argument. SourceOffset is 2486 /// an offset into this that we're processing (which is always either 0 or 8). 2487 /// 2488 llvm::Type *X86_64ABIInfo:: 2489 GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2490 QualType SourceTy, unsigned SourceOffset) const { 2491 // If we're dealing with an un-offset LLVM IR type, then it means that we're 2492 // returning an 8-byte unit starting with it. See if we can safely use it. 2493 if (IROffset == 0) { 2494 // Pointers and int64's always fill the 8-byte unit. 2495 if ((isa<llvm::PointerType>(IRType) && Has64BitPointers) || 2496 IRType->isIntegerTy(64)) 2497 return IRType; 2498 2499 // If we have a 1/2/4-byte integer, we can use it only if the rest of the 2500 // goodness in the source type is just tail padding. This is allowed to 2501 // kick in for struct {double,int} on the int, but not on 2502 // struct{double,int,int} because we wouldn't return the second int. We 2503 // have to do this analysis on the source type because we can't depend on 2504 // unions being lowered a specific way etc. 2505 if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) || 2506 IRType->isIntegerTy(32) || 2507 (isa<llvm::PointerType>(IRType) && !Has64BitPointers)) { 2508 unsigned BitWidth = isa<llvm::PointerType>(IRType) ? 32 : 2509 cast<llvm::IntegerType>(IRType)->getBitWidth(); 2510 2511 if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth, 2512 SourceOffset*8+64, getContext())) 2513 return IRType; 2514 } 2515 } 2516 2517 if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { 2518 // If this is a struct, recurse into the field at the specified offset. 2519 const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy); 2520 if (IROffset < SL->getSizeInBytes()) { 2521 unsigned FieldIdx = SL->getElementContainingOffset(IROffset); 2522 IROffset -= SL->getElementOffset(FieldIdx); 2523 2524 return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset, 2525 SourceTy, SourceOffset); 2526 } 2527 } 2528 2529 if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { 2530 llvm::Type *EltTy = ATy->getElementType(); 2531 unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy); 2532 unsigned EltOffset = IROffset/EltSize*EltSize; 2533 return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy, 2534 SourceOffset); 2535 } 2536 2537 // Okay, we don't have any better idea of what to pass, so we pass this in an 2538 // integer register that isn't too big to fit the rest of the struct. 2539 unsigned TySizeInBytes = 2540 (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity(); 2541 2542 assert(TySizeInBytes != SourceOffset && "Empty field?"); 2543 2544 // It is always safe to classify this as an integer type up to i64 that 2545 // isn't larger than the structure. 2546 return llvm::IntegerType::get(getVMContext(), 2547 std::min(TySizeInBytes-SourceOffset, 8U)*8); 2548 } 2549 2550 2551 /// GetX86_64ByValArgumentPair - Given a high and low type that can ideally 2552 /// be used as elements of a two register pair to pass or return, return a 2553 /// first class aggregate to represent them. For example, if the low part of 2554 /// a by-value argument should be passed as i32* and the high part as float, 2555 /// return {i32*, float}. 2556 static llvm::Type * 2557 GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, 2558 const llvm::DataLayout &TD) { 2559 // In order to correctly satisfy the ABI, we need to the high part to start 2560 // at offset 8. If the high and low parts we inferred are both 4-byte types 2561 // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have 2562 // the second element at offset 8. Check for this: 2563 unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo); 2564 llvm::Align HiAlign = TD.getABITypeAlign(Hi); 2565 unsigned HiStart = llvm::alignTo(LoSize, HiAlign); 2566 assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); 2567 2568 // To handle this, we have to increase the size of the low part so that the 2569 // second element will start at an 8 byte offset. We can't increase the size 2570 // of the second element because it might make us access off the end of the 2571 // struct. 2572 if (HiStart != 8) { 2573 // There are usually two sorts of types the ABI generation code can produce 2574 // for the low part of a pair that aren't 8 bytes in size: half, float or 2575 // i8/i16/i32. This can also include pointers when they are 32-bit (X32 and 2576 // NaCl). 2577 // Promote these to a larger type. 2578 if (Lo->isHalfTy() || Lo->isFloatTy()) 2579 Lo = llvm::Type::getDoubleTy(Lo->getContext()); 2580 else { 2581 assert((Lo->isIntegerTy() || Lo->isPointerTy()) 2582 && "Invalid/unknown lo type"); 2583 Lo = llvm::Type::getInt64Ty(Lo->getContext()); 2584 } 2585 } 2586 2587 llvm::StructType *Result = llvm::StructType::get(Lo, Hi); 2588 2589 // Verify that the second element is at an 8-byte offset. 2590 assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && 2591 "Invalid x86-64 argument pair!"); 2592 return Result; 2593 } 2594 2595 ABIArgInfo X86_64ABIInfo:: 2596 classifyReturnType(QualType RetTy) const { 2597 // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the 2598 // classification algorithm. 2599 X86_64ABIInfo::Class Lo, Hi; 2600 classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true); 2601 2602 // Check some invariants. 2603 assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); 2604 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); 2605 2606 llvm::Type *ResType = nullptr; 2607 switch (Lo) { 2608 case NoClass: 2609 if (Hi == NoClass) 2610 return ABIArgInfo::getIgnore(); 2611 // If the low part is just padding, it takes no register, leave ResType 2612 // null. 2613 assert((Hi == SSE || Hi == Integer || Hi == X87Up) && 2614 "Unknown missing lo part"); 2615 break; 2616 2617 case SSEUp: 2618 case X87Up: 2619 llvm_unreachable("Invalid classification for lo word."); 2620 2621 // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via 2622 // hidden argument. 2623 case Memory: 2624 return getIndirectReturnResult(RetTy); 2625 2626 // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next 2627 // available register of the sequence %rax, %rdx is used. 2628 case Integer: 2629 ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); 2630 2631 // If we have a sign or zero extended integer, make sure to return Extend 2632 // so that the parameter gets the right LLVM IR attributes. 2633 if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { 2634 // Treat an enum type as its underlying type. 2635 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 2636 RetTy = EnumTy->getDecl()->getIntegerType(); 2637 2638 if (RetTy->isIntegralOrEnumerationType() && 2639 isPromotableIntegerTypeForABI(RetTy)) 2640 return ABIArgInfo::getExtend(RetTy); 2641 } 2642 break; 2643 2644 // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next 2645 // available SSE register of the sequence %xmm0, %xmm1 is used. 2646 case SSE: 2647 ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); 2648 break; 2649 2650 // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is 2651 // returned on the X87 stack in %st0 as 80-bit x87 number. 2652 case X87: 2653 ResType = llvm::Type::getX86_FP80Ty(getVMContext()); 2654 break; 2655 2656 // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real 2657 // part of the value is returned in %st0 and the imaginary part in 2658 // %st1. 2659 case ComplexX87: 2660 assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); 2661 ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), 2662 llvm::Type::getX86_FP80Ty(getVMContext())); 2663 break; 2664 } 2665 2666 llvm::Type *HighPart = nullptr; 2667 switch (Hi) { 2668 // Memory was handled previously and X87 should 2669 // never occur as a hi class. 2670 case Memory: 2671 case X87: 2672 llvm_unreachable("Invalid classification for hi word."); 2673 2674 case ComplexX87: // Previously handled. 2675 case NoClass: 2676 break; 2677 2678 case Integer: 2679 HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2680 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2681 return ABIArgInfo::getDirect(HighPart, 8); 2682 break; 2683 case SSE: 2684 HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2685 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2686 return ABIArgInfo::getDirect(HighPart, 8); 2687 break; 2688 2689 // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte 2690 // is passed in the next available eightbyte chunk if the last used 2691 // vector register. 2692 // 2693 // SSEUP should always be preceded by SSE, just widen. 2694 case SSEUp: 2695 assert(Lo == SSE && "Unexpected SSEUp classification."); 2696 ResType = GetByteVectorType(RetTy); 2697 break; 2698 2699 // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is 2700 // returned together with the previous X87 value in %st0. 2701 case X87Up: 2702 // If X87Up is preceded by X87, we don't need to do 2703 // anything. However, in some cases with unions it may not be 2704 // preceded by X87. In such situations we follow gcc and pass the 2705 // extra bits in an SSE reg. 2706 if (Lo != X87) { 2707 HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2708 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2709 return ABIArgInfo::getDirect(HighPart, 8); 2710 } 2711 break; 2712 } 2713 2714 // If a high part was specified, merge it together with the low part. It is 2715 // known to pass in the high eightbyte of the result. We do this by forming a 2716 // first class struct aggregate with the high and low part: {low, high} 2717 if (HighPart) 2718 ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); 2719 2720 return ABIArgInfo::getDirect(ResType); 2721 } 2722 2723 ABIArgInfo 2724 X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs, 2725 unsigned &neededInt, unsigned &neededSSE, 2726 bool isNamedArg, bool IsRegCall) const { 2727 Ty = useFirstFieldIfTransparentUnion(Ty); 2728 2729 X86_64ABIInfo::Class Lo, Hi; 2730 classify(Ty, 0, Lo, Hi, isNamedArg, IsRegCall); 2731 2732 // Check some invariants. 2733 // FIXME: Enforce these by construction. 2734 assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); 2735 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); 2736 2737 neededInt = 0; 2738 neededSSE = 0; 2739 llvm::Type *ResType = nullptr; 2740 switch (Lo) { 2741 case NoClass: 2742 if (Hi == NoClass) 2743 return ABIArgInfo::getIgnore(); 2744 // If the low part is just padding, it takes no register, leave ResType 2745 // null. 2746 assert((Hi == SSE || Hi == Integer || Hi == X87Up) && 2747 "Unknown missing lo part"); 2748 break; 2749 2750 // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument 2751 // on the stack. 2752 case Memory: 2753 2754 // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or 2755 // COMPLEX_X87, it is passed in memory. 2756 case X87: 2757 case ComplexX87: 2758 if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect) 2759 ++neededInt; 2760 return getIndirectResult(Ty, freeIntRegs); 2761 2762 case SSEUp: 2763 case X87Up: 2764 llvm_unreachable("Invalid classification for lo word."); 2765 2766 // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next 2767 // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8 2768 // and %r9 is used. 2769 case Integer: 2770 ++neededInt; 2771 2772 // Pick an 8-byte type based on the preferred type. 2773 ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0); 2774 2775 // If we have a sign or zero extended integer, make sure to return Extend 2776 // so that the parameter gets the right LLVM IR attributes. 2777 if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { 2778 // Treat an enum type as its underlying type. 2779 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2780 Ty = EnumTy->getDecl()->getIntegerType(); 2781 2782 if (Ty->isIntegralOrEnumerationType() && 2783 isPromotableIntegerTypeForABI(Ty)) 2784 return ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty)); 2785 } 2786 2787 break; 2788 2789 // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next 2790 // available SSE register is used, the registers are taken in the 2791 // order from %xmm0 to %xmm7. 2792 case SSE: { 2793 llvm::Type *IRType = CGT.ConvertType(Ty); 2794 ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0); 2795 ++neededSSE; 2796 break; 2797 } 2798 } 2799 2800 llvm::Type *HighPart = nullptr; 2801 switch (Hi) { 2802 // Memory was handled previously, ComplexX87 and X87 should 2803 // never occur as hi classes, and X87Up must be preceded by X87, 2804 // which is passed in memory. 2805 case Memory: 2806 case X87: 2807 case ComplexX87: 2808 llvm_unreachable("Invalid classification for hi word."); 2809 2810 case NoClass: break; 2811 2812 case Integer: 2813 ++neededInt; 2814 // Pick an 8-byte type based on the preferred type. 2815 HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); 2816 2817 if (Lo == NoClass) // Pass HighPart at offset 8 in memory. 2818 return ABIArgInfo::getDirect(HighPart, 8); 2819 break; 2820 2821 // X87Up generally doesn't occur here (long double is passed in 2822 // memory), except in situations involving unions. 2823 case X87Up: 2824 case SSE: 2825 ++neededSSE; 2826 HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); 2827 2828 if (Lo == NoClass) // Pass HighPart at offset 8 in memory. 2829 return ABIArgInfo::getDirect(HighPart, 8); 2830 break; 2831 2832 // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the 2833 // eightbyte is passed in the upper half of the last used SSE 2834 // register. This only happens when 128-bit vectors are passed. 2835 case SSEUp: 2836 assert(Lo == SSE && "Unexpected SSEUp classification"); 2837 ResType = GetByteVectorType(Ty); 2838 break; 2839 } 2840 2841 // If a high part was specified, merge it together with the low part. It is 2842 // known to pass in the high eightbyte of the result. We do this by forming a 2843 // first class struct aggregate with the high and low part: {low, high} 2844 if (HighPart) 2845 ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); 2846 2847 return ABIArgInfo::getDirect(ResType); 2848 } 2849 2850 ABIArgInfo 2851 X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, 2852 unsigned &NeededSSE, 2853 unsigned &MaxVectorWidth) const { 2854 auto RT = Ty->getAs<RecordType>(); 2855 assert(RT && "classifyRegCallStructType only valid with struct types"); 2856 2857 if (RT->getDecl()->hasFlexibleArrayMember()) 2858 return getIndirectReturnResult(Ty); 2859 2860 // Sum up bases 2861 if (auto CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl())) { 2862 if (CXXRD->isDynamicClass()) { 2863 NeededInt = NeededSSE = 0; 2864 return getIndirectReturnResult(Ty); 2865 } 2866 2867 for (const auto &I : CXXRD->bases()) 2868 if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE, 2869 MaxVectorWidth) 2870 .isIndirect()) { 2871 NeededInt = NeededSSE = 0; 2872 return getIndirectReturnResult(Ty); 2873 } 2874 } 2875 2876 // Sum up members 2877 for (const auto *FD : RT->getDecl()->fields()) { 2878 QualType MTy = FD->getType(); 2879 if (MTy->isRecordType() && !MTy->isUnionType()) { 2880 if (classifyRegCallStructTypeImpl(MTy, NeededInt, NeededSSE, 2881 MaxVectorWidth) 2882 .isIndirect()) { 2883 NeededInt = NeededSSE = 0; 2884 return getIndirectReturnResult(Ty); 2885 } 2886 } else { 2887 unsigned LocalNeededInt, LocalNeededSSE; 2888 if (classifyArgumentType(MTy, UINT_MAX, LocalNeededInt, LocalNeededSSE, 2889 true, true) 2890 .isIndirect()) { 2891 NeededInt = NeededSSE = 0; 2892 return getIndirectReturnResult(Ty); 2893 } 2894 if (const auto *AT = getContext().getAsConstantArrayType(MTy)) 2895 MTy = AT->getElementType(); 2896 if (const auto *VT = MTy->getAs<VectorType>()) 2897 if (getContext().getTypeSize(VT) > MaxVectorWidth) 2898 MaxVectorWidth = getContext().getTypeSize(VT); 2899 NeededInt += LocalNeededInt; 2900 NeededSSE += LocalNeededSSE; 2901 } 2902 } 2903 2904 return ABIArgInfo::getDirect(); 2905 } 2906 2907 ABIArgInfo 2908 X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt, 2909 unsigned &NeededSSE, 2910 unsigned &MaxVectorWidth) const { 2911 2912 NeededInt = 0; 2913 NeededSSE = 0; 2914 MaxVectorWidth = 0; 2915 2916 return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE, 2917 MaxVectorWidth); 2918 } 2919 2920 void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { 2921 2922 const unsigned CallingConv = FI.getCallingConvention(); 2923 // It is possible to force Win64 calling convention on any x86_64 target by 2924 // using __attribute__((ms_abi)). In such case to correctly emit Win64 2925 // compatible code delegate this call to WinX86_64ABIInfo::computeInfo. 2926 if (CallingConv == llvm::CallingConv::Win64) { 2927 WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel); 2928 Win64ABIInfo.computeInfo(FI); 2929 return; 2930 } 2931 2932 bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall; 2933 2934 // Keep track of the number of assigned registers. 2935 unsigned FreeIntRegs = IsRegCall ? 11 : 6; 2936 unsigned FreeSSERegs = IsRegCall ? 16 : 8; 2937 unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0; 2938 2939 if (!::classifyReturnType(getCXXABI(), FI, *this)) { 2940 if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && 2941 !FI.getReturnType()->getTypePtr()->isUnionType()) { 2942 FI.getReturnInfo() = classifyRegCallStructType( 2943 FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth); 2944 if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { 2945 FreeIntRegs -= NeededInt; 2946 FreeSSERegs -= NeededSSE; 2947 } else { 2948 FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); 2949 } 2950 } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() && 2951 getContext().getCanonicalType(FI.getReturnType() 2952 ->getAs<ComplexType>() 2953 ->getElementType()) == 2954 getContext().LongDoubleTy) 2955 // Complex Long Double Type is passed in Memory when Regcall 2956 // calling convention is used. 2957 FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); 2958 else 2959 FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); 2960 } 2961 2962 // If the return value is indirect, then the hidden argument is consuming one 2963 // integer register. 2964 if (FI.getReturnInfo().isIndirect()) 2965 --FreeIntRegs; 2966 else if (NeededSSE && MaxVectorWidth > 0) 2967 FI.setMaxVectorWidth(MaxVectorWidth); 2968 2969 // The chain argument effectively gives us another free register. 2970 if (FI.isChainCall()) 2971 ++FreeIntRegs; 2972 2973 unsigned NumRequiredArgs = FI.getNumRequiredArgs(); 2974 // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers 2975 // get assigned (in left-to-right order) for passing as follows... 2976 unsigned ArgNo = 0; 2977 for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); 2978 it != ie; ++it, ++ArgNo) { 2979 bool IsNamedArg = ArgNo < NumRequiredArgs; 2980 2981 if (IsRegCall && it->type->isStructureOrClassType()) 2982 it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE, 2983 MaxVectorWidth); 2984 else 2985 it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt, 2986 NeededSSE, IsNamedArg); 2987 2988 // AMD64-ABI 3.2.3p3: If there are no registers available for any 2989 // eightbyte of an argument, the whole argument is passed on the 2990 // stack. If registers have already been assigned for some 2991 // eightbytes of such an argument, the assignments get reverted. 2992 if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { 2993 FreeIntRegs -= NeededInt; 2994 FreeSSERegs -= NeededSSE; 2995 if (MaxVectorWidth > FI.getMaxVectorWidth()) 2996 FI.setMaxVectorWidth(MaxVectorWidth); 2997 } else { 2998 it->info = getIndirectResult(it->type, FreeIntRegs); 2999 } 3000 } 3001 } 3002 3003 static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF, 3004 Address VAListAddr, QualType Ty) { 3005 Address overflow_arg_area_p = 3006 CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p"); 3007 llvm::Value *overflow_arg_area = 3008 CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area"); 3009 3010 // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 3011 // byte boundary if alignment needed by type exceeds 8 byte boundary. 3012 // It isn't stated explicitly in the standard, but in practice we use 3013 // alignment greater than 16 where necessary. 3014 CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); 3015 if (Align > CharUnits::fromQuantity(8)) { 3016 overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area, 3017 Align); 3018 } 3019 3020 // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. 3021 llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); 3022 llvm::Value *Res = overflow_arg_area; 3023 3024 // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: 3025 // l->overflow_arg_area + sizeof(type). 3026 // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to 3027 // an 8 byte boundary. 3028 3029 uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8; 3030 llvm::Value *Offset = 3031 llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7) & ~7); 3032 overflow_arg_area = CGF.Builder.CreateGEP(CGF.Int8Ty, overflow_arg_area, 3033 Offset, "overflow_arg_area.next"); 3034 CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p); 3035 3036 // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type. 3037 return Address(Res, LTy, Align); 3038 } 3039 3040 RValue X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 3041 QualType Ty, AggValueSlot Slot) const { 3042 // Assume that va_list type is correct; should be pointer to LLVM type: 3043 // struct { 3044 // i32 gp_offset; 3045 // i32 fp_offset; 3046 // i8* overflow_arg_area; 3047 // i8* reg_save_area; 3048 // }; 3049 unsigned neededInt, neededSSE; 3050 3051 Ty = getContext().getCanonicalType(Ty); 3052 ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE, 3053 /*isNamedArg*/false); 3054 3055 // Empty records are ignored for parameter passing purposes. 3056 if (AI.isIgnore()) 3057 return Slot.asRValue(); 3058 3059 // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed 3060 // in the registers. If not go to step 7. 3061 if (!neededInt && !neededSSE) 3062 return CGF.EmitLoadOfAnyValue( 3063 CGF.MakeAddrLValue(EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty), Ty), 3064 Slot); 3065 3066 // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of 3067 // general purpose registers needed to pass type and num_fp to hold 3068 // the number of floating point registers needed. 3069 3070 // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into 3071 // registers. In the case: l->gp_offset > 48 - num_gp * 8 or 3072 // l->fp_offset > 304 - num_fp * 16 go to step 7. 3073 // 3074 // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of 3075 // register save space). 3076 3077 llvm::Value *InRegs = nullptr; 3078 Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid(); 3079 llvm::Value *gp_offset = nullptr, *fp_offset = nullptr; 3080 if (neededInt) { 3081 gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p"); 3082 gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset"); 3083 InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8); 3084 InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp"); 3085 } 3086 3087 if (neededSSE) { 3088 fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p"); 3089 fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset"); 3090 llvm::Value *FitsInFP = 3091 llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16); 3092 FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp"); 3093 InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP; 3094 } 3095 3096 llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); 3097 llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); 3098 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); 3099 CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); 3100 3101 // Emit code to load the value if it was passed in registers. 3102 3103 CGF.EmitBlock(InRegBlock); 3104 3105 // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with 3106 // an offset of l->gp_offset and/or l->fp_offset. This may require 3107 // copying to a temporary location in case the parameter is passed 3108 // in different register classes or requires an alignment greater 3109 // than 8 for general purpose registers and 16 for XMM registers. 3110 // 3111 // FIXME: This really results in shameful code when we end up needing to 3112 // collect arguments from different places; often what should result in a 3113 // simple assembling of a structure from scattered addresses has many more 3114 // loads than necessary. Can we clean this up? 3115 llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); 3116 llvm::Value *RegSaveArea = CGF.Builder.CreateLoad( 3117 CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area"); 3118 3119 Address RegAddr = Address::invalid(); 3120 if (neededInt && neededSSE) { 3121 // FIXME: Cleanup. 3122 assert(AI.isDirect() && "Unexpected ABI info for mixed regs"); 3123 llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType()); 3124 Address Tmp = CGF.CreateMemTemp(Ty); 3125 Tmp = Tmp.withElementType(ST); 3126 assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs"); 3127 llvm::Type *TyLo = ST->getElementType(0); 3128 llvm::Type *TyHi = ST->getElementType(1); 3129 assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) && 3130 "Unexpected ABI info for mixed regs"); 3131 llvm::Value *GPAddr = 3132 CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset); 3133 llvm::Value *FPAddr = 3134 CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset); 3135 llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr; 3136 llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr; 3137 3138 // Copy the first element. 3139 // FIXME: Our choice of alignment here and below is probably pessimistic. 3140 llvm::Value *V = CGF.Builder.CreateAlignedLoad( 3141 TyLo, RegLoAddr, 3142 CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyLo))); 3143 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); 3144 3145 // Copy the second element. 3146 V = CGF.Builder.CreateAlignedLoad( 3147 TyHi, RegHiAddr, 3148 CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi))); 3149 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); 3150 3151 RegAddr = Tmp.withElementType(LTy); 3152 } else if (neededInt || neededSSE == 1) { 3153 // Copy to a temporary if necessary to ensure the appropriate alignment. 3154 auto TInfo = getContext().getTypeInfoInChars(Ty); 3155 uint64_t TySize = TInfo.Width.getQuantity(); 3156 CharUnits TyAlign = TInfo.Align; 3157 llvm::Type *CoTy = nullptr; 3158 if (AI.isDirect()) 3159 CoTy = AI.getCoerceToType(); 3160 3161 llvm::Value *GpOrFpOffset = neededInt ? gp_offset : fp_offset; 3162 uint64_t Alignment = neededInt ? 8 : 16; 3163 uint64_t RegSize = neededInt ? neededInt * 8 : 16; 3164 // There are two cases require special handling: 3165 // 1) 3166 // ``` 3167 // struct { 3168 // struct {} a[8]; 3169 // int b; 3170 // }; 3171 // ``` 3172 // The lower 8 bytes of the structure are not stored, 3173 // so an 8-byte offset is needed when accessing the structure. 3174 // 2) 3175 // ``` 3176 // struct { 3177 // long long a; 3178 // struct {} b; 3179 // }; 3180 // ``` 3181 // The stored size of this structure is smaller than its actual size, 3182 // which may lead to reading past the end of the register save area. 3183 if (CoTy && (AI.getDirectOffset() == 8 || RegSize < TySize)) { 3184 Address Tmp = CGF.CreateMemTemp(Ty); 3185 llvm::Value *Addr = 3186 CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset); 3187 llvm::Value *Src = CGF.Builder.CreateAlignedLoad(CoTy, Addr, TyAlign); 3188 llvm::Value *PtrOffset = 3189 llvm::ConstantInt::get(CGF.Int32Ty, AI.getDirectOffset()); 3190 Address Dst = Address( 3191 CGF.Builder.CreateGEP(CGF.Int8Ty, Tmp.getBasePointer(), PtrOffset), 3192 LTy, TyAlign); 3193 CGF.Builder.CreateStore(Src, Dst); 3194 RegAddr = Tmp.withElementType(LTy); 3195 } else { 3196 RegAddr = 3197 Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset), 3198 LTy, CharUnits::fromQuantity(Alignment)); 3199 3200 // Copy into a temporary if the type is more aligned than the 3201 // register save area. 3202 if (neededInt && TyAlign.getQuantity() > 8) { 3203 Address Tmp = CGF.CreateMemTemp(Ty); 3204 CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); 3205 RegAddr = Tmp; 3206 } 3207 } 3208 3209 } else { 3210 assert(neededSSE == 2 && "Invalid number of needed registers!"); 3211 // SSE registers are spaced 16 bytes apart in the register save 3212 // area, we need to collect the two eightbytes together. 3213 // The ABI isn't explicit about this, but it seems reasonable 3214 // to assume that the slots are 16-byte aligned, since the stack is 3215 // naturally 16-byte aligned and the prologue is expected to store 3216 // all the SSE registers to the RSA. 3217 Address RegAddrLo = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, 3218 fp_offset), 3219 CGF.Int8Ty, CharUnits::fromQuantity(16)); 3220 Address RegAddrHi = 3221 CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, 3222 CharUnits::fromQuantity(16)); 3223 llvm::Type *ST = AI.canHaveCoerceToType() 3224 ? AI.getCoerceToType() 3225 : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy); 3226 llvm::Value *V; 3227 Address Tmp = CGF.CreateMemTemp(Ty); 3228 Tmp = Tmp.withElementType(ST); 3229 V = CGF.Builder.CreateLoad( 3230 RegAddrLo.withElementType(ST->getStructElementType(0))); 3231 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); 3232 V = CGF.Builder.CreateLoad( 3233 RegAddrHi.withElementType(ST->getStructElementType(1))); 3234 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); 3235 3236 RegAddr = Tmp.withElementType(LTy); 3237 } 3238 3239 // AMD64-ABI 3.5.7p5: Step 5. Set: 3240 // l->gp_offset = l->gp_offset + num_gp * 8 3241 // l->fp_offset = l->fp_offset + num_fp * 16. 3242 if (neededInt) { 3243 llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8); 3244 CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset), 3245 gp_offset_p); 3246 } 3247 if (neededSSE) { 3248 llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16); 3249 CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset), 3250 fp_offset_p); 3251 } 3252 CGF.EmitBranch(ContBlock); 3253 3254 // Emit code to load the value if it was passed in memory. 3255 3256 CGF.EmitBlock(InMemBlock); 3257 Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); 3258 3259 // Return the appropriate result. 3260 3261 CGF.EmitBlock(ContBlock); 3262 Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, 3263 "vaarg.addr"); 3264 return CGF.EmitLoadOfAnyValue(CGF.MakeAddrLValue(ResAddr, Ty), Slot); 3265 } 3266 3267 RValue X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, 3268 QualType Ty, AggValueSlot Slot) const { 3269 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3270 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3271 uint64_t Width = getContext().getTypeSize(Ty); 3272 bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); 3273 3274 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, 3275 CGF.getContext().getTypeInfoInChars(Ty), 3276 CharUnits::fromQuantity(8), 3277 /*allowHigherAlign*/ false, Slot); 3278 } 3279 3280 ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgForVectorCall( 3281 QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo ¤t) const { 3282 const Type *Base = nullptr; 3283 uint64_t NumElts = 0; 3284 3285 if (!Ty->isBuiltinType() && !Ty->isVectorType() && 3286 isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) { 3287 FreeSSERegs -= NumElts; 3288 return getDirectX86Hva(); 3289 } 3290 return current; 3291 } 3292 3293 ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, 3294 bool IsReturnType, bool IsVectorCall, 3295 bool IsRegCall) const { 3296 3297 if (Ty->isVoidType()) 3298 return ABIArgInfo::getIgnore(); 3299 3300 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 3301 Ty = EnumTy->getDecl()->getIntegerType(); 3302 3303 TypeInfo Info = getContext().getTypeInfo(Ty); 3304 uint64_t Width = Info.Width; 3305 CharUnits Align = getContext().toCharUnitsFromBits(Info.Align); 3306 3307 const RecordType *RT = Ty->getAs<RecordType>(); 3308 if (RT) { 3309 if (!IsReturnType) { 3310 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI())) 3311 return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), 3312 RAA == CGCXXABI::RAA_DirectInMemory); 3313 } 3314 3315 if (RT->getDecl()->hasFlexibleArrayMember()) 3316 return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), 3317 /*ByVal=*/false); 3318 } 3319 3320 const Type *Base = nullptr; 3321 uint64_t NumElts = 0; 3322 // vectorcall adds the concept of a homogenous vector aggregate, similar to 3323 // other targets. 3324 if ((IsVectorCall || IsRegCall) && 3325 isHomogeneousAggregate(Ty, Base, NumElts)) { 3326 if (IsRegCall) { 3327 if (FreeSSERegs >= NumElts) { 3328 FreeSSERegs -= NumElts; 3329 if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) 3330 return ABIArgInfo::getDirect(); 3331 return ABIArgInfo::getExpand(); 3332 } 3333 return ABIArgInfo::getIndirect( 3334 Align, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(), 3335 /*ByVal=*/false); 3336 } else if (IsVectorCall) { 3337 if (FreeSSERegs >= NumElts && 3338 (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) { 3339 FreeSSERegs -= NumElts; 3340 return ABIArgInfo::getDirect(); 3341 } else if (IsReturnType) { 3342 return ABIArgInfo::getExpand(); 3343 } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { 3344 // HVAs are delayed and reclassified in the 2nd step. 3345 return ABIArgInfo::getIndirect( 3346 Align, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(), 3347 /*ByVal=*/false); 3348 } 3349 } 3350 } 3351 3352 if (Ty->isMemberPointerType()) { 3353 // If the member pointer is represented by an LLVM int or ptr, pass it 3354 // directly. 3355 llvm::Type *LLTy = CGT.ConvertType(Ty); 3356 if (LLTy->isPointerTy() || LLTy->isIntegerTy()) 3357 return ABIArgInfo::getDirect(); 3358 } 3359 3360 if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) { 3361 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3362 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3363 if (Width > 64 || !llvm::isPowerOf2_64(Width)) 3364 return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), 3365 /*ByVal=*/false); 3366 3367 // Otherwise, coerce it to a small integer. 3368 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width)); 3369 } 3370 3371 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 3372 switch (BT->getKind()) { 3373 case BuiltinType::Bool: 3374 // Bool type is always extended to the ABI, other builtin types are not 3375 // extended. 3376 return ABIArgInfo::getExtend(Ty); 3377 3378 case BuiltinType::LongDouble: 3379 // Mingw64 GCC uses the old 80 bit extended precision floating point 3380 // unit. It passes them indirectly through memory. 3381 if (IsMingw64) { 3382 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 3383 if (LDF == &llvm::APFloat::x87DoubleExtended()) 3384 return ABIArgInfo::getIndirect( 3385 Align, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(), 3386 /*ByVal=*/false); 3387 } 3388 break; 3389 3390 case BuiltinType::Int128: 3391 case BuiltinType::UInt128: 3392 case BuiltinType::Float128: 3393 // 128-bit float and integer types share the same ABI. 3394 3395 // If it's a parameter type, the normal ABI rule is that arguments larger 3396 // than 8 bytes are passed indirectly. GCC follows it. We follow it too, 3397 // even though it isn't particularly efficient. 3398 if (!IsReturnType) 3399 return ABIArgInfo::getIndirect( 3400 Align, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(), 3401 /*ByVal=*/false); 3402 3403 // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. 3404 // Clang matches them for compatibility. 3405 // NOTE: GCC actually returns f128 indirectly but will hopefully change. 3406 // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054#c8. 3407 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 3408 llvm::Type::getInt64Ty(getVMContext()), 2)); 3409 3410 default: 3411 break; 3412 } 3413 } 3414 3415 if (Ty->isBitIntType()) { 3416 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3417 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3418 // However, non-power-of-two bit-precise integers will be passed as 1, 2, 4, 3419 // or 8 bytes anyway as long is it fits in them, so we don't have to check 3420 // the power of 2. 3421 if (Width <= 64) 3422 return ABIArgInfo::getDirect(); 3423 return ABIArgInfo::getIndirect( 3424 Align, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(), 3425 /*ByVal=*/false); 3426 } 3427 3428 return ABIArgInfo::getDirect(); 3429 } 3430 3431 void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { 3432 const unsigned CC = FI.getCallingConvention(); 3433 bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall; 3434 bool IsRegCall = CC == llvm::CallingConv::X86_RegCall; 3435 3436 // If __attribute__((sysv_abi)) is in use, use the SysV argument 3437 // classification rules. 3438 if (CC == llvm::CallingConv::X86_64_SysV) { 3439 X86_64ABIInfo SysVABIInfo(CGT, AVXLevel); 3440 SysVABIInfo.computeInfo(FI); 3441 return; 3442 } 3443 3444 unsigned FreeSSERegs = 0; 3445 if (IsVectorCall) { 3446 // We can use up to 4 SSE return registers with vectorcall. 3447 FreeSSERegs = 4; 3448 } else if (IsRegCall) { 3449 // RegCall gives us 16 SSE registers. 3450 FreeSSERegs = 16; 3451 } 3452 3453 if (!getCXXABI().classifyReturnType(FI)) 3454 FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true, 3455 IsVectorCall, IsRegCall); 3456 3457 if (IsVectorCall) { 3458 // We can use up to 6 SSE register parameters with vectorcall. 3459 FreeSSERegs = 6; 3460 } else if (IsRegCall) { 3461 // RegCall gives us 16 SSE registers, we can reuse the return registers. 3462 FreeSSERegs = 16; 3463 } 3464 3465 unsigned ArgNum = 0; 3466 unsigned ZeroSSERegs = 0; 3467 for (auto &I : FI.arguments()) { 3468 // Vectorcall in x64 only permits the first 6 arguments to be passed as 3469 // XMM/YMM registers. After the sixth argument, pretend no vector 3470 // registers are left. 3471 unsigned *MaybeFreeSSERegs = 3472 (IsVectorCall && ArgNum >= 6) ? &ZeroSSERegs : &FreeSSERegs; 3473 I.info = 3474 classify(I.type, *MaybeFreeSSERegs, false, IsVectorCall, IsRegCall); 3475 ++ArgNum; 3476 } 3477 3478 if (IsVectorCall) { 3479 // For vectorcall, assign aggregate HVAs to any free vector registers in a 3480 // second pass. 3481 for (auto &I : FI.arguments()) 3482 I.info = reclassifyHvaArgForVectorCall(I.type, FreeSSERegs, I.info); 3483 } 3484 } 3485 3486 RValue WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 3487 QualType Ty, AggValueSlot Slot) const { 3488 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3489 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3490 uint64_t Width = getContext().getTypeSize(Ty); 3491 bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); 3492 3493 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, 3494 CGF.getContext().getTypeInfoInChars(Ty), 3495 CharUnits::fromQuantity(8), 3496 /*allowHigherAlign*/ false, Slot); 3497 } 3498 3499 std::unique_ptr<TargetCodeGenInfo> CodeGen::createX86_32TargetCodeGenInfo( 3500 CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI, 3501 unsigned NumRegisterParameters, bool SoftFloatABI) { 3502 bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI( 3503 CGM.getTriple(), CGM.getCodeGenOpts()); 3504 return std::make_unique<X86_32TargetCodeGenInfo>( 3505 CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 3506 NumRegisterParameters, SoftFloatABI); 3507 } 3508 3509 std::unique_ptr<TargetCodeGenInfo> CodeGen::createWinX86_32TargetCodeGenInfo( 3510 CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI, 3511 unsigned NumRegisterParameters) { 3512 bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI( 3513 CGM.getTriple(), CGM.getCodeGenOpts()); 3514 return std::make_unique<WinX86_32TargetCodeGenInfo>( 3515 CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 3516 NumRegisterParameters); 3517 } 3518 3519 std::unique_ptr<TargetCodeGenInfo> 3520 CodeGen::createX86_64TargetCodeGenInfo(CodeGenModule &CGM, 3521 X86AVXABILevel AVXLevel) { 3522 return std::make_unique<X86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel); 3523 } 3524 3525 std::unique_ptr<TargetCodeGenInfo> 3526 CodeGen::createWinX86_64TargetCodeGenInfo(CodeGenModule &CGM, 3527 X86AVXABILevel AVXLevel) { 3528 return std::make_unique<WinX86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel); 3529 } 3530