1 //===- X86.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ABIInfoImpl.h" 10 #include "TargetInfo.h" 11 #include "clang/Basic/DiagnosticFrontend.h" 12 #include "llvm/ADT/SmallBitVector.h" 13 14 using namespace clang; 15 using namespace clang::CodeGen; 16 17 namespace { 18 19 /// IsX86_MMXType - Return true if this is an MMX type. 20 bool IsX86_MMXType(llvm::Type *IRType) { 21 // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>. 22 return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 && 23 cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() && 24 IRType->getScalarSizeInBits() != 64; 25 } 26 27 static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 28 StringRef Constraint, 29 llvm::Type* Ty) { 30 bool IsMMXCons = llvm::StringSwitch<bool>(Constraint) 31 .Cases("y", "&y", "^Ym", true) 32 .Default(false); 33 if (IsMMXCons && Ty->isVectorTy()) { 34 if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedValue() != 35 64) { 36 // Invalid MMX constraint 37 return nullptr; 38 } 39 40 return llvm::Type::getX86_MMXTy(CGF.getLLVMContext()); 41 } 42 43 // No operation needed 44 return Ty; 45 } 46 47 /// Returns true if this type can be passed in SSE registers with the 48 /// X86_VectorCall calling convention. Shared between x86_32 and x86_64. 49 static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { 50 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 51 if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) { 52 if (BT->getKind() == BuiltinType::LongDouble) { 53 if (&Context.getTargetInfo().getLongDoubleFormat() == 54 &llvm::APFloat::x87DoubleExtended()) 55 return false; 56 } 57 return true; 58 } 59 } else if (const VectorType *VT = Ty->getAs<VectorType>()) { 60 // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX 61 // registers specially. 62 unsigned VecSize = Context.getTypeSize(VT); 63 if (VecSize == 128 || VecSize == 256 || VecSize == 512) 64 return true; 65 } 66 return false; 67 } 68 69 /// Returns true if this aggregate is small enough to be passed in SSE registers 70 /// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64. 71 static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) { 72 return NumMembers <= 4; 73 } 74 75 /// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86. 76 static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { 77 auto AI = ABIArgInfo::getDirect(T); 78 AI.setInReg(true); 79 AI.setCanBeFlattened(false); 80 return AI; 81 } 82 83 //===----------------------------------------------------------------------===// 84 // X86-32 ABI Implementation 85 //===----------------------------------------------------------------------===// 86 87 /// Similar to llvm::CCState, but for Clang. 88 struct CCState { 89 CCState(CGFunctionInfo &FI) 90 : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()), 91 Required(FI.getRequiredArgs()), IsDelegateCall(FI.isDelegateCall()) {} 92 93 llvm::SmallBitVector IsPreassigned; 94 unsigned CC = CallingConv::CC_C; 95 unsigned FreeRegs = 0; 96 unsigned FreeSSERegs = 0; 97 RequiredArgs Required; 98 bool IsDelegateCall = false; 99 }; 100 101 /// X86_32ABIInfo - The X86-32 ABI information. 102 class X86_32ABIInfo : public ABIInfo { 103 enum Class { 104 Integer, 105 Float 106 }; 107 108 static const unsigned MinABIStackAlignInBytes = 4; 109 110 bool IsDarwinVectorABI; 111 bool IsRetSmallStructInRegABI; 112 bool IsWin32StructABI; 113 bool IsSoftFloatABI; 114 bool IsMCUABI; 115 bool IsLinuxABI; 116 unsigned DefaultNumRegisterParameters; 117 118 static bool isRegisterSize(unsigned Size) { 119 return (Size == 8 || Size == 16 || Size == 32 || Size == 64); 120 } 121 122 bool isHomogeneousAggregateBaseType(QualType Ty) const override { 123 // FIXME: Assumes vectorcall is in use. 124 return isX86VectorTypeForVectorCall(getContext(), Ty); 125 } 126 127 bool isHomogeneousAggregateSmallEnough(const Type *Ty, 128 uint64_t NumMembers) const override { 129 // FIXME: Assumes vectorcall is in use. 130 return isX86VectorCallAggregateSmallEnough(NumMembers); 131 } 132 133 bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const; 134 135 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 136 /// such that the argument will be passed in memory. 137 ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; 138 139 ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const; 140 141 /// Return the alignment to use for the given type on the stack. 142 unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; 143 144 Class classify(QualType Ty) const; 145 ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; 146 ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State, 147 unsigned ArgIndex) const; 148 149 /// Updates the number of available free registers, returns 150 /// true if any registers were allocated. 151 bool updateFreeRegs(QualType Ty, CCState &State) const; 152 153 bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, 154 bool &NeedsPadding) const; 155 bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; 156 157 bool canExpandIndirectArgument(QualType Ty) const; 158 159 /// Rewrite the function info so that all memory arguments use 160 /// inalloca. 161 void rewriteWithInAlloca(CGFunctionInfo &FI) const; 162 163 void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, 164 CharUnits &StackOffset, ABIArgInfo &Info, 165 QualType Type) const; 166 void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const; 167 168 public: 169 170 void computeInfo(CGFunctionInfo &FI) const override; 171 Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 172 QualType Ty) const override; 173 174 X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, 175 bool RetSmallStructInRegABI, bool Win32StructABI, 176 unsigned NumRegisterParameters, bool SoftFloatABI) 177 : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), 178 IsRetSmallStructInRegABI(RetSmallStructInRegABI), 179 IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), 180 IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), 181 IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() || 182 CGT.getTarget().getTriple().isOSCygMing()), 183 DefaultNumRegisterParameters(NumRegisterParameters) {} 184 }; 185 186 class X86_32SwiftABIInfo : public SwiftABIInfo { 187 public: 188 explicit X86_32SwiftABIInfo(CodeGenTypes &CGT) 189 : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false) {} 190 191 bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys, 192 bool AsReturnValue) const override { 193 // LLVM's x86-32 lowering currently only assigns up to three 194 // integer registers and three fp registers. Oddly, it'll use up to 195 // four vector registers for vectors, but those can overlap with the 196 // scalar registers. 197 return occupiesMoreThan(ComponentTys, /*total=*/3); 198 } 199 }; 200 201 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { 202 public: 203 X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, 204 bool RetSmallStructInRegABI, bool Win32StructABI, 205 unsigned NumRegisterParameters, bool SoftFloatABI) 206 : TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>( 207 CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 208 NumRegisterParameters, SoftFloatABI)) { 209 SwiftInfo = std::make_unique<X86_32SwiftABIInfo>(CGT); 210 } 211 212 static bool isStructReturnInRegABI( 213 const llvm::Triple &Triple, const CodeGenOptions &Opts); 214 215 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 216 CodeGen::CodeGenModule &CGM) const override; 217 218 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 219 // Darwin uses different dwarf register numbers for EH. 220 if (CGM.getTarget().getTriple().isOSDarwin()) return 5; 221 return 4; 222 } 223 224 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 225 llvm::Value *Address) const override; 226 227 llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 228 StringRef Constraint, 229 llvm::Type* Ty) const override { 230 return X86AdjustInlineAsmType(CGF, Constraint, Ty); 231 } 232 233 void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue, 234 std::string &Constraints, 235 std::vector<llvm::Type *> &ResultRegTypes, 236 std::vector<llvm::Type *> &ResultTruncRegTypes, 237 std::vector<LValue> &ResultRegDests, 238 std::string &AsmString, 239 unsigned NumOutputs) const override; 240 241 StringRef getARCRetainAutoreleasedReturnValueMarker() const override { 242 return "movl\t%ebp, %ebp" 243 "\t\t// marker for objc_retainAutoreleaseReturnValue"; 244 } 245 }; 246 247 } 248 249 /// Rewrite input constraint references after adding some output constraints. 250 /// In the case where there is one output and one input and we add one output, 251 /// we need to replace all operand references greater than or equal to 1: 252 /// mov $0, $1 253 /// mov eax, $1 254 /// The result will be: 255 /// mov $0, $2 256 /// mov eax, $2 257 static void rewriteInputConstraintReferences(unsigned FirstIn, 258 unsigned NumNewOuts, 259 std::string &AsmString) { 260 std::string Buf; 261 llvm::raw_string_ostream OS(Buf); 262 size_t Pos = 0; 263 while (Pos < AsmString.size()) { 264 size_t DollarStart = AsmString.find('$', Pos); 265 if (DollarStart == std::string::npos) 266 DollarStart = AsmString.size(); 267 size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart); 268 if (DollarEnd == std::string::npos) 269 DollarEnd = AsmString.size(); 270 OS << StringRef(&AsmString[Pos], DollarEnd - Pos); 271 Pos = DollarEnd; 272 size_t NumDollars = DollarEnd - DollarStart; 273 if (NumDollars % 2 != 0 && Pos < AsmString.size()) { 274 // We have an operand reference. 275 size_t DigitStart = Pos; 276 if (AsmString[DigitStart] == '{') { 277 OS << '{'; 278 ++DigitStart; 279 } 280 size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart); 281 if (DigitEnd == std::string::npos) 282 DigitEnd = AsmString.size(); 283 StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart); 284 unsigned OperandIndex; 285 if (!OperandStr.getAsInteger(10, OperandIndex)) { 286 if (OperandIndex >= FirstIn) 287 OperandIndex += NumNewOuts; 288 OS << OperandIndex; 289 } else { 290 OS << OperandStr; 291 } 292 Pos = DigitEnd; 293 } 294 } 295 AsmString = std::move(OS.str()); 296 } 297 298 /// Add output constraints for EAX:EDX because they are return registers. 299 void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( 300 CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints, 301 std::vector<llvm::Type *> &ResultRegTypes, 302 std::vector<llvm::Type *> &ResultTruncRegTypes, 303 std::vector<LValue> &ResultRegDests, std::string &AsmString, 304 unsigned NumOutputs) const { 305 uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType()); 306 307 // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is 308 // larger. 309 if (!Constraints.empty()) 310 Constraints += ','; 311 if (RetWidth <= 32) { 312 Constraints += "={eax}"; 313 ResultRegTypes.push_back(CGF.Int32Ty); 314 } else { 315 // Use the 'A' constraint for EAX:EDX. 316 Constraints += "=A"; 317 ResultRegTypes.push_back(CGF.Int64Ty); 318 } 319 320 // Truncate EAX or EAX:EDX to an integer of the appropriate size. 321 llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth); 322 ResultTruncRegTypes.push_back(CoerceTy); 323 324 // Coerce the integer by bitcasting the return slot pointer. 325 ReturnSlot.setAddress(ReturnSlot.getAddress(CGF).withElementType(CoerceTy)); 326 ResultRegDests.push_back(ReturnSlot); 327 328 rewriteInputConstraintReferences(NumOutputs, 1, AsmString); 329 } 330 331 /// shouldReturnTypeInRegister - Determine if the given type should be 332 /// returned in a register (for the Darwin and MCU ABI). 333 bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, 334 ASTContext &Context) const { 335 uint64_t Size = Context.getTypeSize(Ty); 336 337 // For i386, type must be register sized. 338 // For the MCU ABI, it only needs to be <= 8-byte 339 if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size))) 340 return false; 341 342 if (Ty->isVectorType()) { 343 // 64- and 128- bit vectors inside structures are not returned in 344 // registers. 345 if (Size == 64 || Size == 128) 346 return false; 347 348 return true; 349 } 350 351 // If this is a builtin, pointer, enum, complex type, member pointer, or 352 // member function pointer it is ok. 353 if (Ty->getAs<BuiltinType>() || Ty->hasPointerRepresentation() || 354 Ty->isAnyComplexType() || Ty->isEnumeralType() || 355 Ty->isBlockPointerType() || Ty->isMemberPointerType()) 356 return true; 357 358 // Arrays are treated like records. 359 if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) 360 return shouldReturnTypeInRegister(AT->getElementType(), Context); 361 362 // Otherwise, it must be a record type. 363 const RecordType *RT = Ty->getAs<RecordType>(); 364 if (!RT) return false; 365 366 // FIXME: Traverse bases here too. 367 368 // Structure types are passed in register if all fields would be 369 // passed in a register. 370 for (const auto *FD : RT->getDecl()->fields()) { 371 // Empty fields are ignored. 372 if (isEmptyField(Context, FD, true)) 373 continue; 374 375 // Check fields recursively. 376 if (!shouldReturnTypeInRegister(FD->getType(), Context)) 377 return false; 378 } 379 return true; 380 } 381 382 static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { 383 // Treat complex types as the element type. 384 if (const ComplexType *CTy = Ty->getAs<ComplexType>()) 385 Ty = CTy->getElementType(); 386 387 // Check for a type which we know has a simple scalar argument-passing 388 // convention without any padding. (We're specifically looking for 32 389 // and 64-bit integer and integer-equivalents, float, and double.) 390 if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && 391 !Ty->isEnumeralType() && !Ty->isBlockPointerType()) 392 return false; 393 394 uint64_t Size = Context.getTypeSize(Ty); 395 return Size == 32 || Size == 64; 396 } 397 398 static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD, 399 uint64_t &Size) { 400 for (const auto *FD : RD->fields()) { 401 // Scalar arguments on the stack get 4 byte alignment on x86. If the 402 // argument is smaller than 32-bits, expanding the struct will create 403 // alignment padding. 404 if (!is32Or64BitBasicType(FD->getType(), Context)) 405 return false; 406 407 // FIXME: Reject bit-fields wholesale; there are two problems, we don't know 408 // how to expand them yet, and the predicate for telling if a bitfield still 409 // counts as "basic" is more complicated than what we were doing previously. 410 if (FD->isBitField()) 411 return false; 412 413 Size += Context.getTypeSize(FD->getType()); 414 } 415 return true; 416 } 417 418 static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD, 419 uint64_t &Size) { 420 // Don't do this if there are any non-empty bases. 421 for (const CXXBaseSpecifier &Base : RD->bases()) { 422 if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(), 423 Size)) 424 return false; 425 } 426 if (!addFieldSizes(Context, RD, Size)) 427 return false; 428 return true; 429 } 430 431 /// Test whether an argument type which is to be passed indirectly (on the 432 /// stack) would have the equivalent layout if it was expanded into separate 433 /// arguments. If so, we prefer to do the latter to avoid inhibiting 434 /// optimizations. 435 bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { 436 // We can only expand structure types. 437 const RecordType *RT = Ty->getAs<RecordType>(); 438 if (!RT) 439 return false; 440 const RecordDecl *RD = RT->getDecl(); 441 uint64_t Size = 0; 442 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 443 if (!IsWin32StructABI) { 444 // On non-Windows, we have to conservatively match our old bitcode 445 // prototypes in order to be ABI-compatible at the bitcode level. 446 if (!CXXRD->isCLike()) 447 return false; 448 } else { 449 // Don't do this for dynamic classes. 450 if (CXXRD->isDynamicClass()) 451 return false; 452 } 453 if (!addBaseAndFieldSizes(getContext(), CXXRD, Size)) 454 return false; 455 } else { 456 if (!addFieldSizes(getContext(), RD, Size)) 457 return false; 458 } 459 460 // We can do this if there was no alignment padding. 461 return Size == getContext().getTypeSize(Ty); 462 } 463 464 ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { 465 // If the return value is indirect, then the hidden argument is consuming one 466 // integer register. 467 if (State.FreeRegs) { 468 --State.FreeRegs; 469 if (!IsMCUABI) 470 return getNaturalAlignIndirectInReg(RetTy); 471 } 472 return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); 473 } 474 475 ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, 476 CCState &State) const { 477 if (RetTy->isVoidType()) 478 return ABIArgInfo::getIgnore(); 479 480 const Type *Base = nullptr; 481 uint64_t NumElts = 0; 482 if ((State.CC == llvm::CallingConv::X86_VectorCall || 483 State.CC == llvm::CallingConv::X86_RegCall) && 484 isHomogeneousAggregate(RetTy, Base, NumElts)) { 485 // The LLVM struct type for such an aggregate should lower properly. 486 return ABIArgInfo::getDirect(); 487 } 488 489 if (const VectorType *VT = RetTy->getAs<VectorType>()) { 490 // On Darwin, some vectors are returned in registers. 491 if (IsDarwinVectorABI) { 492 uint64_t Size = getContext().getTypeSize(RetTy); 493 494 // 128-bit vectors are a special case; they are returned in 495 // registers and we need to make sure to pick a type the LLVM 496 // backend will like. 497 if (Size == 128) 498 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 499 llvm::Type::getInt64Ty(getVMContext()), 2)); 500 501 // Always return in register if it fits in a general purpose 502 // register, or if it is 64 bits and has a single element. 503 if ((Size == 8 || Size == 16 || Size == 32) || 504 (Size == 64 && VT->getNumElements() == 1)) 505 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 506 Size)); 507 508 return getIndirectReturnResult(RetTy, State); 509 } 510 511 return ABIArgInfo::getDirect(); 512 } 513 514 if (isAggregateTypeForABI(RetTy)) { 515 if (const RecordType *RT = RetTy->getAs<RecordType>()) { 516 // Structures with flexible arrays are always indirect. 517 if (RT->getDecl()->hasFlexibleArrayMember()) 518 return getIndirectReturnResult(RetTy, State); 519 } 520 521 // If specified, structs and unions are always indirect. 522 if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType()) 523 return getIndirectReturnResult(RetTy, State); 524 525 // Ignore empty structs/unions. 526 if (isEmptyRecord(getContext(), RetTy, true)) 527 return ABIArgInfo::getIgnore(); 528 529 // Return complex of _Float16 as <2 x half> so the backend will use xmm0. 530 if (const ComplexType *CT = RetTy->getAs<ComplexType>()) { 531 QualType ET = getContext().getCanonicalType(CT->getElementType()); 532 if (ET->isFloat16Type()) 533 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 534 llvm::Type::getHalfTy(getVMContext()), 2)); 535 } 536 537 // Small structures which are register sized are generally returned 538 // in a register. 539 if (shouldReturnTypeInRegister(RetTy, getContext())) { 540 uint64_t Size = getContext().getTypeSize(RetTy); 541 542 // As a special-case, if the struct is a "single-element" struct, and 543 // the field is of type "float" or "double", return it in a 544 // floating-point register. (MSVC does not apply this special case.) 545 // We apply a similar transformation for pointer types to improve the 546 // quality of the generated IR. 547 if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) 548 if ((!IsWin32StructABI && SeltTy->isRealFloatingType()) 549 || SeltTy->hasPointerRepresentation()) 550 return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); 551 552 // FIXME: We should be able to narrow this integer in cases with dead 553 // padding. 554 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size)); 555 } 556 557 return getIndirectReturnResult(RetTy, State); 558 } 559 560 // Treat an enum type as its underlying type. 561 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 562 RetTy = EnumTy->getDecl()->getIntegerType(); 563 564 if (const auto *EIT = RetTy->getAs<BitIntType>()) 565 if (EIT->getNumBits() > 64) 566 return getIndirectReturnResult(RetTy, State); 567 568 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) 569 : ABIArgInfo::getDirect()); 570 } 571 572 unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, 573 unsigned Align) const { 574 // Otherwise, if the alignment is less than or equal to the minimum ABI 575 // alignment, just use the default; the backend will handle this. 576 if (Align <= MinABIStackAlignInBytes) 577 return 0; // Use default alignment. 578 579 if (IsLinuxABI) { 580 // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't 581 // want to spend any effort dealing with the ramifications of ABI breaks. 582 // 583 // If the vector type is __m128/__m256/__m512, return the default alignment. 584 if (Ty->isVectorType() && (Align == 16 || Align == 32 || Align == 64)) 585 return Align; 586 } 587 // On non-Darwin, the stack type alignment is always 4. 588 if (!IsDarwinVectorABI) { 589 // Set explicit alignment, since we may need to realign the top. 590 return MinABIStackAlignInBytes; 591 } 592 593 // Otherwise, if the type contains an SSE vector type, the alignment is 16. 594 if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) || 595 isRecordWithSIMDVectorType(getContext(), Ty))) 596 return 16; 597 598 return MinABIStackAlignInBytes; 599 } 600 601 ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal, 602 CCState &State) const { 603 if (!ByVal) { 604 if (State.FreeRegs) { 605 --State.FreeRegs; // Non-byval indirects just use one pointer. 606 if (!IsMCUABI) 607 return getNaturalAlignIndirectInReg(Ty); 608 } 609 return getNaturalAlignIndirect(Ty, false); 610 } 611 612 // Compute the byval alignment. 613 unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; 614 unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign); 615 if (StackAlign == 0) 616 return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true); 617 618 // If the stack alignment is less than the type alignment, realign the 619 // argument. 620 bool Realign = TypeAlign > StackAlign; 621 return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign), 622 /*ByVal=*/true, Realign); 623 } 624 625 X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const { 626 const Type *T = isSingleElementStruct(Ty, getContext()); 627 if (!T) 628 T = Ty.getTypePtr(); 629 630 if (const BuiltinType *BT = T->getAs<BuiltinType>()) { 631 BuiltinType::Kind K = BT->getKind(); 632 if (K == BuiltinType::Float || K == BuiltinType::Double) 633 return Float; 634 } 635 return Integer; 636 } 637 638 bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const { 639 if (!IsSoftFloatABI) { 640 Class C = classify(Ty); 641 if (C == Float) 642 return false; 643 } 644 645 unsigned Size = getContext().getTypeSize(Ty); 646 unsigned SizeInRegs = (Size + 31) / 32; 647 648 if (SizeInRegs == 0) 649 return false; 650 651 if (!IsMCUABI) { 652 if (SizeInRegs > State.FreeRegs) { 653 State.FreeRegs = 0; 654 return false; 655 } 656 } else { 657 // The MCU psABI allows passing parameters in-reg even if there are 658 // earlier parameters that are passed on the stack. Also, 659 // it does not allow passing >8-byte structs in-register, 660 // even if there are 3 free registers available. 661 if (SizeInRegs > State.FreeRegs || SizeInRegs > 2) 662 return false; 663 } 664 665 State.FreeRegs -= SizeInRegs; 666 return true; 667 } 668 669 bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, 670 bool &InReg, 671 bool &NeedsPadding) const { 672 // On Windows, aggregates other than HFAs are never passed in registers, and 673 // they do not consume register slots. Homogenous floating-point aggregates 674 // (HFAs) have already been dealt with at this point. 675 if (IsWin32StructABI && isAggregateTypeForABI(Ty)) 676 return false; 677 678 NeedsPadding = false; 679 InReg = !IsMCUABI; 680 681 if (!updateFreeRegs(Ty, State)) 682 return false; 683 684 if (IsMCUABI) 685 return true; 686 687 if (State.CC == llvm::CallingConv::X86_FastCall || 688 State.CC == llvm::CallingConv::X86_VectorCall || 689 State.CC == llvm::CallingConv::X86_RegCall) { 690 if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) 691 NeedsPadding = true; 692 693 return false; 694 } 695 696 return true; 697 } 698 699 bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { 700 bool IsPtrOrInt = (getContext().getTypeSize(Ty) <= 32) && 701 (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || 702 Ty->isReferenceType()); 703 704 if (!IsPtrOrInt && (State.CC == llvm::CallingConv::X86_FastCall || 705 State.CC == llvm::CallingConv::X86_VectorCall)) 706 return false; 707 708 if (!updateFreeRegs(Ty, State)) 709 return false; 710 711 if (!IsPtrOrInt && State.CC == llvm::CallingConv::X86_RegCall) 712 return false; 713 714 // Return true to apply inreg to all legal parameters except for MCU targets. 715 return !IsMCUABI; 716 } 717 718 void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const { 719 // Vectorcall x86 works subtly different than in x64, so the format is 720 // a bit different than the x64 version. First, all vector types (not HVAs) 721 // are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers. 722 // This differs from the x64 implementation, where the first 6 by INDEX get 723 // registers. 724 // In the second pass over the arguments, HVAs are passed in the remaining 725 // vector registers if possible, or indirectly by address. The address will be 726 // passed in ECX/EDX if available. Any other arguments are passed according to 727 // the usual fastcall rules. 728 MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); 729 for (int I = 0, E = Args.size(); I < E; ++I) { 730 const Type *Base = nullptr; 731 uint64_t NumElts = 0; 732 const QualType &Ty = Args[I].type; 733 if ((Ty->isVectorType() || Ty->isBuiltinType()) && 734 isHomogeneousAggregate(Ty, Base, NumElts)) { 735 if (State.FreeSSERegs >= NumElts) { 736 State.FreeSSERegs -= NumElts; 737 Args[I].info = ABIArgInfo::getDirectInReg(); 738 State.IsPreassigned.set(I); 739 } 740 } 741 } 742 } 743 744 ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State, 745 unsigned ArgIndex) const { 746 // FIXME: Set alignment on indirect arguments. 747 bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall; 748 bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall; 749 bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall; 750 751 Ty = useFirstFieldIfTransparentUnion(Ty); 752 TypeInfo TI = getContext().getTypeInfo(Ty); 753 754 // Check with the C++ ABI first. 755 const RecordType *RT = Ty->getAs<RecordType>(); 756 if (RT) { 757 CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); 758 if (RAA == CGCXXABI::RAA_Indirect) { 759 return getIndirectResult(Ty, false, State); 760 } else if (State.IsDelegateCall) { 761 // Avoid having different alignments on delegate call args by always 762 // setting the alignment to 4, which is what we do for inallocas. 763 ABIArgInfo Res = getIndirectResult(Ty, false, State); 764 Res.setIndirectAlign(CharUnits::fromQuantity(4)); 765 return Res; 766 } else if (RAA == CGCXXABI::RAA_DirectInMemory) { 767 // The field index doesn't matter, we'll fix it up later. 768 return ABIArgInfo::getInAlloca(/*FieldIndex=*/0); 769 } 770 } 771 772 // Regcall uses the concept of a homogenous vector aggregate, similar 773 // to other targets. 774 const Type *Base = nullptr; 775 uint64_t NumElts = 0; 776 if ((IsRegCall || IsVectorCall) && 777 isHomogeneousAggregate(Ty, Base, NumElts)) { 778 if (State.FreeSSERegs >= NumElts) { 779 State.FreeSSERegs -= NumElts; 780 781 // Vectorcall passes HVAs directly and does not flatten them, but regcall 782 // does. 783 if (IsVectorCall) 784 return getDirectX86Hva(); 785 786 if (Ty->isBuiltinType() || Ty->isVectorType()) 787 return ABIArgInfo::getDirect(); 788 return ABIArgInfo::getExpand(); 789 } 790 return getIndirectResult(Ty, /*ByVal=*/false, State); 791 } 792 793 if (isAggregateTypeForABI(Ty)) { 794 // Structures with flexible arrays are always indirect. 795 // FIXME: This should not be byval! 796 if (RT && RT->getDecl()->hasFlexibleArrayMember()) 797 return getIndirectResult(Ty, true, State); 798 799 // Ignore empty structs/unions on non-Windows. 800 if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) 801 return ABIArgInfo::getIgnore(); 802 803 llvm::LLVMContext &LLVMContext = getVMContext(); 804 llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); 805 bool NeedsPadding = false; 806 bool InReg; 807 if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { 808 unsigned SizeInRegs = (TI.Width + 31) / 32; 809 SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32); 810 llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); 811 if (InReg) 812 return ABIArgInfo::getDirectInReg(Result); 813 else 814 return ABIArgInfo::getDirect(Result); 815 } 816 llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr; 817 818 // Pass over-aligned aggregates to non-variadic functions on Windows 819 // indirectly. This behavior was added in MSVC 2015. Use the required 820 // alignment from the record layout, since that may be less than the 821 // regular type alignment, and types with required alignment of less than 4 822 // bytes are not passed indirectly. 823 if (IsWin32StructABI && State.Required.isRequiredArg(ArgIndex)) { 824 unsigned AlignInBits = 0; 825 if (RT) { 826 const ASTRecordLayout &Layout = 827 getContext().getASTRecordLayout(RT->getDecl()); 828 AlignInBits = getContext().toBits(Layout.getRequiredAlignment()); 829 } else if (TI.isAlignRequired()) { 830 AlignInBits = TI.Align; 831 } 832 if (AlignInBits > 32) 833 return getIndirectResult(Ty, /*ByVal=*/false, State); 834 } 835 836 // Expand small (<= 128-bit) record types when we know that the stack layout 837 // of those arguments will match the struct. This is important because the 838 // LLVM backend isn't smart enough to remove byval, which inhibits many 839 // optimizations. 840 // Don't do this for the MCU if there are still free integer registers 841 // (see X86_64 ABI for full explanation). 842 if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) && 843 canExpandIndirectArgument(Ty)) 844 return ABIArgInfo::getExpandWithPadding( 845 IsFastCall || IsVectorCall || IsRegCall, PaddingType); 846 847 return getIndirectResult(Ty, true, State); 848 } 849 850 if (const VectorType *VT = Ty->getAs<VectorType>()) { 851 // On Windows, vectors are passed directly if registers are available, or 852 // indirectly if not. This avoids the need to align argument memory. Pass 853 // user-defined vector types larger than 512 bits indirectly for simplicity. 854 if (IsWin32StructABI) { 855 if (TI.Width <= 512 && State.FreeSSERegs > 0) { 856 --State.FreeSSERegs; 857 return ABIArgInfo::getDirectInReg(); 858 } 859 return getIndirectResult(Ty, /*ByVal=*/false, State); 860 } 861 862 // On Darwin, some vectors are passed in memory, we handle this by passing 863 // it as an i8/i16/i32/i64. 864 if (IsDarwinVectorABI) { 865 if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) || 866 (TI.Width == 64 && VT->getNumElements() == 1)) 867 return ABIArgInfo::getDirect( 868 llvm::IntegerType::get(getVMContext(), TI.Width)); 869 } 870 871 if (IsX86_MMXType(CGT.ConvertType(Ty))) 872 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); 873 874 return ABIArgInfo::getDirect(); 875 } 876 877 878 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 879 Ty = EnumTy->getDecl()->getIntegerType(); 880 881 bool InReg = shouldPrimitiveUseInReg(Ty, State); 882 883 if (isPromotableIntegerTypeForABI(Ty)) { 884 if (InReg) 885 return ABIArgInfo::getExtendInReg(Ty); 886 return ABIArgInfo::getExtend(Ty); 887 } 888 889 if (const auto *EIT = Ty->getAs<BitIntType>()) { 890 if (EIT->getNumBits() <= 64) { 891 if (InReg) 892 return ABIArgInfo::getDirectInReg(); 893 return ABIArgInfo::getDirect(); 894 } 895 return getIndirectResult(Ty, /*ByVal=*/false, State); 896 } 897 898 if (InReg) 899 return ABIArgInfo::getDirectInReg(); 900 return ABIArgInfo::getDirect(); 901 } 902 903 void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { 904 CCState State(FI); 905 if (IsMCUABI) 906 State.FreeRegs = 3; 907 else if (State.CC == llvm::CallingConv::X86_FastCall) { 908 State.FreeRegs = 2; 909 State.FreeSSERegs = 3; 910 } else if (State.CC == llvm::CallingConv::X86_VectorCall) { 911 State.FreeRegs = 2; 912 State.FreeSSERegs = 6; 913 } else if (FI.getHasRegParm()) 914 State.FreeRegs = FI.getRegParm(); 915 else if (State.CC == llvm::CallingConv::X86_RegCall) { 916 State.FreeRegs = 5; 917 State.FreeSSERegs = 8; 918 } else if (IsWin32StructABI) { 919 // Since MSVC 2015, the first three SSE vectors have been passed in 920 // registers. The rest are passed indirectly. 921 State.FreeRegs = DefaultNumRegisterParameters; 922 State.FreeSSERegs = 3; 923 } else 924 State.FreeRegs = DefaultNumRegisterParameters; 925 926 if (!::classifyReturnType(getCXXABI(), FI, *this)) { 927 FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State); 928 } else if (FI.getReturnInfo().isIndirect()) { 929 // The C++ ABI is not aware of register usage, so we have to check if the 930 // return value was sret and put it in a register ourselves if appropriate. 931 if (State.FreeRegs) { 932 --State.FreeRegs; // The sret parameter consumes a register. 933 if (!IsMCUABI) 934 FI.getReturnInfo().setInReg(true); 935 } 936 } 937 938 // The chain argument effectively gives us another free register. 939 if (FI.isChainCall()) 940 ++State.FreeRegs; 941 942 // For vectorcall, do a first pass over the arguments, assigning FP and vector 943 // arguments to XMM registers as available. 944 if (State.CC == llvm::CallingConv::X86_VectorCall) 945 runVectorCallFirstPass(FI, State); 946 947 bool UsedInAlloca = false; 948 MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments(); 949 for (unsigned I = 0, E = Args.size(); I < E; ++I) { 950 // Skip arguments that have already been assigned. 951 if (State.IsPreassigned.test(I)) 952 continue; 953 954 Args[I].info = 955 classifyArgumentType(Args[I].type, State, I); 956 UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca); 957 } 958 959 // If we needed to use inalloca for any argument, do a second pass and rewrite 960 // all the memory arguments to use inalloca. 961 if (UsedInAlloca) 962 rewriteWithInAlloca(FI); 963 } 964 965 void 966 X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, 967 CharUnits &StackOffset, ABIArgInfo &Info, 968 QualType Type) const { 969 // Arguments are always 4-byte-aligned. 970 CharUnits WordSize = CharUnits::fromQuantity(4); 971 assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct"); 972 973 // sret pointers and indirect things will require an extra pointer 974 // indirection, unless they are byval. Most things are byval, and will not 975 // require this indirection. 976 bool IsIndirect = false; 977 if (Info.isIndirect() && !Info.getIndirectByVal()) 978 IsIndirect = true; 979 Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect); 980 llvm::Type *LLTy = CGT.ConvertTypeForMem(Type); 981 if (IsIndirect) 982 LLTy = llvm::PointerType::getUnqual(getVMContext()); 983 FrameFields.push_back(LLTy); 984 StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type); 985 986 // Insert padding bytes to respect alignment. 987 CharUnits FieldEnd = StackOffset; 988 StackOffset = FieldEnd.alignTo(WordSize); 989 if (StackOffset != FieldEnd) { 990 CharUnits NumBytes = StackOffset - FieldEnd; 991 llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); 992 Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity()); 993 FrameFields.push_back(Ty); 994 } 995 } 996 997 static bool isArgInAlloca(const ABIArgInfo &Info) { 998 // Leave ignored and inreg arguments alone. 999 switch (Info.getKind()) { 1000 case ABIArgInfo::InAlloca: 1001 return true; 1002 case ABIArgInfo::Ignore: 1003 case ABIArgInfo::IndirectAliased: 1004 return false; 1005 case ABIArgInfo::Indirect: 1006 case ABIArgInfo::Direct: 1007 case ABIArgInfo::Extend: 1008 return !Info.getInReg(); 1009 case ABIArgInfo::Expand: 1010 case ABIArgInfo::CoerceAndExpand: 1011 // These are aggregate types which are never passed in registers when 1012 // inalloca is involved. 1013 return true; 1014 } 1015 llvm_unreachable("invalid enum"); 1016 } 1017 1018 void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const { 1019 assert(IsWin32StructABI && "inalloca only supported on win32"); 1020 1021 // Build a packed struct type for all of the arguments in memory. 1022 SmallVector<llvm::Type *, 6> FrameFields; 1023 1024 // The stack alignment is always 4. 1025 CharUnits StackAlign = CharUnits::fromQuantity(4); 1026 1027 CharUnits StackOffset; 1028 CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end(); 1029 1030 // Put 'this' into the struct before 'sret', if necessary. 1031 bool IsThisCall = 1032 FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall; 1033 ABIArgInfo &Ret = FI.getReturnInfo(); 1034 if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall && 1035 isArgInAlloca(I->info)) { 1036 addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); 1037 ++I; 1038 } 1039 1040 // Put the sret parameter into the inalloca struct if it's in memory. 1041 if (Ret.isIndirect() && !Ret.getInReg()) { 1042 addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType()); 1043 // On Windows, the hidden sret parameter is always returned in eax. 1044 Ret.setInAllocaSRet(IsWin32StructABI); 1045 } 1046 1047 // Skip the 'this' parameter in ecx. 1048 if (IsThisCall) 1049 ++I; 1050 1051 // Put arguments passed in memory into the struct. 1052 for (; I != E; ++I) { 1053 if (isArgInAlloca(I->info)) 1054 addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type); 1055 } 1056 1057 FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields, 1058 /*isPacked=*/true), 1059 StackAlign); 1060 } 1061 1062 Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, 1063 Address VAListAddr, QualType Ty) const { 1064 1065 auto TypeInfo = getContext().getTypeInfoInChars(Ty); 1066 1067 // x86-32 changes the alignment of certain arguments on the stack. 1068 // 1069 // Just messing with TypeInfo like this works because we never pass 1070 // anything indirectly. 1071 TypeInfo.Align = CharUnits::fromQuantity( 1072 getTypeStackAlignInBytes(Ty, TypeInfo.Align.getQuantity())); 1073 1074 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, 1075 TypeInfo, CharUnits::fromQuantity(4), 1076 /*AllowHigherAlign*/ true); 1077 } 1078 1079 bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( 1080 const llvm::Triple &Triple, const CodeGenOptions &Opts) { 1081 assert(Triple.getArch() == llvm::Triple::x86); 1082 1083 switch (Opts.getStructReturnConvention()) { 1084 case CodeGenOptions::SRCK_Default: 1085 break; 1086 case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return 1087 return false; 1088 case CodeGenOptions::SRCK_InRegs: // -freg-struct-return 1089 return true; 1090 } 1091 1092 if (Triple.isOSDarwin() || Triple.isOSIAMCU()) 1093 return true; 1094 1095 switch (Triple.getOS()) { 1096 case llvm::Triple::DragonFly: 1097 case llvm::Triple::FreeBSD: 1098 case llvm::Triple::OpenBSD: 1099 case llvm::Triple::Win32: 1100 return true; 1101 default: 1102 return false; 1103 } 1104 } 1105 1106 static void addX86InterruptAttrs(const FunctionDecl *FD, llvm::GlobalValue *GV, 1107 CodeGen::CodeGenModule &CGM) { 1108 if (!FD->hasAttr<AnyX86InterruptAttr>()) 1109 return; 1110 1111 llvm::Function *Fn = cast<llvm::Function>(GV); 1112 Fn->setCallingConv(llvm::CallingConv::X86_INTR); 1113 if (FD->getNumParams() == 0) 1114 return; 1115 1116 auto PtrTy = cast<PointerType>(FD->getParamDecl(0)->getType()); 1117 llvm::Type *ByValTy = CGM.getTypes().ConvertType(PtrTy->getPointeeType()); 1118 llvm::Attribute NewAttr = llvm::Attribute::getWithByValType( 1119 Fn->getContext(), ByValTy); 1120 Fn->addParamAttr(0, NewAttr); 1121 } 1122 1123 void X86_32TargetCodeGenInfo::setTargetAttributes( 1124 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1125 if (GV->isDeclaration()) 1126 return; 1127 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1128 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1129 llvm::Function *Fn = cast<llvm::Function>(GV); 1130 Fn->addFnAttr("stackrealign"); 1131 } 1132 1133 addX86InterruptAttrs(FD, GV, CGM); 1134 } 1135 } 1136 1137 bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable( 1138 CodeGen::CodeGenFunction &CGF, 1139 llvm::Value *Address) const { 1140 CodeGen::CGBuilderTy &Builder = CGF.Builder; 1141 1142 llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); 1143 1144 // 0-7 are the eight integer registers; the order is different 1145 // on Darwin (for EH), but the range is the same. 1146 // 8 is %eip. 1147 AssignToArrayRange(Builder, Address, Four8, 0, 8); 1148 1149 if (CGF.CGM.getTarget().getTriple().isOSDarwin()) { 1150 // 12-16 are st(0..4). Not sure why we stop at 4. 1151 // These have size 16, which is sizeof(long double) on 1152 // platforms with 8-byte alignment for that type. 1153 llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); 1154 AssignToArrayRange(Builder, Address, Sixteen8, 12, 16); 1155 1156 } else { 1157 // 9 is %eflags, which doesn't get a size on Darwin for some 1158 // reason. 1159 Builder.CreateAlignedStore( 1160 Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9), 1161 CharUnits::One()); 1162 1163 // 11-16 are st(0..5). Not sure why we stop at 5. 1164 // These have size 12, which is sizeof(long double) on 1165 // platforms with 4-byte alignment for that type. 1166 llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12); 1167 AssignToArrayRange(Builder, Address, Twelve8, 11, 16); 1168 } 1169 1170 return false; 1171 } 1172 1173 //===----------------------------------------------------------------------===// 1174 // X86-64 ABI Implementation 1175 //===----------------------------------------------------------------------===// 1176 1177 1178 namespace { 1179 1180 /// \p returns the size in bits of the largest (native) vector for \p AVXLevel. 1181 static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { 1182 switch (AVXLevel) { 1183 case X86AVXABILevel::AVX512: 1184 return 512; 1185 case X86AVXABILevel::AVX: 1186 return 256; 1187 case X86AVXABILevel::None: 1188 return 128; 1189 } 1190 llvm_unreachable("Unknown AVXLevel"); 1191 } 1192 1193 /// X86_64ABIInfo - The X86_64 ABI information. 1194 class X86_64ABIInfo : public ABIInfo { 1195 enum Class { 1196 Integer = 0, 1197 SSE, 1198 SSEUp, 1199 X87, 1200 X87Up, 1201 ComplexX87, 1202 NoClass, 1203 Memory 1204 }; 1205 1206 /// merge - Implement the X86_64 ABI merging algorithm. 1207 /// 1208 /// Merge an accumulating classification \arg Accum with a field 1209 /// classification \arg Field. 1210 /// 1211 /// \param Accum - The accumulating classification. This should 1212 /// always be either NoClass or the result of a previous merge 1213 /// call. In addition, this should never be Memory (the caller 1214 /// should just return Memory for the aggregate). 1215 static Class merge(Class Accum, Class Field); 1216 1217 /// postMerge - Implement the X86_64 ABI post merging algorithm. 1218 /// 1219 /// Post merger cleanup, reduces a malformed Hi and Lo pair to 1220 /// final MEMORY or SSE classes when necessary. 1221 /// 1222 /// \param AggregateSize - The size of the current aggregate in 1223 /// the classification process. 1224 /// 1225 /// \param Lo - The classification for the parts of the type 1226 /// residing in the low word of the containing object. 1227 /// 1228 /// \param Hi - The classification for the parts of the type 1229 /// residing in the higher words of the containing object. 1230 /// 1231 void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; 1232 1233 /// classify - Determine the x86_64 register classes in which the 1234 /// given type T should be passed. 1235 /// 1236 /// \param Lo - The classification for the parts of the type 1237 /// residing in the low word of the containing object. 1238 /// 1239 /// \param Hi - The classification for the parts of the type 1240 /// residing in the high word of the containing object. 1241 /// 1242 /// \param OffsetBase - The bit offset of this type in the 1243 /// containing object. Some parameters are classified different 1244 /// depending on whether they straddle an eightbyte boundary. 1245 /// 1246 /// \param isNamedArg - Whether the argument in question is a "named" 1247 /// argument, as used in AMD64-ABI 3.5.7. 1248 /// 1249 /// \param IsRegCall - Whether the calling conversion is regcall. 1250 /// 1251 /// If a word is unused its result will be NoClass; if a type should 1252 /// be passed in Memory then at least the classification of \arg Lo 1253 /// will be Memory. 1254 /// 1255 /// The \arg Lo class will be NoClass iff the argument is ignored. 1256 /// 1257 /// If the \arg Lo class is ComplexX87, then the \arg Hi class will 1258 /// also be ComplexX87. 1259 void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi, 1260 bool isNamedArg, bool IsRegCall = false) const; 1261 1262 llvm::Type *GetByteVectorType(QualType Ty) const; 1263 llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType, 1264 unsigned IROffset, QualType SourceTy, 1265 unsigned SourceOffset) const; 1266 llvm::Type *GetINTEGERTypeAtOffset(llvm::Type *IRType, 1267 unsigned IROffset, QualType SourceTy, 1268 unsigned SourceOffset) const; 1269 1270 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 1271 /// such that the argument will be returned in memory. 1272 ABIArgInfo getIndirectReturnResult(QualType Ty) const; 1273 1274 /// getIndirectResult - Give a source type \arg Ty, return a suitable result 1275 /// such that the argument will be passed in memory. 1276 /// 1277 /// \param freeIntRegs - The number of free integer registers remaining 1278 /// available. 1279 ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const; 1280 1281 ABIArgInfo classifyReturnType(QualType RetTy) const; 1282 1283 ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, 1284 unsigned &neededInt, unsigned &neededSSE, 1285 bool isNamedArg, 1286 bool IsRegCall = false) const; 1287 1288 ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt, 1289 unsigned &NeededSSE, 1290 unsigned &MaxVectorWidth) const; 1291 1292 ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, 1293 unsigned &NeededSSE, 1294 unsigned &MaxVectorWidth) const; 1295 1296 bool IsIllegalVectorType(QualType Ty) const; 1297 1298 /// The 0.98 ABI revision clarified a lot of ambiguities, 1299 /// unfortunately in ways that were not always consistent with 1300 /// certain previous compilers. In particular, platforms which 1301 /// required strict binary compatibility with older versions of GCC 1302 /// may need to exempt themselves. 1303 bool honorsRevision0_98() const { 1304 return !getTarget().getTriple().isOSDarwin(); 1305 } 1306 1307 /// GCC classifies <1 x long long> as SSE but some platform ABIs choose to 1308 /// classify it as INTEGER (for compatibility with older clang compilers). 1309 bool classifyIntegerMMXAsSSE() const { 1310 // Clang <= 3.8 did not do this. 1311 if (getContext().getLangOpts().getClangABICompat() <= 1312 LangOptions::ClangABI::Ver3_8) 1313 return false; 1314 1315 const llvm::Triple &Triple = getTarget().getTriple(); 1316 if (Triple.isOSDarwin() || Triple.isPS() || Triple.isOSFreeBSD()) 1317 return false; 1318 return true; 1319 } 1320 1321 // GCC classifies vectors of __int128 as memory. 1322 bool passInt128VectorsInMem() const { 1323 // Clang <= 9.0 did not do this. 1324 if (getContext().getLangOpts().getClangABICompat() <= 1325 LangOptions::ClangABI::Ver9) 1326 return false; 1327 1328 const llvm::Triple &T = getTarget().getTriple(); 1329 return T.isOSLinux() || T.isOSNetBSD(); 1330 } 1331 1332 X86AVXABILevel AVXLevel; 1333 // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on 1334 // 64-bit hardware. 1335 bool Has64BitPointers; 1336 1337 public: 1338 X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1339 : ABIInfo(CGT), AVXLevel(AVXLevel), 1340 Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {} 1341 1342 bool isPassedUsingAVXType(QualType type) const { 1343 unsigned neededInt, neededSSE; 1344 // The freeIntRegs argument doesn't matter here. 1345 ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE, 1346 /*isNamedArg*/true); 1347 if (info.isDirect()) { 1348 llvm::Type *ty = info.getCoerceToType(); 1349 if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty)) 1350 return vectorTy->getPrimitiveSizeInBits().getFixedValue() > 128; 1351 } 1352 return false; 1353 } 1354 1355 void computeInfo(CGFunctionInfo &FI) const override; 1356 1357 Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 1358 QualType Ty) const override; 1359 Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, 1360 QualType Ty) const override; 1361 1362 bool has64BitPointers() const { 1363 return Has64BitPointers; 1364 } 1365 }; 1366 1367 /// WinX86_64ABIInfo - The Windows X86_64 ABI information. 1368 class WinX86_64ABIInfo : public ABIInfo { 1369 public: 1370 WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1371 : ABIInfo(CGT), AVXLevel(AVXLevel), 1372 IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} 1373 1374 void computeInfo(CGFunctionInfo &FI) const override; 1375 1376 Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 1377 QualType Ty) const override; 1378 1379 bool isHomogeneousAggregateBaseType(QualType Ty) const override { 1380 // FIXME: Assumes vectorcall is in use. 1381 return isX86VectorTypeForVectorCall(getContext(), Ty); 1382 } 1383 1384 bool isHomogeneousAggregateSmallEnough(const Type *Ty, 1385 uint64_t NumMembers) const override { 1386 // FIXME: Assumes vectorcall is in use. 1387 return isX86VectorCallAggregateSmallEnough(NumMembers); 1388 } 1389 1390 private: 1391 ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, 1392 bool IsVectorCall, bool IsRegCall) const; 1393 ABIArgInfo reclassifyHvaArgForVectorCall(QualType Ty, unsigned &FreeSSERegs, 1394 const ABIArgInfo ¤t) const; 1395 1396 X86AVXABILevel AVXLevel; 1397 1398 bool IsMingw64; 1399 }; 1400 1401 class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { 1402 public: 1403 X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) 1404 : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) { 1405 SwiftInfo = 1406 std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); 1407 } 1408 1409 /// Disable tail call on x86-64. The epilogue code before the tail jump blocks 1410 /// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations. 1411 bool markARCOptimizedReturnCallsAsNoTail() const override { return true; } 1412 1413 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 1414 return 7; 1415 } 1416 1417 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 1418 llvm::Value *Address) const override { 1419 llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); 1420 1421 // 0-15 are the 16 integer registers. 1422 // 16 is %rip. 1423 AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); 1424 return false; 1425 } 1426 1427 llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF, 1428 StringRef Constraint, 1429 llvm::Type* Ty) const override { 1430 return X86AdjustInlineAsmType(CGF, Constraint, Ty); 1431 } 1432 1433 bool isNoProtoCallVariadic(const CallArgList &args, 1434 const FunctionNoProtoType *fnType) const override { 1435 // The default CC on x86-64 sets %al to the number of SSA 1436 // registers used, and GCC sets this when calling an unprototyped 1437 // function, so we override the default behavior. However, don't do 1438 // that when AVX types are involved: the ABI explicitly states it is 1439 // undefined, and it doesn't work in practice because of how the ABI 1440 // defines varargs anyway. 1441 if (fnType->getCallConv() == CC_C) { 1442 bool HasAVXType = false; 1443 for (CallArgList::const_iterator 1444 it = args.begin(), ie = args.end(); it != ie; ++it) { 1445 if (getABIInfo<X86_64ABIInfo>().isPassedUsingAVXType(it->Ty)) { 1446 HasAVXType = true; 1447 break; 1448 } 1449 } 1450 1451 if (!HasAVXType) 1452 return true; 1453 } 1454 1455 return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType); 1456 } 1457 1458 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1459 CodeGen::CodeGenModule &CGM) const override { 1460 if (GV->isDeclaration()) 1461 return; 1462 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1463 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1464 llvm::Function *Fn = cast<llvm::Function>(GV); 1465 Fn->addFnAttr("stackrealign"); 1466 } 1467 1468 addX86InterruptAttrs(FD, GV, CGM); 1469 } 1470 } 1471 1472 void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, 1473 const FunctionDecl *Caller, 1474 const FunctionDecl *Callee, 1475 const CallArgList &Args) const override; 1476 }; 1477 } // namespace 1478 1479 static void initFeatureMaps(const ASTContext &Ctx, 1480 llvm::StringMap<bool> &CallerMap, 1481 const FunctionDecl *Caller, 1482 llvm::StringMap<bool> &CalleeMap, 1483 const FunctionDecl *Callee) { 1484 if (CalleeMap.empty() && CallerMap.empty()) { 1485 // The caller is potentially nullptr in the case where the call isn't in a 1486 // function. In this case, the getFunctionFeatureMap ensures we just get 1487 // the TU level setting (since it cannot be modified by 'target'.. 1488 Ctx.getFunctionFeatureMap(CallerMap, Caller); 1489 Ctx.getFunctionFeatureMap(CalleeMap, Callee); 1490 } 1491 } 1492 1493 static bool checkAVXParamFeature(DiagnosticsEngine &Diag, 1494 SourceLocation CallLoc, 1495 const llvm::StringMap<bool> &CallerMap, 1496 const llvm::StringMap<bool> &CalleeMap, 1497 QualType Ty, StringRef Feature, 1498 bool IsArgument) { 1499 bool CallerHasFeat = CallerMap.lookup(Feature); 1500 bool CalleeHasFeat = CalleeMap.lookup(Feature); 1501 if (!CallerHasFeat && !CalleeHasFeat) 1502 return Diag.Report(CallLoc, diag::warn_avx_calling_convention) 1503 << IsArgument << Ty << Feature; 1504 1505 // Mixing calling conventions here is very clearly an error. 1506 if (!CallerHasFeat || !CalleeHasFeat) 1507 return Diag.Report(CallLoc, diag::err_avx_calling_convention) 1508 << IsArgument << Ty << Feature; 1509 1510 // Else, both caller and callee have the required feature, so there is no need 1511 // to diagnose. 1512 return false; 1513 } 1514 1515 static bool checkAVX512ParamFeature(DiagnosticsEngine &Diag, 1516 SourceLocation CallLoc, 1517 const llvm::StringMap<bool> &CallerMap, 1518 const llvm::StringMap<bool> &CalleeMap, 1519 QualType Ty, bool IsArgument) { 1520 bool Caller256 = CallerMap.lookup("avx512f") && !CallerMap.lookup("evex512"); 1521 bool Callee256 = CalleeMap.lookup("avx512f") && !CalleeMap.lookup("evex512"); 1522 1523 // Forbid 512-bit or larger vector pass or return when we disabled ZMM 1524 // instructions. 1525 if (Caller256 || Callee256) 1526 return Diag.Report(CallLoc, diag::err_avx_calling_convention) 1527 << IsArgument << Ty << "evex512"; 1528 1529 return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, 1530 "avx512f", IsArgument); 1531 } 1532 1533 static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx, 1534 SourceLocation CallLoc, 1535 const llvm::StringMap<bool> &CallerMap, 1536 const llvm::StringMap<bool> &CalleeMap, QualType Ty, 1537 bool IsArgument) { 1538 uint64_t Size = Ctx.getTypeSize(Ty); 1539 if (Size > 256) 1540 return checkAVX512ParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, 1541 IsArgument); 1542 1543 if (Size > 128) 1544 return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx", 1545 IsArgument); 1546 1547 return false; 1548 } 1549 1550 void X86_64TargetCodeGenInfo::checkFunctionCallABI( 1551 CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, 1552 const FunctionDecl *Callee, const CallArgList &Args) const { 1553 llvm::StringMap<bool> CallerMap; 1554 llvm::StringMap<bool> CalleeMap; 1555 unsigned ArgIndex = 0; 1556 1557 // We need to loop through the actual call arguments rather than the 1558 // function's parameters, in case this variadic. 1559 for (const CallArg &Arg : Args) { 1560 // The "avx" feature changes how vectors >128 in size are passed. "avx512f" 1561 // additionally changes how vectors >256 in size are passed. Like GCC, we 1562 // warn when a function is called with an argument where this will change. 1563 // Unlike GCC, we also error when it is an obvious ABI mismatch, that is, 1564 // the caller and callee features are mismatched. 1565 // Unfortunately, we cannot do this diagnostic in SEMA, since the callee can 1566 // change its ABI with attribute-target after this call. 1567 if (Arg.getType()->isVectorType() && 1568 CGM.getContext().getTypeSize(Arg.getType()) > 128) { 1569 initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); 1570 QualType Ty = Arg.getType(); 1571 // The CallArg seems to have desugared the type already, so for clearer 1572 // diagnostics, replace it with the type in the FunctionDecl if possible. 1573 if (ArgIndex < Callee->getNumParams()) 1574 Ty = Callee->getParamDecl(ArgIndex)->getType(); 1575 1576 if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, 1577 CalleeMap, Ty, /*IsArgument*/ true)) 1578 return; 1579 } 1580 ++ArgIndex; 1581 } 1582 1583 // Check return always, as we don't have a good way of knowing in codegen 1584 // whether this value is used, tail-called, etc. 1585 if (Callee->getReturnType()->isVectorType() && 1586 CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) { 1587 initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); 1588 checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, 1589 CalleeMap, Callee->getReturnType(), 1590 /*IsArgument*/ false); 1591 } 1592 } 1593 1594 std::string TargetCodeGenInfo::qualifyWindowsLibrary(StringRef Lib) { 1595 // If the argument does not end in .lib, automatically add the suffix. 1596 // If the argument contains a space, enclose it in quotes. 1597 // This matches the behavior of MSVC. 1598 bool Quote = Lib.contains(' '); 1599 std::string ArgStr = Quote ? "\"" : ""; 1600 ArgStr += Lib; 1601 if (!Lib.ends_with_insensitive(".lib") && !Lib.ends_with_insensitive(".a")) 1602 ArgStr += ".lib"; 1603 ArgStr += Quote ? "\"" : ""; 1604 return ArgStr; 1605 } 1606 1607 namespace { 1608 class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo { 1609 public: 1610 WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, 1611 bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, 1612 unsigned NumRegisterParameters) 1613 : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI, 1614 Win32StructABI, NumRegisterParameters, false) {} 1615 1616 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1617 CodeGen::CodeGenModule &CGM) const override; 1618 1619 void getDependentLibraryOption(llvm::StringRef Lib, 1620 llvm::SmallString<24> &Opt) const override { 1621 Opt = "/DEFAULTLIB:"; 1622 Opt += qualifyWindowsLibrary(Lib); 1623 } 1624 1625 void getDetectMismatchOption(llvm::StringRef Name, 1626 llvm::StringRef Value, 1627 llvm::SmallString<32> &Opt) const override { 1628 Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; 1629 } 1630 }; 1631 } // namespace 1632 1633 void WinX86_32TargetCodeGenInfo::setTargetAttributes( 1634 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1635 X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); 1636 if (GV->isDeclaration()) 1637 return; 1638 addStackProbeTargetAttributes(D, GV, CGM); 1639 } 1640 1641 namespace { 1642 class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { 1643 public: 1644 WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, 1645 X86AVXABILevel AVXLevel) 1646 : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) { 1647 SwiftInfo = 1648 std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/true); 1649 } 1650 1651 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, 1652 CodeGen::CodeGenModule &CGM) const override; 1653 1654 int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { 1655 return 7; 1656 } 1657 1658 bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, 1659 llvm::Value *Address) const override { 1660 llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); 1661 1662 // 0-15 are the 16 integer registers. 1663 // 16 is %rip. 1664 AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16); 1665 return false; 1666 } 1667 1668 void getDependentLibraryOption(llvm::StringRef Lib, 1669 llvm::SmallString<24> &Opt) const override { 1670 Opt = "/DEFAULTLIB:"; 1671 Opt += qualifyWindowsLibrary(Lib); 1672 } 1673 1674 void getDetectMismatchOption(llvm::StringRef Name, 1675 llvm::StringRef Value, 1676 llvm::SmallString<32> &Opt) const override { 1677 Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; 1678 } 1679 }; 1680 } // namespace 1681 1682 void WinX86_64TargetCodeGenInfo::setTargetAttributes( 1683 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { 1684 TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); 1685 if (GV->isDeclaration()) 1686 return; 1687 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { 1688 if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { 1689 llvm::Function *Fn = cast<llvm::Function>(GV); 1690 Fn->addFnAttr("stackrealign"); 1691 } 1692 1693 addX86InterruptAttrs(FD, GV, CGM); 1694 } 1695 1696 addStackProbeTargetAttributes(D, GV, CGM); 1697 } 1698 1699 void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo, 1700 Class &Hi) const { 1701 // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done: 1702 // 1703 // (a) If one of the classes is Memory, the whole argument is passed in 1704 // memory. 1705 // 1706 // (b) If X87UP is not preceded by X87, the whole argument is passed in 1707 // memory. 1708 // 1709 // (c) If the size of the aggregate exceeds two eightbytes and the first 1710 // eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole 1711 // argument is passed in memory. NOTE: This is necessary to keep the 1712 // ABI working for processors that don't support the __m256 type. 1713 // 1714 // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE. 1715 // 1716 // Some of these are enforced by the merging logic. Others can arise 1717 // only with unions; for example: 1718 // union { _Complex double; unsigned; } 1719 // 1720 // Note that clauses (b) and (c) were added in 0.98. 1721 // 1722 if (Hi == Memory) 1723 Lo = Memory; 1724 if (Hi == X87Up && Lo != X87 && honorsRevision0_98()) 1725 Lo = Memory; 1726 if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp)) 1727 Lo = Memory; 1728 if (Hi == SSEUp && Lo != SSE) 1729 Hi = SSE; 1730 } 1731 1732 X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) { 1733 // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is 1734 // classified recursively so that always two fields are 1735 // considered. The resulting class is calculated according to 1736 // the classes of the fields in the eightbyte: 1737 // 1738 // (a) If both classes are equal, this is the resulting class. 1739 // 1740 // (b) If one of the classes is NO_CLASS, the resulting class is 1741 // the other class. 1742 // 1743 // (c) If one of the classes is MEMORY, the result is the MEMORY 1744 // class. 1745 // 1746 // (d) If one of the classes is INTEGER, the result is the 1747 // INTEGER. 1748 // 1749 // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, 1750 // MEMORY is used as class. 1751 // 1752 // (f) Otherwise class SSE is used. 1753 1754 // Accum should never be memory (we should have returned) or 1755 // ComplexX87 (because this cannot be passed in a structure). 1756 assert((Accum != Memory && Accum != ComplexX87) && 1757 "Invalid accumulated classification during merge."); 1758 if (Accum == Field || Field == NoClass) 1759 return Accum; 1760 if (Field == Memory) 1761 return Memory; 1762 if (Accum == NoClass) 1763 return Field; 1764 if (Accum == Integer || Field == Integer) 1765 return Integer; 1766 if (Field == X87 || Field == X87Up || Field == ComplexX87 || 1767 Accum == X87 || Accum == X87Up) 1768 return Memory; 1769 return SSE; 1770 } 1771 1772 void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, 1773 Class &Hi, bool isNamedArg, bool IsRegCall) const { 1774 // FIXME: This code can be simplified by introducing a simple value class for 1775 // Class pairs with appropriate constructor methods for the various 1776 // situations. 1777 1778 // FIXME: Some of the split computations are wrong; unaligned vectors 1779 // shouldn't be passed in registers for example, so there is no chance they 1780 // can straddle an eightbyte. Verify & simplify. 1781 1782 Lo = Hi = NoClass; 1783 1784 Class &Current = OffsetBase < 64 ? Lo : Hi; 1785 Current = Memory; 1786 1787 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 1788 BuiltinType::Kind k = BT->getKind(); 1789 1790 if (k == BuiltinType::Void) { 1791 Current = NoClass; 1792 } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) { 1793 Lo = Integer; 1794 Hi = Integer; 1795 } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) { 1796 Current = Integer; 1797 } else if (k == BuiltinType::Float || k == BuiltinType::Double || 1798 k == BuiltinType::Float16 || k == BuiltinType::BFloat16) { 1799 Current = SSE; 1800 } else if (k == BuiltinType::Float128) { 1801 Lo = SSE; 1802 Hi = SSEUp; 1803 } else if (k == BuiltinType::LongDouble) { 1804 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 1805 if (LDF == &llvm::APFloat::IEEEquad()) { 1806 Lo = SSE; 1807 Hi = SSEUp; 1808 } else if (LDF == &llvm::APFloat::x87DoubleExtended()) { 1809 Lo = X87; 1810 Hi = X87Up; 1811 } else if (LDF == &llvm::APFloat::IEEEdouble()) { 1812 Current = SSE; 1813 } else 1814 llvm_unreachable("unexpected long double representation!"); 1815 } 1816 // FIXME: _Decimal32 and _Decimal64 are SSE. 1817 // FIXME: _float128 and _Decimal128 are (SSE, SSEUp). 1818 return; 1819 } 1820 1821 if (const EnumType *ET = Ty->getAs<EnumType>()) { 1822 // Classify the underlying integer type. 1823 classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg); 1824 return; 1825 } 1826 1827 if (Ty->hasPointerRepresentation()) { 1828 Current = Integer; 1829 return; 1830 } 1831 1832 if (Ty->isMemberPointerType()) { 1833 if (Ty->isMemberFunctionPointerType()) { 1834 if (Has64BitPointers) { 1835 // If Has64BitPointers, this is an {i64, i64}, so classify both 1836 // Lo and Hi now. 1837 Lo = Hi = Integer; 1838 } else { 1839 // Otherwise, with 32-bit pointers, this is an {i32, i32}. If that 1840 // straddles an eightbyte boundary, Hi should be classified as well. 1841 uint64_t EB_FuncPtr = (OffsetBase) / 64; 1842 uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64; 1843 if (EB_FuncPtr != EB_ThisAdj) { 1844 Lo = Hi = Integer; 1845 } else { 1846 Current = Integer; 1847 } 1848 } 1849 } else { 1850 Current = Integer; 1851 } 1852 return; 1853 } 1854 1855 if (const VectorType *VT = Ty->getAs<VectorType>()) { 1856 uint64_t Size = getContext().getTypeSize(VT); 1857 if (Size == 1 || Size == 8 || Size == 16 || Size == 32) { 1858 // gcc passes the following as integer: 1859 // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float> 1860 // 2 bytes - <2 x char>, <1 x short> 1861 // 1 byte - <1 x char> 1862 Current = Integer; 1863 1864 // If this type crosses an eightbyte boundary, it should be 1865 // split. 1866 uint64_t EB_Lo = (OffsetBase) / 64; 1867 uint64_t EB_Hi = (OffsetBase + Size - 1) / 64; 1868 if (EB_Lo != EB_Hi) 1869 Hi = Lo; 1870 } else if (Size == 64) { 1871 QualType ElementType = VT->getElementType(); 1872 1873 // gcc passes <1 x double> in memory. :( 1874 if (ElementType->isSpecificBuiltinType(BuiltinType::Double)) 1875 return; 1876 1877 // gcc passes <1 x long long> as SSE but clang used to unconditionally 1878 // pass them as integer. For platforms where clang is the de facto 1879 // platform compiler, we must continue to use integer. 1880 if (!classifyIntegerMMXAsSSE() && 1881 (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) || 1882 ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) || 1883 ElementType->isSpecificBuiltinType(BuiltinType::Long) || 1884 ElementType->isSpecificBuiltinType(BuiltinType::ULong))) 1885 Current = Integer; 1886 else 1887 Current = SSE; 1888 1889 // If this type crosses an eightbyte boundary, it should be 1890 // split. 1891 if (OffsetBase && OffsetBase != 64) 1892 Hi = Lo; 1893 } else if (Size == 128 || 1894 (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { 1895 QualType ElementType = VT->getElementType(); 1896 1897 // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :( 1898 if (passInt128VectorsInMem() && Size != 128 && 1899 (ElementType->isSpecificBuiltinType(BuiltinType::Int128) || 1900 ElementType->isSpecificBuiltinType(BuiltinType::UInt128))) 1901 return; 1902 1903 // Arguments of 256-bits are split into four eightbyte chunks. The 1904 // least significant one belongs to class SSE and all the others to class 1905 // SSEUP. The original Lo and Hi design considers that types can't be 1906 // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense. 1907 // This design isn't correct for 256-bits, but since there're no cases 1908 // where the upper parts would need to be inspected, avoid adding 1909 // complexity and just consider Hi to match the 64-256 part. 1910 // 1911 // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in 1912 // registers if they are "named", i.e. not part of the "..." of a 1913 // variadic function. 1914 // 1915 // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are 1916 // split into eight eightbyte chunks, one SSE and seven SSEUP. 1917 Lo = SSE; 1918 Hi = SSEUp; 1919 } 1920 return; 1921 } 1922 1923 if (const ComplexType *CT = Ty->getAs<ComplexType>()) { 1924 QualType ET = getContext().getCanonicalType(CT->getElementType()); 1925 1926 uint64_t Size = getContext().getTypeSize(Ty); 1927 if (ET->isIntegralOrEnumerationType()) { 1928 if (Size <= 64) 1929 Current = Integer; 1930 else if (Size <= 128) 1931 Lo = Hi = Integer; 1932 } else if (ET->isFloat16Type() || ET == getContext().FloatTy || 1933 ET->isBFloat16Type()) { 1934 Current = SSE; 1935 } else if (ET == getContext().DoubleTy) { 1936 Lo = Hi = SSE; 1937 } else if (ET == getContext().LongDoubleTy) { 1938 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 1939 if (LDF == &llvm::APFloat::IEEEquad()) 1940 Current = Memory; 1941 else if (LDF == &llvm::APFloat::x87DoubleExtended()) 1942 Current = ComplexX87; 1943 else if (LDF == &llvm::APFloat::IEEEdouble()) 1944 Lo = Hi = SSE; 1945 else 1946 llvm_unreachable("unexpected long double representation!"); 1947 } 1948 1949 // If this complex type crosses an eightbyte boundary then it 1950 // should be split. 1951 uint64_t EB_Real = (OffsetBase) / 64; 1952 uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64; 1953 if (Hi == NoClass && EB_Real != EB_Imag) 1954 Hi = Lo; 1955 1956 return; 1957 } 1958 1959 if (const auto *EITy = Ty->getAs<BitIntType>()) { 1960 if (EITy->getNumBits() <= 64) 1961 Current = Integer; 1962 else if (EITy->getNumBits() <= 128) 1963 Lo = Hi = Integer; 1964 // Larger values need to get passed in memory. 1965 return; 1966 } 1967 1968 if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { 1969 // Arrays are treated like structures. 1970 1971 uint64_t Size = getContext().getTypeSize(Ty); 1972 1973 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger 1974 // than eight eightbytes, ..., it has class MEMORY. 1975 // regcall ABI doesn't have limitation to an object. The only limitation 1976 // is the free registers, which will be checked in computeInfo. 1977 if (!IsRegCall && Size > 512) 1978 return; 1979 1980 // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned 1981 // fields, it has class MEMORY. 1982 // 1983 // Only need to check alignment of array base. 1984 if (OffsetBase % getContext().getTypeAlign(AT->getElementType())) 1985 return; 1986 1987 // Otherwise implement simplified merge. We could be smarter about 1988 // this, but it isn't worth it and would be harder to verify. 1989 Current = NoClass; 1990 uint64_t EltSize = getContext().getTypeSize(AT->getElementType()); 1991 uint64_t ArraySize = AT->getSize().getZExtValue(); 1992 1993 // The only case a 256-bit wide vector could be used is when the array 1994 // contains a single 256-bit element. Since Lo and Hi logic isn't extended 1995 // to work for sizes wider than 128, early check and fallback to memory. 1996 // 1997 if (Size > 128 && 1998 (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel))) 1999 return; 2000 2001 for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) { 2002 Class FieldLo, FieldHi; 2003 classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg); 2004 Lo = merge(Lo, FieldLo); 2005 Hi = merge(Hi, FieldHi); 2006 if (Lo == Memory || Hi == Memory) 2007 break; 2008 } 2009 2010 postMerge(Size, Lo, Hi); 2011 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification."); 2012 return; 2013 } 2014 2015 if (const RecordType *RT = Ty->getAs<RecordType>()) { 2016 uint64_t Size = getContext().getTypeSize(Ty); 2017 2018 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger 2019 // than eight eightbytes, ..., it has class MEMORY. 2020 if (Size > 512) 2021 return; 2022 2023 // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial 2024 // copy constructor or a non-trivial destructor, it is passed by invisible 2025 // reference. 2026 if (getRecordArgABI(RT, getCXXABI())) 2027 return; 2028 2029 const RecordDecl *RD = RT->getDecl(); 2030 2031 // Assume variable sized types are passed in memory. 2032 if (RD->hasFlexibleArrayMember()) 2033 return; 2034 2035 const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); 2036 2037 // Reset Lo class, this will be recomputed. 2038 Current = NoClass; 2039 2040 // If this is a C++ record, classify the bases first. 2041 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 2042 for (const auto &I : CXXRD->bases()) { 2043 assert(!I.isVirtual() && !I.getType()->isDependentType() && 2044 "Unexpected base class!"); 2045 const auto *Base = 2046 cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); 2047 2048 // Classify this field. 2049 // 2050 // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a 2051 // single eightbyte, each is classified separately. Each eightbyte gets 2052 // initialized to class NO_CLASS. 2053 Class FieldLo, FieldHi; 2054 uint64_t Offset = 2055 OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base)); 2056 classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg); 2057 Lo = merge(Lo, FieldLo); 2058 Hi = merge(Hi, FieldHi); 2059 if (Lo == Memory || Hi == Memory) { 2060 postMerge(Size, Lo, Hi); 2061 return; 2062 } 2063 } 2064 } 2065 2066 // Classify the fields one at a time, merging the results. 2067 unsigned idx = 0; 2068 bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <= 2069 LangOptions::ClangABI::Ver11 || 2070 getContext().getTargetInfo().getTriple().isPS(); 2071 bool IsUnion = RT->isUnionType() && !UseClang11Compat; 2072 2073 for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); 2074 i != e; ++i, ++idx) { 2075 uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); 2076 bool BitField = i->isBitField(); 2077 2078 // Ignore padding bit-fields. 2079 if (BitField && i->isUnnamedBitfield()) 2080 continue; 2081 2082 // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than 2083 // eight eightbytes, or it contains unaligned fields, it has class MEMORY. 2084 // 2085 // The only case a 256-bit or a 512-bit wide vector could be used is when 2086 // the struct contains a single 256-bit or 512-bit element. Early check 2087 // and fallback to memory. 2088 // 2089 // FIXME: Extended the Lo and Hi logic properly to work for size wider 2090 // than 128. 2091 if (Size > 128 && 2092 ((!IsUnion && Size != getContext().getTypeSize(i->getType())) || 2093 Size > getNativeVectorSizeForAVXABI(AVXLevel))) { 2094 Lo = Memory; 2095 postMerge(Size, Lo, Hi); 2096 return; 2097 } 2098 // Note, skip this test for bit-fields, see below. 2099 if (!BitField && Offset % getContext().getTypeAlign(i->getType())) { 2100 Lo = Memory; 2101 postMerge(Size, Lo, Hi); 2102 return; 2103 } 2104 2105 // Classify this field. 2106 // 2107 // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate 2108 // exceeds a single eightbyte, each is classified 2109 // separately. Each eightbyte gets initialized to class 2110 // NO_CLASS. 2111 Class FieldLo, FieldHi; 2112 2113 // Bit-fields require special handling, they do not force the 2114 // structure to be passed in memory even if unaligned, and 2115 // therefore they can straddle an eightbyte. 2116 if (BitField) { 2117 assert(!i->isUnnamedBitfield()); 2118 uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); 2119 uint64_t Size = i->getBitWidthValue(getContext()); 2120 2121 uint64_t EB_Lo = Offset / 64; 2122 uint64_t EB_Hi = (Offset + Size - 1) / 64; 2123 2124 if (EB_Lo) { 2125 assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes."); 2126 FieldLo = NoClass; 2127 FieldHi = Integer; 2128 } else { 2129 FieldLo = Integer; 2130 FieldHi = EB_Hi ? Integer : NoClass; 2131 } 2132 } else 2133 classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg); 2134 Lo = merge(Lo, FieldLo); 2135 Hi = merge(Hi, FieldHi); 2136 if (Lo == Memory || Hi == Memory) 2137 break; 2138 } 2139 2140 postMerge(Size, Lo, Hi); 2141 } 2142 } 2143 2144 ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { 2145 // If this is a scalar LLVM value then assume LLVM will pass it in the right 2146 // place naturally. 2147 if (!isAggregateTypeForABI(Ty)) { 2148 // Treat an enum type as its underlying type. 2149 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2150 Ty = EnumTy->getDecl()->getIntegerType(); 2151 2152 if (Ty->isBitIntType()) 2153 return getNaturalAlignIndirect(Ty); 2154 2155 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 2156 : ABIArgInfo::getDirect()); 2157 } 2158 2159 return getNaturalAlignIndirect(Ty); 2160 } 2161 2162 bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { 2163 if (const VectorType *VecTy = Ty->getAs<VectorType>()) { 2164 uint64_t Size = getContext().getTypeSize(VecTy); 2165 unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); 2166 if (Size <= 64 || Size > LargestVector) 2167 return true; 2168 QualType EltTy = VecTy->getElementType(); 2169 if (passInt128VectorsInMem() && 2170 (EltTy->isSpecificBuiltinType(BuiltinType::Int128) || 2171 EltTy->isSpecificBuiltinType(BuiltinType::UInt128))) 2172 return true; 2173 } 2174 2175 return false; 2176 } 2177 2178 ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, 2179 unsigned freeIntRegs) const { 2180 // If this is a scalar LLVM value then assume LLVM will pass it in the right 2181 // place naturally. 2182 // 2183 // This assumption is optimistic, as there could be free registers available 2184 // when we need to pass this argument in memory, and LLVM could try to pass 2185 // the argument in the free register. This does not seem to happen currently, 2186 // but this code would be much safer if we could mark the argument with 2187 // 'onstack'. See PR12193. 2188 if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) && 2189 !Ty->isBitIntType()) { 2190 // Treat an enum type as its underlying type. 2191 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2192 Ty = EnumTy->getDecl()->getIntegerType(); 2193 2194 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) 2195 : ABIArgInfo::getDirect()); 2196 } 2197 2198 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) 2199 return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); 2200 2201 // Compute the byval alignment. We specify the alignment of the byval in all 2202 // cases so that the mid-level optimizer knows the alignment of the byval. 2203 unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U); 2204 2205 // Attempt to avoid passing indirect results using byval when possible. This 2206 // is important for good codegen. 2207 // 2208 // We do this by coercing the value into a scalar type which the backend can 2209 // handle naturally (i.e., without using byval). 2210 // 2211 // For simplicity, we currently only do this when we have exhausted all of the 2212 // free integer registers. Doing this when there are free integer registers 2213 // would require more care, as we would have to ensure that the coerced value 2214 // did not claim the unused register. That would require either reording the 2215 // arguments to the function (so that any subsequent inreg values came first), 2216 // or only doing this optimization when there were no following arguments that 2217 // might be inreg. 2218 // 2219 // We currently expect it to be rare (particularly in well written code) for 2220 // arguments to be passed on the stack when there are still free integer 2221 // registers available (this would typically imply large structs being passed 2222 // by value), so this seems like a fair tradeoff for now. 2223 // 2224 // We can revisit this if the backend grows support for 'onstack' parameter 2225 // attributes. See PR12193. 2226 if (freeIntRegs == 0) { 2227 uint64_t Size = getContext().getTypeSize(Ty); 2228 2229 // If this type fits in an eightbyte, coerce it into the matching integral 2230 // type, which will end up on the stack (with alignment 8). 2231 if (Align == 8 && Size <= 64) 2232 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 2233 Size)); 2234 } 2235 2236 return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align)); 2237 } 2238 2239 /// The ABI specifies that a value should be passed in a full vector XMM/YMM 2240 /// register. Pick an LLVM IR type that will be passed as a vector register. 2241 llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { 2242 // Wrapper structs/arrays that only contain vectors are passed just like 2243 // vectors; strip them off if present. 2244 if (const Type *InnerTy = isSingleElementStruct(Ty, getContext())) 2245 Ty = QualType(InnerTy, 0); 2246 2247 llvm::Type *IRType = CGT.ConvertType(Ty); 2248 if (isa<llvm::VectorType>(IRType)) { 2249 // Don't pass vXi128 vectors in their native type, the backend can't 2250 // legalize them. 2251 if (passInt128VectorsInMem() && 2252 cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy(128)) { 2253 // Use a vXi64 vector. 2254 uint64_t Size = getContext().getTypeSize(Ty); 2255 return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()), 2256 Size / 64); 2257 } 2258 2259 return IRType; 2260 } 2261 2262 if (IRType->getTypeID() == llvm::Type::FP128TyID) 2263 return IRType; 2264 2265 // We couldn't find the preferred IR vector type for 'Ty'. 2266 uint64_t Size = getContext().getTypeSize(Ty); 2267 assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); 2268 2269 2270 // Return a LLVM IR vector type based on the size of 'Ty'. 2271 return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()), 2272 Size / 64); 2273 } 2274 2275 /// BitsContainNoUserData - Return true if the specified [start,end) bit range 2276 /// is known to either be off the end of the specified type or being in 2277 /// alignment padding. The user type specified is known to be at most 128 bits 2278 /// in size, and have passed through X86_64ABIInfo::classify with a successful 2279 /// classification that put one of the two halves in the INTEGER class. 2280 /// 2281 /// It is conservatively correct to return false. 2282 static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, 2283 unsigned EndBit, ASTContext &Context) { 2284 // If the bytes being queried are off the end of the type, there is no user 2285 // data hiding here. This handles analysis of builtins, vectors and other 2286 // types that don't contain interesting padding. 2287 unsigned TySize = (unsigned)Context.getTypeSize(Ty); 2288 if (TySize <= StartBit) 2289 return true; 2290 2291 if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { 2292 unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType()); 2293 unsigned NumElts = (unsigned)AT->getSize().getZExtValue(); 2294 2295 // Check each element to see if the element overlaps with the queried range. 2296 for (unsigned i = 0; i != NumElts; ++i) { 2297 // If the element is after the span we care about, then we're done.. 2298 unsigned EltOffset = i*EltSize; 2299 if (EltOffset >= EndBit) break; 2300 2301 unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0; 2302 if (!BitsContainNoUserData(AT->getElementType(), EltStart, 2303 EndBit-EltOffset, Context)) 2304 return false; 2305 } 2306 // If it overlaps no elements, then it is safe to process as padding. 2307 return true; 2308 } 2309 2310 if (const RecordType *RT = Ty->getAs<RecordType>()) { 2311 const RecordDecl *RD = RT->getDecl(); 2312 const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); 2313 2314 // If this is a C++ record, check the bases first. 2315 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 2316 for (const auto &I : CXXRD->bases()) { 2317 assert(!I.isVirtual() && !I.getType()->isDependentType() && 2318 "Unexpected base class!"); 2319 const auto *Base = 2320 cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); 2321 2322 // If the base is after the span we care about, ignore it. 2323 unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); 2324 if (BaseOffset >= EndBit) continue; 2325 2326 unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0; 2327 if (!BitsContainNoUserData(I.getType(), BaseStart, 2328 EndBit-BaseOffset, Context)) 2329 return false; 2330 } 2331 } 2332 2333 // Verify that no field has data that overlaps the region of interest. Yes 2334 // this could be sped up a lot by being smarter about queried fields, 2335 // however we're only looking at structs up to 16 bytes, so we don't care 2336 // much. 2337 unsigned idx = 0; 2338 for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); 2339 i != e; ++i, ++idx) { 2340 unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx); 2341 2342 // If we found a field after the region we care about, then we're done. 2343 if (FieldOffset >= EndBit) break; 2344 2345 unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0; 2346 if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset, 2347 Context)) 2348 return false; 2349 } 2350 2351 // If nothing in this record overlapped the area of interest, then we're 2352 // clean. 2353 return true; 2354 } 2355 2356 return false; 2357 } 2358 2359 /// getFPTypeAtOffset - Return a floating point type at the specified offset. 2360 static llvm::Type *getFPTypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2361 const llvm::DataLayout &TD) { 2362 if (IROffset == 0 && IRType->isFloatingPointTy()) 2363 return IRType; 2364 2365 // If this is a struct, recurse into the field at the specified offset. 2366 if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { 2367 if (!STy->getNumContainedTypes()) 2368 return nullptr; 2369 2370 const llvm::StructLayout *SL = TD.getStructLayout(STy); 2371 unsigned Elt = SL->getElementContainingOffset(IROffset); 2372 IROffset -= SL->getElementOffset(Elt); 2373 return getFPTypeAtOffset(STy->getElementType(Elt), IROffset, TD); 2374 } 2375 2376 // If this is an array, recurse into the field at the specified offset. 2377 if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { 2378 llvm::Type *EltTy = ATy->getElementType(); 2379 unsigned EltSize = TD.getTypeAllocSize(EltTy); 2380 IROffset -= IROffset / EltSize * EltSize; 2381 return getFPTypeAtOffset(EltTy, IROffset, TD); 2382 } 2383 2384 return nullptr; 2385 } 2386 2387 /// GetSSETypeAtOffset - Return a type that will be passed by the backend in the 2388 /// low 8 bytes of an XMM register, corresponding to the SSE class. 2389 llvm::Type *X86_64ABIInfo:: 2390 GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2391 QualType SourceTy, unsigned SourceOffset) const { 2392 const llvm::DataLayout &TD = getDataLayout(); 2393 unsigned SourceSize = 2394 (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset; 2395 llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD); 2396 if (!T0 || T0->isDoubleTy()) 2397 return llvm::Type::getDoubleTy(getVMContext()); 2398 2399 // Get the adjacent FP type. 2400 llvm::Type *T1 = nullptr; 2401 unsigned T0Size = TD.getTypeAllocSize(T0); 2402 if (SourceSize > T0Size) 2403 T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD); 2404 if (T1 == nullptr) { 2405 // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due 2406 // to its alignment. 2407 if (T0->is16bitFPTy() && SourceSize > 4) 2408 T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD); 2409 // If we can't get a second FP type, return a simple half or float. 2410 // avx512fp16-abi.c:pr51813_2 shows it works to return float for 2411 // {float, i8} too. 2412 if (T1 == nullptr) 2413 return T0; 2414 } 2415 2416 if (T0->isFloatTy() && T1->isFloatTy()) 2417 return llvm::FixedVectorType::get(T0, 2); 2418 2419 if (T0->is16bitFPTy() && T1->is16bitFPTy()) { 2420 llvm::Type *T2 = nullptr; 2421 if (SourceSize > 4) 2422 T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD); 2423 if (T2 == nullptr) 2424 return llvm::FixedVectorType::get(T0, 2); 2425 return llvm::FixedVectorType::get(T0, 4); 2426 } 2427 2428 if (T0->is16bitFPTy() || T1->is16bitFPTy()) 2429 return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4); 2430 2431 return llvm::Type::getDoubleTy(getVMContext()); 2432 } 2433 2434 2435 /// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in 2436 /// an 8-byte GPR. This means that we either have a scalar or we are talking 2437 /// about the high or low part of an up-to-16-byte struct. This routine picks 2438 /// the best LLVM IR type to represent this, which may be i64 or may be anything 2439 /// else that the backend will pass in a GPR that works better (e.g. i8, %foo*, 2440 /// etc). 2441 /// 2442 /// PrefType is an LLVM IR type that corresponds to (part of) the IR type for 2443 /// the source type. IROffset is an offset in bytes into the LLVM IR type that 2444 /// the 8-byte value references. PrefType may be null. 2445 /// 2446 /// SourceTy is the source-level type for the entire argument. SourceOffset is 2447 /// an offset into this that we're processing (which is always either 0 or 8). 2448 /// 2449 llvm::Type *X86_64ABIInfo:: 2450 GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, 2451 QualType SourceTy, unsigned SourceOffset) const { 2452 // If we're dealing with an un-offset LLVM IR type, then it means that we're 2453 // returning an 8-byte unit starting with it. See if we can safely use it. 2454 if (IROffset == 0) { 2455 // Pointers and int64's always fill the 8-byte unit. 2456 if ((isa<llvm::PointerType>(IRType) && Has64BitPointers) || 2457 IRType->isIntegerTy(64)) 2458 return IRType; 2459 2460 // If we have a 1/2/4-byte integer, we can use it only if the rest of the 2461 // goodness in the source type is just tail padding. This is allowed to 2462 // kick in for struct {double,int} on the int, but not on 2463 // struct{double,int,int} because we wouldn't return the second int. We 2464 // have to do this analysis on the source type because we can't depend on 2465 // unions being lowered a specific way etc. 2466 if (IRType->isIntegerTy(8) || IRType->isIntegerTy(16) || 2467 IRType->isIntegerTy(32) || 2468 (isa<llvm::PointerType>(IRType) && !Has64BitPointers)) { 2469 unsigned BitWidth = isa<llvm::PointerType>(IRType) ? 32 : 2470 cast<llvm::IntegerType>(IRType)->getBitWidth(); 2471 2472 if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth, 2473 SourceOffset*8+64, getContext())) 2474 return IRType; 2475 } 2476 } 2477 2478 if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) { 2479 // If this is a struct, recurse into the field at the specified offset. 2480 const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy); 2481 if (IROffset < SL->getSizeInBytes()) { 2482 unsigned FieldIdx = SL->getElementContainingOffset(IROffset); 2483 IROffset -= SL->getElementOffset(FieldIdx); 2484 2485 return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset, 2486 SourceTy, SourceOffset); 2487 } 2488 } 2489 2490 if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) { 2491 llvm::Type *EltTy = ATy->getElementType(); 2492 unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy); 2493 unsigned EltOffset = IROffset/EltSize*EltSize; 2494 return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy, 2495 SourceOffset); 2496 } 2497 2498 // Okay, we don't have any better idea of what to pass, so we pass this in an 2499 // integer register that isn't too big to fit the rest of the struct. 2500 unsigned TySizeInBytes = 2501 (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity(); 2502 2503 assert(TySizeInBytes != SourceOffset && "Empty field?"); 2504 2505 // It is always safe to classify this as an integer type up to i64 that 2506 // isn't larger than the structure. 2507 return llvm::IntegerType::get(getVMContext(), 2508 std::min(TySizeInBytes-SourceOffset, 8U)*8); 2509 } 2510 2511 2512 /// GetX86_64ByValArgumentPair - Given a high and low type that can ideally 2513 /// be used as elements of a two register pair to pass or return, return a 2514 /// first class aggregate to represent them. For example, if the low part of 2515 /// a by-value argument should be passed as i32* and the high part as float, 2516 /// return {i32*, float}. 2517 static llvm::Type * 2518 GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, 2519 const llvm::DataLayout &TD) { 2520 // In order to correctly satisfy the ABI, we need to the high part to start 2521 // at offset 8. If the high and low parts we inferred are both 4-byte types 2522 // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have 2523 // the second element at offset 8. Check for this: 2524 unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo); 2525 llvm::Align HiAlign = TD.getABITypeAlign(Hi); 2526 unsigned HiStart = llvm::alignTo(LoSize, HiAlign); 2527 assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); 2528 2529 // To handle this, we have to increase the size of the low part so that the 2530 // second element will start at an 8 byte offset. We can't increase the size 2531 // of the second element because it might make us access off the end of the 2532 // struct. 2533 if (HiStart != 8) { 2534 // There are usually two sorts of types the ABI generation code can produce 2535 // for the low part of a pair that aren't 8 bytes in size: half, float or 2536 // i8/i16/i32. This can also include pointers when they are 32-bit (X32 and 2537 // NaCl). 2538 // Promote these to a larger type. 2539 if (Lo->isHalfTy() || Lo->isFloatTy()) 2540 Lo = llvm::Type::getDoubleTy(Lo->getContext()); 2541 else { 2542 assert((Lo->isIntegerTy() || Lo->isPointerTy()) 2543 && "Invalid/unknown lo type"); 2544 Lo = llvm::Type::getInt64Ty(Lo->getContext()); 2545 } 2546 } 2547 2548 llvm::StructType *Result = llvm::StructType::get(Lo, Hi); 2549 2550 // Verify that the second element is at an 8-byte offset. 2551 assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 && 2552 "Invalid x86-64 argument pair!"); 2553 return Result; 2554 } 2555 2556 ABIArgInfo X86_64ABIInfo:: 2557 classifyReturnType(QualType RetTy) const { 2558 // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the 2559 // classification algorithm. 2560 X86_64ABIInfo::Class Lo, Hi; 2561 classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true); 2562 2563 // Check some invariants. 2564 assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); 2565 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); 2566 2567 llvm::Type *ResType = nullptr; 2568 switch (Lo) { 2569 case NoClass: 2570 if (Hi == NoClass) 2571 return ABIArgInfo::getIgnore(); 2572 // If the low part is just padding, it takes no register, leave ResType 2573 // null. 2574 assert((Hi == SSE || Hi == Integer || Hi == X87Up) && 2575 "Unknown missing lo part"); 2576 break; 2577 2578 case SSEUp: 2579 case X87Up: 2580 llvm_unreachable("Invalid classification for lo word."); 2581 2582 // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via 2583 // hidden argument. 2584 case Memory: 2585 return getIndirectReturnResult(RetTy); 2586 2587 // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next 2588 // available register of the sequence %rax, %rdx is used. 2589 case Integer: 2590 ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); 2591 2592 // If we have a sign or zero extended integer, make sure to return Extend 2593 // so that the parameter gets the right LLVM IR attributes. 2594 if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { 2595 // Treat an enum type as its underlying type. 2596 if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) 2597 RetTy = EnumTy->getDecl()->getIntegerType(); 2598 2599 if (RetTy->isIntegralOrEnumerationType() && 2600 isPromotableIntegerTypeForABI(RetTy)) 2601 return ABIArgInfo::getExtend(RetTy); 2602 } 2603 break; 2604 2605 // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next 2606 // available SSE register of the sequence %xmm0, %xmm1 is used. 2607 case SSE: 2608 ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0); 2609 break; 2610 2611 // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is 2612 // returned on the X87 stack in %st0 as 80-bit x87 number. 2613 case X87: 2614 ResType = llvm::Type::getX86_FP80Ty(getVMContext()); 2615 break; 2616 2617 // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real 2618 // part of the value is returned in %st0 and the imaginary part in 2619 // %st1. 2620 case ComplexX87: 2621 assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); 2622 ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()), 2623 llvm::Type::getX86_FP80Ty(getVMContext())); 2624 break; 2625 } 2626 2627 llvm::Type *HighPart = nullptr; 2628 switch (Hi) { 2629 // Memory was handled previously and X87 should 2630 // never occur as a hi class. 2631 case Memory: 2632 case X87: 2633 llvm_unreachable("Invalid classification for hi word."); 2634 2635 case ComplexX87: // Previously handled. 2636 case NoClass: 2637 break; 2638 2639 case Integer: 2640 HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2641 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2642 return ABIArgInfo::getDirect(HighPart, 8); 2643 break; 2644 case SSE: 2645 HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2646 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2647 return ABIArgInfo::getDirect(HighPart, 8); 2648 break; 2649 2650 // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte 2651 // is passed in the next available eightbyte chunk if the last used 2652 // vector register. 2653 // 2654 // SSEUP should always be preceded by SSE, just widen. 2655 case SSEUp: 2656 assert(Lo == SSE && "Unexpected SSEUp classification."); 2657 ResType = GetByteVectorType(RetTy); 2658 break; 2659 2660 // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is 2661 // returned together with the previous X87 value in %st0. 2662 case X87Up: 2663 // If X87Up is preceded by X87, we don't need to do 2664 // anything. However, in some cases with unions it may not be 2665 // preceded by X87. In such situations we follow gcc and pass the 2666 // extra bits in an SSE reg. 2667 if (Lo != X87) { 2668 HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8); 2669 if (Lo == NoClass) // Return HighPart at offset 8 in memory. 2670 return ABIArgInfo::getDirect(HighPart, 8); 2671 } 2672 break; 2673 } 2674 2675 // If a high part was specified, merge it together with the low part. It is 2676 // known to pass in the high eightbyte of the result. We do this by forming a 2677 // first class struct aggregate with the high and low part: {low, high} 2678 if (HighPart) 2679 ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); 2680 2681 return ABIArgInfo::getDirect(ResType); 2682 } 2683 2684 ABIArgInfo 2685 X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs, 2686 unsigned &neededInt, unsigned &neededSSE, 2687 bool isNamedArg, bool IsRegCall) const { 2688 Ty = useFirstFieldIfTransparentUnion(Ty); 2689 2690 X86_64ABIInfo::Class Lo, Hi; 2691 classify(Ty, 0, Lo, Hi, isNamedArg, IsRegCall); 2692 2693 // Check some invariants. 2694 // FIXME: Enforce these by construction. 2695 assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); 2696 assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); 2697 2698 neededInt = 0; 2699 neededSSE = 0; 2700 llvm::Type *ResType = nullptr; 2701 switch (Lo) { 2702 case NoClass: 2703 if (Hi == NoClass) 2704 return ABIArgInfo::getIgnore(); 2705 // If the low part is just padding, it takes no register, leave ResType 2706 // null. 2707 assert((Hi == SSE || Hi == Integer || Hi == X87Up) && 2708 "Unknown missing lo part"); 2709 break; 2710 2711 // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument 2712 // on the stack. 2713 case Memory: 2714 2715 // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or 2716 // COMPLEX_X87, it is passed in memory. 2717 case X87: 2718 case ComplexX87: 2719 if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect) 2720 ++neededInt; 2721 return getIndirectResult(Ty, freeIntRegs); 2722 2723 case SSEUp: 2724 case X87Up: 2725 llvm_unreachable("Invalid classification for lo word."); 2726 2727 // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next 2728 // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8 2729 // and %r9 is used. 2730 case Integer: 2731 ++neededInt; 2732 2733 // Pick an 8-byte type based on the preferred type. 2734 ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0); 2735 2736 // If we have a sign or zero extended integer, make sure to return Extend 2737 // so that the parameter gets the right LLVM IR attributes. 2738 if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) { 2739 // Treat an enum type as its underlying type. 2740 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 2741 Ty = EnumTy->getDecl()->getIntegerType(); 2742 2743 if (Ty->isIntegralOrEnumerationType() && 2744 isPromotableIntegerTypeForABI(Ty)) 2745 return ABIArgInfo::getExtend(Ty); 2746 } 2747 2748 break; 2749 2750 // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next 2751 // available SSE register is used, the registers are taken in the 2752 // order from %xmm0 to %xmm7. 2753 case SSE: { 2754 llvm::Type *IRType = CGT.ConvertType(Ty); 2755 ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0); 2756 ++neededSSE; 2757 break; 2758 } 2759 } 2760 2761 llvm::Type *HighPart = nullptr; 2762 switch (Hi) { 2763 // Memory was handled previously, ComplexX87 and X87 should 2764 // never occur as hi classes, and X87Up must be preceded by X87, 2765 // which is passed in memory. 2766 case Memory: 2767 case X87: 2768 case ComplexX87: 2769 llvm_unreachable("Invalid classification for hi word."); 2770 2771 case NoClass: break; 2772 2773 case Integer: 2774 ++neededInt; 2775 // Pick an 8-byte type based on the preferred type. 2776 HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); 2777 2778 if (Lo == NoClass) // Pass HighPart at offset 8 in memory. 2779 return ABIArgInfo::getDirect(HighPart, 8); 2780 break; 2781 2782 // X87Up generally doesn't occur here (long double is passed in 2783 // memory), except in situations involving unions. 2784 case X87Up: 2785 case SSE: 2786 HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); 2787 2788 if (Lo == NoClass) // Pass HighPart at offset 8 in memory. 2789 return ABIArgInfo::getDirect(HighPart, 8); 2790 2791 ++neededSSE; 2792 break; 2793 2794 // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the 2795 // eightbyte is passed in the upper half of the last used SSE 2796 // register. This only happens when 128-bit vectors are passed. 2797 case SSEUp: 2798 assert(Lo == SSE && "Unexpected SSEUp classification"); 2799 ResType = GetByteVectorType(Ty); 2800 break; 2801 } 2802 2803 // If a high part was specified, merge it together with the low part. It is 2804 // known to pass in the high eightbyte of the result. We do this by forming a 2805 // first class struct aggregate with the high and low part: {low, high} 2806 if (HighPart) 2807 ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout()); 2808 2809 return ABIArgInfo::getDirect(ResType); 2810 } 2811 2812 ABIArgInfo 2813 X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, 2814 unsigned &NeededSSE, 2815 unsigned &MaxVectorWidth) const { 2816 auto RT = Ty->getAs<RecordType>(); 2817 assert(RT && "classifyRegCallStructType only valid with struct types"); 2818 2819 if (RT->getDecl()->hasFlexibleArrayMember()) 2820 return getIndirectReturnResult(Ty); 2821 2822 // Sum up bases 2823 if (auto CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl())) { 2824 if (CXXRD->isDynamicClass()) { 2825 NeededInt = NeededSSE = 0; 2826 return getIndirectReturnResult(Ty); 2827 } 2828 2829 for (const auto &I : CXXRD->bases()) 2830 if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE, 2831 MaxVectorWidth) 2832 .isIndirect()) { 2833 NeededInt = NeededSSE = 0; 2834 return getIndirectReturnResult(Ty); 2835 } 2836 } 2837 2838 // Sum up members 2839 for (const auto *FD : RT->getDecl()->fields()) { 2840 QualType MTy = FD->getType(); 2841 if (MTy->isRecordType() && !MTy->isUnionType()) { 2842 if (classifyRegCallStructTypeImpl(MTy, NeededInt, NeededSSE, 2843 MaxVectorWidth) 2844 .isIndirect()) { 2845 NeededInt = NeededSSE = 0; 2846 return getIndirectReturnResult(Ty); 2847 } 2848 } else { 2849 unsigned LocalNeededInt, LocalNeededSSE; 2850 if (classifyArgumentType(MTy, UINT_MAX, LocalNeededInt, LocalNeededSSE, 2851 true, true) 2852 .isIndirect()) { 2853 NeededInt = NeededSSE = 0; 2854 return getIndirectReturnResult(Ty); 2855 } 2856 if (const auto *AT = getContext().getAsConstantArrayType(MTy)) 2857 MTy = AT->getElementType(); 2858 if (const auto *VT = MTy->getAs<VectorType>()) 2859 if (getContext().getTypeSize(VT) > MaxVectorWidth) 2860 MaxVectorWidth = getContext().getTypeSize(VT); 2861 NeededInt += LocalNeededInt; 2862 NeededSSE += LocalNeededSSE; 2863 } 2864 } 2865 2866 return ABIArgInfo::getDirect(); 2867 } 2868 2869 ABIArgInfo 2870 X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt, 2871 unsigned &NeededSSE, 2872 unsigned &MaxVectorWidth) const { 2873 2874 NeededInt = 0; 2875 NeededSSE = 0; 2876 MaxVectorWidth = 0; 2877 2878 return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE, 2879 MaxVectorWidth); 2880 } 2881 2882 void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { 2883 2884 const unsigned CallingConv = FI.getCallingConvention(); 2885 // It is possible to force Win64 calling convention on any x86_64 target by 2886 // using __attribute__((ms_abi)). In such case to correctly emit Win64 2887 // compatible code delegate this call to WinX86_64ABIInfo::computeInfo. 2888 if (CallingConv == llvm::CallingConv::Win64) { 2889 WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel); 2890 Win64ABIInfo.computeInfo(FI); 2891 return; 2892 } 2893 2894 bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall; 2895 2896 // Keep track of the number of assigned registers. 2897 unsigned FreeIntRegs = IsRegCall ? 11 : 6; 2898 unsigned FreeSSERegs = IsRegCall ? 16 : 8; 2899 unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0; 2900 2901 if (!::classifyReturnType(getCXXABI(), FI, *this)) { 2902 if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && 2903 !FI.getReturnType()->getTypePtr()->isUnionType()) { 2904 FI.getReturnInfo() = classifyRegCallStructType( 2905 FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth); 2906 if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { 2907 FreeIntRegs -= NeededInt; 2908 FreeSSERegs -= NeededSSE; 2909 } else { 2910 FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); 2911 } 2912 } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() && 2913 getContext().getCanonicalType(FI.getReturnType() 2914 ->getAs<ComplexType>() 2915 ->getElementType()) == 2916 getContext().LongDoubleTy) 2917 // Complex Long Double Type is passed in Memory when Regcall 2918 // calling convention is used. 2919 FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); 2920 else 2921 FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); 2922 } 2923 2924 // If the return value is indirect, then the hidden argument is consuming one 2925 // integer register. 2926 if (FI.getReturnInfo().isIndirect()) 2927 --FreeIntRegs; 2928 else if (NeededSSE && MaxVectorWidth > 0) 2929 FI.setMaxVectorWidth(MaxVectorWidth); 2930 2931 // The chain argument effectively gives us another free register. 2932 if (FI.isChainCall()) 2933 ++FreeIntRegs; 2934 2935 unsigned NumRequiredArgs = FI.getNumRequiredArgs(); 2936 // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers 2937 // get assigned (in left-to-right order) for passing as follows... 2938 unsigned ArgNo = 0; 2939 for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); 2940 it != ie; ++it, ++ArgNo) { 2941 bool IsNamedArg = ArgNo < NumRequiredArgs; 2942 2943 if (IsRegCall && it->type->isStructureOrClassType()) 2944 it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE, 2945 MaxVectorWidth); 2946 else 2947 it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt, 2948 NeededSSE, IsNamedArg); 2949 2950 // AMD64-ABI 3.2.3p3: If there are no registers available for any 2951 // eightbyte of an argument, the whole argument is passed on the 2952 // stack. If registers have already been assigned for some 2953 // eightbytes of such an argument, the assignments get reverted. 2954 if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { 2955 FreeIntRegs -= NeededInt; 2956 FreeSSERegs -= NeededSSE; 2957 if (MaxVectorWidth > FI.getMaxVectorWidth()) 2958 FI.setMaxVectorWidth(MaxVectorWidth); 2959 } else { 2960 it->info = getIndirectResult(it->type, FreeIntRegs); 2961 } 2962 } 2963 } 2964 2965 static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF, 2966 Address VAListAddr, QualType Ty) { 2967 Address overflow_arg_area_p = 2968 CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p"); 2969 llvm::Value *overflow_arg_area = 2970 CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area"); 2971 2972 // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 2973 // byte boundary if alignment needed by type exceeds 8 byte boundary. 2974 // It isn't stated explicitly in the standard, but in practice we use 2975 // alignment greater than 16 where necessary. 2976 CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty); 2977 if (Align > CharUnits::fromQuantity(8)) { 2978 overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area, 2979 Align); 2980 } 2981 2982 // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. 2983 llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); 2984 llvm::Value *Res = overflow_arg_area; 2985 2986 // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: 2987 // l->overflow_arg_area + sizeof(type). 2988 // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to 2989 // an 8 byte boundary. 2990 2991 uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8; 2992 llvm::Value *Offset = 2993 llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7) & ~7); 2994 overflow_arg_area = CGF.Builder.CreateGEP(CGF.Int8Ty, overflow_arg_area, 2995 Offset, "overflow_arg_area.next"); 2996 CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p); 2997 2998 // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type. 2999 return Address(Res, LTy, Align); 3000 } 3001 3002 Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 3003 QualType Ty) const { 3004 // Assume that va_list type is correct; should be pointer to LLVM type: 3005 // struct { 3006 // i32 gp_offset; 3007 // i32 fp_offset; 3008 // i8* overflow_arg_area; 3009 // i8* reg_save_area; 3010 // }; 3011 unsigned neededInt, neededSSE; 3012 3013 Ty = getContext().getCanonicalType(Ty); 3014 ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE, 3015 /*isNamedArg*/false); 3016 3017 // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed 3018 // in the registers. If not go to step 7. 3019 if (!neededInt && !neededSSE) 3020 return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); 3021 3022 // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of 3023 // general purpose registers needed to pass type and num_fp to hold 3024 // the number of floating point registers needed. 3025 3026 // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into 3027 // registers. In the case: l->gp_offset > 48 - num_gp * 8 or 3028 // l->fp_offset > 304 - num_fp * 16 go to step 7. 3029 // 3030 // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of 3031 // register save space). 3032 3033 llvm::Value *InRegs = nullptr; 3034 Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid(); 3035 llvm::Value *gp_offset = nullptr, *fp_offset = nullptr; 3036 if (neededInt) { 3037 gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p"); 3038 gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset"); 3039 InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8); 3040 InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp"); 3041 } 3042 3043 if (neededSSE) { 3044 fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p"); 3045 fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset"); 3046 llvm::Value *FitsInFP = 3047 llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16); 3048 FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp"); 3049 InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP; 3050 } 3051 3052 llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); 3053 llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem"); 3054 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); 3055 CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock); 3056 3057 // Emit code to load the value if it was passed in registers. 3058 3059 CGF.EmitBlock(InRegBlock); 3060 3061 // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with 3062 // an offset of l->gp_offset and/or l->fp_offset. This may require 3063 // copying to a temporary location in case the parameter is passed 3064 // in different register classes or requires an alignment greater 3065 // than 8 for general purpose registers and 16 for XMM registers. 3066 // 3067 // FIXME: This really results in shameful code when we end up needing to 3068 // collect arguments from different places; often what should result in a 3069 // simple assembling of a structure from scattered addresses has many more 3070 // loads than necessary. Can we clean this up? 3071 llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); 3072 llvm::Value *RegSaveArea = CGF.Builder.CreateLoad( 3073 CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area"); 3074 3075 Address RegAddr = Address::invalid(); 3076 if (neededInt && neededSSE) { 3077 // FIXME: Cleanup. 3078 assert(AI.isDirect() && "Unexpected ABI info for mixed regs"); 3079 llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType()); 3080 Address Tmp = CGF.CreateMemTemp(Ty); 3081 Tmp = Tmp.withElementType(ST); 3082 assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs"); 3083 llvm::Type *TyLo = ST->getElementType(0); 3084 llvm::Type *TyHi = ST->getElementType(1); 3085 assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) && 3086 "Unexpected ABI info for mixed regs"); 3087 llvm::Value *GPAddr = 3088 CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset); 3089 llvm::Value *FPAddr = 3090 CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset); 3091 llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr; 3092 llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr; 3093 3094 // Copy the first element. 3095 // FIXME: Our choice of alignment here and below is probably pessimistic. 3096 llvm::Value *V = CGF.Builder.CreateAlignedLoad( 3097 TyLo, RegLoAddr, 3098 CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyLo))); 3099 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); 3100 3101 // Copy the second element. 3102 V = CGF.Builder.CreateAlignedLoad( 3103 TyHi, RegHiAddr, 3104 CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi))); 3105 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); 3106 3107 RegAddr = Tmp.withElementType(LTy); 3108 } else if (neededInt) { 3109 RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset), 3110 LTy, CharUnits::fromQuantity(8)); 3111 3112 // Copy to a temporary if necessary to ensure the appropriate alignment. 3113 auto TInfo = getContext().getTypeInfoInChars(Ty); 3114 uint64_t TySize = TInfo.Width.getQuantity(); 3115 CharUnits TyAlign = TInfo.Align; 3116 3117 // Copy into a temporary if the type is more aligned than the 3118 // register save area. 3119 if (TyAlign.getQuantity() > 8) { 3120 Address Tmp = CGF.CreateMemTemp(Ty); 3121 CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); 3122 RegAddr = Tmp; 3123 } 3124 3125 } else if (neededSSE == 1) { 3126 RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset), 3127 LTy, CharUnits::fromQuantity(16)); 3128 } else { 3129 assert(neededSSE == 2 && "Invalid number of needed registers!"); 3130 // SSE registers are spaced 16 bytes apart in the register save 3131 // area, we need to collect the two eightbytes together. 3132 // The ABI isn't explicit about this, but it seems reasonable 3133 // to assume that the slots are 16-byte aligned, since the stack is 3134 // naturally 16-byte aligned and the prologue is expected to store 3135 // all the SSE registers to the RSA. 3136 Address RegAddrLo = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, 3137 fp_offset), 3138 CGF.Int8Ty, CharUnits::fromQuantity(16)); 3139 Address RegAddrHi = 3140 CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo, 3141 CharUnits::fromQuantity(16)); 3142 llvm::Type *ST = AI.canHaveCoerceToType() 3143 ? AI.getCoerceToType() 3144 : llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy); 3145 llvm::Value *V; 3146 Address Tmp = CGF.CreateMemTemp(Ty); 3147 Tmp = Tmp.withElementType(ST); 3148 V = CGF.Builder.CreateLoad( 3149 RegAddrLo.withElementType(ST->getStructElementType(0))); 3150 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); 3151 V = CGF.Builder.CreateLoad( 3152 RegAddrHi.withElementType(ST->getStructElementType(1))); 3153 CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); 3154 3155 RegAddr = Tmp.withElementType(LTy); 3156 } 3157 3158 // AMD64-ABI 3.5.7p5: Step 5. Set: 3159 // l->gp_offset = l->gp_offset + num_gp * 8 3160 // l->fp_offset = l->fp_offset + num_fp * 16. 3161 if (neededInt) { 3162 llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt * 8); 3163 CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset), 3164 gp_offset_p); 3165 } 3166 if (neededSSE) { 3167 llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE * 16); 3168 CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset), 3169 fp_offset_p); 3170 } 3171 CGF.EmitBranch(ContBlock); 3172 3173 // Emit code to load the value if it was passed in memory. 3174 3175 CGF.EmitBlock(InMemBlock); 3176 Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty); 3177 3178 // Return the appropriate result. 3179 3180 CGF.EmitBlock(ContBlock); 3181 Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock, 3182 "vaarg.addr"); 3183 return ResAddr; 3184 } 3185 3186 Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, 3187 QualType Ty) const { 3188 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3189 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3190 uint64_t Width = getContext().getTypeSize(Ty); 3191 bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); 3192 3193 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, 3194 CGF.getContext().getTypeInfoInChars(Ty), 3195 CharUnits::fromQuantity(8), 3196 /*allowHigherAlign*/ false); 3197 } 3198 3199 ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgForVectorCall( 3200 QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo ¤t) const { 3201 const Type *Base = nullptr; 3202 uint64_t NumElts = 0; 3203 3204 if (!Ty->isBuiltinType() && !Ty->isVectorType() && 3205 isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) { 3206 FreeSSERegs -= NumElts; 3207 return getDirectX86Hva(); 3208 } 3209 return current; 3210 } 3211 3212 ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, 3213 bool IsReturnType, bool IsVectorCall, 3214 bool IsRegCall) const { 3215 3216 if (Ty->isVoidType()) 3217 return ABIArgInfo::getIgnore(); 3218 3219 if (const EnumType *EnumTy = Ty->getAs<EnumType>()) 3220 Ty = EnumTy->getDecl()->getIntegerType(); 3221 3222 TypeInfo Info = getContext().getTypeInfo(Ty); 3223 uint64_t Width = Info.Width; 3224 CharUnits Align = getContext().toCharUnitsFromBits(Info.Align); 3225 3226 const RecordType *RT = Ty->getAs<RecordType>(); 3227 if (RT) { 3228 if (!IsReturnType) { 3229 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI())) 3230 return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); 3231 } 3232 3233 if (RT->getDecl()->hasFlexibleArrayMember()) 3234 return getNaturalAlignIndirect(Ty, /*ByVal=*/false); 3235 3236 } 3237 3238 const Type *Base = nullptr; 3239 uint64_t NumElts = 0; 3240 // vectorcall adds the concept of a homogenous vector aggregate, similar to 3241 // other targets. 3242 if ((IsVectorCall || IsRegCall) && 3243 isHomogeneousAggregate(Ty, Base, NumElts)) { 3244 if (IsRegCall) { 3245 if (FreeSSERegs >= NumElts) { 3246 FreeSSERegs -= NumElts; 3247 if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) 3248 return ABIArgInfo::getDirect(); 3249 return ABIArgInfo::getExpand(); 3250 } 3251 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3252 } else if (IsVectorCall) { 3253 if (FreeSSERegs >= NumElts && 3254 (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) { 3255 FreeSSERegs -= NumElts; 3256 return ABIArgInfo::getDirect(); 3257 } else if (IsReturnType) { 3258 return ABIArgInfo::getExpand(); 3259 } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { 3260 // HVAs are delayed and reclassified in the 2nd step. 3261 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3262 } 3263 } 3264 } 3265 3266 if (Ty->isMemberPointerType()) { 3267 // If the member pointer is represented by an LLVM int or ptr, pass it 3268 // directly. 3269 llvm::Type *LLTy = CGT.ConvertType(Ty); 3270 if (LLTy->isPointerTy() || LLTy->isIntegerTy()) 3271 return ABIArgInfo::getDirect(); 3272 } 3273 3274 if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) { 3275 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3276 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3277 if (Width > 64 || !llvm::isPowerOf2_64(Width)) 3278 return getNaturalAlignIndirect(Ty, /*ByVal=*/false); 3279 3280 // Otherwise, coerce it to a small integer. 3281 return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width)); 3282 } 3283 3284 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { 3285 switch (BT->getKind()) { 3286 case BuiltinType::Bool: 3287 // Bool type is always extended to the ABI, other builtin types are not 3288 // extended. 3289 return ABIArgInfo::getExtend(Ty); 3290 3291 case BuiltinType::LongDouble: 3292 // Mingw64 GCC uses the old 80 bit extended precision floating point 3293 // unit. It passes them indirectly through memory. 3294 if (IsMingw64) { 3295 const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); 3296 if (LDF == &llvm::APFloat::x87DoubleExtended()) 3297 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3298 } 3299 break; 3300 3301 case BuiltinType::Int128: 3302 case BuiltinType::UInt128: 3303 // If it's a parameter type, the normal ABI rule is that arguments larger 3304 // than 8 bytes are passed indirectly. GCC follows it. We follow it too, 3305 // even though it isn't particularly efficient. 3306 if (!IsReturnType) 3307 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3308 3309 // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. 3310 // Clang matches them for compatibility. 3311 return ABIArgInfo::getDirect(llvm::FixedVectorType::get( 3312 llvm::Type::getInt64Ty(getVMContext()), 2)); 3313 3314 default: 3315 break; 3316 } 3317 } 3318 3319 if (Ty->isBitIntType()) { 3320 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3321 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3322 // However, non-power-of-two bit-precise integers will be passed as 1, 2, 4, 3323 // or 8 bytes anyway as long is it fits in them, so we don't have to check 3324 // the power of 2. 3325 if (Width <= 64) 3326 return ABIArgInfo::getDirect(); 3327 return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); 3328 } 3329 3330 return ABIArgInfo::getDirect(); 3331 } 3332 3333 void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { 3334 const unsigned CC = FI.getCallingConvention(); 3335 bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall; 3336 bool IsRegCall = CC == llvm::CallingConv::X86_RegCall; 3337 3338 // If __attribute__((sysv_abi)) is in use, use the SysV argument 3339 // classification rules. 3340 if (CC == llvm::CallingConv::X86_64_SysV) { 3341 X86_64ABIInfo SysVABIInfo(CGT, AVXLevel); 3342 SysVABIInfo.computeInfo(FI); 3343 return; 3344 } 3345 3346 unsigned FreeSSERegs = 0; 3347 if (IsVectorCall) { 3348 // We can use up to 4 SSE return registers with vectorcall. 3349 FreeSSERegs = 4; 3350 } else if (IsRegCall) { 3351 // RegCall gives us 16 SSE registers. 3352 FreeSSERegs = 16; 3353 } 3354 3355 if (!getCXXABI().classifyReturnType(FI)) 3356 FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true, 3357 IsVectorCall, IsRegCall); 3358 3359 if (IsVectorCall) { 3360 // We can use up to 6 SSE register parameters with vectorcall. 3361 FreeSSERegs = 6; 3362 } else if (IsRegCall) { 3363 // RegCall gives us 16 SSE registers, we can reuse the return registers. 3364 FreeSSERegs = 16; 3365 } 3366 3367 unsigned ArgNum = 0; 3368 unsigned ZeroSSERegs = 0; 3369 for (auto &I : FI.arguments()) { 3370 // Vectorcall in x64 only permits the first 6 arguments to be passed as 3371 // XMM/YMM registers. After the sixth argument, pretend no vector 3372 // registers are left. 3373 unsigned *MaybeFreeSSERegs = 3374 (IsVectorCall && ArgNum >= 6) ? &ZeroSSERegs : &FreeSSERegs; 3375 I.info = 3376 classify(I.type, *MaybeFreeSSERegs, false, IsVectorCall, IsRegCall); 3377 ++ArgNum; 3378 } 3379 3380 if (IsVectorCall) { 3381 // For vectorcall, assign aggregate HVAs to any free vector registers in a 3382 // second pass. 3383 for (auto &I : FI.arguments()) 3384 I.info = reclassifyHvaArgForVectorCall(I.type, FreeSSERegs, I.info); 3385 } 3386 } 3387 3388 Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, 3389 QualType Ty) const { 3390 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is 3391 // not 1, 2, 4, or 8 bytes, must be passed by reference." 3392 uint64_t Width = getContext().getTypeSize(Ty); 3393 bool IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); 3394 3395 return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, 3396 CGF.getContext().getTypeInfoInChars(Ty), 3397 CharUnits::fromQuantity(8), 3398 /*allowHigherAlign*/ false); 3399 } 3400 3401 std::unique_ptr<TargetCodeGenInfo> CodeGen::createX86_32TargetCodeGenInfo( 3402 CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI, 3403 unsigned NumRegisterParameters, bool SoftFloatABI) { 3404 bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI( 3405 CGM.getTriple(), CGM.getCodeGenOpts()); 3406 return std::make_unique<X86_32TargetCodeGenInfo>( 3407 CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 3408 NumRegisterParameters, SoftFloatABI); 3409 } 3410 3411 std::unique_ptr<TargetCodeGenInfo> CodeGen::createWinX86_32TargetCodeGenInfo( 3412 CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI, 3413 unsigned NumRegisterParameters) { 3414 bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI( 3415 CGM.getTriple(), CGM.getCodeGenOpts()); 3416 return std::make_unique<WinX86_32TargetCodeGenInfo>( 3417 CGM.getTypes(), DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, 3418 NumRegisterParameters); 3419 } 3420 3421 std::unique_ptr<TargetCodeGenInfo> 3422 CodeGen::createX86_64TargetCodeGenInfo(CodeGenModule &CGM, 3423 X86AVXABILevel AVXLevel) { 3424 return std::make_unique<X86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel); 3425 } 3426 3427 std::unique_ptr<TargetCodeGenInfo> 3428 CodeGen::createWinX86_64TargetCodeGenInfo(CodeGenModule &CGM, 3429 X86AVXABILevel AVXLevel) { 3430 return std::make_unique<WinX86_64TargetCodeGenInfo>(CGM.getTypes(), AVXLevel); 3431 } 3432