1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit Builtin calls as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ABIInfo.h" 14 #include "CGCUDARuntime.h" 15 #include "CGCXXABI.h" 16 #include "CGHLSLRuntime.h" 17 #include "CGObjCRuntime.h" 18 #include "CGOpenCLRuntime.h" 19 #include "CGRecordLayout.h" 20 #include "CodeGenFunction.h" 21 #include "CodeGenModule.h" 22 #include "ConstantEmitter.h" 23 #include "PatternInit.h" 24 #include "TargetInfo.h" 25 #include "clang/AST/ASTContext.h" 26 #include "clang/AST/Attr.h" 27 #include "clang/AST/Decl.h" 28 #include "clang/AST/OSLog.h" 29 #include "clang/AST/OperationKinds.h" 30 #include "clang/Basic/TargetBuiltins.h" 31 #include "clang/Basic/TargetInfo.h" 32 #include "clang/Basic/TargetOptions.h" 33 #include "clang/CodeGen/CGFunctionInfo.h" 34 #include "clang/Frontend/FrontendDiagnostic.h" 35 #include "llvm/ADT/APFloat.h" 36 #include "llvm/ADT/APInt.h" 37 #include "llvm/ADT/FloatingPointMode.h" 38 #include "llvm/ADT/SmallPtrSet.h" 39 #include "llvm/ADT/StringExtras.h" 40 #include "llvm/Analysis/ValueTracking.h" 41 #include "llvm/IR/DataLayout.h" 42 #include "llvm/IR/InlineAsm.h" 43 #include "llvm/IR/Intrinsics.h" 44 #include "llvm/IR/IntrinsicsAArch64.h" 45 #include "llvm/IR/IntrinsicsAMDGPU.h" 46 #include "llvm/IR/IntrinsicsARM.h" 47 #include "llvm/IR/IntrinsicsBPF.h" 48 #include "llvm/IR/IntrinsicsDirectX.h" 49 #include "llvm/IR/IntrinsicsHexagon.h" 50 #include "llvm/IR/IntrinsicsNVPTX.h" 51 #include "llvm/IR/IntrinsicsPowerPC.h" 52 #include "llvm/IR/IntrinsicsR600.h" 53 #include "llvm/IR/IntrinsicsRISCV.h" 54 #include "llvm/IR/IntrinsicsS390.h" 55 #include "llvm/IR/IntrinsicsVE.h" 56 #include "llvm/IR/IntrinsicsWebAssembly.h" 57 #include "llvm/IR/IntrinsicsX86.h" 58 #include "llvm/IR/MDBuilder.h" 59 #include "llvm/IR/MatrixBuilder.h" 60 #include "llvm/IR/MemoryModelRelaxationAnnotations.h" 61 #include "llvm/Support/ConvertUTF.h" 62 #include "llvm/Support/MathExtras.h" 63 #include "llvm/Support/ScopedPrinter.h" 64 #include "llvm/TargetParser/AArch64TargetParser.h" 65 #include "llvm/TargetParser/X86TargetParser.h" 66 #include <optional> 67 #include <sstream> 68 69 using namespace clang; 70 using namespace CodeGen; 71 using namespace llvm; 72 73 static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, 74 Align AlignmentInBytes) { 75 ConstantInt *Byte; 76 switch (CGF.getLangOpts().getTrivialAutoVarInit()) { 77 case LangOptions::TrivialAutoVarInitKind::Uninitialized: 78 // Nothing to initialize. 79 return; 80 case LangOptions::TrivialAutoVarInitKind::Zero: 81 Byte = CGF.Builder.getInt8(0x00); 82 break; 83 case LangOptions::TrivialAutoVarInitKind::Pattern: { 84 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext()); 85 Byte = llvm::dyn_cast<llvm::ConstantInt>( 86 initializationPatternFor(CGF.CGM, Int8)); 87 break; 88 } 89 } 90 if (CGF.CGM.stopAutoInit()) 91 return; 92 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes); 93 I->addAnnotationMetadata("auto-init"); 94 } 95 96 /// getBuiltinLibFunction - Given a builtin id for a function like 97 /// "__builtin_fabsf", return a Function* for "fabsf". 98 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 99 unsigned BuiltinID) { 100 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 101 102 // Get the name, skip over the __builtin_ prefix (if necessary). 103 StringRef Name; 104 GlobalDecl D(FD); 105 106 // TODO: This list should be expanded or refactored after all GCC-compatible 107 // std libcall builtins are implemented. 108 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{ 109 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"}, 110 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"}, 111 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"}, 112 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"}, 113 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"}, 114 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"}, 115 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"}, 116 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"}, 117 {Builtin::BI__builtin_fprintf, "__fprintfieee128"}, 118 {Builtin::BI__builtin_printf, "__printfieee128"}, 119 {Builtin::BI__builtin_snprintf, "__snprintfieee128"}, 120 {Builtin::BI__builtin_sprintf, "__sprintfieee128"}, 121 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"}, 122 {Builtin::BI__builtin_vprintf, "__vprintfieee128"}, 123 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"}, 124 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"}, 125 {Builtin::BI__builtin_fscanf, "__fscanfieee128"}, 126 {Builtin::BI__builtin_scanf, "__scanfieee128"}, 127 {Builtin::BI__builtin_sscanf, "__sscanfieee128"}, 128 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"}, 129 {Builtin::BI__builtin_vscanf, "__vscanfieee128"}, 130 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"}, 131 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"}, 132 }; 133 134 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit 135 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions 136 // if it is 64-bit 'long double' mode. 137 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{ 138 {Builtin::BI__builtin_frexpl, "frexp"}, 139 {Builtin::BI__builtin_ldexpl, "ldexp"}, 140 {Builtin::BI__builtin_modfl, "modf"}, 141 }; 142 143 // If the builtin has been declared explicitly with an assembler label, 144 // use the mangled name. This differs from the plain label on platforms 145 // that prefix labels. 146 if (FD->hasAttr<AsmLabelAttr>()) 147 Name = getMangledName(D); 148 else { 149 // TODO: This mutation should also be applied to other targets other than 150 // PPC, after backend supports IEEE 128-bit style libcalls. 151 if (getTriple().isPPC64() && 152 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() && 153 F128Builtins.contains(BuiltinID)) 154 Name = F128Builtins[BuiltinID]; 155 else if (getTriple().isOSAIX() && 156 &getTarget().getLongDoubleFormat() == 157 &llvm::APFloat::IEEEdouble() && 158 AIXLongDouble64Builtins.contains(BuiltinID)) 159 Name = AIXLongDouble64Builtins[BuiltinID]; 160 else 161 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10); 162 } 163 164 llvm::FunctionType *Ty = 165 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 166 167 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 168 } 169 170 /// Emit the conversions required to turn the given value into an 171 /// integer of the given size. 172 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 173 QualType T, llvm::IntegerType *IntType) { 174 V = CGF.EmitToMemory(V, T); 175 176 if (V->getType()->isPointerTy()) 177 return CGF.Builder.CreatePtrToInt(V, IntType); 178 179 assert(V->getType() == IntType); 180 return V; 181 } 182 183 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 184 QualType T, llvm::Type *ResultType) { 185 V = CGF.EmitFromMemory(V, T); 186 187 if (ResultType->isPointerTy()) 188 return CGF.Builder.CreateIntToPtr(V, ResultType); 189 190 assert(V->getType() == ResultType); 191 return V; 192 } 193 194 static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E) { 195 ASTContext &Ctx = CGF.getContext(); 196 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0)); 197 unsigned Bytes = Ptr.getElementType()->isPointerTy() 198 ? Ctx.getTypeSizeInChars(Ctx.VoidPtrTy).getQuantity() 199 : Ptr.getElementType()->getScalarSizeInBits() / 8; 200 unsigned Align = Ptr.getAlignment().getQuantity(); 201 if (Align % Bytes != 0) { 202 DiagnosticsEngine &Diags = CGF.CGM.getDiags(); 203 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned); 204 // Force address to be at least naturally-aligned. 205 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes)); 206 } 207 return Ptr; 208 } 209 210 /// Utility to insert an atomic instruction based on Intrinsic::ID 211 /// and the expression node. 212 static Value *MakeBinaryAtomicValue( 213 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, 214 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { 215 216 QualType T = E->getType(); 217 assert(E->getArg(0)->getType()->isPointerType()); 218 assert(CGF.getContext().hasSameUnqualifiedType(T, 219 E->getArg(0)->getType()->getPointeeType())); 220 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 221 222 Address DestAddr = CheckAtomicAlignment(CGF, E); 223 224 llvm::IntegerType *IntType = llvm::IntegerType::get( 225 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 226 227 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1)); 228 llvm::Type *ValueType = Val->getType(); 229 Val = EmitToInt(CGF, Val, T, IntType); 230 231 llvm::Value *Result = 232 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering); 233 return EmitFromInt(CGF, Result, T, ValueType); 234 } 235 236 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { 237 Value *Val = CGF.EmitScalarExpr(E->getArg(0)); 238 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1)); 239 240 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); 241 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType()); 242 LV.setNontemporal(true); 243 CGF.EmitStoreOfScalar(Val, LV, false); 244 return nullptr; 245 } 246 247 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { 248 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0)); 249 250 LValue LV = CGF.MakeAddrLValue(Addr, E->getType()); 251 LV.setNontemporal(true); 252 return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); 253 } 254 255 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 256 llvm::AtomicRMWInst::BinOp Kind, 257 const CallExpr *E) { 258 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); 259 } 260 261 /// Utility to insert an atomic instruction based Intrinsic::ID and 262 /// the expression node, where the return value is the result of the 263 /// operation. 264 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 265 llvm::AtomicRMWInst::BinOp Kind, 266 const CallExpr *E, 267 Instruction::BinaryOps Op, 268 bool Invert = false) { 269 QualType T = E->getType(); 270 assert(E->getArg(0)->getType()->isPointerType()); 271 assert(CGF.getContext().hasSameUnqualifiedType(T, 272 E->getArg(0)->getType()->getPointeeType())); 273 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 274 275 Address DestAddr = CheckAtomicAlignment(CGF, E); 276 277 llvm::IntegerType *IntType = llvm::IntegerType::get( 278 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 279 280 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1)); 281 llvm::Type *ValueType = Val->getType(); 282 Val = EmitToInt(CGF, Val, T, IntType); 283 284 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 285 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent); 286 Result = CGF.Builder.CreateBinOp(Op, Result, Val); 287 if (Invert) 288 Result = 289 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 290 llvm::ConstantInt::getAllOnesValue(IntType)); 291 Result = EmitFromInt(CGF, Result, T, ValueType); 292 return RValue::get(Result); 293 } 294 295 /// Utility to insert an atomic cmpxchg instruction. 296 /// 297 /// @param CGF The current codegen function. 298 /// @param E Builtin call expression to convert to cmpxchg. 299 /// arg0 - address to operate on 300 /// arg1 - value to compare with 301 /// arg2 - new value 302 /// @param ReturnBool Specifies whether to return success flag of 303 /// cmpxchg result or the old value. 304 /// 305 /// @returns result of cmpxchg, according to ReturnBool 306 /// 307 /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics 308 /// invoke the function EmitAtomicCmpXchgForMSIntrin. 309 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, 310 bool ReturnBool) { 311 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); 312 Address DestAddr = CheckAtomicAlignment(CGF, E); 313 314 llvm::IntegerType *IntType = llvm::IntegerType::get( 315 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 316 317 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1)); 318 llvm::Type *ValueType = Cmp->getType(); 319 Cmp = EmitToInt(CGF, Cmp, T, IntType); 320 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); 321 322 Value *Pair = CGF.Builder.CreateAtomicCmpXchg( 323 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent, 324 llvm::AtomicOrdering::SequentiallyConsistent); 325 if (ReturnBool) 326 // Extract boolean success flag and zext it to int. 327 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), 328 CGF.ConvertType(E->getType())); 329 else 330 // Extract old value and emit it using the same type as compare value. 331 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, 332 ValueType); 333 } 334 335 /// This function should be invoked to emit atomic cmpxchg for Microsoft's 336 /// _InterlockedCompareExchange* intrinsics which have the following signature: 337 /// T _InterlockedCompareExchange(T volatile *Destination, 338 /// T Exchange, 339 /// T Comparand); 340 /// 341 /// Whereas the llvm 'cmpxchg' instruction has the following syntax: 342 /// cmpxchg *Destination, Comparand, Exchange. 343 /// So we need to swap Comparand and Exchange when invoking 344 /// CreateAtomicCmpXchg. That is the reason we could not use the above utility 345 /// function MakeAtomicCmpXchgValue since it expects the arguments to be 346 /// already swapped. 347 348 static 349 Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, 350 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) { 351 assert(E->getArg(0)->getType()->isPointerType()); 352 assert(CGF.getContext().hasSameUnqualifiedType( 353 E->getType(), E->getArg(0)->getType()->getPointeeType())); 354 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), 355 E->getArg(1)->getType())); 356 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), 357 E->getArg(2)->getType())); 358 359 Address DestAddr = CheckAtomicAlignment(CGF, E); 360 361 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2)); 362 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1)); 363 364 // For Release ordering, the failure ordering should be Monotonic. 365 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ? 366 AtomicOrdering::Monotonic : 367 SuccessOrdering; 368 369 // The atomic instruction is marked volatile for consistency with MSVC. This 370 // blocks the few atomics optimizations that LLVM has. If we want to optimize 371 // _Interlocked* operations in the future, we will have to remove the volatile 372 // marker. 373 auto *Result = CGF.Builder.CreateAtomicCmpXchg( 374 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering); 375 Result->setVolatile(true); 376 return CGF.Builder.CreateExtractValue(Result, 0); 377 } 378 379 // 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are 380 // prototyped like this: 381 // 382 // unsigned char _InterlockedCompareExchange128...( 383 // __int64 volatile * _Destination, 384 // __int64 _ExchangeHigh, 385 // __int64 _ExchangeLow, 386 // __int64 * _ComparandResult); 387 // 388 // Note that Destination is assumed to be at least 16-byte aligned, despite 389 // being typed int64. 390 391 static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, 392 const CallExpr *E, 393 AtomicOrdering SuccessOrdering) { 394 assert(E->getNumArgs() == 4); 395 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 396 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1)); 397 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2)); 398 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3)); 399 400 assert(DestPtr->getType()->isPointerTy()); 401 assert(!ExchangeHigh->getType()->isPointerTy()); 402 assert(!ExchangeLow->getType()->isPointerTy()); 403 404 // For Release ordering, the failure ordering should be Monotonic. 405 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release 406 ? AtomicOrdering::Monotonic 407 : SuccessOrdering; 408 409 // Convert to i128 pointers and values. Alignment is also overridden for 410 // destination pointer. 411 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128); 412 Address DestAddr(DestPtr, Int128Ty, 413 CGF.getContext().toCharUnitsFromBits(128)); 414 ComparandAddr = ComparandAddr.withElementType(Int128Ty); 415 416 // (((i128)hi) << 64) | ((i128)lo) 417 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty); 418 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty); 419 ExchangeHigh = 420 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64)); 421 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow); 422 423 // Load the comparand for the instruction. 424 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr); 425 426 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange, 427 SuccessOrdering, FailureOrdering); 428 429 // The atomic instruction is marked volatile for consistency with MSVC. This 430 // blocks the few atomics optimizations that LLVM has. If we want to optimize 431 // _Interlocked* operations in the future, we will have to remove the volatile 432 // marker. 433 CXI->setVolatile(true); 434 435 // Store the result as an outparameter. 436 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0), 437 ComparandAddr); 438 439 // Get the success boolean and zero extend it to i8. 440 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1); 441 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty); 442 } 443 444 static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, 445 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { 446 assert(E->getArg(0)->getType()->isPointerType()); 447 448 auto *IntTy = CGF.ConvertType(E->getType()); 449 Address DestAddr = CheckAtomicAlignment(CGF, E); 450 auto *Result = CGF.Builder.CreateAtomicRMW( 451 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering); 452 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1)); 453 } 454 455 static Value *EmitAtomicDecrementValue( 456 CodeGenFunction &CGF, const CallExpr *E, 457 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { 458 assert(E->getArg(0)->getType()->isPointerType()); 459 460 auto *IntTy = CGF.ConvertType(E->getType()); 461 Address DestAddr = CheckAtomicAlignment(CGF, E); 462 auto *Result = CGF.Builder.CreateAtomicRMW( 463 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering); 464 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1)); 465 } 466 467 // Build a plain volatile load. 468 static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) { 469 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); 470 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 471 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy); 472 llvm::Type *ITy = 473 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8); 474 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize); 475 Load->setVolatile(true); 476 return Load; 477 } 478 479 // Build a plain volatile store. 480 static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) { 481 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); 482 Value *Value = CGF.EmitScalarExpr(E->getArg(1)); 483 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 484 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy); 485 llvm::StoreInst *Store = 486 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize); 487 Store->setVolatile(true); 488 return Store; 489 } 490 491 // Emit a simple mangled intrinsic that has 1 argument and a return type 492 // matching the argument type. Depending on mode, this may be a constrained 493 // floating-point intrinsic. 494 static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, 495 const CallExpr *E, unsigned IntrinsicID, 496 unsigned ConstrainedIntrinsicID) { 497 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 498 499 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); 500 if (CGF.Builder.getIsFPConstrained()) { 501 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); 502 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 }); 503 } else { 504 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 505 return CGF.Builder.CreateCall(F, Src0); 506 } 507 } 508 509 // Emit an intrinsic that has 2 operands of the same type as its result. 510 // Depending on mode, this may be a constrained floating-point intrinsic. 511 static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, 512 const CallExpr *E, unsigned IntrinsicID, 513 unsigned ConstrainedIntrinsicID) { 514 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 515 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 516 517 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); 518 if (CGF.Builder.getIsFPConstrained()) { 519 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); 520 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); 521 } else { 522 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 523 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 524 } 525 } 526 527 // Has second type mangled argument. 528 static Value *emitBinaryExpMaybeConstrainedFPBuiltin( 529 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, 530 llvm::Intrinsic::ID ConstrainedIntrinsicID) { 531 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 532 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 533 534 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); 535 if (CGF.Builder.getIsFPConstrained()) { 536 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, 537 {Src0->getType(), Src1->getType()}); 538 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1}); 539 } 540 541 Function *F = 542 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()}); 543 return CGF.Builder.CreateCall(F, {Src0, Src1}); 544 } 545 546 // Emit an intrinsic that has 3 operands of the same type as its result. 547 // Depending on mode, this may be a constrained floating-point intrinsic. 548 static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, 549 const CallExpr *E, unsigned IntrinsicID, 550 unsigned ConstrainedIntrinsicID) { 551 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 552 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 553 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 554 555 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); 556 if (CGF.Builder.getIsFPConstrained()) { 557 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); 558 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); 559 } else { 560 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 561 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 562 } 563 } 564 565 // Emit an intrinsic where all operands are of the same type as the result. 566 // Depending on mode, this may be a constrained floating-point intrinsic. 567 static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, 568 unsigned IntrinsicID, 569 unsigned ConstrainedIntrinsicID, 570 llvm::Type *Ty, 571 ArrayRef<Value *> Args) { 572 Function *F; 573 if (CGF.Builder.getIsFPConstrained()) 574 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty); 575 else 576 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty); 577 578 if (CGF.Builder.getIsFPConstrained()) 579 return CGF.Builder.CreateConstrainedFPCall(F, Args); 580 else 581 return CGF.Builder.CreateCall(F, Args); 582 } 583 584 // Emit a simple intrinsic that has N scalar arguments and a return type 585 // matching the argument type. It is assumed that only the first argument is 586 // overloaded. 587 template <unsigned N> 588 Value *emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E, 589 unsigned IntrinsicID, 590 llvm::StringRef Name = "") { 591 static_assert(N, "expect non-empty argument"); 592 SmallVector<Value *, N> Args; 593 for (unsigned I = 0; I < N; ++I) 594 Args.push_back(CGF.EmitScalarExpr(E->getArg(I))); 595 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType()); 596 return CGF.Builder.CreateCall(F, Args, Name); 597 } 598 599 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 600 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 601 const CallExpr *E, 602 unsigned IntrinsicID) { 603 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 604 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 605 606 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 607 return CGF.Builder.CreateCall(F, {Src0, Src1}); 608 } 609 610 // Emit an intrinsic that has overloaded integer result and fp operand. 611 static Value * 612 emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, 613 unsigned IntrinsicID, 614 unsigned ConstrainedIntrinsicID) { 615 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 616 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 617 618 if (CGF.Builder.getIsFPConstrained()) { 619 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); 620 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, 621 {ResultType, Src0->getType()}); 622 return CGF.Builder.CreateConstrainedFPCall(F, {Src0}); 623 } else { 624 Function *F = 625 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()}); 626 return CGF.Builder.CreateCall(F, Src0); 627 } 628 } 629 630 static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, 631 llvm::Intrinsic::ID IntrinsicID) { 632 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 633 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 634 635 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType(); 636 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy); 637 llvm::Function *F = 638 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy}); 639 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0); 640 641 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1); 642 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy); 643 CGF.EmitStoreOfScalar(Exp, LV); 644 645 return CGF.Builder.CreateExtractValue(Call, 0); 646 } 647 648 /// EmitFAbs - Emit a call to @llvm.fabs(). 649 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 650 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 651 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 652 Call->setDoesNotAccessMemory(); 653 return Call; 654 } 655 656 /// Emit the computation of the sign bit for a floating point value. Returns 657 /// the i1 sign bit value. 658 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 659 LLVMContext &C = CGF.CGM.getLLVMContext(); 660 661 llvm::Type *Ty = V->getType(); 662 int Width = Ty->getPrimitiveSizeInBits(); 663 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 664 V = CGF.Builder.CreateBitCast(V, IntTy); 665 if (Ty->isPPC_FP128Ty()) { 666 // We want the sign bit of the higher-order double. The bitcast we just 667 // did works as if the double-double was stored to memory and then 668 // read as an i128. The "store" will put the higher-order double in the 669 // lower address in both little- and big-Endian modes, but the "load" 670 // will treat those bits as a different part of the i128: the low bits in 671 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian 672 // we need to shift the high bits down to the low before truncating. 673 Width >>= 1; 674 if (CGF.getTarget().isBigEndian()) { 675 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); 676 V = CGF.Builder.CreateLShr(V, ShiftCst); 677 } 678 // We are truncating value in order to extract the higher-order 679 // double, which we will be using to extract the sign from. 680 IntTy = llvm::IntegerType::get(C, Width); 681 V = CGF.Builder.CreateTrunc(V, IntTy); 682 } 683 Value *Zero = llvm::Constant::getNullValue(IntTy); 684 return CGF.Builder.CreateICmpSLT(V, Zero); 685 } 686 687 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, 688 const CallExpr *E, llvm::Constant *calleeValue) { 689 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); 690 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD)); 691 RValue Call = 692 CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); 693 694 // Check the supported intrinsic. 695 if (unsigned BuiltinID = FD->getBuiltinID()) { 696 auto IsErrnoIntrinsic = [&]() -> unsigned { 697 switch (BuiltinID) { 698 case Builtin::BIexpf: 699 case Builtin::BI__builtin_expf: 700 case Builtin::BI__builtin_expf128: 701 return true; 702 } 703 // TODO: support more FP math libcalls 704 return false; 705 }(); 706 707 // Restrict to target with errno, for example, MacOS doesn't set errno. 708 if (IsErrnoIntrinsic && CGF.CGM.getLangOpts().MathErrno && 709 !CGF.Builder.getIsFPConstrained()) { 710 ASTContext &Context = CGF.getContext(); 711 // Emit "int" TBAA metadata on FP math libcalls. 712 clang::QualType IntTy = Context.IntTy; 713 TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy); 714 Instruction *Inst = cast<llvm::Instruction>(Call.getScalarVal()); 715 CGF.CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo); 716 } 717 } 718 return Call; 719 } 720 721 /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 722 /// depending on IntrinsicID. 723 /// 724 /// \arg CGF The current codegen function. 725 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 726 /// \arg X The first argument to the llvm.*.with.overflow.*. 727 /// \arg Y The second argument to the llvm.*.with.overflow.*. 728 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 729 /// \returns The result (i.e. sum/product) returned by the intrinsic. 730 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 731 const llvm::Intrinsic::ID IntrinsicID, 732 llvm::Value *X, llvm::Value *Y, 733 llvm::Value *&Carry) { 734 // Make sure we have integers of the same width. 735 assert(X->getType() == Y->getType() && 736 "Arguments must be the same type. (Did you forget to make sure both " 737 "arguments have the same integer width?)"); 738 739 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 740 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); 741 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 742 return CGF.Builder.CreateExtractValue(Tmp, 0); 743 } 744 745 static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, 746 int low, int high) { 747 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); 748 llvm::CallInst *Call = CGF.Builder.CreateCall(F); 749 llvm::ConstantRange CR(APInt(32, low), APInt(32, high)); 750 Call->addRangeRetAttr(CR); 751 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef); 752 return Call; 753 } 754 755 namespace { 756 struct WidthAndSignedness { 757 unsigned Width; 758 bool Signed; 759 }; 760 } 761 762 static WidthAndSignedness 763 getIntegerWidthAndSignedness(const clang::ASTContext &context, 764 const clang::QualType Type) { 765 assert(Type->isIntegerType() && "Given type is not an integer."); 766 unsigned Width = Type->isBooleanType() ? 1 767 : Type->isBitIntType() ? context.getIntWidth(Type) 768 : context.getTypeInfo(Type).Width; 769 bool Signed = Type->isSignedIntegerType(); 770 return {Width, Signed}; 771 } 772 773 // Given one or more integer types, this function produces an integer type that 774 // encompasses them: any value in one of the given types could be expressed in 775 // the encompassing type. 776 static struct WidthAndSignedness 777 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { 778 assert(Types.size() > 0 && "Empty list of types."); 779 780 // If any of the given types is signed, we must return a signed type. 781 bool Signed = false; 782 for (const auto &Type : Types) { 783 Signed |= Type.Signed; 784 } 785 786 // The encompassing type must have a width greater than or equal to the width 787 // of the specified types. Additionally, if the encompassing type is signed, 788 // its width must be strictly greater than the width of any unsigned types 789 // given. 790 unsigned Width = 0; 791 for (const auto &Type : Types) { 792 unsigned MinWidth = Type.Width + (Signed && !Type.Signed); 793 if (Width < MinWidth) { 794 Width = MinWidth; 795 } 796 } 797 798 return {Width, Signed}; 799 } 800 801 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { 802 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; 803 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}), 804 ArgValue); 805 } 806 807 /// Checks if using the result of __builtin_object_size(p, @p From) in place of 808 /// __builtin_object_size(p, @p To) is correct 809 static bool areBOSTypesCompatible(int From, int To) { 810 // Note: Our __builtin_object_size implementation currently treats Type=0 and 811 // Type=2 identically. Encoding this implementation detail here may make 812 // improving __builtin_object_size difficult in the future, so it's omitted. 813 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); 814 } 815 816 static llvm::Value * 817 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { 818 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); 819 } 820 821 llvm::Value * 822 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, 823 llvm::IntegerType *ResType, 824 llvm::Value *EmittedE, 825 bool IsDynamic) { 826 uint64_t ObjectSize; 827 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) 828 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic); 829 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); 830 } 831 832 const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberFieldAndOffset( 833 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, 834 uint64_t &Offset) { 835 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = 836 getLangOpts().getStrictFlexArraysLevel(); 837 uint32_t FieldNo = 0; 838 839 if (RD->isImplicit()) 840 return nullptr; 841 842 for (const FieldDecl *FD : RD->fields()) { 843 if ((!FAMDecl || FD == FAMDecl) && 844 Decl::isFlexibleArrayMemberLike( 845 Ctx, FD, FD->getType(), StrictFlexArraysLevel, 846 /*IgnoreTemplateOrMacroSubstitution=*/true)) { 847 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD); 848 Offset += Layout.getFieldOffset(FieldNo); 849 return FD; 850 } 851 852 QualType Ty = FD->getType(); 853 if (Ty->isRecordType()) { 854 if (const FieldDecl *Field = FindFlexibleArrayMemberFieldAndOffset( 855 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) { 856 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD); 857 Offset += Layout.getFieldOffset(FieldNo); 858 return Field; 859 } 860 } 861 862 if (!RD->isUnion()) 863 ++FieldNo; 864 } 865 866 return nullptr; 867 } 868 869 static unsigned CountCountedByAttrs(const RecordDecl *RD) { 870 unsigned Num = 0; 871 872 for (const FieldDecl *FD : RD->fields()) { 873 if (FD->getType()->isCountAttributedType()) 874 return ++Num; 875 876 QualType Ty = FD->getType(); 877 if (Ty->isRecordType()) 878 Num += CountCountedByAttrs(Ty->getAsRecordDecl()); 879 } 880 881 return Num; 882 } 883 884 llvm::Value * 885 CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type, 886 llvm::IntegerType *ResType) { 887 // The code generated here calculates the size of a struct with a flexible 888 // array member that uses the counted_by attribute. There are two instances 889 // we handle: 890 // 891 // struct s { 892 // unsigned long flags; 893 // int count; 894 // int array[] __attribute__((counted_by(count))); 895 // } 896 // 897 // 1) bdos of the flexible array itself: 898 // 899 // __builtin_dynamic_object_size(p->array, 1) == 900 // p->count * sizeof(*p->array) 901 // 902 // 2) bdos of a pointer into the flexible array: 903 // 904 // __builtin_dynamic_object_size(&p->array[42], 1) == 905 // (p->count - 42) * sizeof(*p->array) 906 // 907 // 2) bdos of the whole struct, including the flexible array: 908 // 909 // __builtin_dynamic_object_size(p, 1) == 910 // max(sizeof(struct s), 911 // offsetof(struct s, array) + p->count * sizeof(*p->array)) 912 // 913 ASTContext &Ctx = getContext(); 914 const Expr *Base = E->IgnoreParenImpCasts(); 915 const Expr *Idx = nullptr; 916 917 if (const auto *UO = dyn_cast<UnaryOperator>(Base); 918 UO && UO->getOpcode() == UO_AddrOf) { 919 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts(); 920 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) { 921 Base = ASE->getBase()->IgnoreParenImpCasts(); 922 Idx = ASE->getIdx()->IgnoreParenImpCasts(); 923 924 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) { 925 int64_t Val = IL->getValue().getSExtValue(); 926 if (Val < 0) 927 return getDefaultBuiltinObjectSizeResult(Type, ResType); 928 929 if (Val == 0) 930 // The index is 0, so we don't need to take it into account. 931 Idx = nullptr; 932 } 933 } else { 934 // Potential pointer to another element in the struct. 935 Base = SubExpr; 936 } 937 } 938 939 // Get the flexible array member Decl. 940 const RecordDecl *OuterRD = nullptr; 941 const FieldDecl *FAMDecl = nullptr; 942 if (const auto *ME = dyn_cast<MemberExpr>(Base)) { 943 // Check if \p Base is referencing the FAM itself. 944 const ValueDecl *VD = ME->getMemberDecl(); 945 OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext(); 946 FAMDecl = dyn_cast<FieldDecl>(VD); 947 if (!FAMDecl) 948 return nullptr; 949 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) { 950 // Check if we're pointing to the whole struct. 951 QualType Ty = DRE->getDecl()->getType(); 952 if (Ty->isPointerType()) 953 Ty = Ty->getPointeeType(); 954 OuterRD = Ty->getAsRecordDecl(); 955 956 // If we have a situation like this: 957 // 958 // struct union_of_fams { 959 // int flags; 960 // union { 961 // signed char normal_field; 962 // struct { 963 // int count1; 964 // int arr1[] __counted_by(count1); 965 // }; 966 // struct { 967 // signed char count2; 968 // int arr2[] __counted_by(count2); 969 // }; 970 // }; 971 // }; 972 // 973 // We don't know which 'count' to use in this scenario: 974 // 975 // size_t get_size(struct union_of_fams *p) { 976 // return __builtin_dynamic_object_size(p, 1); 977 // } 978 // 979 // Instead of calculating a wrong number, we give up. 980 if (OuterRD && CountCountedByAttrs(OuterRD) > 1) 981 return nullptr; 982 } 983 984 if (!OuterRD) 985 return nullptr; 986 987 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to 988 // get its offset. 989 uint64_t Offset = 0; 990 FAMDecl = 991 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset); 992 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity(); 993 994 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType()) 995 // No flexible array member found or it doesn't have the "counted_by" 996 // attribute. 997 return nullptr; 998 999 const FieldDecl *CountedByFD = FindCountedByField(FAMDecl); 1000 if (!CountedByFD) 1001 // Can't find the field referenced by the "counted_by" attribute. 1002 return nullptr; 1003 1004 if (isa<DeclRefExpr>(Base)) 1005 // The whole struct is specificed in the __bdos. The calculation of the 1006 // whole size of the structure can be done in two ways: 1007 // 1008 // 1) sizeof(struct S) + count * sizeof(typeof(fam)) 1009 // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam)) 1010 // 1011 // The first will add additional padding after the end of the array, 1012 // allocation while the second method is more precise, but not quite 1013 // expected from programmers. See 1014 // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a 1015 // discussion of the topic. 1016 // 1017 // GCC isn't (currently) able to calculate __bdos on a pointer to the whole 1018 // structure. Therefore, because of the above issue, we'll choose to match 1019 // what GCC does for consistency's sake. 1020 return nullptr; 1021 1022 // Build a load of the counted_by field. 1023 bool IsSigned = CountedByFD->getType()->isSignedIntegerType(); 1024 Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountedByFD); 1025 if (!CountedByInst) 1026 return getDefaultBuiltinObjectSizeResult(Type, ResType); 1027 1028 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned); 1029 1030 // Build a load of the index and subtract it from the count. 1031 Value *IdxInst = nullptr; 1032 if (Idx) { 1033 if (Idx->HasSideEffects(getContext())) 1034 // We can't have side-effects. 1035 return getDefaultBuiltinObjectSizeResult(Type, ResType); 1036 1037 bool IdxSigned = Idx->getType()->isSignedIntegerType(); 1038 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal(); 1039 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned); 1040 1041 // We go ahead with the calculation here. If the index turns out to be 1042 // negative, we'll catch it at the end. 1043 CountedByInst = 1044 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned); 1045 } 1046 1047 // Calculate how large the flexible array member is in bytes. 1048 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType()); 1049 CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType()); 1050 llvm::Constant *ElemSize = 1051 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned); 1052 Value *Res = 1053 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned); 1054 Res = Builder.CreateIntCast(Res, ResType, IsSigned); 1055 1056 // A negative \p IdxInst or \p CountedByInst means that the index lands 1057 // outside of the flexible array member. If that's the case, we want to 1058 // return 0. 1059 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst); 1060 if (IdxInst) 1061 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp); 1062 1063 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned)); 1064 } 1065 1066 /// Returns a Value corresponding to the size of the given expression. 1067 /// This Value may be either of the following: 1068 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on 1069 /// it) 1070 /// - A call to the @llvm.objectsize intrinsic 1071 /// 1072 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null 1073 /// and we wouldn't otherwise try to reference a pass_object_size parameter, 1074 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E. 1075 llvm::Value * 1076 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, 1077 llvm::IntegerType *ResType, 1078 llvm::Value *EmittedE, bool IsDynamic) { 1079 // We need to reference an argument if the pointer is a parameter with the 1080 // pass_object_size attribute. 1081 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { 1082 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); 1083 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); 1084 if (Param != nullptr && PS != nullptr && 1085 areBOSTypesCompatible(PS->getType(), Type)) { 1086 auto Iter = SizeArguments.find(Param); 1087 assert(Iter != SizeArguments.end()); 1088 1089 const ImplicitParamDecl *D = Iter->second; 1090 auto DIter = LocalDeclMap.find(D); 1091 assert(DIter != LocalDeclMap.end()); 1092 1093 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false, 1094 getContext().getSizeType(), E->getBeginLoc()); 1095 } 1096 } 1097 1098 if (IsDynamic) { 1099 // Emit special code for a flexible array member with the "counted_by" 1100 // attribute. 1101 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType)) 1102 return V; 1103 } 1104 1105 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't 1106 // evaluate E for side-effects. In either case, we shouldn't lower to 1107 // @llvm.objectsize. 1108 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext()))) 1109 return getDefaultBuiltinObjectSizeResult(Type, ResType); 1110 1111 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E); 1112 assert(Ptr->getType()->isPointerTy() && 1113 "Non-pointer passed to __builtin_object_size?"); 1114 1115 Function *F = 1116 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); 1117 1118 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. 1119 Value *Min = Builder.getInt1((Type & 2) != 0); 1120 // For GCC compatibility, __builtin_object_size treat NULL as unknown size. 1121 Value *NullIsUnknown = Builder.getTrue(); 1122 Value *Dynamic = Builder.getInt1(IsDynamic); 1123 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic}); 1124 } 1125 1126 namespace { 1127 /// A struct to generically describe a bit test intrinsic. 1128 struct BitTest { 1129 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set }; 1130 enum InterlockingKind : uint8_t { 1131 Unlocked, 1132 Sequential, 1133 Acquire, 1134 Release, 1135 NoFence 1136 }; 1137 1138 ActionKind Action; 1139 InterlockingKind Interlocking; 1140 bool Is64Bit; 1141 1142 static BitTest decodeBitTestBuiltin(unsigned BuiltinID); 1143 }; 1144 1145 } // namespace 1146 1147 BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { 1148 switch (BuiltinID) { 1149 // Main portable variants. 1150 case Builtin::BI_bittest: 1151 return {TestOnly, Unlocked, false}; 1152 case Builtin::BI_bittestandcomplement: 1153 return {Complement, Unlocked, false}; 1154 case Builtin::BI_bittestandreset: 1155 return {Reset, Unlocked, false}; 1156 case Builtin::BI_bittestandset: 1157 return {Set, Unlocked, false}; 1158 case Builtin::BI_interlockedbittestandreset: 1159 return {Reset, Sequential, false}; 1160 case Builtin::BI_interlockedbittestandset: 1161 return {Set, Sequential, false}; 1162 1163 // X86-specific 64-bit variants. 1164 case Builtin::BI_bittest64: 1165 return {TestOnly, Unlocked, true}; 1166 case Builtin::BI_bittestandcomplement64: 1167 return {Complement, Unlocked, true}; 1168 case Builtin::BI_bittestandreset64: 1169 return {Reset, Unlocked, true}; 1170 case Builtin::BI_bittestandset64: 1171 return {Set, Unlocked, true}; 1172 case Builtin::BI_interlockedbittestandreset64: 1173 return {Reset, Sequential, true}; 1174 case Builtin::BI_interlockedbittestandset64: 1175 return {Set, Sequential, true}; 1176 1177 // ARM/AArch64-specific ordering variants. 1178 case Builtin::BI_interlockedbittestandset_acq: 1179 return {Set, Acquire, false}; 1180 case Builtin::BI_interlockedbittestandset_rel: 1181 return {Set, Release, false}; 1182 case Builtin::BI_interlockedbittestandset_nf: 1183 return {Set, NoFence, false}; 1184 case Builtin::BI_interlockedbittestandreset_acq: 1185 return {Reset, Acquire, false}; 1186 case Builtin::BI_interlockedbittestandreset_rel: 1187 return {Reset, Release, false}; 1188 case Builtin::BI_interlockedbittestandreset_nf: 1189 return {Reset, NoFence, false}; 1190 } 1191 llvm_unreachable("expected only bittest intrinsics"); 1192 } 1193 1194 static char bitActionToX86BTCode(BitTest::ActionKind A) { 1195 switch (A) { 1196 case BitTest::TestOnly: return '\0'; 1197 case BitTest::Complement: return 'c'; 1198 case BitTest::Reset: return 'r'; 1199 case BitTest::Set: return 's'; 1200 } 1201 llvm_unreachable("invalid action"); 1202 } 1203 1204 static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF, 1205 BitTest BT, 1206 const CallExpr *E, Value *BitBase, 1207 Value *BitPos) { 1208 char Action = bitActionToX86BTCode(BT.Action); 1209 char SizeSuffix = BT.Is64Bit ? 'q' : 'l'; 1210 1211 // Build the assembly. 1212 SmallString<64> Asm; 1213 raw_svector_ostream AsmOS(Asm); 1214 if (BT.Interlocking != BitTest::Unlocked) 1215 AsmOS << "lock "; 1216 AsmOS << "bt"; 1217 if (Action) 1218 AsmOS << Action; 1219 AsmOS << SizeSuffix << " $2, ($1)"; 1220 1221 // Build the constraints. FIXME: We should support immediates when possible. 1222 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}"; 1223 std::string_view MachineClobbers = CGF.getTarget().getClobbers(); 1224 if (!MachineClobbers.empty()) { 1225 Constraints += ','; 1226 Constraints += MachineClobbers; 1227 } 1228 llvm::IntegerType *IntType = llvm::IntegerType::get( 1229 CGF.getLLVMContext(), 1230 CGF.getContext().getTypeSize(E->getArg(1)->getType())); 1231 llvm::FunctionType *FTy = 1232 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false); 1233 1234 llvm::InlineAsm *IA = 1235 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); 1236 return CGF.Builder.CreateCall(IA, {BitBase, BitPos}); 1237 } 1238 1239 static llvm::AtomicOrdering 1240 getBitTestAtomicOrdering(BitTest::InterlockingKind I) { 1241 switch (I) { 1242 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic; 1243 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent; 1244 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire; 1245 case BitTest::Release: return llvm::AtomicOrdering::Release; 1246 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic; 1247 } 1248 llvm_unreachable("invalid interlocking"); 1249 } 1250 1251 /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of 1252 /// bits and a bit position and read and optionally modify the bit at that 1253 /// position. The position index can be arbitrarily large, i.e. it can be larger 1254 /// than 31 or 63, so we need an indexed load in the general case. 1255 static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF, 1256 unsigned BuiltinID, 1257 const CallExpr *E) { 1258 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0)); 1259 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1)); 1260 1261 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID); 1262 1263 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array 1264 // indexing operation internally. Use them if possible. 1265 if (CGF.getTarget().getTriple().isX86()) 1266 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos); 1267 1268 // Otherwise, use generic code to load one byte and test the bit. Use all but 1269 // the bottom three bits as the array index, and the bottom three bits to form 1270 // a mask. 1271 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0; 1272 Value *ByteIndex = CGF.Builder.CreateAShr( 1273 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx"); 1274 Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy); 1275 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8, 1276 ByteIndex, "bittest.byteaddr"), 1277 CGF.Int8Ty, CharUnits::One()); 1278 Value *PosLow = 1279 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty), 1280 llvm::ConstantInt::get(CGF.Int8Ty, 0x7)); 1281 1282 // The updating instructions will need a mask. 1283 Value *Mask = nullptr; 1284 if (BT.Action != BitTest::TestOnly) { 1285 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow, 1286 "bittest.mask"); 1287 } 1288 1289 // Check the action and ordering of the interlocked intrinsics. 1290 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking); 1291 1292 Value *OldByte = nullptr; 1293 if (Ordering != llvm::AtomicOrdering::NotAtomic) { 1294 // Emit a combined atomicrmw load/store operation for the interlocked 1295 // intrinsics. 1296 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or; 1297 if (BT.Action == BitTest::Reset) { 1298 Mask = CGF.Builder.CreateNot(Mask); 1299 RMWOp = llvm::AtomicRMWInst::And; 1300 } 1301 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering); 1302 } else { 1303 // Emit a plain load for the non-interlocked intrinsics. 1304 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte"); 1305 Value *NewByte = nullptr; 1306 switch (BT.Action) { 1307 case BitTest::TestOnly: 1308 // Don't store anything. 1309 break; 1310 case BitTest::Complement: 1311 NewByte = CGF.Builder.CreateXor(OldByte, Mask); 1312 break; 1313 case BitTest::Reset: 1314 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask)); 1315 break; 1316 case BitTest::Set: 1317 NewByte = CGF.Builder.CreateOr(OldByte, Mask); 1318 break; 1319 } 1320 if (NewByte) 1321 CGF.Builder.CreateStore(NewByte, ByteAddr); 1322 } 1323 1324 // However we loaded the old byte, either by plain load or atomicrmw, shift 1325 // the bit into the low position and mask it to 0 or 1. 1326 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr"); 1327 return CGF.Builder.CreateAnd( 1328 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res"); 1329 } 1330 1331 static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, 1332 unsigned BuiltinID, 1333 const CallExpr *E) { 1334 Value *Addr = CGF.EmitScalarExpr(E->getArg(0)); 1335 1336 SmallString<64> Asm; 1337 raw_svector_ostream AsmOS(Asm); 1338 llvm::IntegerType *RetType = CGF.Int32Ty; 1339 1340 switch (BuiltinID) { 1341 case clang::PPC::BI__builtin_ppc_ldarx: 1342 AsmOS << "ldarx "; 1343 RetType = CGF.Int64Ty; 1344 break; 1345 case clang::PPC::BI__builtin_ppc_lwarx: 1346 AsmOS << "lwarx "; 1347 RetType = CGF.Int32Ty; 1348 break; 1349 case clang::PPC::BI__builtin_ppc_lharx: 1350 AsmOS << "lharx "; 1351 RetType = CGF.Int16Ty; 1352 break; 1353 case clang::PPC::BI__builtin_ppc_lbarx: 1354 AsmOS << "lbarx "; 1355 RetType = CGF.Int8Ty; 1356 break; 1357 default: 1358 llvm_unreachable("Expected only PowerPC load reserve intrinsics"); 1359 } 1360 1361 AsmOS << "$0, ${1:y}"; 1362 1363 std::string Constraints = "=r,*Z,~{memory}"; 1364 std::string_view MachineClobbers = CGF.getTarget().getClobbers(); 1365 if (!MachineClobbers.empty()) { 1366 Constraints += ','; 1367 Constraints += MachineClobbers; 1368 } 1369 1370 llvm::Type *PtrType = CGF.UnqualPtrTy; 1371 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false); 1372 1373 llvm::InlineAsm *IA = 1374 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); 1375 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr}); 1376 CI->addParamAttr( 1377 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType)); 1378 return CI; 1379 } 1380 1381 namespace { 1382 enum class MSVCSetJmpKind { 1383 _setjmpex, 1384 _setjmp3, 1385 _setjmp 1386 }; 1387 } 1388 1389 /// MSVC handles setjmp a bit differently on different platforms. On every 1390 /// architecture except 32-bit x86, the frame address is passed. On x86, extra 1391 /// parameters can be passed as variadic arguments, but we always pass none. 1392 static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, 1393 const CallExpr *E) { 1394 llvm::Value *Arg1 = nullptr; 1395 llvm::Type *Arg1Ty = nullptr; 1396 StringRef Name; 1397 bool IsVarArg = false; 1398 if (SJKind == MSVCSetJmpKind::_setjmp3) { 1399 Name = "_setjmp3"; 1400 Arg1Ty = CGF.Int32Ty; 1401 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0); 1402 IsVarArg = true; 1403 } else { 1404 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex"; 1405 Arg1Ty = CGF.Int8PtrTy; 1406 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) { 1407 Arg1 = CGF.Builder.CreateCall( 1408 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy)); 1409 } else 1410 Arg1 = CGF.Builder.CreateCall( 1411 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy), 1412 llvm::ConstantInt::get(CGF.Int32Ty, 0)); 1413 } 1414 1415 // Mark the call site and declaration with ReturnsTwice. 1416 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty}; 1417 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 1418 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, 1419 llvm::Attribute::ReturnsTwice); 1420 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction( 1421 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name, 1422 ReturnsTwiceAttr, /*Local=*/true); 1423 1424 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast( 1425 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy); 1426 llvm::Value *Args[] = {Buf, Arg1}; 1427 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args); 1428 CB->setAttributes(ReturnsTwiceAttr); 1429 return RValue::get(CB); 1430 } 1431 1432 // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code, 1433 // we handle them here. 1434 enum class CodeGenFunction::MSVCIntrin { 1435 _BitScanForward, 1436 _BitScanReverse, 1437 _InterlockedAnd, 1438 _InterlockedDecrement, 1439 _InterlockedExchange, 1440 _InterlockedExchangeAdd, 1441 _InterlockedExchangeSub, 1442 _InterlockedIncrement, 1443 _InterlockedOr, 1444 _InterlockedXor, 1445 _InterlockedExchangeAdd_acq, 1446 _InterlockedExchangeAdd_rel, 1447 _InterlockedExchangeAdd_nf, 1448 _InterlockedExchange_acq, 1449 _InterlockedExchange_rel, 1450 _InterlockedExchange_nf, 1451 _InterlockedCompareExchange_acq, 1452 _InterlockedCompareExchange_rel, 1453 _InterlockedCompareExchange_nf, 1454 _InterlockedCompareExchange128, 1455 _InterlockedCompareExchange128_acq, 1456 _InterlockedCompareExchange128_rel, 1457 _InterlockedCompareExchange128_nf, 1458 _InterlockedOr_acq, 1459 _InterlockedOr_rel, 1460 _InterlockedOr_nf, 1461 _InterlockedXor_acq, 1462 _InterlockedXor_rel, 1463 _InterlockedXor_nf, 1464 _InterlockedAnd_acq, 1465 _InterlockedAnd_rel, 1466 _InterlockedAnd_nf, 1467 _InterlockedIncrement_acq, 1468 _InterlockedIncrement_rel, 1469 _InterlockedIncrement_nf, 1470 _InterlockedDecrement_acq, 1471 _InterlockedDecrement_rel, 1472 _InterlockedDecrement_nf, 1473 __fastfail, 1474 }; 1475 1476 static std::optional<CodeGenFunction::MSVCIntrin> 1477 translateArmToMsvcIntrin(unsigned BuiltinID) { 1478 using MSVCIntrin = CodeGenFunction::MSVCIntrin; 1479 switch (BuiltinID) { 1480 default: 1481 return std::nullopt; 1482 case clang::ARM::BI_BitScanForward: 1483 case clang::ARM::BI_BitScanForward64: 1484 return MSVCIntrin::_BitScanForward; 1485 case clang::ARM::BI_BitScanReverse: 1486 case clang::ARM::BI_BitScanReverse64: 1487 return MSVCIntrin::_BitScanReverse; 1488 case clang::ARM::BI_InterlockedAnd64: 1489 return MSVCIntrin::_InterlockedAnd; 1490 case clang::ARM::BI_InterlockedExchange64: 1491 return MSVCIntrin::_InterlockedExchange; 1492 case clang::ARM::BI_InterlockedExchangeAdd64: 1493 return MSVCIntrin::_InterlockedExchangeAdd; 1494 case clang::ARM::BI_InterlockedExchangeSub64: 1495 return MSVCIntrin::_InterlockedExchangeSub; 1496 case clang::ARM::BI_InterlockedOr64: 1497 return MSVCIntrin::_InterlockedOr; 1498 case clang::ARM::BI_InterlockedXor64: 1499 return MSVCIntrin::_InterlockedXor; 1500 case clang::ARM::BI_InterlockedDecrement64: 1501 return MSVCIntrin::_InterlockedDecrement; 1502 case clang::ARM::BI_InterlockedIncrement64: 1503 return MSVCIntrin::_InterlockedIncrement; 1504 case clang::ARM::BI_InterlockedExchangeAdd8_acq: 1505 case clang::ARM::BI_InterlockedExchangeAdd16_acq: 1506 case clang::ARM::BI_InterlockedExchangeAdd_acq: 1507 case clang::ARM::BI_InterlockedExchangeAdd64_acq: 1508 return MSVCIntrin::_InterlockedExchangeAdd_acq; 1509 case clang::ARM::BI_InterlockedExchangeAdd8_rel: 1510 case clang::ARM::BI_InterlockedExchangeAdd16_rel: 1511 case clang::ARM::BI_InterlockedExchangeAdd_rel: 1512 case clang::ARM::BI_InterlockedExchangeAdd64_rel: 1513 return MSVCIntrin::_InterlockedExchangeAdd_rel; 1514 case clang::ARM::BI_InterlockedExchangeAdd8_nf: 1515 case clang::ARM::BI_InterlockedExchangeAdd16_nf: 1516 case clang::ARM::BI_InterlockedExchangeAdd_nf: 1517 case clang::ARM::BI_InterlockedExchangeAdd64_nf: 1518 return MSVCIntrin::_InterlockedExchangeAdd_nf; 1519 case clang::ARM::BI_InterlockedExchange8_acq: 1520 case clang::ARM::BI_InterlockedExchange16_acq: 1521 case clang::ARM::BI_InterlockedExchange_acq: 1522 case clang::ARM::BI_InterlockedExchange64_acq: 1523 return MSVCIntrin::_InterlockedExchange_acq; 1524 case clang::ARM::BI_InterlockedExchange8_rel: 1525 case clang::ARM::BI_InterlockedExchange16_rel: 1526 case clang::ARM::BI_InterlockedExchange_rel: 1527 case clang::ARM::BI_InterlockedExchange64_rel: 1528 return MSVCIntrin::_InterlockedExchange_rel; 1529 case clang::ARM::BI_InterlockedExchange8_nf: 1530 case clang::ARM::BI_InterlockedExchange16_nf: 1531 case clang::ARM::BI_InterlockedExchange_nf: 1532 case clang::ARM::BI_InterlockedExchange64_nf: 1533 return MSVCIntrin::_InterlockedExchange_nf; 1534 case clang::ARM::BI_InterlockedCompareExchange8_acq: 1535 case clang::ARM::BI_InterlockedCompareExchange16_acq: 1536 case clang::ARM::BI_InterlockedCompareExchange_acq: 1537 case clang::ARM::BI_InterlockedCompareExchange64_acq: 1538 return MSVCIntrin::_InterlockedCompareExchange_acq; 1539 case clang::ARM::BI_InterlockedCompareExchange8_rel: 1540 case clang::ARM::BI_InterlockedCompareExchange16_rel: 1541 case clang::ARM::BI_InterlockedCompareExchange_rel: 1542 case clang::ARM::BI_InterlockedCompareExchange64_rel: 1543 return MSVCIntrin::_InterlockedCompareExchange_rel; 1544 case clang::ARM::BI_InterlockedCompareExchange8_nf: 1545 case clang::ARM::BI_InterlockedCompareExchange16_nf: 1546 case clang::ARM::BI_InterlockedCompareExchange_nf: 1547 case clang::ARM::BI_InterlockedCompareExchange64_nf: 1548 return MSVCIntrin::_InterlockedCompareExchange_nf; 1549 case clang::ARM::BI_InterlockedOr8_acq: 1550 case clang::ARM::BI_InterlockedOr16_acq: 1551 case clang::ARM::BI_InterlockedOr_acq: 1552 case clang::ARM::BI_InterlockedOr64_acq: 1553 return MSVCIntrin::_InterlockedOr_acq; 1554 case clang::ARM::BI_InterlockedOr8_rel: 1555 case clang::ARM::BI_InterlockedOr16_rel: 1556 case clang::ARM::BI_InterlockedOr_rel: 1557 case clang::ARM::BI_InterlockedOr64_rel: 1558 return MSVCIntrin::_InterlockedOr_rel; 1559 case clang::ARM::BI_InterlockedOr8_nf: 1560 case clang::ARM::BI_InterlockedOr16_nf: 1561 case clang::ARM::BI_InterlockedOr_nf: 1562 case clang::ARM::BI_InterlockedOr64_nf: 1563 return MSVCIntrin::_InterlockedOr_nf; 1564 case clang::ARM::BI_InterlockedXor8_acq: 1565 case clang::ARM::BI_InterlockedXor16_acq: 1566 case clang::ARM::BI_InterlockedXor_acq: 1567 case clang::ARM::BI_InterlockedXor64_acq: 1568 return MSVCIntrin::_InterlockedXor_acq; 1569 case clang::ARM::BI_InterlockedXor8_rel: 1570 case clang::ARM::BI_InterlockedXor16_rel: 1571 case clang::ARM::BI_InterlockedXor_rel: 1572 case clang::ARM::BI_InterlockedXor64_rel: 1573 return MSVCIntrin::_InterlockedXor_rel; 1574 case clang::ARM::BI_InterlockedXor8_nf: 1575 case clang::ARM::BI_InterlockedXor16_nf: 1576 case clang::ARM::BI_InterlockedXor_nf: 1577 case clang::ARM::BI_InterlockedXor64_nf: 1578 return MSVCIntrin::_InterlockedXor_nf; 1579 case clang::ARM::BI_InterlockedAnd8_acq: 1580 case clang::ARM::BI_InterlockedAnd16_acq: 1581 case clang::ARM::BI_InterlockedAnd_acq: 1582 case clang::ARM::BI_InterlockedAnd64_acq: 1583 return MSVCIntrin::_InterlockedAnd_acq; 1584 case clang::ARM::BI_InterlockedAnd8_rel: 1585 case clang::ARM::BI_InterlockedAnd16_rel: 1586 case clang::ARM::BI_InterlockedAnd_rel: 1587 case clang::ARM::BI_InterlockedAnd64_rel: 1588 return MSVCIntrin::_InterlockedAnd_rel; 1589 case clang::ARM::BI_InterlockedAnd8_nf: 1590 case clang::ARM::BI_InterlockedAnd16_nf: 1591 case clang::ARM::BI_InterlockedAnd_nf: 1592 case clang::ARM::BI_InterlockedAnd64_nf: 1593 return MSVCIntrin::_InterlockedAnd_nf; 1594 case clang::ARM::BI_InterlockedIncrement16_acq: 1595 case clang::ARM::BI_InterlockedIncrement_acq: 1596 case clang::ARM::BI_InterlockedIncrement64_acq: 1597 return MSVCIntrin::_InterlockedIncrement_acq; 1598 case clang::ARM::BI_InterlockedIncrement16_rel: 1599 case clang::ARM::BI_InterlockedIncrement_rel: 1600 case clang::ARM::BI_InterlockedIncrement64_rel: 1601 return MSVCIntrin::_InterlockedIncrement_rel; 1602 case clang::ARM::BI_InterlockedIncrement16_nf: 1603 case clang::ARM::BI_InterlockedIncrement_nf: 1604 case clang::ARM::BI_InterlockedIncrement64_nf: 1605 return MSVCIntrin::_InterlockedIncrement_nf; 1606 case clang::ARM::BI_InterlockedDecrement16_acq: 1607 case clang::ARM::BI_InterlockedDecrement_acq: 1608 case clang::ARM::BI_InterlockedDecrement64_acq: 1609 return MSVCIntrin::_InterlockedDecrement_acq; 1610 case clang::ARM::BI_InterlockedDecrement16_rel: 1611 case clang::ARM::BI_InterlockedDecrement_rel: 1612 case clang::ARM::BI_InterlockedDecrement64_rel: 1613 return MSVCIntrin::_InterlockedDecrement_rel; 1614 case clang::ARM::BI_InterlockedDecrement16_nf: 1615 case clang::ARM::BI_InterlockedDecrement_nf: 1616 case clang::ARM::BI_InterlockedDecrement64_nf: 1617 return MSVCIntrin::_InterlockedDecrement_nf; 1618 } 1619 llvm_unreachable("must return from switch"); 1620 } 1621 1622 static std::optional<CodeGenFunction::MSVCIntrin> 1623 translateAarch64ToMsvcIntrin(unsigned BuiltinID) { 1624 using MSVCIntrin = CodeGenFunction::MSVCIntrin; 1625 switch (BuiltinID) { 1626 default: 1627 return std::nullopt; 1628 case clang::AArch64::BI_BitScanForward: 1629 case clang::AArch64::BI_BitScanForward64: 1630 return MSVCIntrin::_BitScanForward; 1631 case clang::AArch64::BI_BitScanReverse: 1632 case clang::AArch64::BI_BitScanReverse64: 1633 return MSVCIntrin::_BitScanReverse; 1634 case clang::AArch64::BI_InterlockedAnd64: 1635 return MSVCIntrin::_InterlockedAnd; 1636 case clang::AArch64::BI_InterlockedExchange64: 1637 return MSVCIntrin::_InterlockedExchange; 1638 case clang::AArch64::BI_InterlockedExchangeAdd64: 1639 return MSVCIntrin::_InterlockedExchangeAdd; 1640 case clang::AArch64::BI_InterlockedExchangeSub64: 1641 return MSVCIntrin::_InterlockedExchangeSub; 1642 case clang::AArch64::BI_InterlockedOr64: 1643 return MSVCIntrin::_InterlockedOr; 1644 case clang::AArch64::BI_InterlockedXor64: 1645 return MSVCIntrin::_InterlockedXor; 1646 case clang::AArch64::BI_InterlockedDecrement64: 1647 return MSVCIntrin::_InterlockedDecrement; 1648 case clang::AArch64::BI_InterlockedIncrement64: 1649 return MSVCIntrin::_InterlockedIncrement; 1650 case clang::AArch64::BI_InterlockedExchangeAdd8_acq: 1651 case clang::AArch64::BI_InterlockedExchangeAdd16_acq: 1652 case clang::AArch64::BI_InterlockedExchangeAdd_acq: 1653 case clang::AArch64::BI_InterlockedExchangeAdd64_acq: 1654 return MSVCIntrin::_InterlockedExchangeAdd_acq; 1655 case clang::AArch64::BI_InterlockedExchangeAdd8_rel: 1656 case clang::AArch64::BI_InterlockedExchangeAdd16_rel: 1657 case clang::AArch64::BI_InterlockedExchangeAdd_rel: 1658 case clang::AArch64::BI_InterlockedExchangeAdd64_rel: 1659 return MSVCIntrin::_InterlockedExchangeAdd_rel; 1660 case clang::AArch64::BI_InterlockedExchangeAdd8_nf: 1661 case clang::AArch64::BI_InterlockedExchangeAdd16_nf: 1662 case clang::AArch64::BI_InterlockedExchangeAdd_nf: 1663 case clang::AArch64::BI_InterlockedExchangeAdd64_nf: 1664 return MSVCIntrin::_InterlockedExchangeAdd_nf; 1665 case clang::AArch64::BI_InterlockedExchange8_acq: 1666 case clang::AArch64::BI_InterlockedExchange16_acq: 1667 case clang::AArch64::BI_InterlockedExchange_acq: 1668 case clang::AArch64::BI_InterlockedExchange64_acq: 1669 return MSVCIntrin::_InterlockedExchange_acq; 1670 case clang::AArch64::BI_InterlockedExchange8_rel: 1671 case clang::AArch64::BI_InterlockedExchange16_rel: 1672 case clang::AArch64::BI_InterlockedExchange_rel: 1673 case clang::AArch64::BI_InterlockedExchange64_rel: 1674 return MSVCIntrin::_InterlockedExchange_rel; 1675 case clang::AArch64::BI_InterlockedExchange8_nf: 1676 case clang::AArch64::BI_InterlockedExchange16_nf: 1677 case clang::AArch64::BI_InterlockedExchange_nf: 1678 case clang::AArch64::BI_InterlockedExchange64_nf: 1679 return MSVCIntrin::_InterlockedExchange_nf; 1680 case clang::AArch64::BI_InterlockedCompareExchange8_acq: 1681 case clang::AArch64::BI_InterlockedCompareExchange16_acq: 1682 case clang::AArch64::BI_InterlockedCompareExchange_acq: 1683 case clang::AArch64::BI_InterlockedCompareExchange64_acq: 1684 return MSVCIntrin::_InterlockedCompareExchange_acq; 1685 case clang::AArch64::BI_InterlockedCompareExchange8_rel: 1686 case clang::AArch64::BI_InterlockedCompareExchange16_rel: 1687 case clang::AArch64::BI_InterlockedCompareExchange_rel: 1688 case clang::AArch64::BI_InterlockedCompareExchange64_rel: 1689 return MSVCIntrin::_InterlockedCompareExchange_rel; 1690 case clang::AArch64::BI_InterlockedCompareExchange8_nf: 1691 case clang::AArch64::BI_InterlockedCompareExchange16_nf: 1692 case clang::AArch64::BI_InterlockedCompareExchange_nf: 1693 case clang::AArch64::BI_InterlockedCompareExchange64_nf: 1694 return MSVCIntrin::_InterlockedCompareExchange_nf; 1695 case clang::AArch64::BI_InterlockedCompareExchange128: 1696 return MSVCIntrin::_InterlockedCompareExchange128; 1697 case clang::AArch64::BI_InterlockedCompareExchange128_acq: 1698 return MSVCIntrin::_InterlockedCompareExchange128_acq; 1699 case clang::AArch64::BI_InterlockedCompareExchange128_nf: 1700 return MSVCIntrin::_InterlockedCompareExchange128_nf; 1701 case clang::AArch64::BI_InterlockedCompareExchange128_rel: 1702 return MSVCIntrin::_InterlockedCompareExchange128_rel; 1703 case clang::AArch64::BI_InterlockedOr8_acq: 1704 case clang::AArch64::BI_InterlockedOr16_acq: 1705 case clang::AArch64::BI_InterlockedOr_acq: 1706 case clang::AArch64::BI_InterlockedOr64_acq: 1707 return MSVCIntrin::_InterlockedOr_acq; 1708 case clang::AArch64::BI_InterlockedOr8_rel: 1709 case clang::AArch64::BI_InterlockedOr16_rel: 1710 case clang::AArch64::BI_InterlockedOr_rel: 1711 case clang::AArch64::BI_InterlockedOr64_rel: 1712 return MSVCIntrin::_InterlockedOr_rel; 1713 case clang::AArch64::BI_InterlockedOr8_nf: 1714 case clang::AArch64::BI_InterlockedOr16_nf: 1715 case clang::AArch64::BI_InterlockedOr_nf: 1716 case clang::AArch64::BI_InterlockedOr64_nf: 1717 return MSVCIntrin::_InterlockedOr_nf; 1718 case clang::AArch64::BI_InterlockedXor8_acq: 1719 case clang::AArch64::BI_InterlockedXor16_acq: 1720 case clang::AArch64::BI_InterlockedXor_acq: 1721 case clang::AArch64::BI_InterlockedXor64_acq: 1722 return MSVCIntrin::_InterlockedXor_acq; 1723 case clang::AArch64::BI_InterlockedXor8_rel: 1724 case clang::AArch64::BI_InterlockedXor16_rel: 1725 case clang::AArch64::BI_InterlockedXor_rel: 1726 case clang::AArch64::BI_InterlockedXor64_rel: 1727 return MSVCIntrin::_InterlockedXor_rel; 1728 case clang::AArch64::BI_InterlockedXor8_nf: 1729 case clang::AArch64::BI_InterlockedXor16_nf: 1730 case clang::AArch64::BI_InterlockedXor_nf: 1731 case clang::AArch64::BI_InterlockedXor64_nf: 1732 return MSVCIntrin::_InterlockedXor_nf; 1733 case clang::AArch64::BI_InterlockedAnd8_acq: 1734 case clang::AArch64::BI_InterlockedAnd16_acq: 1735 case clang::AArch64::BI_InterlockedAnd_acq: 1736 case clang::AArch64::BI_InterlockedAnd64_acq: 1737 return MSVCIntrin::_InterlockedAnd_acq; 1738 case clang::AArch64::BI_InterlockedAnd8_rel: 1739 case clang::AArch64::BI_InterlockedAnd16_rel: 1740 case clang::AArch64::BI_InterlockedAnd_rel: 1741 case clang::AArch64::BI_InterlockedAnd64_rel: 1742 return MSVCIntrin::_InterlockedAnd_rel; 1743 case clang::AArch64::BI_InterlockedAnd8_nf: 1744 case clang::AArch64::BI_InterlockedAnd16_nf: 1745 case clang::AArch64::BI_InterlockedAnd_nf: 1746 case clang::AArch64::BI_InterlockedAnd64_nf: 1747 return MSVCIntrin::_InterlockedAnd_nf; 1748 case clang::AArch64::BI_InterlockedIncrement16_acq: 1749 case clang::AArch64::BI_InterlockedIncrement_acq: 1750 case clang::AArch64::BI_InterlockedIncrement64_acq: 1751 return MSVCIntrin::_InterlockedIncrement_acq; 1752 case clang::AArch64::BI_InterlockedIncrement16_rel: 1753 case clang::AArch64::BI_InterlockedIncrement_rel: 1754 case clang::AArch64::BI_InterlockedIncrement64_rel: 1755 return MSVCIntrin::_InterlockedIncrement_rel; 1756 case clang::AArch64::BI_InterlockedIncrement16_nf: 1757 case clang::AArch64::BI_InterlockedIncrement_nf: 1758 case clang::AArch64::BI_InterlockedIncrement64_nf: 1759 return MSVCIntrin::_InterlockedIncrement_nf; 1760 case clang::AArch64::BI_InterlockedDecrement16_acq: 1761 case clang::AArch64::BI_InterlockedDecrement_acq: 1762 case clang::AArch64::BI_InterlockedDecrement64_acq: 1763 return MSVCIntrin::_InterlockedDecrement_acq; 1764 case clang::AArch64::BI_InterlockedDecrement16_rel: 1765 case clang::AArch64::BI_InterlockedDecrement_rel: 1766 case clang::AArch64::BI_InterlockedDecrement64_rel: 1767 return MSVCIntrin::_InterlockedDecrement_rel; 1768 case clang::AArch64::BI_InterlockedDecrement16_nf: 1769 case clang::AArch64::BI_InterlockedDecrement_nf: 1770 case clang::AArch64::BI_InterlockedDecrement64_nf: 1771 return MSVCIntrin::_InterlockedDecrement_nf; 1772 } 1773 llvm_unreachable("must return from switch"); 1774 } 1775 1776 static std::optional<CodeGenFunction::MSVCIntrin> 1777 translateX86ToMsvcIntrin(unsigned BuiltinID) { 1778 using MSVCIntrin = CodeGenFunction::MSVCIntrin; 1779 switch (BuiltinID) { 1780 default: 1781 return std::nullopt; 1782 case clang::X86::BI_BitScanForward: 1783 case clang::X86::BI_BitScanForward64: 1784 return MSVCIntrin::_BitScanForward; 1785 case clang::X86::BI_BitScanReverse: 1786 case clang::X86::BI_BitScanReverse64: 1787 return MSVCIntrin::_BitScanReverse; 1788 case clang::X86::BI_InterlockedAnd64: 1789 return MSVCIntrin::_InterlockedAnd; 1790 case clang::X86::BI_InterlockedCompareExchange128: 1791 return MSVCIntrin::_InterlockedCompareExchange128; 1792 case clang::X86::BI_InterlockedExchange64: 1793 return MSVCIntrin::_InterlockedExchange; 1794 case clang::X86::BI_InterlockedExchangeAdd64: 1795 return MSVCIntrin::_InterlockedExchangeAdd; 1796 case clang::X86::BI_InterlockedExchangeSub64: 1797 return MSVCIntrin::_InterlockedExchangeSub; 1798 case clang::X86::BI_InterlockedOr64: 1799 return MSVCIntrin::_InterlockedOr; 1800 case clang::X86::BI_InterlockedXor64: 1801 return MSVCIntrin::_InterlockedXor; 1802 case clang::X86::BI_InterlockedDecrement64: 1803 return MSVCIntrin::_InterlockedDecrement; 1804 case clang::X86::BI_InterlockedIncrement64: 1805 return MSVCIntrin::_InterlockedIncrement; 1806 } 1807 llvm_unreachable("must return from switch"); 1808 } 1809 1810 // Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated. 1811 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, 1812 const CallExpr *E) { 1813 switch (BuiltinID) { 1814 case MSVCIntrin::_BitScanForward: 1815 case MSVCIntrin::_BitScanReverse: { 1816 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0))); 1817 Value *ArgValue = EmitScalarExpr(E->getArg(1)); 1818 1819 llvm::Type *ArgType = ArgValue->getType(); 1820 llvm::Type *IndexType = IndexAddress.getElementType(); 1821 llvm::Type *ResultType = ConvertType(E->getType()); 1822 1823 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 1824 Value *ResZero = llvm::Constant::getNullValue(ResultType); 1825 Value *ResOne = llvm::ConstantInt::get(ResultType, 1); 1826 1827 BasicBlock *Begin = Builder.GetInsertBlock(); 1828 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); 1829 Builder.SetInsertPoint(End); 1830 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); 1831 1832 Builder.SetInsertPoint(Begin); 1833 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); 1834 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); 1835 Builder.CreateCondBr(IsZero, End, NotZero); 1836 Result->addIncoming(ResZero, Begin); 1837 1838 Builder.SetInsertPoint(NotZero); 1839 1840 if (BuiltinID == MSVCIntrin::_BitScanForward) { 1841 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 1842 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 1843 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 1844 Builder.CreateStore(ZeroCount, IndexAddress, false); 1845 } else { 1846 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 1847 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); 1848 1849 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 1850 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 1851 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 1852 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); 1853 Builder.CreateStore(Index, IndexAddress, false); 1854 } 1855 Builder.CreateBr(End); 1856 Result->addIncoming(ResOne, NotZero); 1857 1858 Builder.SetInsertPoint(End); 1859 return Result; 1860 } 1861 case MSVCIntrin::_InterlockedAnd: 1862 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); 1863 case MSVCIntrin::_InterlockedExchange: 1864 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); 1865 case MSVCIntrin::_InterlockedExchangeAdd: 1866 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); 1867 case MSVCIntrin::_InterlockedExchangeSub: 1868 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); 1869 case MSVCIntrin::_InterlockedOr: 1870 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); 1871 case MSVCIntrin::_InterlockedXor: 1872 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); 1873 case MSVCIntrin::_InterlockedExchangeAdd_acq: 1874 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, 1875 AtomicOrdering::Acquire); 1876 case MSVCIntrin::_InterlockedExchangeAdd_rel: 1877 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, 1878 AtomicOrdering::Release); 1879 case MSVCIntrin::_InterlockedExchangeAdd_nf: 1880 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, 1881 AtomicOrdering::Monotonic); 1882 case MSVCIntrin::_InterlockedExchange_acq: 1883 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, 1884 AtomicOrdering::Acquire); 1885 case MSVCIntrin::_InterlockedExchange_rel: 1886 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, 1887 AtomicOrdering::Release); 1888 case MSVCIntrin::_InterlockedExchange_nf: 1889 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, 1890 AtomicOrdering::Monotonic); 1891 case MSVCIntrin::_InterlockedCompareExchange_acq: 1892 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire); 1893 case MSVCIntrin::_InterlockedCompareExchange_rel: 1894 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release); 1895 case MSVCIntrin::_InterlockedCompareExchange_nf: 1896 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic); 1897 case MSVCIntrin::_InterlockedCompareExchange128: 1898 return EmitAtomicCmpXchg128ForMSIntrin( 1899 *this, E, AtomicOrdering::SequentiallyConsistent); 1900 case MSVCIntrin::_InterlockedCompareExchange128_acq: 1901 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire); 1902 case MSVCIntrin::_InterlockedCompareExchange128_rel: 1903 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release); 1904 case MSVCIntrin::_InterlockedCompareExchange128_nf: 1905 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic); 1906 case MSVCIntrin::_InterlockedOr_acq: 1907 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, 1908 AtomicOrdering::Acquire); 1909 case MSVCIntrin::_InterlockedOr_rel: 1910 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, 1911 AtomicOrdering::Release); 1912 case MSVCIntrin::_InterlockedOr_nf: 1913 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, 1914 AtomicOrdering::Monotonic); 1915 case MSVCIntrin::_InterlockedXor_acq: 1916 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, 1917 AtomicOrdering::Acquire); 1918 case MSVCIntrin::_InterlockedXor_rel: 1919 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, 1920 AtomicOrdering::Release); 1921 case MSVCIntrin::_InterlockedXor_nf: 1922 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, 1923 AtomicOrdering::Monotonic); 1924 case MSVCIntrin::_InterlockedAnd_acq: 1925 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, 1926 AtomicOrdering::Acquire); 1927 case MSVCIntrin::_InterlockedAnd_rel: 1928 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, 1929 AtomicOrdering::Release); 1930 case MSVCIntrin::_InterlockedAnd_nf: 1931 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, 1932 AtomicOrdering::Monotonic); 1933 case MSVCIntrin::_InterlockedIncrement_acq: 1934 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire); 1935 case MSVCIntrin::_InterlockedIncrement_rel: 1936 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release); 1937 case MSVCIntrin::_InterlockedIncrement_nf: 1938 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic); 1939 case MSVCIntrin::_InterlockedDecrement_acq: 1940 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire); 1941 case MSVCIntrin::_InterlockedDecrement_rel: 1942 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release); 1943 case MSVCIntrin::_InterlockedDecrement_nf: 1944 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic); 1945 1946 case MSVCIntrin::_InterlockedDecrement: 1947 return EmitAtomicDecrementValue(*this, E); 1948 case MSVCIntrin::_InterlockedIncrement: 1949 return EmitAtomicIncrementValue(*this, E); 1950 1951 case MSVCIntrin::__fastfail: { 1952 // Request immediate process termination from the kernel. The instruction 1953 // sequences to do this are documented on MSDN: 1954 // https://msdn.microsoft.com/en-us/library/dn774154.aspx 1955 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); 1956 StringRef Asm, Constraints; 1957 switch (ISA) { 1958 default: 1959 ErrorUnsupported(E, "__fastfail call for this architecture"); 1960 break; 1961 case llvm::Triple::x86: 1962 case llvm::Triple::x86_64: 1963 Asm = "int $$0x29"; 1964 Constraints = "{cx}"; 1965 break; 1966 case llvm::Triple::thumb: 1967 Asm = "udf #251"; 1968 Constraints = "{r0}"; 1969 break; 1970 case llvm::Triple::aarch64: 1971 Asm = "brk #0xF003"; 1972 Constraints = "{w0}"; 1973 } 1974 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); 1975 llvm::InlineAsm *IA = 1976 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); 1977 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 1978 getLLVMContext(), llvm::AttributeList::FunctionIndex, 1979 llvm::Attribute::NoReturn); 1980 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); 1981 CI->setAttributes(NoReturnAttr); 1982 return CI; 1983 } 1984 } 1985 llvm_unreachable("Incorrect MSVC intrinsic!"); 1986 } 1987 1988 namespace { 1989 // ARC cleanup for __builtin_os_log_format 1990 struct CallObjCArcUse final : EHScopeStack::Cleanup { 1991 CallObjCArcUse(llvm::Value *object) : object(object) {} 1992 llvm::Value *object; 1993 1994 void Emit(CodeGenFunction &CGF, Flags flags) override { 1995 CGF.EmitARCIntrinsicUse(object); 1996 } 1997 }; 1998 } 1999 2000 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, 2001 BuiltinCheckKind Kind) { 2002 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) 2003 && "Unsupported builtin check kind"); 2004 2005 Value *ArgValue = EmitScalarExpr(E); 2006 if (!SanOpts.has(SanitizerKind::Builtin)) 2007 return ArgValue; 2008 2009 SanitizerScope SanScope(this); 2010 Value *Cond = Builder.CreateICmpNE( 2011 ArgValue, llvm::Constant::getNullValue(ArgValue->getType())); 2012 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin), 2013 SanitizerHandler::InvalidBuiltin, 2014 {EmitCheckSourceLocation(E->getExprLoc()), 2015 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)}, 2016 std::nullopt); 2017 return ArgValue; 2018 } 2019 2020 static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) { 2021 return CGF.Builder.CreateBinaryIntrinsic( 2022 Intrinsic::abs, ArgValue, 2023 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW)); 2024 } 2025 2026 static Value *EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, 2027 bool SanitizeOverflow) { 2028 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0)); 2029 2030 // Try to eliminate overflow check. 2031 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) { 2032 if (!VCI->isMinSignedValue()) 2033 return EmitAbs(CGF, ArgValue, true); 2034 } 2035 2036 CodeGenFunction::SanitizerScope SanScope(&CGF); 2037 2038 Constant *Zero = Constant::getNullValue(ArgValue->getType()); 2039 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic( 2040 Intrinsic::ssub_with_overflow, Zero, ArgValue); 2041 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0); 2042 Value *NotOverflow = CGF.Builder.CreateNot( 2043 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1)); 2044 2045 // TODO: support -ftrapv-handler. 2046 if (SanitizeOverflow) { 2047 CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}}, 2048 SanitizerHandler::NegateOverflow, 2049 {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()), 2050 CGF.EmitCheckTypeDescriptor(E->getType())}, 2051 {ArgValue}); 2052 } else 2053 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow); 2054 2055 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond"); 2056 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs"); 2057 } 2058 2059 /// Get the argument type for arguments to os_log_helper. 2060 static CanQualType getOSLogArgType(ASTContext &C, int Size) { 2061 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false); 2062 return C.getCanonicalType(UnsignedTy); 2063 } 2064 2065 llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( 2066 const analyze_os_log::OSLogBufferLayout &Layout, 2067 CharUnits BufferAlignment) { 2068 ASTContext &Ctx = getContext(); 2069 2070 llvm::SmallString<64> Name; 2071 { 2072 raw_svector_ostream OS(Name); 2073 OS << "__os_log_helper"; 2074 OS << "_" << BufferAlignment.getQuantity(); 2075 OS << "_" << int(Layout.getSummaryByte()); 2076 OS << "_" << int(Layout.getNumArgsByte()); 2077 for (const auto &Item : Layout.Items) 2078 OS << "_" << int(Item.getSizeByte()) << "_" 2079 << int(Item.getDescriptorByte()); 2080 } 2081 2082 if (llvm::Function *F = CGM.getModule().getFunction(Name)) 2083 return F; 2084 2085 llvm::SmallVector<QualType, 4> ArgTys; 2086 FunctionArgList Args; 2087 Args.push_back(ImplicitParamDecl::Create( 2088 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy, 2089 ImplicitParamKind::Other)); 2090 ArgTys.emplace_back(Ctx.VoidPtrTy); 2091 2092 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { 2093 char Size = Layout.Items[I].getSizeByte(); 2094 if (!Size) 2095 continue; 2096 2097 QualType ArgTy = getOSLogArgType(Ctx, Size); 2098 Args.push_back(ImplicitParamDecl::Create( 2099 Ctx, nullptr, SourceLocation(), 2100 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy, 2101 ImplicitParamKind::Other)); 2102 ArgTys.emplace_back(ArgTy); 2103 } 2104 2105 QualType ReturnTy = Ctx.VoidTy; 2106 2107 // The helper function has linkonce_odr linkage to enable the linker to merge 2108 // identical functions. To ensure the merging always happens, 'noinline' is 2109 // attached to the function when compiling with -Oz. 2110 const CGFunctionInfo &FI = 2111 CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args); 2112 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI); 2113 llvm::Function *Fn = llvm::Function::Create( 2114 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule()); 2115 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); 2116 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false); 2117 CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); 2118 Fn->setDoesNotThrow(); 2119 2120 // Attach 'noinline' at -Oz. 2121 if (CGM.getCodeGenOpts().OptimizeSize == 2) 2122 Fn->addFnAttr(llvm::Attribute::NoInline); 2123 2124 auto NL = ApplyDebugLocation::CreateEmpty(*this); 2125 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args); 2126 2127 // Create a scope with an artificial location for the body of this function. 2128 auto AL = ApplyDebugLocation::CreateArtificial(*this); 2129 2130 CharUnits Offset; 2131 Address BufAddr = makeNaturalAddressForPointer( 2132 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy, 2133 BufferAlignment); 2134 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()), 2135 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); 2136 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()), 2137 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); 2138 2139 unsigned I = 1; 2140 for (const auto &Item : Layout.Items) { 2141 Builder.CreateStore( 2142 Builder.getInt8(Item.getDescriptorByte()), 2143 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); 2144 Builder.CreateStore( 2145 Builder.getInt8(Item.getSizeByte()), 2146 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); 2147 2148 CharUnits Size = Item.size(); 2149 if (!Size.getQuantity()) 2150 continue; 2151 2152 Address Arg = GetAddrOfLocalVar(Args[I]); 2153 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData"); 2154 Addr = Addr.withElementType(Arg.getElementType()); 2155 Builder.CreateStore(Builder.CreateLoad(Arg), Addr); 2156 Offset += Size; 2157 ++I; 2158 } 2159 2160 FinishFunction(); 2161 2162 return Fn; 2163 } 2164 2165 RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { 2166 assert(E.getNumArgs() >= 2 && 2167 "__builtin_os_log_format takes at least 2 arguments"); 2168 ASTContext &Ctx = getContext(); 2169 analyze_os_log::OSLogBufferLayout Layout; 2170 analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout); 2171 Address BufAddr = EmitPointerWithAlignment(E.getArg(0)); 2172 llvm::SmallVector<llvm::Value *, 4> RetainableOperands; 2173 2174 // Ignore argument 1, the format string. It is not currently used. 2175 CallArgList Args; 2176 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy); 2177 2178 for (const auto &Item : Layout.Items) { 2179 int Size = Item.getSizeByte(); 2180 if (!Size) 2181 continue; 2182 2183 llvm::Value *ArgVal; 2184 2185 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) { 2186 uint64_t Val = 0; 2187 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I) 2188 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8; 2189 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val)); 2190 } else if (const Expr *TheExpr = Item.getExpr()) { 2191 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false); 2192 2193 // If a temporary object that requires destruction after the full 2194 // expression is passed, push a lifetime-extended cleanup to extend its 2195 // lifetime to the end of the enclosing block scope. 2196 auto LifetimeExtendObject = [&](const Expr *E) { 2197 E = E->IgnoreParenCasts(); 2198 // Extend lifetimes of objects returned by function calls and message 2199 // sends. 2200 2201 // FIXME: We should do this in other cases in which temporaries are 2202 // created including arguments of non-ARC types (e.g., C++ 2203 // temporaries). 2204 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E)) 2205 return true; 2206 return false; 2207 }; 2208 2209 if (TheExpr->getType()->isObjCRetainableType() && 2210 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) { 2211 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && 2212 "Only scalar can be a ObjC retainable type"); 2213 if (!isa<Constant>(ArgVal)) { 2214 CleanupKind Cleanup = getARCCleanupKind(); 2215 QualType Ty = TheExpr->getType(); 2216 RawAddress Alloca = RawAddress::invalid(); 2217 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca); 2218 ArgVal = EmitARCRetain(Ty, ArgVal); 2219 Builder.CreateStore(ArgVal, Addr); 2220 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty, 2221 CodeGenFunction::destroyARCStrongPrecise, 2222 Cleanup & EHCleanup); 2223 2224 // Push a clang.arc.use call to ensure ARC optimizer knows that the 2225 // argument has to be alive. 2226 if (CGM.getCodeGenOpts().OptimizationLevel != 0) 2227 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal); 2228 } 2229 } 2230 } else { 2231 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity()); 2232 } 2233 2234 unsigned ArgValSize = 2235 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType()); 2236 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(), 2237 ArgValSize); 2238 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy); 2239 CanQualType ArgTy = getOSLogArgType(Ctx, Size); 2240 // If ArgVal has type x86_fp80, zero-extend ArgVal. 2241 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy)); 2242 Args.add(RValue::get(ArgVal), ArgTy); 2243 } 2244 2245 const CGFunctionInfo &FI = 2246 CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args); 2247 llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction( 2248 Layout, BufAddr.getAlignment()); 2249 EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args); 2250 return RValue::get(BufAddr, *this); 2251 } 2252 2253 static bool isSpecialUnsignedMultiplySignedResult( 2254 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, 2255 WidthAndSignedness ResultInfo) { 2256 return BuiltinID == Builtin::BI__builtin_mul_overflow && 2257 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width && 2258 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed; 2259 } 2260 2261 static RValue EmitCheckedUnsignedMultiplySignedResult( 2262 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, 2263 const clang::Expr *Op2, WidthAndSignedness Op2Info, 2264 const clang::Expr *ResultArg, QualType ResultQTy, 2265 WidthAndSignedness ResultInfo) { 2266 assert(isSpecialUnsignedMultiplySignedResult( 2267 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) && 2268 "Cannot specialize this multiply"); 2269 2270 llvm::Value *V1 = CGF.EmitScalarExpr(Op1); 2271 llvm::Value *V2 = CGF.EmitScalarExpr(Op2); 2272 2273 llvm::Value *HasOverflow; 2274 llvm::Value *Result = EmitOverflowIntrinsic( 2275 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow); 2276 2277 // The intrinsic call will detect overflow when the value is > UINT_MAX, 2278 // however, since the original builtin had a signed result, we need to report 2279 // an overflow when the result is greater than INT_MAX. 2280 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width); 2281 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax); 2282 2283 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue); 2284 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow); 2285 2286 bool isVolatile = 2287 ResultArg->getType()->getPointeeType().isVolatileQualified(); 2288 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); 2289 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr, 2290 isVolatile); 2291 return RValue::get(HasOverflow); 2292 } 2293 2294 /// Determine if a binop is a checked mixed-sign multiply we can specialize. 2295 static bool isSpecialMixedSignMultiply(unsigned BuiltinID, 2296 WidthAndSignedness Op1Info, 2297 WidthAndSignedness Op2Info, 2298 WidthAndSignedness ResultInfo) { 2299 return BuiltinID == Builtin::BI__builtin_mul_overflow && 2300 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width && 2301 Op1Info.Signed != Op2Info.Signed; 2302 } 2303 2304 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of 2305 /// the generic checked-binop irgen. 2306 static RValue 2307 EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, 2308 WidthAndSignedness Op1Info, const clang::Expr *Op2, 2309 WidthAndSignedness Op2Info, 2310 const clang::Expr *ResultArg, QualType ResultQTy, 2311 WidthAndSignedness ResultInfo) { 2312 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info, 2313 Op2Info, ResultInfo) && 2314 "Not a mixed-sign multipliction we can specialize"); 2315 2316 // Emit the signed and unsigned operands. 2317 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2; 2318 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1; 2319 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp); 2320 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp); 2321 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width; 2322 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width; 2323 2324 // One of the operands may be smaller than the other. If so, [s|z]ext it. 2325 if (SignedOpWidth < UnsignedOpWidth) 2326 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext"); 2327 if (UnsignedOpWidth < SignedOpWidth) 2328 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext"); 2329 2330 llvm::Type *OpTy = Signed->getType(); 2331 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy); 2332 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); 2333 llvm::Type *ResTy = ResultPtr.getElementType(); 2334 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width); 2335 2336 // Take the absolute value of the signed operand. 2337 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero); 2338 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed); 2339 llvm::Value *AbsSigned = 2340 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed); 2341 2342 // Perform a checked unsigned multiplication. 2343 llvm::Value *UnsignedOverflow; 2344 llvm::Value *UnsignedResult = 2345 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned, 2346 Unsigned, UnsignedOverflow); 2347 2348 llvm::Value *Overflow, *Result; 2349 if (ResultInfo.Signed) { 2350 // Signed overflow occurs if the result is greater than INT_MAX or lesser 2351 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative). 2352 auto IntMax = 2353 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth); 2354 llvm::Value *MaxResult = 2355 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax), 2356 CGF.Builder.CreateZExt(IsNegative, OpTy)); 2357 llvm::Value *SignedOverflow = 2358 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult); 2359 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow); 2360 2361 // Prepare the signed result (possibly by negating it). 2362 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult); 2363 llvm::Value *SignedResult = 2364 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult); 2365 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy); 2366 } else { 2367 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX. 2368 llvm::Value *Underflow = CGF.Builder.CreateAnd( 2369 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult)); 2370 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow); 2371 if (ResultInfo.Width < OpWidth) { 2372 auto IntMax = 2373 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth); 2374 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT( 2375 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax)); 2376 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow); 2377 } 2378 2379 // Negate the product if it would be negative in infinite precision. 2380 Result = CGF.Builder.CreateSelect( 2381 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult); 2382 2383 Result = CGF.Builder.CreateTrunc(Result, ResTy); 2384 } 2385 assert(Overflow && Result && "Missing overflow or result"); 2386 2387 bool isVolatile = 2388 ResultArg->getType()->getPointeeType().isVolatileQualified(); 2389 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr, 2390 isVolatile); 2391 return RValue::get(Overflow); 2392 } 2393 2394 static bool 2395 TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, 2396 llvm::SmallPtrSetImpl<const Decl *> &Seen) { 2397 if (const auto *Arr = Ctx.getAsArrayType(Ty)) 2398 Ty = Ctx.getBaseElementType(Arr); 2399 2400 const auto *Record = Ty->getAsCXXRecordDecl(); 2401 if (!Record) 2402 return false; 2403 2404 // We've already checked this type, or are in the process of checking it. 2405 if (!Seen.insert(Record).second) 2406 return false; 2407 2408 assert(Record->hasDefinition() && 2409 "Incomplete types should already be diagnosed"); 2410 2411 if (Record->isDynamicClass()) 2412 return true; 2413 2414 for (FieldDecl *F : Record->fields()) { 2415 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen)) 2416 return true; 2417 } 2418 return false; 2419 } 2420 2421 /// Determine if the specified type requires laundering by checking if it is a 2422 /// dynamic class type or contains a subobject which is a dynamic class type. 2423 static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) { 2424 if (!CGM.getCodeGenOpts().StrictVTablePointers) 2425 return false; 2426 llvm::SmallPtrSet<const Decl *, 16> Seen; 2427 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen); 2428 } 2429 2430 RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { 2431 llvm::Value *Src = EmitScalarExpr(E->getArg(0)); 2432 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1)); 2433 2434 // The builtin's shift arg may have a different type than the source arg and 2435 // result, but the LLVM intrinsic uses the same type for all values. 2436 llvm::Type *Ty = Src->getType(); 2437 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false); 2438 2439 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same. 2440 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; 2441 Function *F = CGM.getIntrinsic(IID, Ty); 2442 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); 2443 } 2444 2445 // Map math builtins for long-double to f128 version. 2446 static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) { 2447 switch (BuiltinID) { 2448 #define MUTATE_LDBL(func) \ 2449 case Builtin::BI__builtin_##func##l: \ 2450 return Builtin::BI__builtin_##func##f128; 2451 MUTATE_LDBL(sqrt) 2452 MUTATE_LDBL(cbrt) 2453 MUTATE_LDBL(fabs) 2454 MUTATE_LDBL(log) 2455 MUTATE_LDBL(log2) 2456 MUTATE_LDBL(log10) 2457 MUTATE_LDBL(log1p) 2458 MUTATE_LDBL(logb) 2459 MUTATE_LDBL(exp) 2460 MUTATE_LDBL(exp2) 2461 MUTATE_LDBL(expm1) 2462 MUTATE_LDBL(fdim) 2463 MUTATE_LDBL(hypot) 2464 MUTATE_LDBL(ilogb) 2465 MUTATE_LDBL(pow) 2466 MUTATE_LDBL(fmin) 2467 MUTATE_LDBL(fmax) 2468 MUTATE_LDBL(ceil) 2469 MUTATE_LDBL(trunc) 2470 MUTATE_LDBL(rint) 2471 MUTATE_LDBL(nearbyint) 2472 MUTATE_LDBL(round) 2473 MUTATE_LDBL(floor) 2474 MUTATE_LDBL(lround) 2475 MUTATE_LDBL(llround) 2476 MUTATE_LDBL(lrint) 2477 MUTATE_LDBL(llrint) 2478 MUTATE_LDBL(fmod) 2479 MUTATE_LDBL(modf) 2480 MUTATE_LDBL(nan) 2481 MUTATE_LDBL(nans) 2482 MUTATE_LDBL(inf) 2483 MUTATE_LDBL(fma) 2484 MUTATE_LDBL(sin) 2485 MUTATE_LDBL(cos) 2486 MUTATE_LDBL(tan) 2487 MUTATE_LDBL(sinh) 2488 MUTATE_LDBL(cosh) 2489 MUTATE_LDBL(tanh) 2490 MUTATE_LDBL(asin) 2491 MUTATE_LDBL(acos) 2492 MUTATE_LDBL(atan) 2493 MUTATE_LDBL(asinh) 2494 MUTATE_LDBL(acosh) 2495 MUTATE_LDBL(atanh) 2496 MUTATE_LDBL(atan2) 2497 MUTATE_LDBL(erf) 2498 MUTATE_LDBL(erfc) 2499 MUTATE_LDBL(ldexp) 2500 MUTATE_LDBL(frexp) 2501 MUTATE_LDBL(huge_val) 2502 MUTATE_LDBL(copysign) 2503 MUTATE_LDBL(nextafter) 2504 MUTATE_LDBL(nexttoward) 2505 MUTATE_LDBL(remainder) 2506 MUTATE_LDBL(remquo) 2507 MUTATE_LDBL(scalbln) 2508 MUTATE_LDBL(scalbn) 2509 MUTATE_LDBL(tgamma) 2510 MUTATE_LDBL(lgamma) 2511 #undef MUTATE_LDBL 2512 default: 2513 return BuiltinID; 2514 } 2515 } 2516 2517 static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, 2518 Value *V) { 2519 if (CGF.Builder.getIsFPConstrained() && 2520 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) { 2521 if (Value *Result = 2522 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM)) 2523 return Result; 2524 } 2525 return nullptr; 2526 } 2527 2528 static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, 2529 const FunctionDecl *FD) { 2530 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported"; 2531 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD); 2532 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy); 2533 2534 SmallVector<Value *, 16> Args; 2535 for (auto &&FormalTy : FnTy->params()) 2536 Args.push_back(llvm::PoisonValue::get(FormalTy)); 2537 2538 return RValue::get(CGF->Builder.CreateCall(UBF, Args)); 2539 } 2540 2541 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, 2542 const CallExpr *E, 2543 ReturnValueSlot ReturnValue) { 2544 const FunctionDecl *FD = GD.getDecl()->getAsFunction(); 2545 // See if we can constant fold this builtin. If so, don't emit it at all. 2546 // TODO: Extend this handling to all builtin calls that we can constant-fold. 2547 Expr::EvalResult Result; 2548 if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) && 2549 !Result.hasSideEffects()) { 2550 if (Result.Val.isInt()) 2551 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 2552 Result.Val.getInt())); 2553 if (Result.Val.isFloat()) 2554 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 2555 Result.Val.getFloat())); 2556 } 2557 2558 // If current long-double semantics is IEEE 128-bit, replace math builtins 2559 // of long-double with f128 equivalent. 2560 // TODO: This mutation should also be applied to other targets other than PPC, 2561 // after backend supports IEEE 128-bit style libcalls. 2562 if (getTarget().getTriple().isPPC64() && 2563 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()) 2564 BuiltinID = mutateLongDoubleBuiltin(BuiltinID); 2565 2566 // If the builtin has been declared explicitly with an assembler label, 2567 // disable the specialized emitting below. Ideally we should communicate the 2568 // rename in IR, or at least avoid generating the intrinsic calls that are 2569 // likely to get lowered to the renamed library functions. 2570 const unsigned BuiltinIDIfNoAsmLabel = 2571 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID; 2572 2573 std::optional<bool> ErrnoOverriden; 2574 // ErrnoOverriden is true if math-errno is overriden via the 2575 // '#pragma float_control(precise, on)'. This pragma disables fast-math, 2576 // which implies math-errno. 2577 if (E->hasStoredFPFeatures()) { 2578 FPOptionsOverride OP = E->getFPFeatures(); 2579 if (OP.hasMathErrnoOverride()) 2580 ErrnoOverriden = OP.getMathErrnoOverride(); 2581 } 2582 // True if 'attribute__((optnone))' is used. This attribute overrides 2583 // fast-math which implies math-errno. 2584 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>(); 2585 2586 // True if we are compiling at -O2 and errno has been disabled 2587 // using the '#pragma float_control(precise, off)', and 2588 // attribute opt-none hasn't been seen. 2589 bool ErrnoOverridenToFalseWithOpt = 2590 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone && 2591 CGM.getCodeGenOpts().OptimizationLevel != 0; 2592 2593 // There are LLVM math intrinsics/instructions corresponding to math library 2594 // functions except the LLVM op will never set errno while the math library 2595 // might. Also, math builtins have the same semantics as their math library 2596 // twins. Thus, we can transform math library and builtin calls to their 2597 // LLVM counterparts if the call is marked 'const' (known to never set errno). 2598 // In case FP exceptions are enabled, the experimental versions of the 2599 // intrinsics model those. 2600 bool ConstAlways = 2601 getContext().BuiltinInfo.isConst(BuiltinID); 2602 2603 // There's a special case with the fma builtins where they are always const 2604 // if the target environment is GNU or the target is OS is Windows and we're 2605 // targeting the MSVCRT.dll environment. 2606 // FIXME: This list can be become outdated. Need to find a way to get it some 2607 // other way. 2608 switch (BuiltinID) { 2609 case Builtin::BI__builtin_fma: 2610 case Builtin::BI__builtin_fmaf: 2611 case Builtin::BI__builtin_fmal: 2612 case Builtin::BI__builtin_fmaf16: 2613 case Builtin::BIfma: 2614 case Builtin::BIfmaf: 2615 case Builtin::BIfmal: { 2616 auto &Trip = CGM.getTriple(); 2617 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT()) 2618 ConstAlways = true; 2619 break; 2620 } 2621 default: 2622 break; 2623 } 2624 2625 bool ConstWithoutErrnoAndExceptions = 2626 getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID); 2627 bool ConstWithoutExceptions = 2628 getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID); 2629 2630 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is 2631 // disabled. 2632 // Math intrinsics are generated only when math-errno is disabled. Any pragmas 2633 // or attributes that affect math-errno should prevent or allow math 2634 // intrincs to be generated. Intrinsics are generated: 2635 // 1- In fast math mode, unless math-errno is overriden 2636 // via '#pragma float_control(precise, on)', or via an 2637 // 'attribute__((optnone))'. 2638 // 2- If math-errno was enabled on command line but overriden 2639 // to false via '#pragma float_control(precise, off))' and 2640 // 'attribute__((optnone))' hasn't been used. 2641 // 3- If we are compiling with optimization and errno has been disabled 2642 // via '#pragma float_control(precise, off)', and 2643 // 'attribute__((optnone))' hasn't been used. 2644 2645 bool ConstWithoutErrnoOrExceptions = 2646 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions; 2647 bool GenerateIntrinsics = 2648 (ConstAlways && !OptNone) || 2649 (!getLangOpts().MathErrno && 2650 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone); 2651 if (!GenerateIntrinsics) { 2652 GenerateIntrinsics = 2653 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions; 2654 if (!GenerateIntrinsics) 2655 GenerateIntrinsics = 2656 ConstWithoutErrnoOrExceptions && 2657 (!getLangOpts().MathErrno && 2658 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone); 2659 if (!GenerateIntrinsics) 2660 GenerateIntrinsics = 2661 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt; 2662 } 2663 if (GenerateIntrinsics) { 2664 switch (BuiltinIDIfNoAsmLabel) { 2665 case Builtin::BIacos: 2666 case Builtin::BIacosf: 2667 case Builtin::BIacosl: 2668 case Builtin::BI__builtin_acos: 2669 case Builtin::BI__builtin_acosf: 2670 case Builtin::BI__builtin_acosf16: 2671 case Builtin::BI__builtin_acosl: 2672 case Builtin::BI__builtin_acosf128: 2673 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( 2674 *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos)); 2675 2676 case Builtin::BIasin: 2677 case Builtin::BIasinf: 2678 case Builtin::BIasinl: 2679 case Builtin::BI__builtin_asin: 2680 case Builtin::BI__builtin_asinf: 2681 case Builtin::BI__builtin_asinf16: 2682 case Builtin::BI__builtin_asinl: 2683 case Builtin::BI__builtin_asinf128: 2684 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( 2685 *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin)); 2686 2687 case Builtin::BIatan: 2688 case Builtin::BIatanf: 2689 case Builtin::BIatanl: 2690 case Builtin::BI__builtin_atan: 2691 case Builtin::BI__builtin_atanf: 2692 case Builtin::BI__builtin_atanf16: 2693 case Builtin::BI__builtin_atanl: 2694 case Builtin::BI__builtin_atanf128: 2695 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( 2696 *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan)); 2697 2698 case Builtin::BIceil: 2699 case Builtin::BIceilf: 2700 case Builtin::BIceill: 2701 case Builtin::BI__builtin_ceil: 2702 case Builtin::BI__builtin_ceilf: 2703 case Builtin::BI__builtin_ceilf16: 2704 case Builtin::BI__builtin_ceill: 2705 case Builtin::BI__builtin_ceilf128: 2706 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 2707 Intrinsic::ceil, 2708 Intrinsic::experimental_constrained_ceil)); 2709 2710 case Builtin::BIcopysign: 2711 case Builtin::BIcopysignf: 2712 case Builtin::BIcopysignl: 2713 case Builtin::BI__builtin_copysign: 2714 case Builtin::BI__builtin_copysignf: 2715 case Builtin::BI__builtin_copysignf16: 2716 case Builtin::BI__builtin_copysignl: 2717 case Builtin::BI__builtin_copysignf128: 2718 return RValue::get( 2719 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign)); 2720 2721 case Builtin::BIcos: 2722 case Builtin::BIcosf: 2723 case Builtin::BIcosl: 2724 case Builtin::BI__builtin_cos: 2725 case Builtin::BI__builtin_cosf: 2726 case Builtin::BI__builtin_cosf16: 2727 case Builtin::BI__builtin_cosl: 2728 case Builtin::BI__builtin_cosf128: 2729 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 2730 Intrinsic::cos, 2731 Intrinsic::experimental_constrained_cos)); 2732 2733 case Builtin::BIcosh: 2734 case Builtin::BIcoshf: 2735 case Builtin::BIcoshl: 2736 case Builtin::BI__builtin_cosh: 2737 case Builtin::BI__builtin_coshf: 2738 case Builtin::BI__builtin_coshf16: 2739 case Builtin::BI__builtin_coshl: 2740 case Builtin::BI__builtin_coshf128: 2741 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( 2742 *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh)); 2743 2744 case Builtin::BIexp: 2745 case Builtin::BIexpf: 2746 case Builtin::BIexpl: 2747 case Builtin::BI__builtin_exp: 2748 case Builtin::BI__builtin_expf: 2749 case Builtin::BI__builtin_expf16: 2750 case Builtin::BI__builtin_expl: 2751 case Builtin::BI__builtin_expf128: 2752 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 2753 Intrinsic::exp, 2754 Intrinsic::experimental_constrained_exp)); 2755 2756 case Builtin::BIexp2: 2757 case Builtin::BIexp2f: 2758 case Builtin::BIexp2l: 2759 case Builtin::BI__builtin_exp2: 2760 case Builtin::BI__builtin_exp2f: 2761 case Builtin::BI__builtin_exp2f16: 2762 case Builtin::BI__builtin_exp2l: 2763 case Builtin::BI__builtin_exp2f128: 2764 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 2765 Intrinsic::exp2, 2766 Intrinsic::experimental_constrained_exp2)); 2767 case Builtin::BI__builtin_exp10: 2768 case Builtin::BI__builtin_exp10f: 2769 case Builtin::BI__builtin_exp10f16: 2770 case Builtin::BI__builtin_exp10l: 2771 case Builtin::BI__builtin_exp10f128: { 2772 // TODO: strictfp support 2773 if (Builder.getIsFPConstrained()) 2774 break; 2775 return RValue::get( 2776 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10)); 2777 } 2778 case Builtin::BIfabs: 2779 case Builtin::BIfabsf: 2780 case Builtin::BIfabsl: 2781 case Builtin::BI__builtin_fabs: 2782 case Builtin::BI__builtin_fabsf: 2783 case Builtin::BI__builtin_fabsf16: 2784 case Builtin::BI__builtin_fabsl: 2785 case Builtin::BI__builtin_fabsf128: 2786 return RValue::get( 2787 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs)); 2788 2789 case Builtin::BIfloor: 2790 case Builtin::BIfloorf: 2791 case Builtin::BIfloorl: 2792 case Builtin::BI__builtin_floor: 2793 case Builtin::BI__builtin_floorf: 2794 case Builtin::BI__builtin_floorf16: 2795 case Builtin::BI__builtin_floorl: 2796 case Builtin::BI__builtin_floorf128: 2797 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 2798 Intrinsic::floor, 2799 Intrinsic::experimental_constrained_floor)); 2800 2801 case Builtin::BIfma: 2802 case Builtin::BIfmaf: 2803 case Builtin::BIfmal: 2804 case Builtin::BI__builtin_fma: 2805 case Builtin::BI__builtin_fmaf: 2806 case Builtin::BI__builtin_fmaf16: 2807 case Builtin::BI__builtin_fmal: 2808 case Builtin::BI__builtin_fmaf128: 2809 return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E, 2810 Intrinsic::fma, 2811 Intrinsic::experimental_constrained_fma)); 2812 2813 case Builtin::BIfmax: 2814 case Builtin::BIfmaxf: 2815 case Builtin::BIfmaxl: 2816 case Builtin::BI__builtin_fmax: 2817 case Builtin::BI__builtin_fmaxf: 2818 case Builtin::BI__builtin_fmaxf16: 2819 case Builtin::BI__builtin_fmaxl: 2820 case Builtin::BI__builtin_fmaxf128: 2821 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, 2822 Intrinsic::maxnum, 2823 Intrinsic::experimental_constrained_maxnum)); 2824 2825 case Builtin::BIfmin: 2826 case Builtin::BIfminf: 2827 case Builtin::BIfminl: 2828 case Builtin::BI__builtin_fmin: 2829 case Builtin::BI__builtin_fminf: 2830 case Builtin::BI__builtin_fminf16: 2831 case Builtin::BI__builtin_fminl: 2832 case Builtin::BI__builtin_fminf128: 2833 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, 2834 Intrinsic::minnum, 2835 Intrinsic::experimental_constrained_minnum)); 2836 2837 // fmod() is a special-case. It maps to the frem instruction rather than an 2838 // LLVM intrinsic. 2839 case Builtin::BIfmod: 2840 case Builtin::BIfmodf: 2841 case Builtin::BIfmodl: 2842 case Builtin::BI__builtin_fmod: 2843 case Builtin::BI__builtin_fmodf: 2844 case Builtin::BI__builtin_fmodf16: 2845 case Builtin::BI__builtin_fmodl: 2846 case Builtin::BI__builtin_fmodf128: { 2847 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 2848 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 2849 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 2850 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod")); 2851 } 2852 2853 case Builtin::BIlog: 2854 case Builtin::BIlogf: 2855 case Builtin::BIlogl: 2856 case Builtin::BI__builtin_log: 2857 case Builtin::BI__builtin_logf: 2858 case Builtin::BI__builtin_logf16: 2859 case Builtin::BI__builtin_logl: 2860 case Builtin::BI__builtin_logf128: 2861 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 2862 Intrinsic::log, 2863 Intrinsic::experimental_constrained_log)); 2864 2865 case Builtin::BIlog10: 2866 case Builtin::BIlog10f: 2867 case Builtin::BIlog10l: 2868 case Builtin::BI__builtin_log10: 2869 case Builtin::BI__builtin_log10f: 2870 case Builtin::BI__builtin_log10f16: 2871 case Builtin::BI__builtin_log10l: 2872 case Builtin::BI__builtin_log10f128: 2873 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 2874 Intrinsic::log10, 2875 Intrinsic::experimental_constrained_log10)); 2876 2877 case Builtin::BIlog2: 2878 case Builtin::BIlog2f: 2879 case Builtin::BIlog2l: 2880 case Builtin::BI__builtin_log2: 2881 case Builtin::BI__builtin_log2f: 2882 case Builtin::BI__builtin_log2f16: 2883 case Builtin::BI__builtin_log2l: 2884 case Builtin::BI__builtin_log2f128: 2885 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 2886 Intrinsic::log2, 2887 Intrinsic::experimental_constrained_log2)); 2888 2889 case Builtin::BInearbyint: 2890 case Builtin::BInearbyintf: 2891 case Builtin::BInearbyintl: 2892 case Builtin::BI__builtin_nearbyint: 2893 case Builtin::BI__builtin_nearbyintf: 2894 case Builtin::BI__builtin_nearbyintl: 2895 case Builtin::BI__builtin_nearbyintf128: 2896 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 2897 Intrinsic::nearbyint, 2898 Intrinsic::experimental_constrained_nearbyint)); 2899 2900 case Builtin::BIpow: 2901 case Builtin::BIpowf: 2902 case Builtin::BIpowl: 2903 case Builtin::BI__builtin_pow: 2904 case Builtin::BI__builtin_powf: 2905 case Builtin::BI__builtin_powf16: 2906 case Builtin::BI__builtin_powl: 2907 case Builtin::BI__builtin_powf128: 2908 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, 2909 Intrinsic::pow, 2910 Intrinsic::experimental_constrained_pow)); 2911 2912 case Builtin::BIrint: 2913 case Builtin::BIrintf: 2914 case Builtin::BIrintl: 2915 case Builtin::BI__builtin_rint: 2916 case Builtin::BI__builtin_rintf: 2917 case Builtin::BI__builtin_rintf16: 2918 case Builtin::BI__builtin_rintl: 2919 case Builtin::BI__builtin_rintf128: 2920 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 2921 Intrinsic::rint, 2922 Intrinsic::experimental_constrained_rint)); 2923 2924 case Builtin::BIround: 2925 case Builtin::BIroundf: 2926 case Builtin::BIroundl: 2927 case Builtin::BI__builtin_round: 2928 case Builtin::BI__builtin_roundf: 2929 case Builtin::BI__builtin_roundf16: 2930 case Builtin::BI__builtin_roundl: 2931 case Builtin::BI__builtin_roundf128: 2932 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 2933 Intrinsic::round, 2934 Intrinsic::experimental_constrained_round)); 2935 2936 case Builtin::BIroundeven: 2937 case Builtin::BIroundevenf: 2938 case Builtin::BIroundevenl: 2939 case Builtin::BI__builtin_roundeven: 2940 case Builtin::BI__builtin_roundevenf: 2941 case Builtin::BI__builtin_roundevenf16: 2942 case Builtin::BI__builtin_roundevenl: 2943 case Builtin::BI__builtin_roundevenf128: 2944 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 2945 Intrinsic::roundeven, 2946 Intrinsic::experimental_constrained_roundeven)); 2947 2948 case Builtin::BIsin: 2949 case Builtin::BIsinf: 2950 case Builtin::BIsinl: 2951 case Builtin::BI__builtin_sin: 2952 case Builtin::BI__builtin_sinf: 2953 case Builtin::BI__builtin_sinf16: 2954 case Builtin::BI__builtin_sinl: 2955 case Builtin::BI__builtin_sinf128: 2956 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 2957 Intrinsic::sin, 2958 Intrinsic::experimental_constrained_sin)); 2959 2960 case Builtin::BIsinh: 2961 case Builtin::BIsinhf: 2962 case Builtin::BIsinhl: 2963 case Builtin::BI__builtin_sinh: 2964 case Builtin::BI__builtin_sinhf: 2965 case Builtin::BI__builtin_sinhf16: 2966 case Builtin::BI__builtin_sinhl: 2967 case Builtin::BI__builtin_sinhf128: 2968 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( 2969 *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh)); 2970 2971 case Builtin::BIsqrt: 2972 case Builtin::BIsqrtf: 2973 case Builtin::BIsqrtl: 2974 case Builtin::BI__builtin_sqrt: 2975 case Builtin::BI__builtin_sqrtf: 2976 case Builtin::BI__builtin_sqrtf16: 2977 case Builtin::BI__builtin_sqrtl: 2978 case Builtin::BI__builtin_sqrtf128: 2979 case Builtin::BI__builtin_elementwise_sqrt: { 2980 llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin( 2981 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt); 2982 SetSqrtFPAccuracy(Call); 2983 return RValue::get(Call); 2984 } 2985 2986 case Builtin::BItan: 2987 case Builtin::BItanf: 2988 case Builtin::BItanl: 2989 case Builtin::BI__builtin_tan: 2990 case Builtin::BI__builtin_tanf: 2991 case Builtin::BI__builtin_tanf16: 2992 case Builtin::BI__builtin_tanl: 2993 case Builtin::BI__builtin_tanf128: 2994 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( 2995 *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan)); 2996 2997 case Builtin::BItanh: 2998 case Builtin::BItanhf: 2999 case Builtin::BItanhl: 3000 case Builtin::BI__builtin_tanh: 3001 case Builtin::BI__builtin_tanhf: 3002 case Builtin::BI__builtin_tanhf16: 3003 case Builtin::BI__builtin_tanhl: 3004 case Builtin::BI__builtin_tanhf128: 3005 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( 3006 *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh)); 3007 3008 case Builtin::BItrunc: 3009 case Builtin::BItruncf: 3010 case Builtin::BItruncl: 3011 case Builtin::BI__builtin_trunc: 3012 case Builtin::BI__builtin_truncf: 3013 case Builtin::BI__builtin_truncf16: 3014 case Builtin::BI__builtin_truncl: 3015 case Builtin::BI__builtin_truncf128: 3016 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, 3017 Intrinsic::trunc, 3018 Intrinsic::experimental_constrained_trunc)); 3019 3020 case Builtin::BIlround: 3021 case Builtin::BIlroundf: 3022 case Builtin::BIlroundl: 3023 case Builtin::BI__builtin_lround: 3024 case Builtin::BI__builtin_lroundf: 3025 case Builtin::BI__builtin_lroundl: 3026 case Builtin::BI__builtin_lroundf128: 3027 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( 3028 *this, E, Intrinsic::lround, 3029 Intrinsic::experimental_constrained_lround)); 3030 3031 case Builtin::BIllround: 3032 case Builtin::BIllroundf: 3033 case Builtin::BIllroundl: 3034 case Builtin::BI__builtin_llround: 3035 case Builtin::BI__builtin_llroundf: 3036 case Builtin::BI__builtin_llroundl: 3037 case Builtin::BI__builtin_llroundf128: 3038 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( 3039 *this, E, Intrinsic::llround, 3040 Intrinsic::experimental_constrained_llround)); 3041 3042 case Builtin::BIlrint: 3043 case Builtin::BIlrintf: 3044 case Builtin::BIlrintl: 3045 case Builtin::BI__builtin_lrint: 3046 case Builtin::BI__builtin_lrintf: 3047 case Builtin::BI__builtin_lrintl: 3048 case Builtin::BI__builtin_lrintf128: 3049 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( 3050 *this, E, Intrinsic::lrint, 3051 Intrinsic::experimental_constrained_lrint)); 3052 3053 case Builtin::BIllrint: 3054 case Builtin::BIllrintf: 3055 case Builtin::BIllrintl: 3056 case Builtin::BI__builtin_llrint: 3057 case Builtin::BI__builtin_llrintf: 3058 case Builtin::BI__builtin_llrintl: 3059 case Builtin::BI__builtin_llrintf128: 3060 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( 3061 *this, E, Intrinsic::llrint, 3062 Intrinsic::experimental_constrained_llrint)); 3063 case Builtin::BI__builtin_ldexp: 3064 case Builtin::BI__builtin_ldexpf: 3065 case Builtin::BI__builtin_ldexpl: 3066 case Builtin::BI__builtin_ldexpf16: 3067 case Builtin::BI__builtin_ldexpf128: { 3068 return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin( 3069 *this, E, Intrinsic::ldexp, 3070 Intrinsic::experimental_constrained_ldexp)); 3071 } 3072 default: 3073 break; 3074 } 3075 } 3076 3077 // Check NonnullAttribute/NullabilityArg and Alignment. 3078 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg, 3079 unsigned ParmNum) { 3080 Value *Val = A.emitRawPointer(*this); 3081 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD, 3082 ParmNum); 3083 3084 if (SanOpts.has(SanitizerKind::Alignment)) { 3085 SanitizerSet SkippedChecks; 3086 SkippedChecks.set(SanitizerKind::All); 3087 SkippedChecks.clear(SanitizerKind::Alignment); 3088 SourceLocation Loc = Arg->getExprLoc(); 3089 // Strip an implicit cast. 3090 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg)) 3091 if (CE->getCastKind() == CK_BitCast) 3092 Arg = CE->getSubExpr(); 3093 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(), 3094 SkippedChecks); 3095 } 3096 }; 3097 3098 switch (BuiltinIDIfNoAsmLabel) { 3099 default: break; 3100 case Builtin::BI__builtin___CFStringMakeConstantString: 3101 case Builtin::BI__builtin___NSStringMakeConstantString: 3102 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); 3103 case Builtin::BI__builtin_stdarg_start: 3104 case Builtin::BI__builtin_va_start: 3105 case Builtin::BI__va_start: 3106 case Builtin::BI__builtin_va_end: 3107 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start 3108 ? EmitScalarExpr(E->getArg(0)) 3109 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this), 3110 BuiltinID != Builtin::BI__builtin_va_end); 3111 return RValue::get(nullptr); 3112 case Builtin::BI__builtin_va_copy: { 3113 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this); 3114 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this); 3115 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}), 3116 {DstPtr, SrcPtr}); 3117 return RValue::get(nullptr); 3118 } 3119 case Builtin::BIabs: 3120 case Builtin::BIlabs: 3121 case Builtin::BIllabs: 3122 case Builtin::BI__builtin_abs: 3123 case Builtin::BI__builtin_labs: 3124 case Builtin::BI__builtin_llabs: { 3125 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow); 3126 3127 Value *Result; 3128 switch (getLangOpts().getSignedOverflowBehavior()) { 3129 case LangOptions::SOB_Defined: 3130 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false); 3131 break; 3132 case LangOptions::SOB_Undefined: 3133 if (!SanitizeOverflow) { 3134 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true); 3135 break; 3136 } 3137 [[fallthrough]]; 3138 case LangOptions::SOB_Trapping: 3139 // TODO: Somehow handle the corner case when the address of abs is taken. 3140 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow); 3141 break; 3142 } 3143 return RValue::get(Result); 3144 } 3145 case Builtin::BI__builtin_complex: { 3146 Value *Real = EmitScalarExpr(E->getArg(0)); 3147 Value *Imag = EmitScalarExpr(E->getArg(1)); 3148 return RValue::getComplex({Real, Imag}); 3149 } 3150 case Builtin::BI__builtin_conj: 3151 case Builtin::BI__builtin_conjf: 3152 case Builtin::BI__builtin_conjl: 3153 case Builtin::BIconj: 3154 case Builtin::BIconjf: 3155 case Builtin::BIconjl: { 3156 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 3157 Value *Real = ComplexVal.first; 3158 Value *Imag = ComplexVal.second; 3159 Imag = Builder.CreateFNeg(Imag, "neg"); 3160 return RValue::getComplex(std::make_pair(Real, Imag)); 3161 } 3162 case Builtin::BI__builtin_creal: 3163 case Builtin::BI__builtin_crealf: 3164 case Builtin::BI__builtin_creall: 3165 case Builtin::BIcreal: 3166 case Builtin::BIcrealf: 3167 case Builtin::BIcreall: { 3168 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 3169 return RValue::get(ComplexVal.first); 3170 } 3171 3172 case Builtin::BI__builtin_preserve_access_index: { 3173 // Only enabled preserved access index region when debuginfo 3174 // is available as debuginfo is needed to preserve user-level 3175 // access pattern. 3176 if (!getDebugInfo()) { 3177 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g"); 3178 return RValue::get(EmitScalarExpr(E->getArg(0))); 3179 } 3180 3181 // Nested builtin_preserve_access_index() not supported 3182 if (IsInPreservedAIRegion) { 3183 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported"); 3184 return RValue::get(EmitScalarExpr(E->getArg(0))); 3185 } 3186 3187 IsInPreservedAIRegion = true; 3188 Value *Res = EmitScalarExpr(E->getArg(0)); 3189 IsInPreservedAIRegion = false; 3190 return RValue::get(Res); 3191 } 3192 3193 case Builtin::BI__builtin_cimag: 3194 case Builtin::BI__builtin_cimagf: 3195 case Builtin::BI__builtin_cimagl: 3196 case Builtin::BIcimag: 3197 case Builtin::BIcimagf: 3198 case Builtin::BIcimagl: { 3199 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 3200 return RValue::get(ComplexVal.second); 3201 } 3202 3203 case Builtin::BI__builtin_clrsb: 3204 case Builtin::BI__builtin_clrsbl: 3205 case Builtin::BI__builtin_clrsbll: { 3206 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or 3207 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 3208 3209 llvm::Type *ArgType = ArgValue->getType(); 3210 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 3211 3212 llvm::Type *ResultType = ConvertType(E->getType()); 3213 Value *Zero = llvm::Constant::getNullValue(ArgType); 3214 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg"); 3215 Value *Inverse = Builder.CreateNot(ArgValue, "not"); 3216 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue); 3217 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()}); 3218 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1)); 3219 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 3220 "cast"); 3221 return RValue::get(Result); 3222 } 3223 case Builtin::BI__builtin_ctzs: 3224 case Builtin::BI__builtin_ctz: 3225 case Builtin::BI__builtin_ctzl: 3226 case Builtin::BI__builtin_ctzll: 3227 case Builtin::BI__builtin_ctzg: { 3228 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg && 3229 E->getNumArgs() > 1; 3230 3231 Value *ArgValue = 3232 HasFallback ? EmitScalarExpr(E->getArg(0)) 3233 : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); 3234 3235 llvm::Type *ArgType = ArgValue->getType(); 3236 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 3237 3238 llvm::Type *ResultType = ConvertType(E->getType()); 3239 Value *ZeroUndef = 3240 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef()); 3241 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 3242 if (Result->getType() != ResultType) 3243 Result = 3244 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast"); 3245 if (!HasFallback) 3246 return RValue::get(Result); 3247 3248 Value *Zero = Constant::getNullValue(ArgType); 3249 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 3250 Value *FallbackValue = EmitScalarExpr(E->getArg(1)); 3251 Value *ResultOrFallback = 3252 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg"); 3253 return RValue::get(ResultOrFallback); 3254 } 3255 case Builtin::BI__builtin_clzs: 3256 case Builtin::BI__builtin_clz: 3257 case Builtin::BI__builtin_clzl: 3258 case Builtin::BI__builtin_clzll: 3259 case Builtin::BI__builtin_clzg: { 3260 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg && 3261 E->getNumArgs() > 1; 3262 3263 Value *ArgValue = 3264 HasFallback ? EmitScalarExpr(E->getArg(0)) 3265 : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); 3266 3267 llvm::Type *ArgType = ArgValue->getType(); 3268 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 3269 3270 llvm::Type *ResultType = ConvertType(E->getType()); 3271 Value *ZeroUndef = 3272 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef()); 3273 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 3274 if (Result->getType() != ResultType) 3275 Result = 3276 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast"); 3277 if (!HasFallback) 3278 return RValue::get(Result); 3279 3280 Value *Zero = Constant::getNullValue(ArgType); 3281 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 3282 Value *FallbackValue = EmitScalarExpr(E->getArg(1)); 3283 Value *ResultOrFallback = 3284 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg"); 3285 return RValue::get(ResultOrFallback); 3286 } 3287 case Builtin::BI__builtin_ffs: 3288 case Builtin::BI__builtin_ffsl: 3289 case Builtin::BI__builtin_ffsll: { 3290 // ffs(x) -> x ? cttz(x) + 1 : 0 3291 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 3292 3293 llvm::Type *ArgType = ArgValue->getType(); 3294 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 3295 3296 llvm::Type *ResultType = ConvertType(E->getType()); 3297 Value *Tmp = 3298 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), 3299 llvm::ConstantInt::get(ArgType, 1)); 3300 Value *Zero = llvm::Constant::getNullValue(ArgType); 3301 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 3302 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 3303 if (Result->getType() != ResultType) 3304 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 3305 "cast"); 3306 return RValue::get(Result); 3307 } 3308 case Builtin::BI__builtin_parity: 3309 case Builtin::BI__builtin_parityl: 3310 case Builtin::BI__builtin_parityll: { 3311 // parity(x) -> ctpop(x) & 1 3312 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 3313 3314 llvm::Type *ArgType = ArgValue->getType(); 3315 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 3316 3317 llvm::Type *ResultType = ConvertType(E->getType()); 3318 Value *Tmp = Builder.CreateCall(F, ArgValue); 3319 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 3320 if (Result->getType() != ResultType) 3321 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 3322 "cast"); 3323 return RValue::get(Result); 3324 } 3325 case Builtin::BI__lzcnt16: 3326 case Builtin::BI__lzcnt: 3327 case Builtin::BI__lzcnt64: { 3328 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 3329 3330 llvm::Type *ArgType = ArgValue->getType(); 3331 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 3332 3333 llvm::Type *ResultType = ConvertType(E->getType()); 3334 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()}); 3335 if (Result->getType() != ResultType) 3336 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 3337 "cast"); 3338 return RValue::get(Result); 3339 } 3340 case Builtin::BI__popcnt16: 3341 case Builtin::BI__popcnt: 3342 case Builtin::BI__popcnt64: 3343 case Builtin::BI__builtin_popcount: 3344 case Builtin::BI__builtin_popcountl: 3345 case Builtin::BI__builtin_popcountll: 3346 case Builtin::BI__builtin_popcountg: { 3347 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 3348 3349 llvm::Type *ArgType = ArgValue->getType(); 3350 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 3351 3352 llvm::Type *ResultType = ConvertType(E->getType()); 3353 Value *Result = Builder.CreateCall(F, ArgValue); 3354 if (Result->getType() != ResultType) 3355 Result = 3356 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast"); 3357 return RValue::get(Result); 3358 } 3359 case Builtin::BI__builtin_unpredictable: { 3360 // Always return the argument of __builtin_unpredictable. LLVM does not 3361 // handle this builtin. Metadata for this builtin should be added directly 3362 // to instructions such as branches or switches that use it. 3363 return RValue::get(EmitScalarExpr(E->getArg(0))); 3364 } 3365 case Builtin::BI__builtin_expect: { 3366 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 3367 llvm::Type *ArgType = ArgValue->getType(); 3368 3369 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 3370 // Don't generate llvm.expect on -O0 as the backend won't use it for 3371 // anything. 3372 // Note, we still IRGen ExpectedValue because it could have side-effects. 3373 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 3374 return RValue::get(ArgValue); 3375 3376 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 3377 Value *Result = 3378 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); 3379 return RValue::get(Result); 3380 } 3381 case Builtin::BI__builtin_expect_with_probability: { 3382 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 3383 llvm::Type *ArgType = ArgValue->getType(); 3384 3385 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 3386 llvm::APFloat Probability(0.0); 3387 const Expr *ProbArg = E->getArg(2); 3388 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext()); 3389 assert(EvalSucceed && "probability should be able to evaluate as float"); 3390 (void)EvalSucceed; 3391 bool LoseInfo = false; 3392 Probability.convert(llvm::APFloat::IEEEdouble(), 3393 llvm::RoundingMode::Dynamic, &LoseInfo); 3394 llvm::Type *Ty = ConvertType(ProbArg->getType()); 3395 Constant *Confidence = ConstantFP::get(Ty, Probability); 3396 // Don't generate llvm.expect.with.probability on -O0 as the backend 3397 // won't use it for anything. 3398 // Note, we still IRGen ExpectedValue because it could have side-effects. 3399 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 3400 return RValue::get(ArgValue); 3401 3402 Function *FnExpect = 3403 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType); 3404 Value *Result = Builder.CreateCall( 3405 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval"); 3406 return RValue::get(Result); 3407 } 3408 case Builtin::BI__builtin_assume_aligned: { 3409 const Expr *Ptr = E->getArg(0); 3410 Value *PtrValue = EmitScalarExpr(Ptr); 3411 Value *OffsetValue = 3412 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 3413 3414 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 3415 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 3416 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) 3417 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(), 3418 llvm::Value::MaximumAlignment); 3419 3420 emitAlignmentAssumption(PtrValue, Ptr, 3421 /*The expr loc is sufficient.*/ SourceLocation(), 3422 AlignmentCI, OffsetValue); 3423 return RValue::get(PtrValue); 3424 } 3425 case Builtin::BI__assume: 3426 case Builtin::BI__builtin_assume: { 3427 if (E->getArg(0)->HasSideEffects(getContext())) 3428 return RValue::get(nullptr); 3429 3430 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 3431 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 3432 Builder.CreateCall(FnAssume, ArgValue); 3433 return RValue::get(nullptr); 3434 } 3435 case Builtin::BI__builtin_assume_separate_storage: { 3436 const Expr *Arg0 = E->getArg(0); 3437 const Expr *Arg1 = E->getArg(1); 3438 3439 Value *Value0 = EmitScalarExpr(Arg0); 3440 Value *Value1 = EmitScalarExpr(Arg1); 3441 3442 Value *Values[] = {Value0, Value1}; 3443 OperandBundleDefT<Value *> OBD("separate_storage", Values); 3444 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD}); 3445 return RValue::get(nullptr); 3446 } 3447 case Builtin::BI__builtin_allow_runtime_check: { 3448 StringRef Kind = 3449 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString(); 3450 LLVMContext &Ctx = CGM.getLLVMContext(); 3451 llvm::Value *Allow = Builder.CreateCall( 3452 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check), 3453 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind))); 3454 return RValue::get(Allow); 3455 } 3456 case Builtin::BI__arithmetic_fence: { 3457 // Create the builtin call if FastMath is selected, and the target 3458 // supports the builtin, otherwise just return the argument. 3459 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 3460 llvm::FastMathFlags FMF = Builder.getFastMathFlags(); 3461 bool isArithmeticFenceEnabled = 3462 FMF.allowReassoc() && 3463 getContext().getTargetInfo().checkArithmeticFenceSupported(); 3464 QualType ArgType = E->getArg(0)->getType(); 3465 if (ArgType->isComplexType()) { 3466 if (isArithmeticFenceEnabled) { 3467 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType(); 3468 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 3469 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first, 3470 ConvertType(ElementType)); 3471 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second, 3472 ConvertType(ElementType)); 3473 return RValue::getComplex(std::make_pair(Real, Imag)); 3474 } 3475 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 3476 Value *Real = ComplexVal.first; 3477 Value *Imag = ComplexVal.second; 3478 return RValue::getComplex(std::make_pair(Real, Imag)); 3479 } 3480 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 3481 if (isArithmeticFenceEnabled) 3482 return RValue::get( 3483 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType))); 3484 return RValue::get(ArgValue); 3485 } 3486 case Builtin::BI__builtin_bswap16: 3487 case Builtin::BI__builtin_bswap32: 3488 case Builtin::BI__builtin_bswap64: 3489 case Builtin::BI_byteswap_ushort: 3490 case Builtin::BI_byteswap_ulong: 3491 case Builtin::BI_byteswap_uint64: { 3492 return RValue::get( 3493 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap)); 3494 } 3495 case Builtin::BI__builtin_bitreverse8: 3496 case Builtin::BI__builtin_bitreverse16: 3497 case Builtin::BI__builtin_bitreverse32: 3498 case Builtin::BI__builtin_bitreverse64: { 3499 return RValue::get( 3500 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse)); 3501 } 3502 case Builtin::BI__builtin_rotateleft8: 3503 case Builtin::BI__builtin_rotateleft16: 3504 case Builtin::BI__builtin_rotateleft32: 3505 case Builtin::BI__builtin_rotateleft64: 3506 case Builtin::BI_rotl8: // Microsoft variants of rotate left 3507 case Builtin::BI_rotl16: 3508 case Builtin::BI_rotl: 3509 case Builtin::BI_lrotl: 3510 case Builtin::BI_rotl64: 3511 return emitRotate(E, false); 3512 3513 case Builtin::BI__builtin_rotateright8: 3514 case Builtin::BI__builtin_rotateright16: 3515 case Builtin::BI__builtin_rotateright32: 3516 case Builtin::BI__builtin_rotateright64: 3517 case Builtin::BI_rotr8: // Microsoft variants of rotate right 3518 case Builtin::BI_rotr16: 3519 case Builtin::BI_rotr: 3520 case Builtin::BI_lrotr: 3521 case Builtin::BI_rotr64: 3522 return emitRotate(E, true); 3523 3524 case Builtin::BI__builtin_constant_p: { 3525 llvm::Type *ResultType = ConvertType(E->getType()); 3526 3527 const Expr *Arg = E->getArg(0); 3528 QualType ArgType = Arg->getType(); 3529 // FIXME: The allowance for Obj-C pointers and block pointers is historical 3530 // and likely a mistake. 3531 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() && 3532 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType()) 3533 // Per the GCC documentation, only numeric constants are recognized after 3534 // inlining. 3535 return RValue::get(ConstantInt::get(ResultType, 0)); 3536 3537 if (Arg->HasSideEffects(getContext())) 3538 // The argument is unevaluated, so be conservative if it might have 3539 // side-effects. 3540 return RValue::get(ConstantInt::get(ResultType, 0)); 3541 3542 Value *ArgValue = EmitScalarExpr(Arg); 3543 if (ArgType->isObjCObjectPointerType()) { 3544 // Convert Objective-C objects to id because we cannot distinguish between 3545 // LLVM types for Obj-C classes as they are opaque. 3546 ArgType = CGM.getContext().getObjCIdType(); 3547 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType)); 3548 } 3549 Function *F = 3550 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); 3551 Value *Result = Builder.CreateCall(F, ArgValue); 3552 if (Result->getType() != ResultType) 3553 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false); 3554 return RValue::get(Result); 3555 } 3556 case Builtin::BI__builtin_dynamic_object_size: 3557 case Builtin::BI__builtin_object_size: { 3558 unsigned Type = 3559 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); 3560 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); 3561 3562 // We pass this builtin onto the optimizer so that it can figure out the 3563 // object size in more complex cases. 3564 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size; 3565 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, 3566 /*EmittedE=*/nullptr, IsDynamic)); 3567 } 3568 case Builtin::BI__builtin_prefetch: { 3569 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 3570 // FIXME: Technically these constants should of type 'int', yes? 3571 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 3572 llvm::ConstantInt::get(Int32Ty, 0); 3573 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 3574 llvm::ConstantInt::get(Int32Ty, 3); 3575 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 3576 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); 3577 Builder.CreateCall(F, {Address, RW, Locality, Data}); 3578 return RValue::get(nullptr); 3579 } 3580 case Builtin::BI__builtin_readcyclecounter: { 3581 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 3582 return RValue::get(Builder.CreateCall(F)); 3583 } 3584 case Builtin::BI__builtin_readsteadycounter: { 3585 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter); 3586 return RValue::get(Builder.CreateCall(F)); 3587 } 3588 case Builtin::BI__builtin___clear_cache: { 3589 Value *Begin = EmitScalarExpr(E->getArg(0)); 3590 Value *End = EmitScalarExpr(E->getArg(1)); 3591 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache); 3592 return RValue::get(Builder.CreateCall(F, {Begin, End})); 3593 } 3594 case Builtin::BI__builtin_trap: 3595 EmitTrapCall(Intrinsic::trap); 3596 return RValue::get(nullptr); 3597 case Builtin::BI__builtin_verbose_trap: { 3598 llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation(); 3599 if (getDebugInfo()) { 3600 TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor( 3601 TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()), 3602 *E->getArg(1)->tryEvaluateString(getContext())); 3603 } 3604 ApplyDebugLocation ApplyTrapDI(*this, TrapLocation); 3605 // Currently no attempt is made to prevent traps from being merged. 3606 EmitTrapCall(Intrinsic::trap); 3607 return RValue::get(nullptr); 3608 } 3609 case Builtin::BI__debugbreak: 3610 EmitTrapCall(Intrinsic::debugtrap); 3611 return RValue::get(nullptr); 3612 case Builtin::BI__builtin_unreachable: { 3613 EmitUnreachable(E->getExprLoc()); 3614 3615 // We do need to preserve an insertion point. 3616 EmitBlock(createBasicBlock("unreachable.cont")); 3617 3618 return RValue::get(nullptr); 3619 } 3620 3621 case Builtin::BI__builtin_powi: 3622 case Builtin::BI__builtin_powif: 3623 case Builtin::BI__builtin_powil: { 3624 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 3625 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 3626 3627 if (Builder.getIsFPConstrained()) { 3628 // FIXME: llvm.powi has 2 mangling types, 3629 // llvm.experimental.constrained.powi has one. 3630 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 3631 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi, 3632 Src0->getType()); 3633 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 })); 3634 } 3635 3636 Function *F = CGM.getIntrinsic(Intrinsic::powi, 3637 { Src0->getType(), Src1->getType() }); 3638 return RValue::get(Builder.CreateCall(F, { Src0, Src1 })); 3639 } 3640 case Builtin::BI__builtin_frexpl: { 3641 // Linux PPC will not be adding additional PPCDoubleDouble support. 3642 // WIP to switch default to IEEE long double. Will emit libcall for 3643 // frexpl instead of legalizing this type in the BE. 3644 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble()) 3645 break; 3646 [[fallthrough]]; 3647 } 3648 case Builtin::BI__builtin_frexp: 3649 case Builtin::BI__builtin_frexpf: 3650 case Builtin::BI__builtin_frexpf128: 3651 case Builtin::BI__builtin_frexpf16: 3652 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp)); 3653 case Builtin::BI__builtin_isgreater: 3654 case Builtin::BI__builtin_isgreaterequal: 3655 case Builtin::BI__builtin_isless: 3656 case Builtin::BI__builtin_islessequal: 3657 case Builtin::BI__builtin_islessgreater: 3658 case Builtin::BI__builtin_isunordered: { 3659 // Ordered comparisons: we know the arguments to these are matching scalar 3660 // floating point values. 3661 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 3662 Value *LHS = EmitScalarExpr(E->getArg(0)); 3663 Value *RHS = EmitScalarExpr(E->getArg(1)); 3664 3665 switch (BuiltinID) { 3666 default: llvm_unreachable("Unknown ordered comparison"); 3667 case Builtin::BI__builtin_isgreater: 3668 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 3669 break; 3670 case Builtin::BI__builtin_isgreaterequal: 3671 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 3672 break; 3673 case Builtin::BI__builtin_isless: 3674 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 3675 break; 3676 case Builtin::BI__builtin_islessequal: 3677 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 3678 break; 3679 case Builtin::BI__builtin_islessgreater: 3680 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 3681 break; 3682 case Builtin::BI__builtin_isunordered: 3683 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 3684 break; 3685 } 3686 // ZExt bool to int type. 3687 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 3688 } 3689 3690 case Builtin::BI__builtin_isnan: { 3691 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 3692 Value *V = EmitScalarExpr(E->getArg(0)); 3693 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) 3694 return RValue::get(Result); 3695 return RValue::get( 3696 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan), 3697 ConvertType(E->getType()))); 3698 } 3699 3700 case Builtin::BI__builtin_issignaling: { 3701 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 3702 Value *V = EmitScalarExpr(E->getArg(0)); 3703 return RValue::get( 3704 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan), 3705 ConvertType(E->getType()))); 3706 } 3707 3708 case Builtin::BI__builtin_isinf: { 3709 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 3710 Value *V = EmitScalarExpr(E->getArg(0)); 3711 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) 3712 return RValue::get(Result); 3713 return RValue::get( 3714 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf), 3715 ConvertType(E->getType()))); 3716 } 3717 3718 case Builtin::BIfinite: 3719 case Builtin::BI__finite: 3720 case Builtin::BIfinitef: 3721 case Builtin::BI__finitef: 3722 case Builtin::BIfinitel: 3723 case Builtin::BI__finitel: 3724 case Builtin::BI__builtin_isfinite: { 3725 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 3726 Value *V = EmitScalarExpr(E->getArg(0)); 3727 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) 3728 return RValue::get(Result); 3729 return RValue::get( 3730 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite), 3731 ConvertType(E->getType()))); 3732 } 3733 3734 case Builtin::BI__builtin_isnormal: { 3735 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 3736 Value *V = EmitScalarExpr(E->getArg(0)); 3737 return RValue::get( 3738 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal), 3739 ConvertType(E->getType()))); 3740 } 3741 3742 case Builtin::BI__builtin_issubnormal: { 3743 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 3744 Value *V = EmitScalarExpr(E->getArg(0)); 3745 return RValue::get( 3746 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal), 3747 ConvertType(E->getType()))); 3748 } 3749 3750 case Builtin::BI__builtin_iszero: { 3751 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 3752 Value *V = EmitScalarExpr(E->getArg(0)); 3753 return RValue::get( 3754 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero), 3755 ConvertType(E->getType()))); 3756 } 3757 3758 case Builtin::BI__builtin_isfpclass: { 3759 Expr::EvalResult Result; 3760 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext())) 3761 break; 3762 uint64_t Test = Result.Val.getInt().getLimitedValue(); 3763 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 3764 Value *V = EmitScalarExpr(E->getArg(0)); 3765 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test), 3766 ConvertType(E->getType()))); 3767 } 3768 3769 case Builtin::BI__builtin_nondeterministic_value: { 3770 llvm::Type *Ty = ConvertType(E->getArg(0)->getType()); 3771 3772 Value *Result = PoisonValue::get(Ty); 3773 Result = Builder.CreateFreeze(Result); 3774 3775 return RValue::get(Result); 3776 } 3777 3778 case Builtin::BI__builtin_elementwise_abs: { 3779 Value *Result; 3780 QualType QT = E->getArg(0)->getType(); 3781 3782 if (auto *VecTy = QT->getAs<VectorType>()) 3783 QT = VecTy->getElementType(); 3784 if (QT->isIntegerType()) 3785 Result = Builder.CreateBinaryIntrinsic( 3786 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)), 3787 Builder.getFalse(), nullptr, "elt.abs"); 3788 else 3789 Result = emitBuiltinWithOneOverloadedType<1>( 3790 *this, E, llvm::Intrinsic::fabs, "elt.abs"); 3791 3792 return RValue::get(Result); 3793 } 3794 case Builtin::BI__builtin_elementwise_acos: 3795 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3796 *this, E, llvm::Intrinsic::acos, "elt.acos")); 3797 case Builtin::BI__builtin_elementwise_asin: 3798 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3799 *this, E, llvm::Intrinsic::asin, "elt.asin")); 3800 case Builtin::BI__builtin_elementwise_atan: 3801 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3802 *this, E, llvm::Intrinsic::atan, "elt.atan")); 3803 case Builtin::BI__builtin_elementwise_ceil: 3804 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3805 *this, E, llvm::Intrinsic::ceil, "elt.ceil")); 3806 case Builtin::BI__builtin_elementwise_exp: 3807 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3808 *this, E, llvm::Intrinsic::exp, "elt.exp")); 3809 case Builtin::BI__builtin_elementwise_exp2: 3810 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3811 *this, E, llvm::Intrinsic::exp2, "elt.exp2")); 3812 case Builtin::BI__builtin_elementwise_log: 3813 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3814 *this, E, llvm::Intrinsic::log, "elt.log")); 3815 case Builtin::BI__builtin_elementwise_log2: 3816 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3817 *this, E, llvm::Intrinsic::log2, "elt.log2")); 3818 case Builtin::BI__builtin_elementwise_log10: 3819 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3820 *this, E, llvm::Intrinsic::log10, "elt.log10")); 3821 case Builtin::BI__builtin_elementwise_pow: { 3822 return RValue::get( 3823 emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow)); 3824 } 3825 case Builtin::BI__builtin_elementwise_bitreverse: 3826 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3827 *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse")); 3828 case Builtin::BI__builtin_elementwise_cos: 3829 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3830 *this, E, llvm::Intrinsic::cos, "elt.cos")); 3831 case Builtin::BI__builtin_elementwise_cosh: 3832 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3833 *this, E, llvm::Intrinsic::cosh, "elt.cosh")); 3834 case Builtin::BI__builtin_elementwise_floor: 3835 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3836 *this, E, llvm::Intrinsic::floor, "elt.floor")); 3837 case Builtin::BI__builtin_elementwise_roundeven: 3838 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3839 *this, E, llvm::Intrinsic::roundeven, "elt.roundeven")); 3840 case Builtin::BI__builtin_elementwise_round: 3841 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3842 *this, E, llvm::Intrinsic::round, "elt.round")); 3843 case Builtin::BI__builtin_elementwise_rint: 3844 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3845 *this, E, llvm::Intrinsic::rint, "elt.rint")); 3846 case Builtin::BI__builtin_elementwise_nearbyint: 3847 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3848 *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint")); 3849 case Builtin::BI__builtin_elementwise_sin: 3850 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3851 *this, E, llvm::Intrinsic::sin, "elt.sin")); 3852 case Builtin::BI__builtin_elementwise_sinh: 3853 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3854 *this, E, llvm::Intrinsic::sinh, "elt.sinh")); 3855 case Builtin::BI__builtin_elementwise_tan: 3856 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3857 *this, E, llvm::Intrinsic::tan, "elt.tan")); 3858 case Builtin::BI__builtin_elementwise_tanh: 3859 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3860 *this, E, llvm::Intrinsic::tanh, "elt.tanh")); 3861 case Builtin::BI__builtin_elementwise_trunc: 3862 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3863 *this, E, llvm::Intrinsic::trunc, "elt.trunc")); 3864 case Builtin::BI__builtin_elementwise_canonicalize: 3865 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3866 *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize")); 3867 case Builtin::BI__builtin_elementwise_copysign: 3868 return RValue::get(emitBuiltinWithOneOverloadedType<2>( 3869 *this, E, llvm::Intrinsic::copysign)); 3870 case Builtin::BI__builtin_elementwise_fma: 3871 return RValue::get( 3872 emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma)); 3873 case Builtin::BI__builtin_elementwise_add_sat: 3874 case Builtin::BI__builtin_elementwise_sub_sat: { 3875 Value *Op0 = EmitScalarExpr(E->getArg(0)); 3876 Value *Op1 = EmitScalarExpr(E->getArg(1)); 3877 Value *Result; 3878 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected"); 3879 QualType Ty = E->getArg(0)->getType(); 3880 if (auto *VecTy = Ty->getAs<VectorType>()) 3881 Ty = VecTy->getElementType(); 3882 bool IsSigned = Ty->isSignedIntegerType(); 3883 unsigned Opc; 3884 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat) 3885 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat; 3886 else 3887 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat; 3888 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat"); 3889 return RValue::get(Result); 3890 } 3891 3892 case Builtin::BI__builtin_elementwise_max: { 3893 Value *Op0 = EmitScalarExpr(E->getArg(0)); 3894 Value *Op1 = EmitScalarExpr(E->getArg(1)); 3895 Value *Result; 3896 if (Op0->getType()->isIntOrIntVectorTy()) { 3897 QualType Ty = E->getArg(0)->getType(); 3898 if (auto *VecTy = Ty->getAs<VectorType>()) 3899 Ty = VecTy->getElementType(); 3900 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType() 3901 ? llvm::Intrinsic::smax 3902 : llvm::Intrinsic::umax, 3903 Op0, Op1, nullptr, "elt.max"); 3904 } else 3905 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max"); 3906 return RValue::get(Result); 3907 } 3908 case Builtin::BI__builtin_elementwise_min: { 3909 Value *Op0 = EmitScalarExpr(E->getArg(0)); 3910 Value *Op1 = EmitScalarExpr(E->getArg(1)); 3911 Value *Result; 3912 if (Op0->getType()->isIntOrIntVectorTy()) { 3913 QualType Ty = E->getArg(0)->getType(); 3914 if (auto *VecTy = Ty->getAs<VectorType>()) 3915 Ty = VecTy->getElementType(); 3916 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType() 3917 ? llvm::Intrinsic::smin 3918 : llvm::Intrinsic::umin, 3919 Op0, Op1, nullptr, "elt.min"); 3920 } else 3921 Result = Builder.CreateMinNum(Op0, Op1, "elt.min"); 3922 return RValue::get(Result); 3923 } 3924 3925 case Builtin::BI__builtin_reduce_max: { 3926 auto GetIntrinsicID = [this](QualType QT) { 3927 if (auto *VecTy = QT->getAs<VectorType>()) 3928 QT = VecTy->getElementType(); 3929 else if (QT->isSizelessVectorType()) 3930 QT = QT->getSizelessVectorEltType(CGM.getContext()); 3931 3932 if (QT->isSignedIntegerType()) 3933 return llvm::Intrinsic::vector_reduce_smax; 3934 if (QT->isUnsignedIntegerType()) 3935 return llvm::Intrinsic::vector_reduce_umax; 3936 assert(QT->isFloatingType() && "must have a float here"); 3937 return llvm::Intrinsic::vector_reduce_fmax; 3938 }; 3939 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3940 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); 3941 } 3942 3943 case Builtin::BI__builtin_reduce_min: { 3944 auto GetIntrinsicID = [this](QualType QT) { 3945 if (auto *VecTy = QT->getAs<VectorType>()) 3946 QT = VecTy->getElementType(); 3947 else if (QT->isSizelessVectorType()) 3948 QT = QT->getSizelessVectorEltType(CGM.getContext()); 3949 3950 if (QT->isSignedIntegerType()) 3951 return llvm::Intrinsic::vector_reduce_smin; 3952 if (QT->isUnsignedIntegerType()) 3953 return llvm::Intrinsic::vector_reduce_umin; 3954 assert(QT->isFloatingType() && "must have a float here"); 3955 return llvm::Intrinsic::vector_reduce_fmin; 3956 }; 3957 3958 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3959 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); 3960 } 3961 3962 case Builtin::BI__builtin_reduce_add: 3963 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3964 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add")); 3965 case Builtin::BI__builtin_reduce_mul: 3966 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3967 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul")); 3968 case Builtin::BI__builtin_reduce_xor: 3969 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3970 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor")); 3971 case Builtin::BI__builtin_reduce_or: 3972 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3973 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or")); 3974 case Builtin::BI__builtin_reduce_and: 3975 return RValue::get(emitBuiltinWithOneOverloadedType<1>( 3976 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and")); 3977 3978 case Builtin::BI__builtin_matrix_transpose: { 3979 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>(); 3980 Value *MatValue = EmitScalarExpr(E->getArg(0)); 3981 MatrixBuilder MB(Builder); 3982 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(), 3983 MatrixTy->getNumColumns()); 3984 return RValue::get(Result); 3985 } 3986 3987 case Builtin::BI__builtin_matrix_column_major_load: { 3988 MatrixBuilder MB(Builder); 3989 // Emit everything that isn't dependent on the first parameter type 3990 Value *Stride = EmitScalarExpr(E->getArg(3)); 3991 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>(); 3992 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>(); 3993 assert(PtrTy && "arg0 must be of pointer type"); 3994 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified(); 3995 3996 Address Src = EmitPointerWithAlignment(E->getArg(0)); 3997 EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)), 3998 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 3999 0); 4000 Value *Result = MB.CreateColumnMajorLoad( 4001 Src.getElementType(), Src.emitRawPointer(*this), 4002 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile, 4003 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix"); 4004 return RValue::get(Result); 4005 } 4006 4007 case Builtin::BI__builtin_matrix_column_major_store: { 4008 MatrixBuilder MB(Builder); 4009 Value *Matrix = EmitScalarExpr(E->getArg(0)); 4010 Address Dst = EmitPointerWithAlignment(E->getArg(1)); 4011 Value *Stride = EmitScalarExpr(E->getArg(2)); 4012 4013 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>(); 4014 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>(); 4015 assert(PtrTy && "arg1 must be of pointer type"); 4016 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified(); 4017 4018 EmitNonNullArgCheck(RValue::get(Dst.emitRawPointer(*this)), 4019 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD, 4020 0); 4021 Value *Result = MB.CreateColumnMajorStore( 4022 Matrix, Dst.emitRawPointer(*this), 4023 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile, 4024 MatrixTy->getNumRows(), MatrixTy->getNumColumns()); 4025 return RValue::get(Result); 4026 } 4027 4028 case Builtin::BI__builtin_isinf_sign: { 4029 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 4030 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 4031 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. 4032 Value *Arg = EmitScalarExpr(E->getArg(0)); 4033 Value *AbsArg = EmitFAbs(*this, Arg); 4034 Value *IsInf = Builder.CreateFCmpOEQ( 4035 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 4036 Value *IsNeg = EmitSignBit(*this, Arg); 4037 4038 llvm::Type *IntTy = ConvertType(E->getType()); 4039 Value *Zero = Constant::getNullValue(IntTy); 4040 Value *One = ConstantInt::get(IntTy, 1); 4041 Value *NegativeOne = ConstantInt::get(IntTy, -1); 4042 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 4043 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 4044 return RValue::get(Result); 4045 } 4046 4047 case Builtin::BI__builtin_flt_rounds: { 4048 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding); 4049 4050 llvm::Type *ResultType = ConvertType(E->getType()); 4051 Value *Result = Builder.CreateCall(F); 4052 if (Result->getType() != ResultType) 4053 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 4054 "cast"); 4055 return RValue::get(Result); 4056 } 4057 4058 case Builtin::BI__builtin_set_flt_rounds: { 4059 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding); 4060 4061 Value *V = EmitScalarExpr(E->getArg(0)); 4062 Builder.CreateCall(F, V); 4063 return RValue::get(nullptr); 4064 } 4065 4066 case Builtin::BI__builtin_fpclassify: { 4067 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 4068 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. 4069 Value *V = EmitScalarExpr(E->getArg(5)); 4070 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 4071 4072 // Create Result 4073 BasicBlock *Begin = Builder.GetInsertBlock(); 4074 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 4075 Builder.SetInsertPoint(End); 4076 PHINode *Result = 4077 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 4078 "fpclassify_result"); 4079 4080 // if (V==0) return FP_ZERO 4081 Builder.SetInsertPoint(Begin); 4082 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 4083 "iszero"); 4084 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 4085 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 4086 Builder.CreateCondBr(IsZero, End, NotZero); 4087 Result->addIncoming(ZeroLiteral, Begin); 4088 4089 // if (V != V) return FP_NAN 4090 Builder.SetInsertPoint(NotZero); 4091 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 4092 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 4093 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 4094 Builder.CreateCondBr(IsNan, End, NotNan); 4095 Result->addIncoming(NanLiteral, NotZero); 4096 4097 // if (fabs(V) == infinity) return FP_INFINITY 4098 Builder.SetInsertPoint(NotNan); 4099 Value *VAbs = EmitFAbs(*this, V); 4100 Value *IsInf = 4101 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 4102 "isinf"); 4103 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 4104 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 4105 Builder.CreateCondBr(IsInf, End, NotInf); 4106 Result->addIncoming(InfLiteral, NotNan); 4107 4108 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 4109 Builder.SetInsertPoint(NotInf); 4110 APFloat Smallest = APFloat::getSmallestNormalized( 4111 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 4112 Value *IsNormal = 4113 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 4114 "isnormal"); 4115 Value *NormalResult = 4116 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 4117 EmitScalarExpr(E->getArg(3))); 4118 Builder.CreateBr(End); 4119 Result->addIncoming(NormalResult, NotInf); 4120 4121 // return Result 4122 Builder.SetInsertPoint(End); 4123 return RValue::get(Result); 4124 } 4125 4126 // An alloca will always return a pointer to the alloca (stack) address 4127 // space. This address space need not be the same as the AST / Language 4128 // default (e.g. in C / C++ auto vars are in the generic address space). At 4129 // the AST level this is handled within CreateTempAlloca et al., but for the 4130 // builtin / dynamic alloca we have to handle it here. We use an explicit cast 4131 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation. 4132 case Builtin::BIalloca: 4133 case Builtin::BI_alloca: 4134 case Builtin::BI__builtin_alloca_uninitialized: 4135 case Builtin::BI__builtin_alloca: { 4136 Value *Size = EmitScalarExpr(E->getArg(0)); 4137 const TargetInfo &TI = getContext().getTargetInfo(); 4138 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. 4139 const Align SuitableAlignmentInBytes = 4140 CGM.getContext() 4141 .toCharUnitsFromBits(TI.getSuitableAlign()) 4142 .getAsAlign(); 4143 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 4144 AI->setAlignment(SuitableAlignmentInBytes); 4145 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized) 4146 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes); 4147 LangAS AAS = getASTAllocaAddressSpace(); 4148 LangAS EAS = E->getType()->getPointeeType().getAddressSpace(); 4149 if (AAS != EAS) { 4150 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType()); 4151 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS, 4152 EAS, Ty)); 4153 } 4154 return RValue::get(AI); 4155 } 4156 4157 case Builtin::BI__builtin_alloca_with_align_uninitialized: 4158 case Builtin::BI__builtin_alloca_with_align: { 4159 Value *Size = EmitScalarExpr(E->getArg(0)); 4160 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); 4161 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue); 4162 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); 4163 const Align AlignmentInBytes = 4164 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign(); 4165 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 4166 AI->setAlignment(AlignmentInBytes); 4167 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized) 4168 initializeAlloca(*this, AI, Size, AlignmentInBytes); 4169 LangAS AAS = getASTAllocaAddressSpace(); 4170 LangAS EAS = E->getType()->getPointeeType().getAddressSpace(); 4171 if (AAS != EAS) { 4172 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType()); 4173 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS, 4174 EAS, Ty)); 4175 } 4176 return RValue::get(AI); 4177 } 4178 4179 case Builtin::BIbzero: 4180 case Builtin::BI__builtin_bzero: { 4181 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 4182 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 4183 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(), 4184 E->getArg(0)->getExprLoc(), FD, 0); 4185 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); 4186 return RValue::get(nullptr); 4187 } 4188 4189 case Builtin::BIbcopy: 4190 case Builtin::BI__builtin_bcopy: { 4191 Address Src = EmitPointerWithAlignment(E->getArg(0)); 4192 Address Dest = EmitPointerWithAlignment(E->getArg(1)); 4193 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 4194 EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)), 4195 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 4196 0); 4197 EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)), 4198 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD, 4199 0); 4200 Builder.CreateMemMove(Dest, Src, SizeVal, false); 4201 return RValue::get(nullptr); 4202 } 4203 4204 case Builtin::BImemcpy: 4205 case Builtin::BI__builtin_memcpy: 4206 case Builtin::BImempcpy: 4207 case Builtin::BI__builtin_mempcpy: { 4208 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 4209 Address Src = EmitPointerWithAlignment(E->getArg(1)); 4210 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 4211 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0); 4212 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1); 4213 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 4214 if (BuiltinID == Builtin::BImempcpy || 4215 BuiltinID == Builtin::BI__builtin_mempcpy) 4216 return RValue::get(Builder.CreateInBoundsGEP( 4217 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal)); 4218 else 4219 return RValue::get(Dest, *this); 4220 } 4221 4222 case Builtin::BI__builtin_memcpy_inline: { 4223 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 4224 Address Src = EmitPointerWithAlignment(E->getArg(1)); 4225 uint64_t Size = 4226 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue(); 4227 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0); 4228 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1); 4229 Builder.CreateMemCpyInline(Dest, Src, Size); 4230 return RValue::get(nullptr); 4231 } 4232 4233 case Builtin::BI__builtin_char_memchr: 4234 BuiltinID = Builtin::BI__builtin_memchr; 4235 break; 4236 4237 case Builtin::BI__builtin___memcpy_chk: { 4238 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 4239 Expr::EvalResult SizeResult, DstSizeResult; 4240 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || 4241 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) 4242 break; 4243 llvm::APSInt Size = SizeResult.Val.getInt(); 4244 llvm::APSInt DstSize = DstSizeResult.Val.getInt(); 4245 if (Size.ugt(DstSize)) 4246 break; 4247 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 4248 Address Src = EmitPointerWithAlignment(E->getArg(1)); 4249 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 4250 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 4251 return RValue::get(Dest, *this); 4252 } 4253 4254 case Builtin::BI__builtin_objc_memmove_collectable: { 4255 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 4256 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); 4257 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 4258 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 4259 DestAddr, SrcAddr, SizeVal); 4260 return RValue::get(DestAddr, *this); 4261 } 4262 4263 case Builtin::BI__builtin___memmove_chk: { 4264 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 4265 Expr::EvalResult SizeResult, DstSizeResult; 4266 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || 4267 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) 4268 break; 4269 llvm::APSInt Size = SizeResult.Val.getInt(); 4270 llvm::APSInt DstSize = DstSizeResult.Val.getInt(); 4271 if (Size.ugt(DstSize)) 4272 break; 4273 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 4274 Address Src = EmitPointerWithAlignment(E->getArg(1)); 4275 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 4276 Builder.CreateMemMove(Dest, Src, SizeVal, false); 4277 return RValue::get(Dest, *this); 4278 } 4279 4280 case Builtin::BImemmove: 4281 case Builtin::BI__builtin_memmove: { 4282 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 4283 Address Src = EmitPointerWithAlignment(E->getArg(1)); 4284 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 4285 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0); 4286 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1); 4287 Builder.CreateMemMove(Dest, Src, SizeVal, false); 4288 return RValue::get(Dest, *this); 4289 } 4290 case Builtin::BImemset: 4291 case Builtin::BI__builtin_memset: { 4292 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 4293 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 4294 Builder.getInt8Ty()); 4295 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 4296 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(), 4297 E->getArg(0)->getExprLoc(), FD, 0); 4298 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 4299 return RValue::get(Dest, *this); 4300 } 4301 case Builtin::BI__builtin_memset_inline: { 4302 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 4303 Value *ByteVal = 4304 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty()); 4305 uint64_t Size = 4306 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue(); 4307 EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)), 4308 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 4309 0); 4310 Builder.CreateMemSetInline(Dest, ByteVal, Size); 4311 return RValue::get(nullptr); 4312 } 4313 case Builtin::BI__builtin___memset_chk: { 4314 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 4315 Expr::EvalResult SizeResult, DstSizeResult; 4316 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || 4317 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) 4318 break; 4319 llvm::APSInt Size = SizeResult.Val.getInt(); 4320 llvm::APSInt DstSize = DstSizeResult.Val.getInt(); 4321 if (Size.ugt(DstSize)) 4322 break; 4323 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 4324 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 4325 Builder.getInt8Ty()); 4326 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 4327 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 4328 return RValue::get(Dest, *this); 4329 } 4330 case Builtin::BI__builtin_wmemchr: { 4331 // The MSVC runtime library does not provide a definition of wmemchr, so we 4332 // need an inline implementation. 4333 if (!getTarget().getTriple().isOSMSVCRT()) 4334 break; 4335 4336 llvm::Type *WCharTy = ConvertType(getContext().WCharTy); 4337 Value *Str = EmitScalarExpr(E->getArg(0)); 4338 Value *Chr = EmitScalarExpr(E->getArg(1)); 4339 Value *Size = EmitScalarExpr(E->getArg(2)); 4340 4341 BasicBlock *Entry = Builder.GetInsertBlock(); 4342 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq"); 4343 BasicBlock *Next = createBasicBlock("wmemchr.next"); 4344 BasicBlock *Exit = createBasicBlock("wmemchr.exit"); 4345 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0)); 4346 Builder.CreateCondBr(SizeEq0, Exit, CmpEq); 4347 4348 EmitBlock(CmpEq); 4349 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2); 4350 StrPhi->addIncoming(Str, Entry); 4351 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2); 4352 SizePhi->addIncoming(Size, Entry); 4353 CharUnits WCharAlign = 4354 getContext().getTypeAlignInChars(getContext().WCharTy); 4355 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign); 4356 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0); 4357 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr); 4358 Builder.CreateCondBr(StrEqChr, Exit, Next); 4359 4360 EmitBlock(Next); 4361 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1); 4362 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1)); 4363 Value *NextSizeEq0 = 4364 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0)); 4365 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq); 4366 StrPhi->addIncoming(NextStr, Next); 4367 SizePhi->addIncoming(NextSize, Next); 4368 4369 EmitBlock(Exit); 4370 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3); 4371 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry); 4372 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next); 4373 Ret->addIncoming(FoundChr, CmpEq); 4374 return RValue::get(Ret); 4375 } 4376 case Builtin::BI__builtin_wmemcmp: { 4377 // The MSVC runtime library does not provide a definition of wmemcmp, so we 4378 // need an inline implementation. 4379 if (!getTarget().getTriple().isOSMSVCRT()) 4380 break; 4381 4382 llvm::Type *WCharTy = ConvertType(getContext().WCharTy); 4383 4384 Value *Dst = EmitScalarExpr(E->getArg(0)); 4385 Value *Src = EmitScalarExpr(E->getArg(1)); 4386 Value *Size = EmitScalarExpr(E->getArg(2)); 4387 4388 BasicBlock *Entry = Builder.GetInsertBlock(); 4389 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt"); 4390 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt"); 4391 BasicBlock *Next = createBasicBlock("wmemcmp.next"); 4392 BasicBlock *Exit = createBasicBlock("wmemcmp.exit"); 4393 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0)); 4394 Builder.CreateCondBr(SizeEq0, Exit, CmpGT); 4395 4396 EmitBlock(CmpGT); 4397 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2); 4398 DstPhi->addIncoming(Dst, Entry); 4399 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2); 4400 SrcPhi->addIncoming(Src, Entry); 4401 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2); 4402 SizePhi->addIncoming(Size, Entry); 4403 CharUnits WCharAlign = 4404 getContext().getTypeAlignInChars(getContext().WCharTy); 4405 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign); 4406 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign); 4407 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh); 4408 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT); 4409 4410 EmitBlock(CmpLT); 4411 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh); 4412 Builder.CreateCondBr(DstLtSrc, Exit, Next); 4413 4414 EmitBlock(Next); 4415 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1); 4416 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1); 4417 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1)); 4418 Value *NextSizeEq0 = 4419 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0)); 4420 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT); 4421 DstPhi->addIncoming(NextDst, Next); 4422 SrcPhi->addIncoming(NextSrc, Next); 4423 SizePhi->addIncoming(NextSize, Next); 4424 4425 EmitBlock(Exit); 4426 PHINode *Ret = Builder.CreatePHI(IntTy, 4); 4427 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry); 4428 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT); 4429 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT); 4430 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next); 4431 return RValue::get(Ret); 4432 } 4433 case Builtin::BI__builtin_dwarf_cfa: { 4434 // The offset in bytes from the first argument to the CFA. 4435 // 4436 // Why on earth is this in the frontend? Is there any reason at 4437 // all that the backend can't reasonably determine this while 4438 // lowering llvm.eh.dwarf.cfa()? 4439 // 4440 // TODO: If there's a satisfactory reason, add a target hook for 4441 // this instead of hard-coding 0, which is correct for most targets. 4442 int32_t Offset = 0; 4443 4444 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 4445 return RValue::get(Builder.CreateCall(F, 4446 llvm::ConstantInt::get(Int32Ty, Offset))); 4447 } 4448 case Builtin::BI__builtin_return_address: { 4449 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 4450 getContext().UnsignedIntTy); 4451 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); 4452 return RValue::get(Builder.CreateCall(F, Depth)); 4453 } 4454 case Builtin::BI_ReturnAddress: { 4455 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); 4456 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); 4457 } 4458 case Builtin::BI__builtin_frame_address: { 4459 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), 4460 getContext().UnsignedIntTy); 4461 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy); 4462 return RValue::get(Builder.CreateCall(F, Depth)); 4463 } 4464 case Builtin::BI__builtin_extract_return_addr: { 4465 Value *Address = EmitScalarExpr(E->getArg(0)); 4466 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 4467 return RValue::get(Result); 4468 } 4469 case Builtin::BI__builtin_frob_return_addr: { 4470 Value *Address = EmitScalarExpr(E->getArg(0)); 4471 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 4472 return RValue::get(Result); 4473 } 4474 case Builtin::BI__builtin_dwarf_sp_column: { 4475 llvm::IntegerType *Ty 4476 = cast<llvm::IntegerType>(ConvertType(E->getType())); 4477 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 4478 if (Column == -1) { 4479 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 4480 return RValue::get(llvm::UndefValue::get(Ty)); 4481 } 4482 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 4483 } 4484 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 4485 Value *Address = EmitScalarExpr(E->getArg(0)); 4486 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 4487 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 4488 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 4489 } 4490 case Builtin::BI__builtin_eh_return: { 4491 Value *Int = EmitScalarExpr(E->getArg(0)); 4492 Value *Ptr = EmitScalarExpr(E->getArg(1)); 4493 4494 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 4495 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 4496 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 4497 Function *F = 4498 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32 4499 : Intrinsic::eh_return_i64); 4500 Builder.CreateCall(F, {Int, Ptr}); 4501 Builder.CreateUnreachable(); 4502 4503 // We do need to preserve an insertion point. 4504 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 4505 4506 return RValue::get(nullptr); 4507 } 4508 case Builtin::BI__builtin_unwind_init: { 4509 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 4510 Builder.CreateCall(F); 4511 return RValue::get(nullptr); 4512 } 4513 case Builtin::BI__builtin_extend_pointer: { 4514 // Extends a pointer to the size of an _Unwind_Word, which is 4515 // uint64_t on all platforms. Generally this gets poked into a 4516 // register and eventually used as an address, so if the 4517 // addressing registers are wider than pointers and the platform 4518 // doesn't implicitly ignore high-order bits when doing 4519 // addressing, we need to make sure we zext / sext based on 4520 // the platform's expectations. 4521 // 4522 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 4523 4524 // Cast the pointer to intptr_t. 4525 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4526 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 4527 4528 // If that's 64 bits, we're done. 4529 if (IntPtrTy->getBitWidth() == 64) 4530 return RValue::get(Result); 4531 4532 // Otherwise, ask the codegen data what to do. 4533 if (getTargetHooks().extendPointerWithSExt()) 4534 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 4535 else 4536 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 4537 } 4538 case Builtin::BI__builtin_setjmp: { 4539 // Buffer is a void**. 4540 Address Buf = EmitPointerWithAlignment(E->getArg(0)); 4541 4542 // Store the frame pointer to the setjmp buffer. 4543 Value *FrameAddr = Builder.CreateCall( 4544 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy), 4545 ConstantInt::get(Int32Ty, 0)); 4546 Builder.CreateStore(FrameAddr, Buf); 4547 4548 // Store the stack pointer to the setjmp buffer. 4549 Value *StackAddr = Builder.CreateStackSave(); 4550 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType()); 4551 4552 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2); 4553 Builder.CreateStore(StackAddr, StackSaveSlot); 4554 4555 // Call LLVM's EH setjmp, which is lightweight. 4556 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 4557 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this))); 4558 } 4559 case Builtin::BI__builtin_longjmp: { 4560 Value *Buf = EmitScalarExpr(E->getArg(0)); 4561 4562 // Call LLVM's EH longjmp, which is lightweight. 4563 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 4564 4565 // longjmp doesn't return; mark this as unreachable. 4566 Builder.CreateUnreachable(); 4567 4568 // We do need to preserve an insertion point. 4569 EmitBlock(createBasicBlock("longjmp.cont")); 4570 4571 return RValue::get(nullptr); 4572 } 4573 case Builtin::BI__builtin_launder: { 4574 const Expr *Arg = E->getArg(0); 4575 QualType ArgTy = Arg->getType()->getPointeeType(); 4576 Value *Ptr = EmitScalarExpr(Arg); 4577 if (TypeRequiresBuiltinLaunder(CGM, ArgTy)) 4578 Ptr = Builder.CreateLaunderInvariantGroup(Ptr); 4579 4580 return RValue::get(Ptr); 4581 } 4582 case Builtin::BI__sync_fetch_and_add: 4583 case Builtin::BI__sync_fetch_and_sub: 4584 case Builtin::BI__sync_fetch_and_or: 4585 case Builtin::BI__sync_fetch_and_and: 4586 case Builtin::BI__sync_fetch_and_xor: 4587 case Builtin::BI__sync_fetch_and_nand: 4588 case Builtin::BI__sync_add_and_fetch: 4589 case Builtin::BI__sync_sub_and_fetch: 4590 case Builtin::BI__sync_and_and_fetch: 4591 case Builtin::BI__sync_or_and_fetch: 4592 case Builtin::BI__sync_xor_and_fetch: 4593 case Builtin::BI__sync_nand_and_fetch: 4594 case Builtin::BI__sync_val_compare_and_swap: 4595 case Builtin::BI__sync_bool_compare_and_swap: 4596 case Builtin::BI__sync_lock_test_and_set: 4597 case Builtin::BI__sync_lock_release: 4598 case Builtin::BI__sync_swap: 4599 llvm_unreachable("Shouldn't make it through sema"); 4600 case Builtin::BI__sync_fetch_and_add_1: 4601 case Builtin::BI__sync_fetch_and_add_2: 4602 case Builtin::BI__sync_fetch_and_add_4: 4603 case Builtin::BI__sync_fetch_and_add_8: 4604 case Builtin::BI__sync_fetch_and_add_16: 4605 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 4606 case Builtin::BI__sync_fetch_and_sub_1: 4607 case Builtin::BI__sync_fetch_and_sub_2: 4608 case Builtin::BI__sync_fetch_and_sub_4: 4609 case Builtin::BI__sync_fetch_and_sub_8: 4610 case Builtin::BI__sync_fetch_and_sub_16: 4611 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 4612 case Builtin::BI__sync_fetch_and_or_1: 4613 case Builtin::BI__sync_fetch_and_or_2: 4614 case Builtin::BI__sync_fetch_and_or_4: 4615 case Builtin::BI__sync_fetch_and_or_8: 4616 case Builtin::BI__sync_fetch_and_or_16: 4617 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 4618 case Builtin::BI__sync_fetch_and_and_1: 4619 case Builtin::BI__sync_fetch_and_and_2: 4620 case Builtin::BI__sync_fetch_and_and_4: 4621 case Builtin::BI__sync_fetch_and_and_8: 4622 case Builtin::BI__sync_fetch_and_and_16: 4623 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 4624 case Builtin::BI__sync_fetch_and_xor_1: 4625 case Builtin::BI__sync_fetch_and_xor_2: 4626 case Builtin::BI__sync_fetch_and_xor_4: 4627 case Builtin::BI__sync_fetch_and_xor_8: 4628 case Builtin::BI__sync_fetch_and_xor_16: 4629 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 4630 case Builtin::BI__sync_fetch_and_nand_1: 4631 case Builtin::BI__sync_fetch_and_nand_2: 4632 case Builtin::BI__sync_fetch_and_nand_4: 4633 case Builtin::BI__sync_fetch_and_nand_8: 4634 case Builtin::BI__sync_fetch_and_nand_16: 4635 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 4636 4637 // Clang extensions: not overloaded yet. 4638 case Builtin::BI__sync_fetch_and_min: 4639 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 4640 case Builtin::BI__sync_fetch_and_max: 4641 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 4642 case Builtin::BI__sync_fetch_and_umin: 4643 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 4644 case Builtin::BI__sync_fetch_and_umax: 4645 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 4646 4647 case Builtin::BI__sync_add_and_fetch_1: 4648 case Builtin::BI__sync_add_and_fetch_2: 4649 case Builtin::BI__sync_add_and_fetch_4: 4650 case Builtin::BI__sync_add_and_fetch_8: 4651 case Builtin::BI__sync_add_and_fetch_16: 4652 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 4653 llvm::Instruction::Add); 4654 case Builtin::BI__sync_sub_and_fetch_1: 4655 case Builtin::BI__sync_sub_and_fetch_2: 4656 case Builtin::BI__sync_sub_and_fetch_4: 4657 case Builtin::BI__sync_sub_and_fetch_8: 4658 case Builtin::BI__sync_sub_and_fetch_16: 4659 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 4660 llvm::Instruction::Sub); 4661 case Builtin::BI__sync_and_and_fetch_1: 4662 case Builtin::BI__sync_and_and_fetch_2: 4663 case Builtin::BI__sync_and_and_fetch_4: 4664 case Builtin::BI__sync_and_and_fetch_8: 4665 case Builtin::BI__sync_and_and_fetch_16: 4666 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 4667 llvm::Instruction::And); 4668 case Builtin::BI__sync_or_and_fetch_1: 4669 case Builtin::BI__sync_or_and_fetch_2: 4670 case Builtin::BI__sync_or_and_fetch_4: 4671 case Builtin::BI__sync_or_and_fetch_8: 4672 case Builtin::BI__sync_or_and_fetch_16: 4673 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 4674 llvm::Instruction::Or); 4675 case Builtin::BI__sync_xor_and_fetch_1: 4676 case Builtin::BI__sync_xor_and_fetch_2: 4677 case Builtin::BI__sync_xor_and_fetch_4: 4678 case Builtin::BI__sync_xor_and_fetch_8: 4679 case Builtin::BI__sync_xor_and_fetch_16: 4680 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 4681 llvm::Instruction::Xor); 4682 case Builtin::BI__sync_nand_and_fetch_1: 4683 case Builtin::BI__sync_nand_and_fetch_2: 4684 case Builtin::BI__sync_nand_and_fetch_4: 4685 case Builtin::BI__sync_nand_and_fetch_8: 4686 case Builtin::BI__sync_nand_and_fetch_16: 4687 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 4688 llvm::Instruction::And, true); 4689 4690 case Builtin::BI__sync_val_compare_and_swap_1: 4691 case Builtin::BI__sync_val_compare_and_swap_2: 4692 case Builtin::BI__sync_val_compare_and_swap_4: 4693 case Builtin::BI__sync_val_compare_and_swap_8: 4694 case Builtin::BI__sync_val_compare_and_swap_16: 4695 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); 4696 4697 case Builtin::BI__sync_bool_compare_and_swap_1: 4698 case Builtin::BI__sync_bool_compare_and_swap_2: 4699 case Builtin::BI__sync_bool_compare_and_swap_4: 4700 case Builtin::BI__sync_bool_compare_and_swap_8: 4701 case Builtin::BI__sync_bool_compare_and_swap_16: 4702 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); 4703 4704 case Builtin::BI__sync_swap_1: 4705 case Builtin::BI__sync_swap_2: 4706 case Builtin::BI__sync_swap_4: 4707 case Builtin::BI__sync_swap_8: 4708 case Builtin::BI__sync_swap_16: 4709 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 4710 4711 case Builtin::BI__sync_lock_test_and_set_1: 4712 case Builtin::BI__sync_lock_test_and_set_2: 4713 case Builtin::BI__sync_lock_test_and_set_4: 4714 case Builtin::BI__sync_lock_test_and_set_8: 4715 case Builtin::BI__sync_lock_test_and_set_16: 4716 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 4717 4718 case Builtin::BI__sync_lock_release_1: 4719 case Builtin::BI__sync_lock_release_2: 4720 case Builtin::BI__sync_lock_release_4: 4721 case Builtin::BI__sync_lock_release_8: 4722 case Builtin::BI__sync_lock_release_16: { 4723 Address Ptr = CheckAtomicAlignment(*this, E); 4724 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4725 4726 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4727 getContext().getTypeSize(ElTy)); 4728 llvm::StoreInst *Store = 4729 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr); 4730 Store->setAtomic(llvm::AtomicOrdering::Release); 4731 return RValue::get(nullptr); 4732 } 4733 4734 case Builtin::BI__sync_synchronize: { 4735 // We assume this is supposed to correspond to a C++0x-style 4736 // sequentially-consistent fence (i.e. this is only usable for 4737 // synchronization, not device I/O or anything like that). This intrinsic 4738 // is really badly designed in the sense that in theory, there isn't 4739 // any way to safely use it... but in practice, it mostly works 4740 // to use it with non-atomic loads and stores to get acquire/release 4741 // semantics. 4742 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); 4743 return RValue::get(nullptr); 4744 } 4745 4746 case Builtin::BI__builtin_nontemporal_load: 4747 return RValue::get(EmitNontemporalLoad(*this, E)); 4748 case Builtin::BI__builtin_nontemporal_store: 4749 return RValue::get(EmitNontemporalStore(*this, E)); 4750 case Builtin::BI__c11_atomic_is_lock_free: 4751 case Builtin::BI__atomic_is_lock_free: { 4752 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 4753 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 4754 // _Atomic(T) is always properly-aligned. 4755 const char *LibCallName = "__atomic_is_lock_free"; 4756 CallArgList Args; 4757 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 4758 getContext().getSizeType()); 4759 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 4760 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 4761 getContext().VoidPtrTy); 4762 else 4763 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 4764 getContext().VoidPtrTy); 4765 const CGFunctionInfo &FuncInfo = 4766 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); 4767 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 4768 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 4769 return EmitCall(FuncInfo, CGCallee::forDirect(Func), 4770 ReturnValueSlot(), Args); 4771 } 4772 4773 case Builtin::BI__atomic_test_and_set: { 4774 // Look at the argument type to determine whether this is a volatile 4775 // operation. The parameter type is always volatile. 4776 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 4777 bool Volatile = 4778 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 4779 4780 Address Ptr = 4781 EmitPointerWithAlignment(E->getArg(0)).withElementType(Int8Ty); 4782 4783 Value *NewVal = Builder.getInt8(1); 4784 Value *Order = EmitScalarExpr(E->getArg(1)); 4785 if (isa<llvm::ConstantInt>(Order)) { 4786 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 4787 AtomicRMWInst *Result = nullptr; 4788 switch (ord) { 4789 case 0: // memory_order_relaxed 4790 default: // invalid order 4791 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 4792 llvm::AtomicOrdering::Monotonic); 4793 break; 4794 case 1: // memory_order_consume 4795 case 2: // memory_order_acquire 4796 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 4797 llvm::AtomicOrdering::Acquire); 4798 break; 4799 case 3: // memory_order_release 4800 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 4801 llvm::AtomicOrdering::Release); 4802 break; 4803 case 4: // memory_order_acq_rel 4804 4805 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 4806 llvm::AtomicOrdering::AcquireRelease); 4807 break; 4808 case 5: // memory_order_seq_cst 4809 Result = Builder.CreateAtomicRMW( 4810 llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 4811 llvm::AtomicOrdering::SequentiallyConsistent); 4812 break; 4813 } 4814 Result->setVolatile(Volatile); 4815 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 4816 } 4817 4818 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 4819 4820 llvm::BasicBlock *BBs[5] = { 4821 createBasicBlock("monotonic", CurFn), 4822 createBasicBlock("acquire", CurFn), 4823 createBasicBlock("release", CurFn), 4824 createBasicBlock("acqrel", CurFn), 4825 createBasicBlock("seqcst", CurFn) 4826 }; 4827 llvm::AtomicOrdering Orders[5] = { 4828 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, 4829 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, 4830 llvm::AtomicOrdering::SequentiallyConsistent}; 4831 4832 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 4833 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 4834 4835 Builder.SetInsertPoint(ContBB); 4836 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 4837 4838 for (unsigned i = 0; i < 5; ++i) { 4839 Builder.SetInsertPoint(BBs[i]); 4840 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 4841 Ptr, NewVal, Orders[i]); 4842 RMW->setVolatile(Volatile); 4843 Result->addIncoming(RMW, BBs[i]); 4844 Builder.CreateBr(ContBB); 4845 } 4846 4847 SI->addCase(Builder.getInt32(0), BBs[0]); 4848 SI->addCase(Builder.getInt32(1), BBs[1]); 4849 SI->addCase(Builder.getInt32(2), BBs[1]); 4850 SI->addCase(Builder.getInt32(3), BBs[2]); 4851 SI->addCase(Builder.getInt32(4), BBs[3]); 4852 SI->addCase(Builder.getInt32(5), BBs[4]); 4853 4854 Builder.SetInsertPoint(ContBB); 4855 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 4856 } 4857 4858 case Builtin::BI__atomic_clear: { 4859 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 4860 bool Volatile = 4861 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 4862 4863 Address Ptr = EmitPointerWithAlignment(E->getArg(0)); 4864 Ptr = Ptr.withElementType(Int8Ty); 4865 Value *NewVal = Builder.getInt8(0); 4866 Value *Order = EmitScalarExpr(E->getArg(1)); 4867 if (isa<llvm::ConstantInt>(Order)) { 4868 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 4869 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 4870 switch (ord) { 4871 case 0: // memory_order_relaxed 4872 default: // invalid order 4873 Store->setOrdering(llvm::AtomicOrdering::Monotonic); 4874 break; 4875 case 3: // memory_order_release 4876 Store->setOrdering(llvm::AtomicOrdering::Release); 4877 break; 4878 case 5: // memory_order_seq_cst 4879 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); 4880 break; 4881 } 4882 return RValue::get(nullptr); 4883 } 4884 4885 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 4886 4887 llvm::BasicBlock *BBs[3] = { 4888 createBasicBlock("monotonic", CurFn), 4889 createBasicBlock("release", CurFn), 4890 createBasicBlock("seqcst", CurFn) 4891 }; 4892 llvm::AtomicOrdering Orders[3] = { 4893 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, 4894 llvm::AtomicOrdering::SequentiallyConsistent}; 4895 4896 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 4897 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 4898 4899 for (unsigned i = 0; i < 3; ++i) { 4900 Builder.SetInsertPoint(BBs[i]); 4901 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 4902 Store->setOrdering(Orders[i]); 4903 Builder.CreateBr(ContBB); 4904 } 4905 4906 SI->addCase(Builder.getInt32(0), BBs[0]); 4907 SI->addCase(Builder.getInt32(3), BBs[1]); 4908 SI->addCase(Builder.getInt32(5), BBs[2]); 4909 4910 Builder.SetInsertPoint(ContBB); 4911 return RValue::get(nullptr); 4912 } 4913 4914 case Builtin::BI__atomic_thread_fence: 4915 case Builtin::BI__atomic_signal_fence: 4916 case Builtin::BI__c11_atomic_thread_fence: 4917 case Builtin::BI__c11_atomic_signal_fence: { 4918 llvm::SyncScope::ID SSID; 4919 if (BuiltinID == Builtin::BI__atomic_signal_fence || 4920 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 4921 SSID = llvm::SyncScope::SingleThread; 4922 else 4923 SSID = llvm::SyncScope::System; 4924 Value *Order = EmitScalarExpr(E->getArg(0)); 4925 if (isa<llvm::ConstantInt>(Order)) { 4926 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 4927 switch (ord) { 4928 case 0: // memory_order_relaxed 4929 default: // invalid order 4930 break; 4931 case 1: // memory_order_consume 4932 case 2: // memory_order_acquire 4933 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 4934 break; 4935 case 3: // memory_order_release 4936 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 4937 break; 4938 case 4: // memory_order_acq_rel 4939 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 4940 break; 4941 case 5: // memory_order_seq_cst 4942 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 4943 break; 4944 } 4945 return RValue::get(nullptr); 4946 } 4947 4948 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 4949 AcquireBB = createBasicBlock("acquire", CurFn); 4950 ReleaseBB = createBasicBlock("release", CurFn); 4951 AcqRelBB = createBasicBlock("acqrel", CurFn); 4952 SeqCstBB = createBasicBlock("seqcst", CurFn); 4953 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 4954 4955 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 4956 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 4957 4958 Builder.SetInsertPoint(AcquireBB); 4959 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); 4960 Builder.CreateBr(ContBB); 4961 SI->addCase(Builder.getInt32(1), AcquireBB); 4962 SI->addCase(Builder.getInt32(2), AcquireBB); 4963 4964 Builder.SetInsertPoint(ReleaseBB); 4965 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); 4966 Builder.CreateBr(ContBB); 4967 SI->addCase(Builder.getInt32(3), ReleaseBB); 4968 4969 Builder.SetInsertPoint(AcqRelBB); 4970 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); 4971 Builder.CreateBr(ContBB); 4972 SI->addCase(Builder.getInt32(4), AcqRelBB); 4973 4974 Builder.SetInsertPoint(SeqCstBB); 4975 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); 4976 Builder.CreateBr(ContBB); 4977 SI->addCase(Builder.getInt32(5), SeqCstBB); 4978 4979 Builder.SetInsertPoint(ContBB); 4980 return RValue::get(nullptr); 4981 } 4982 4983 case Builtin::BI__builtin_signbit: 4984 case Builtin::BI__builtin_signbitf: 4985 case Builtin::BI__builtin_signbitl: { 4986 return RValue::get( 4987 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 4988 ConvertType(E->getType()))); 4989 } 4990 case Builtin::BI__warn_memset_zero_len: 4991 return RValue::getIgnored(); 4992 case Builtin::BI__annotation: { 4993 // Re-encode each wide string to UTF8 and make an MDString. 4994 SmallVector<Metadata *, 1> Strings; 4995 for (const Expr *Arg : E->arguments()) { 4996 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts()); 4997 assert(Str->getCharByteWidth() == 2); 4998 StringRef WideBytes = Str->getBytes(); 4999 std::string StrUtf8; 5000 if (!convertUTF16ToUTF8String( 5001 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) { 5002 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument"); 5003 continue; 5004 } 5005 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8)); 5006 } 5007 5008 // Build and MDTuple of MDStrings and emit the intrinsic call. 5009 llvm::Function *F = 5010 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); 5011 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); 5012 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); 5013 return RValue::getIgnored(); 5014 } 5015 case Builtin::BI__builtin_annotation: { 5016 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 5017 llvm::Function *F = 5018 CGM.getIntrinsic(llvm::Intrinsic::annotation, 5019 {AnnVal->getType(), CGM.ConstGlobalsPtrTy}); 5020 5021 // Get the annotation string, go through casts. Sema requires this to be a 5022 // non-wide string literal, potentially casted, so the cast<> is safe. 5023 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 5024 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 5025 return RValue::get( 5026 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr)); 5027 } 5028 case Builtin::BI__builtin_addcb: 5029 case Builtin::BI__builtin_addcs: 5030 case Builtin::BI__builtin_addc: 5031 case Builtin::BI__builtin_addcl: 5032 case Builtin::BI__builtin_addcll: 5033 case Builtin::BI__builtin_subcb: 5034 case Builtin::BI__builtin_subcs: 5035 case Builtin::BI__builtin_subc: 5036 case Builtin::BI__builtin_subcl: 5037 case Builtin::BI__builtin_subcll: { 5038 5039 // We translate all of these builtins from expressions of the form: 5040 // int x = ..., y = ..., carryin = ..., carryout, result; 5041 // result = __builtin_addc(x, y, carryin, &carryout); 5042 // 5043 // to LLVM IR of the form: 5044 // 5045 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 5046 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 5047 // %carry1 = extractvalue {i32, i1} %tmp1, 1 5048 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 5049 // i32 %carryin) 5050 // %result = extractvalue {i32, i1} %tmp2, 0 5051 // %carry2 = extractvalue {i32, i1} %tmp2, 1 5052 // %tmp3 = or i1 %carry1, %carry2 5053 // %tmp4 = zext i1 %tmp3 to i32 5054 // store i32 %tmp4, i32* %carryout 5055 5056 // Scalarize our inputs. 5057 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 5058 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 5059 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 5060 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); 5061 5062 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 5063 llvm::Intrinsic::ID IntrinsicId; 5064 switch (BuiltinID) { 5065 default: llvm_unreachable("Unknown multiprecision builtin id."); 5066 case Builtin::BI__builtin_addcb: 5067 case Builtin::BI__builtin_addcs: 5068 case Builtin::BI__builtin_addc: 5069 case Builtin::BI__builtin_addcl: 5070 case Builtin::BI__builtin_addcll: 5071 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 5072 break; 5073 case Builtin::BI__builtin_subcb: 5074 case Builtin::BI__builtin_subcs: 5075 case Builtin::BI__builtin_subc: 5076 case Builtin::BI__builtin_subcl: 5077 case Builtin::BI__builtin_subcll: 5078 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 5079 break; 5080 } 5081 5082 // Construct our resulting LLVM IR expression. 5083 llvm::Value *Carry1; 5084 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 5085 X, Y, Carry1); 5086 llvm::Value *Carry2; 5087 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 5088 Sum1, Carryin, Carry2); 5089 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 5090 X->getType()); 5091 Builder.CreateStore(CarryOut, CarryOutPtr); 5092 return RValue::get(Sum2); 5093 } 5094 5095 case Builtin::BI__builtin_add_overflow: 5096 case Builtin::BI__builtin_sub_overflow: 5097 case Builtin::BI__builtin_mul_overflow: { 5098 const clang::Expr *LeftArg = E->getArg(0); 5099 const clang::Expr *RightArg = E->getArg(1); 5100 const clang::Expr *ResultArg = E->getArg(2); 5101 5102 clang::QualType ResultQTy = 5103 ResultArg->getType()->castAs<PointerType>()->getPointeeType(); 5104 5105 WidthAndSignedness LeftInfo = 5106 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); 5107 WidthAndSignedness RightInfo = 5108 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); 5109 WidthAndSignedness ResultInfo = 5110 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); 5111 5112 // Handle mixed-sign multiplication as a special case, because adding 5113 // runtime or backend support for our generic irgen would be too expensive. 5114 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo)) 5115 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg, 5116 RightInfo, ResultArg, ResultQTy, 5117 ResultInfo); 5118 5119 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo, 5120 ResultInfo)) 5121 return EmitCheckedUnsignedMultiplySignedResult( 5122 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy, 5123 ResultInfo); 5124 5125 WidthAndSignedness EncompassingInfo = 5126 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); 5127 5128 llvm::Type *EncompassingLLVMTy = 5129 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); 5130 5131 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); 5132 5133 llvm::Intrinsic::ID IntrinsicId; 5134 switch (BuiltinID) { 5135 default: 5136 llvm_unreachable("Unknown overflow builtin id."); 5137 case Builtin::BI__builtin_add_overflow: 5138 IntrinsicId = EncompassingInfo.Signed 5139 ? llvm::Intrinsic::sadd_with_overflow 5140 : llvm::Intrinsic::uadd_with_overflow; 5141 break; 5142 case Builtin::BI__builtin_sub_overflow: 5143 IntrinsicId = EncompassingInfo.Signed 5144 ? llvm::Intrinsic::ssub_with_overflow 5145 : llvm::Intrinsic::usub_with_overflow; 5146 break; 5147 case Builtin::BI__builtin_mul_overflow: 5148 IntrinsicId = EncompassingInfo.Signed 5149 ? llvm::Intrinsic::smul_with_overflow 5150 : llvm::Intrinsic::umul_with_overflow; 5151 break; 5152 } 5153 5154 llvm::Value *Left = EmitScalarExpr(LeftArg); 5155 llvm::Value *Right = EmitScalarExpr(RightArg); 5156 Address ResultPtr = EmitPointerWithAlignment(ResultArg); 5157 5158 // Extend each operand to the encompassing type. 5159 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); 5160 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); 5161 5162 // Perform the operation on the extended values. 5163 llvm::Value *Overflow, *Result; 5164 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); 5165 5166 if (EncompassingInfo.Width > ResultInfo.Width) { 5167 // The encompassing type is wider than the result type, so we need to 5168 // truncate it. 5169 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); 5170 5171 // To see if the truncation caused an overflow, we will extend 5172 // the result and then compare it to the original result. 5173 llvm::Value *ResultTruncExt = Builder.CreateIntCast( 5174 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); 5175 llvm::Value *TruncationOverflow = 5176 Builder.CreateICmpNE(Result, ResultTruncExt); 5177 5178 Overflow = Builder.CreateOr(Overflow, TruncationOverflow); 5179 Result = ResultTrunc; 5180 } 5181 5182 // Finally, store the result using the pointer. 5183 bool isVolatile = 5184 ResultArg->getType()->getPointeeType().isVolatileQualified(); 5185 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); 5186 5187 return RValue::get(Overflow); 5188 } 5189 5190 case Builtin::BI__builtin_uadd_overflow: 5191 case Builtin::BI__builtin_uaddl_overflow: 5192 case Builtin::BI__builtin_uaddll_overflow: 5193 case Builtin::BI__builtin_usub_overflow: 5194 case Builtin::BI__builtin_usubl_overflow: 5195 case Builtin::BI__builtin_usubll_overflow: 5196 case Builtin::BI__builtin_umul_overflow: 5197 case Builtin::BI__builtin_umull_overflow: 5198 case Builtin::BI__builtin_umulll_overflow: 5199 case Builtin::BI__builtin_sadd_overflow: 5200 case Builtin::BI__builtin_saddl_overflow: 5201 case Builtin::BI__builtin_saddll_overflow: 5202 case Builtin::BI__builtin_ssub_overflow: 5203 case Builtin::BI__builtin_ssubl_overflow: 5204 case Builtin::BI__builtin_ssubll_overflow: 5205 case Builtin::BI__builtin_smul_overflow: 5206 case Builtin::BI__builtin_smull_overflow: 5207 case Builtin::BI__builtin_smulll_overflow: { 5208 5209 // We translate all of these builtins directly to the relevant llvm IR node. 5210 5211 // Scalarize our inputs. 5212 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 5213 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 5214 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); 5215 5216 // Decide which of the overflow intrinsics we are lowering to: 5217 llvm::Intrinsic::ID IntrinsicId; 5218 switch (BuiltinID) { 5219 default: llvm_unreachable("Unknown overflow builtin id."); 5220 case Builtin::BI__builtin_uadd_overflow: 5221 case Builtin::BI__builtin_uaddl_overflow: 5222 case Builtin::BI__builtin_uaddll_overflow: 5223 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 5224 break; 5225 case Builtin::BI__builtin_usub_overflow: 5226 case Builtin::BI__builtin_usubl_overflow: 5227 case Builtin::BI__builtin_usubll_overflow: 5228 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 5229 break; 5230 case Builtin::BI__builtin_umul_overflow: 5231 case Builtin::BI__builtin_umull_overflow: 5232 case Builtin::BI__builtin_umulll_overflow: 5233 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 5234 break; 5235 case Builtin::BI__builtin_sadd_overflow: 5236 case Builtin::BI__builtin_saddl_overflow: 5237 case Builtin::BI__builtin_saddll_overflow: 5238 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 5239 break; 5240 case Builtin::BI__builtin_ssub_overflow: 5241 case Builtin::BI__builtin_ssubl_overflow: 5242 case Builtin::BI__builtin_ssubll_overflow: 5243 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 5244 break; 5245 case Builtin::BI__builtin_smul_overflow: 5246 case Builtin::BI__builtin_smull_overflow: 5247 case Builtin::BI__builtin_smulll_overflow: 5248 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 5249 break; 5250 } 5251 5252 5253 llvm::Value *Carry; 5254 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 5255 Builder.CreateStore(Sum, SumOutPtr); 5256 5257 return RValue::get(Carry); 5258 } 5259 case Builtin::BIaddressof: 5260 case Builtin::BI__addressof: 5261 case Builtin::BI__builtin_addressof: 5262 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this)); 5263 case Builtin::BI__builtin_function_start: 5264 return RValue::get(CGM.GetFunctionStart( 5265 E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext()))); 5266 case Builtin::BI__builtin_operator_new: 5267 return EmitBuiltinNewDeleteCall( 5268 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false); 5269 case Builtin::BI__builtin_operator_delete: 5270 EmitBuiltinNewDeleteCall( 5271 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true); 5272 return RValue::get(nullptr); 5273 5274 case Builtin::BI__builtin_is_aligned: 5275 return EmitBuiltinIsAligned(E); 5276 case Builtin::BI__builtin_align_up: 5277 return EmitBuiltinAlignTo(E, true); 5278 case Builtin::BI__builtin_align_down: 5279 return EmitBuiltinAlignTo(E, false); 5280 5281 case Builtin::BI__noop: 5282 // __noop always evaluates to an integer literal zero. 5283 return RValue::get(ConstantInt::get(IntTy, 0)); 5284 case Builtin::BI__builtin_call_with_static_chain: { 5285 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 5286 const Expr *Chain = E->getArg(1); 5287 return EmitCall(Call->getCallee()->getType(), 5288 EmitCallee(Call->getCallee()), Call, ReturnValue, 5289 EmitScalarExpr(Chain)); 5290 } 5291 case Builtin::BI_InterlockedExchange8: 5292 case Builtin::BI_InterlockedExchange16: 5293 case Builtin::BI_InterlockedExchange: 5294 case Builtin::BI_InterlockedExchangePointer: 5295 return RValue::get( 5296 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); 5297 case Builtin::BI_InterlockedCompareExchangePointer: 5298 case Builtin::BI_InterlockedCompareExchangePointer_nf: { 5299 llvm::Type *RTy; 5300 llvm::IntegerType *IntType = IntegerType::get( 5301 getLLVMContext(), getContext().getTypeSize(E->getType())); 5302 5303 Address DestAddr = CheckAtomicAlignment(*this, E); 5304 5305 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 5306 RTy = Exchange->getType(); 5307 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 5308 5309 llvm::Value *Comparand = 5310 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 5311 5312 auto Ordering = 5313 BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ? 5314 AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent; 5315 5316 auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange, 5317 Ordering, Ordering); 5318 Result->setVolatile(true); 5319 5320 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 5321 0), 5322 RTy)); 5323 } 5324 case Builtin::BI_InterlockedCompareExchange8: 5325 case Builtin::BI_InterlockedCompareExchange16: 5326 case Builtin::BI_InterlockedCompareExchange: 5327 case Builtin::BI_InterlockedCompareExchange64: 5328 return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E)); 5329 case Builtin::BI_InterlockedIncrement16: 5330 case Builtin::BI_InterlockedIncrement: 5331 return RValue::get( 5332 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); 5333 case Builtin::BI_InterlockedDecrement16: 5334 case Builtin::BI_InterlockedDecrement: 5335 return RValue::get( 5336 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); 5337 case Builtin::BI_InterlockedAnd8: 5338 case Builtin::BI_InterlockedAnd16: 5339 case Builtin::BI_InterlockedAnd: 5340 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); 5341 case Builtin::BI_InterlockedExchangeAdd8: 5342 case Builtin::BI_InterlockedExchangeAdd16: 5343 case Builtin::BI_InterlockedExchangeAdd: 5344 return RValue::get( 5345 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); 5346 case Builtin::BI_InterlockedExchangeSub8: 5347 case Builtin::BI_InterlockedExchangeSub16: 5348 case Builtin::BI_InterlockedExchangeSub: 5349 return RValue::get( 5350 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); 5351 case Builtin::BI_InterlockedOr8: 5352 case Builtin::BI_InterlockedOr16: 5353 case Builtin::BI_InterlockedOr: 5354 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); 5355 case Builtin::BI_InterlockedXor8: 5356 case Builtin::BI_InterlockedXor16: 5357 case Builtin::BI_InterlockedXor: 5358 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); 5359 5360 case Builtin::BI_bittest64: 5361 case Builtin::BI_bittest: 5362 case Builtin::BI_bittestandcomplement64: 5363 case Builtin::BI_bittestandcomplement: 5364 case Builtin::BI_bittestandreset64: 5365 case Builtin::BI_bittestandreset: 5366 case Builtin::BI_bittestandset64: 5367 case Builtin::BI_bittestandset: 5368 case Builtin::BI_interlockedbittestandreset: 5369 case Builtin::BI_interlockedbittestandreset64: 5370 case Builtin::BI_interlockedbittestandset64: 5371 case Builtin::BI_interlockedbittestandset: 5372 case Builtin::BI_interlockedbittestandset_acq: 5373 case Builtin::BI_interlockedbittestandset_rel: 5374 case Builtin::BI_interlockedbittestandset_nf: 5375 case Builtin::BI_interlockedbittestandreset_acq: 5376 case Builtin::BI_interlockedbittestandreset_rel: 5377 case Builtin::BI_interlockedbittestandreset_nf: 5378 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E)); 5379 5380 // These builtins exist to emit regular volatile loads and stores not 5381 // affected by the -fms-volatile setting. 5382 case Builtin::BI__iso_volatile_load8: 5383 case Builtin::BI__iso_volatile_load16: 5384 case Builtin::BI__iso_volatile_load32: 5385 case Builtin::BI__iso_volatile_load64: 5386 return RValue::get(EmitISOVolatileLoad(*this, E)); 5387 case Builtin::BI__iso_volatile_store8: 5388 case Builtin::BI__iso_volatile_store16: 5389 case Builtin::BI__iso_volatile_store32: 5390 case Builtin::BI__iso_volatile_store64: 5391 return RValue::get(EmitISOVolatileStore(*this, E)); 5392 5393 case Builtin::BI__builtin_ptrauth_sign_constant: 5394 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); 5395 5396 case Builtin::BI__builtin_ptrauth_auth: 5397 case Builtin::BI__builtin_ptrauth_auth_and_resign: 5398 case Builtin::BI__builtin_ptrauth_blend_discriminator: 5399 case Builtin::BI__builtin_ptrauth_sign_generic_data: 5400 case Builtin::BI__builtin_ptrauth_sign_unauthenticated: 5401 case Builtin::BI__builtin_ptrauth_strip: { 5402 // Emit the arguments. 5403 SmallVector<llvm::Value *, 5> Args; 5404 for (auto argExpr : E->arguments()) 5405 Args.push_back(EmitScalarExpr(argExpr)); 5406 5407 // Cast the value to intptr_t, saving its original type. 5408 llvm::Type *OrigValueType = Args[0]->getType(); 5409 if (OrigValueType->isPointerTy()) 5410 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy); 5411 5412 switch (BuiltinID) { 5413 case Builtin::BI__builtin_ptrauth_auth_and_resign: 5414 if (Args[4]->getType()->isPointerTy()) 5415 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy); 5416 [[fallthrough]]; 5417 5418 case Builtin::BI__builtin_ptrauth_auth: 5419 case Builtin::BI__builtin_ptrauth_sign_unauthenticated: 5420 if (Args[2]->getType()->isPointerTy()) 5421 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy); 5422 break; 5423 5424 case Builtin::BI__builtin_ptrauth_sign_generic_data: 5425 if (Args[1]->getType()->isPointerTy()) 5426 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy); 5427 break; 5428 5429 case Builtin::BI__builtin_ptrauth_blend_discriminator: 5430 case Builtin::BI__builtin_ptrauth_strip: 5431 break; 5432 } 5433 5434 // Call the intrinsic. 5435 auto IntrinsicID = [&]() -> unsigned { 5436 switch (BuiltinID) { 5437 case Builtin::BI__builtin_ptrauth_auth: 5438 return llvm::Intrinsic::ptrauth_auth; 5439 case Builtin::BI__builtin_ptrauth_auth_and_resign: 5440 return llvm::Intrinsic::ptrauth_resign; 5441 case Builtin::BI__builtin_ptrauth_blend_discriminator: 5442 return llvm::Intrinsic::ptrauth_blend; 5443 case Builtin::BI__builtin_ptrauth_sign_generic_data: 5444 return llvm::Intrinsic::ptrauth_sign_generic; 5445 case Builtin::BI__builtin_ptrauth_sign_unauthenticated: 5446 return llvm::Intrinsic::ptrauth_sign; 5447 case Builtin::BI__builtin_ptrauth_strip: 5448 return llvm::Intrinsic::ptrauth_strip; 5449 } 5450 llvm_unreachable("bad ptrauth intrinsic"); 5451 }(); 5452 auto Intrinsic = CGM.getIntrinsic(IntrinsicID); 5453 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args); 5454 5455 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data && 5456 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator && 5457 OrigValueType->isPointerTy()) { 5458 Result = Builder.CreateIntToPtr(Result, OrigValueType); 5459 } 5460 return RValue::get(Result); 5461 } 5462 5463 case Builtin::BI__exception_code: 5464 case Builtin::BI_exception_code: 5465 return RValue::get(EmitSEHExceptionCode()); 5466 case Builtin::BI__exception_info: 5467 case Builtin::BI_exception_info: 5468 return RValue::get(EmitSEHExceptionInfo()); 5469 case Builtin::BI__abnormal_termination: 5470 case Builtin::BI_abnormal_termination: 5471 return RValue::get(EmitSEHAbnormalTermination()); 5472 case Builtin::BI_setjmpex: 5473 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 && 5474 E->getArg(0)->getType()->isPointerType()) 5475 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E); 5476 break; 5477 case Builtin::BI_setjmp: 5478 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 && 5479 E->getArg(0)->getType()->isPointerType()) { 5480 if (getTarget().getTriple().getArch() == llvm::Triple::x86) 5481 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E); 5482 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64) 5483 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E); 5484 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E); 5485 } 5486 break; 5487 5488 // C++ std:: builtins. 5489 case Builtin::BImove: 5490 case Builtin::BImove_if_noexcept: 5491 case Builtin::BIforward: 5492 case Builtin::BIforward_like: 5493 case Builtin::BIas_const: 5494 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this)); 5495 case Builtin::BI__GetExceptionInfo: { 5496 if (llvm::GlobalVariable *GV = 5497 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 5498 return RValue::get(GV); 5499 break; 5500 } 5501 5502 case Builtin::BI__fastfail: 5503 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); 5504 5505 case Builtin::BI__builtin_coro_id: 5506 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); 5507 case Builtin::BI__builtin_coro_promise: 5508 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); 5509 case Builtin::BI__builtin_coro_resume: 5510 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); 5511 return RValue::get(nullptr); 5512 case Builtin::BI__builtin_coro_frame: 5513 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); 5514 case Builtin::BI__builtin_coro_noop: 5515 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop); 5516 case Builtin::BI__builtin_coro_free: 5517 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); 5518 case Builtin::BI__builtin_coro_destroy: 5519 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); 5520 return RValue::get(nullptr); 5521 case Builtin::BI__builtin_coro_done: 5522 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); 5523 case Builtin::BI__builtin_coro_alloc: 5524 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); 5525 case Builtin::BI__builtin_coro_begin: 5526 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); 5527 case Builtin::BI__builtin_coro_end: 5528 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); 5529 case Builtin::BI__builtin_coro_suspend: 5530 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); 5531 case Builtin::BI__builtin_coro_size: 5532 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size); 5533 case Builtin::BI__builtin_coro_align: 5534 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align); 5535 5536 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions 5537 case Builtin::BIread_pipe: 5538 case Builtin::BIwrite_pipe: { 5539 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 5540 *Arg1 = EmitScalarExpr(E->getArg(1)); 5541 CGOpenCLRuntime OpenCLRT(CGM); 5542 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 5543 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 5544 5545 // Type of the generic packet parameter. 5546 unsigned GenericAS = 5547 getContext().getTargetAddressSpace(LangAS::opencl_generic); 5548 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS); 5549 5550 // Testing which overloaded version we should generate the call for. 5551 if (2U == E->getNumArgs()) { 5552 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" 5553 : "__write_pipe_2"; 5554 // Creating a generic function type to be able to call with any builtin or 5555 // user defined type. 5556 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; 5557 llvm::FunctionType *FTy = llvm::FunctionType::get( 5558 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 5559 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); 5560 return RValue::get( 5561 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), 5562 {Arg0, BCast, PacketSize, PacketAlign})); 5563 } else { 5564 assert(4 == E->getNumArgs() && 5565 "Illegal number of parameters to pipe function"); 5566 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" 5567 : "__write_pipe_4"; 5568 5569 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, 5570 Int32Ty, Int32Ty}; 5571 Value *Arg2 = EmitScalarExpr(E->getArg(2)), 5572 *Arg3 = EmitScalarExpr(E->getArg(3)); 5573 llvm::FunctionType *FTy = llvm::FunctionType::get( 5574 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 5575 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); 5576 // We know the third argument is an integer type, but we may need to cast 5577 // it to i32. 5578 if (Arg2->getType() != Int32Ty) 5579 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); 5580 return RValue::get( 5581 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), 5582 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); 5583 } 5584 } 5585 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write 5586 // functions 5587 case Builtin::BIreserve_read_pipe: 5588 case Builtin::BIreserve_write_pipe: 5589 case Builtin::BIwork_group_reserve_read_pipe: 5590 case Builtin::BIwork_group_reserve_write_pipe: 5591 case Builtin::BIsub_group_reserve_read_pipe: 5592 case Builtin::BIsub_group_reserve_write_pipe: { 5593 // Composing the mangled name for the function. 5594 const char *Name; 5595 if (BuiltinID == Builtin::BIreserve_read_pipe) 5596 Name = "__reserve_read_pipe"; 5597 else if (BuiltinID == Builtin::BIreserve_write_pipe) 5598 Name = "__reserve_write_pipe"; 5599 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) 5600 Name = "__work_group_reserve_read_pipe"; 5601 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) 5602 Name = "__work_group_reserve_write_pipe"; 5603 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) 5604 Name = "__sub_group_reserve_read_pipe"; 5605 else 5606 Name = "__sub_group_reserve_write_pipe"; 5607 5608 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 5609 *Arg1 = EmitScalarExpr(E->getArg(1)); 5610 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); 5611 CGOpenCLRuntime OpenCLRT(CGM); 5612 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 5613 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 5614 5615 // Building the generic function prototype. 5616 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; 5617 llvm::FunctionType *FTy = llvm::FunctionType::get( 5618 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 5619 // We know the second argument is an integer type, but we may need to cast 5620 // it to i32. 5621 if (Arg1->getType() != Int32Ty) 5622 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); 5623 return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), 5624 {Arg0, Arg1, PacketSize, PacketAlign})); 5625 } 5626 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write 5627 // functions 5628 case Builtin::BIcommit_read_pipe: 5629 case Builtin::BIcommit_write_pipe: 5630 case Builtin::BIwork_group_commit_read_pipe: 5631 case Builtin::BIwork_group_commit_write_pipe: 5632 case Builtin::BIsub_group_commit_read_pipe: 5633 case Builtin::BIsub_group_commit_write_pipe: { 5634 const char *Name; 5635 if (BuiltinID == Builtin::BIcommit_read_pipe) 5636 Name = "__commit_read_pipe"; 5637 else if (BuiltinID == Builtin::BIcommit_write_pipe) 5638 Name = "__commit_write_pipe"; 5639 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) 5640 Name = "__work_group_commit_read_pipe"; 5641 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) 5642 Name = "__work_group_commit_write_pipe"; 5643 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) 5644 Name = "__sub_group_commit_read_pipe"; 5645 else 5646 Name = "__sub_group_commit_write_pipe"; 5647 5648 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 5649 *Arg1 = EmitScalarExpr(E->getArg(1)); 5650 CGOpenCLRuntime OpenCLRT(CGM); 5651 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 5652 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 5653 5654 // Building the generic function prototype. 5655 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; 5656 llvm::FunctionType *FTy = 5657 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), 5658 llvm::ArrayRef<llvm::Type *>(ArgTys), false); 5659 5660 return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), 5661 {Arg0, Arg1, PacketSize, PacketAlign})); 5662 } 5663 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions 5664 case Builtin::BIget_pipe_num_packets: 5665 case Builtin::BIget_pipe_max_packets: { 5666 const char *BaseName; 5667 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>(); 5668 if (BuiltinID == Builtin::BIget_pipe_num_packets) 5669 BaseName = "__get_pipe_num_packets"; 5670 else 5671 BaseName = "__get_pipe_max_packets"; 5672 std::string Name = std::string(BaseName) + 5673 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo"); 5674 5675 // Building the generic function prototype. 5676 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 5677 CGOpenCLRuntime OpenCLRT(CGM); 5678 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 5679 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 5680 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; 5681 llvm::FunctionType *FTy = llvm::FunctionType::get( 5682 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 5683 5684 return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), 5685 {Arg0, PacketSize, PacketAlign})); 5686 } 5687 5688 // OpenCL v2.0 s6.13.9 - Address space qualifier functions. 5689 case Builtin::BIto_global: 5690 case Builtin::BIto_local: 5691 case Builtin::BIto_private: { 5692 auto Arg0 = EmitScalarExpr(E->getArg(0)); 5693 auto NewArgT = llvm::PointerType::get( 5694 getLLVMContext(), 5695 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 5696 auto NewRetT = llvm::PointerType::get( 5697 getLLVMContext(), 5698 CGM.getContext().getTargetAddressSpace( 5699 E->getType()->getPointeeType().getAddressSpace())); 5700 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); 5701 llvm::Value *NewArg; 5702 if (Arg0->getType()->getPointerAddressSpace() != 5703 NewArgT->getPointerAddressSpace()) 5704 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); 5705 else 5706 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); 5707 auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); 5708 auto NewCall = 5709 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); 5710 return RValue::get(Builder.CreateBitOrPointerCast(NewCall, 5711 ConvertType(E->getType()))); 5712 } 5713 5714 // OpenCL v2.0, s6.13.17 - Enqueue kernel function. 5715 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel. 5716 // The code below expands the builtin call to a call to one of the following 5717 // functions that an OpenCL runtime library will have to provide: 5718 // __enqueue_kernel_basic 5719 // __enqueue_kernel_varargs 5720 // __enqueue_kernel_basic_events 5721 // __enqueue_kernel_events_varargs 5722 case Builtin::BIenqueue_kernel: { 5723 StringRef Name; // Generated function call name 5724 unsigned NumArgs = E->getNumArgs(); 5725 5726 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); 5727 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( 5728 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 5729 5730 llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); 5731 llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); 5732 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); 5733 llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this); 5734 llvm::Type *RangeTy = NDRangeL.getAddress().getType(); 5735 5736 if (NumArgs == 4) { 5737 // The most basic form of the call with parameters: 5738 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) 5739 Name = "__enqueue_kernel_basic"; 5740 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy, 5741 GenericVoidPtrTy}; 5742 llvm::FunctionType *FTy = llvm::FunctionType::get( 5743 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 5744 5745 auto Info = 5746 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); 5747 llvm::Value *Kernel = 5748 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); 5749 llvm::Value *Block = 5750 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 5751 5752 AttrBuilder B(Builder.getContext()); 5753 B.addByValAttr(NDRangeL.getAddress().getElementType()); 5754 llvm::AttributeList ByValAttrSet = 5755 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); 5756 5757 auto RTCall = 5758 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), 5759 {Queue, Flags, Range, Kernel, Block}); 5760 RTCall->setAttributes(ByValAttrSet); 5761 return RValue::get(RTCall); 5762 } 5763 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); 5764 5765 // Create a temporary array to hold the sizes of local pointer arguments 5766 // for the block. \p First is the position of the first size argument. 5767 auto CreateArrayForSizeVar = [=](unsigned First) 5768 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> { 5769 llvm::APInt ArraySize(32, NumArgs - First); 5770 QualType SizeArrayTy = getContext().getConstantArrayType( 5771 getContext().getSizeType(), ArraySize, nullptr, 5772 ArraySizeModifier::Normal, 5773 /*IndexTypeQuals=*/0); 5774 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); 5775 llvm::Value *TmpPtr = Tmp.getPointer(); 5776 llvm::Value *TmpSize = EmitLifetimeStart( 5777 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr); 5778 llvm::Value *ElemPtr; 5779 // Each of the following arguments specifies the size of the corresponding 5780 // argument passed to the enqueued block. 5781 auto *Zero = llvm::ConstantInt::get(IntTy, 0); 5782 for (unsigned I = First; I < NumArgs; ++I) { 5783 auto *Index = llvm::ConstantInt::get(IntTy, I - First); 5784 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr, 5785 {Zero, Index}); 5786 if (I == First) 5787 ElemPtr = GEP; 5788 auto *V = 5789 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); 5790 Builder.CreateAlignedStore( 5791 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy)); 5792 } 5793 return std::tie(ElemPtr, TmpSize, TmpPtr); 5794 }; 5795 5796 // Could have events and/or varargs. 5797 if (E->getArg(3)->getType()->isBlockPointerType()) { 5798 // No events passed, but has variadic arguments. 5799 Name = "__enqueue_kernel_varargs"; 5800 auto Info = 5801 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); 5802 llvm::Value *Kernel = 5803 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); 5804 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 5805 llvm::Value *ElemPtr, *TmpSize, *TmpPtr; 5806 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4); 5807 5808 // Create a vector of the arguments, as well as a constant value to 5809 // express to the runtime the number of variadic arguments. 5810 llvm::Value *const Args[] = {Queue, Flags, 5811 Range, Kernel, 5812 Block, ConstantInt::get(IntTy, NumArgs - 4), 5813 ElemPtr}; 5814 llvm::Type *const ArgTys[] = { 5815 QueueTy, IntTy, RangeTy, GenericVoidPtrTy, 5816 GenericVoidPtrTy, IntTy, ElemPtr->getType()}; 5817 5818 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false); 5819 auto Call = RValue::get( 5820 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); 5821 if (TmpSize) 5822 EmitLifetimeEnd(TmpSize, TmpPtr); 5823 return Call; 5824 } 5825 // Any calls now have event arguments passed. 5826 if (NumArgs >= 7) { 5827 llvm::PointerType *PtrTy = llvm::PointerType::get( 5828 CGM.getLLVMContext(), 5829 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 5830 5831 llvm::Value *NumEvents = 5832 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); 5833 5834 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments 5835 // to be a null pointer constant (including `0` literal), we can take it 5836 // into account and emit null pointer directly. 5837 llvm::Value *EventWaitList = nullptr; 5838 if (E->getArg(4)->isNullPointerConstant( 5839 getContext(), Expr::NPC_ValueDependentIsNotNull)) { 5840 EventWaitList = llvm::ConstantPointerNull::get(PtrTy); 5841 } else { 5842 EventWaitList = 5843 E->getArg(4)->getType()->isArrayType() 5844 ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this) 5845 : EmitScalarExpr(E->getArg(4)); 5846 // Convert to generic address space. 5847 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy); 5848 } 5849 llvm::Value *EventRet = nullptr; 5850 if (E->getArg(5)->isNullPointerConstant( 5851 getContext(), Expr::NPC_ValueDependentIsNotNull)) { 5852 EventRet = llvm::ConstantPointerNull::get(PtrTy); 5853 } else { 5854 EventRet = 5855 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy); 5856 } 5857 5858 auto Info = 5859 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); 5860 llvm::Value *Kernel = 5861 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); 5862 llvm::Value *Block = 5863 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 5864 5865 std::vector<llvm::Type *> ArgTys = { 5866 QueueTy, Int32Ty, RangeTy, Int32Ty, 5867 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; 5868 5869 std::vector<llvm::Value *> Args = {Queue, Flags, Range, 5870 NumEvents, EventWaitList, EventRet, 5871 Kernel, Block}; 5872 5873 if (NumArgs == 7) { 5874 // Has events but no variadics. 5875 Name = "__enqueue_kernel_basic_events"; 5876 llvm::FunctionType *FTy = llvm::FunctionType::get( 5877 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 5878 return RValue::get( 5879 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), 5880 llvm::ArrayRef<llvm::Value *>(Args))); 5881 } 5882 // Has event info and variadics 5883 // Pass the number of variadics to the runtime function too. 5884 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); 5885 ArgTys.push_back(Int32Ty); 5886 Name = "__enqueue_kernel_events_varargs"; 5887 5888 llvm::Value *ElemPtr, *TmpSize, *TmpPtr; 5889 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7); 5890 Args.push_back(ElemPtr); 5891 ArgTys.push_back(ElemPtr->getType()); 5892 5893 llvm::FunctionType *FTy = llvm::FunctionType::get( 5894 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 5895 auto Call = 5896 RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), 5897 llvm::ArrayRef<llvm::Value *>(Args))); 5898 if (TmpSize) 5899 EmitLifetimeEnd(TmpSize, TmpPtr); 5900 return Call; 5901 } 5902 llvm_unreachable("Unexpected enqueue_kernel signature"); 5903 } 5904 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block 5905 // parameter. 5906 case Builtin::BIget_kernel_work_group_size: { 5907 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( 5908 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 5909 auto Info = 5910 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); 5911 Value *Kernel = 5912 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); 5913 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 5914 return RValue::get(EmitRuntimeCall( 5915 CGM.CreateRuntimeFunction( 5916 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, 5917 false), 5918 "__get_kernel_work_group_size_impl"), 5919 {Kernel, Arg})); 5920 } 5921 case Builtin::BIget_kernel_preferred_work_group_size_multiple: { 5922 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( 5923 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 5924 auto Info = 5925 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); 5926 Value *Kernel = 5927 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); 5928 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 5929 return RValue::get(EmitRuntimeCall( 5930 CGM.CreateRuntimeFunction( 5931 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, 5932 false), 5933 "__get_kernel_preferred_work_group_size_multiple_impl"), 5934 {Kernel, Arg})); 5935 } 5936 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: 5937 case Builtin::BIget_kernel_sub_group_count_for_ndrange: { 5938 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( 5939 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 5940 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); 5941 llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this); 5942 auto Info = 5943 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1)); 5944 Value *Kernel = 5945 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); 5946 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); 5947 const char *Name = 5948 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange 5949 ? "__get_kernel_max_sub_group_size_for_ndrange_impl" 5950 : "__get_kernel_sub_group_count_for_ndrange_impl"; 5951 return RValue::get(EmitRuntimeCall( 5952 CGM.CreateRuntimeFunction( 5953 llvm::FunctionType::get( 5954 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy}, 5955 false), 5956 Name), 5957 {NDRange, Kernel, Block})); 5958 } 5959 case Builtin::BI__builtin_store_half: 5960 case Builtin::BI__builtin_store_halff: { 5961 Value *Val = EmitScalarExpr(E->getArg(0)); 5962 Address Address = EmitPointerWithAlignment(E->getArg(1)); 5963 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy()); 5964 Builder.CreateStore(HalfVal, Address); 5965 return RValue::get(nullptr); 5966 } 5967 case Builtin::BI__builtin_load_half: { 5968 Address Address = EmitPointerWithAlignment(E->getArg(0)); 5969 Value *HalfVal = Builder.CreateLoad(Address); 5970 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy())); 5971 } 5972 case Builtin::BI__builtin_load_halff: { 5973 Address Address = EmitPointerWithAlignment(E->getArg(0)); 5974 Value *HalfVal = Builder.CreateLoad(Address); 5975 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); 5976 } 5977 case Builtin::BI__builtin_printf: 5978 case Builtin::BIprintf: 5979 if (getTarget().getTriple().isNVPTX() || 5980 getTarget().getTriple().isAMDGCN() || 5981 (getTarget().getTriple().isSPIRV() && 5982 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) { 5983 if (getLangOpts().OpenMPIsTargetDevice) 5984 return EmitOpenMPDevicePrintfCallExpr(E); 5985 if (getTarget().getTriple().isNVPTX()) 5986 return EmitNVPTXDevicePrintfCallExpr(E); 5987 if ((getTarget().getTriple().isAMDGCN() || 5988 getTarget().getTriple().isSPIRV()) && 5989 getLangOpts().HIP) 5990 return EmitAMDGPUDevicePrintfCallExpr(E); 5991 } 5992 5993 break; 5994 case Builtin::BI__builtin_canonicalize: 5995 case Builtin::BI__builtin_canonicalizef: 5996 case Builtin::BI__builtin_canonicalizef16: 5997 case Builtin::BI__builtin_canonicalizel: 5998 return RValue::get( 5999 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize)); 6000 6001 case Builtin::BI__builtin_thread_pointer: { 6002 if (!getContext().getTargetInfo().isTLSSupported()) 6003 CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); 6004 // Fall through - it's already mapped to the intrinsic by ClangBuiltin. 6005 break; 6006 } 6007 case Builtin::BI__builtin_os_log_format: 6008 return emitBuiltinOSLogFormat(*E); 6009 6010 case Builtin::BI__xray_customevent: { 6011 if (!ShouldXRayInstrumentFunction()) 6012 return RValue::getIgnored(); 6013 6014 if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( 6015 XRayInstrKind::Custom)) 6016 return RValue::getIgnored(); 6017 6018 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) 6019 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents()) 6020 return RValue::getIgnored(); 6021 6022 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); 6023 auto FTy = F->getFunctionType(); 6024 auto Arg0 = E->getArg(0); 6025 auto Arg0Val = EmitScalarExpr(Arg0); 6026 auto Arg0Ty = Arg0->getType(); 6027 auto PTy0 = FTy->getParamType(0); 6028 if (PTy0 != Arg0Val->getType()) { 6029 if (Arg0Ty->isArrayType()) 6030 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this); 6031 else 6032 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); 6033 } 6034 auto Arg1 = EmitScalarExpr(E->getArg(1)); 6035 auto PTy1 = FTy->getParamType(1); 6036 if (PTy1 != Arg1->getType()) 6037 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); 6038 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); 6039 } 6040 6041 case Builtin::BI__xray_typedevent: { 6042 // TODO: There should be a way to always emit events even if the current 6043 // function is not instrumented. Losing events in a stream can cripple 6044 // a trace. 6045 if (!ShouldXRayInstrumentFunction()) 6046 return RValue::getIgnored(); 6047 6048 if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( 6049 XRayInstrKind::Typed)) 6050 return RValue::getIgnored(); 6051 6052 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) 6053 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents()) 6054 return RValue::getIgnored(); 6055 6056 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent); 6057 auto FTy = F->getFunctionType(); 6058 auto Arg0 = EmitScalarExpr(E->getArg(0)); 6059 auto PTy0 = FTy->getParamType(0); 6060 if (PTy0 != Arg0->getType()) 6061 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0); 6062 auto Arg1 = E->getArg(1); 6063 auto Arg1Val = EmitScalarExpr(Arg1); 6064 auto Arg1Ty = Arg1->getType(); 6065 auto PTy1 = FTy->getParamType(1); 6066 if (PTy1 != Arg1Val->getType()) { 6067 if (Arg1Ty->isArrayType()) 6068 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this); 6069 else 6070 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1); 6071 } 6072 auto Arg2 = EmitScalarExpr(E->getArg(2)); 6073 auto PTy2 = FTy->getParamType(2); 6074 if (PTy2 != Arg2->getType()) 6075 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2); 6076 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2})); 6077 } 6078 6079 case Builtin::BI__builtin_ms_va_start: 6080 case Builtin::BI__builtin_ms_va_end: 6081 return RValue::get( 6082 EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).emitRawPointer(*this), 6083 BuiltinID == Builtin::BI__builtin_ms_va_start)); 6084 6085 case Builtin::BI__builtin_ms_va_copy: { 6086 // Lower this manually. We can't reliably determine whether or not any 6087 // given va_copy() is for a Win64 va_list from the calling convention 6088 // alone, because it's legal to do this from a System V ABI function. 6089 // With opaque pointer types, we won't have enough information in LLVM 6090 // IR to determine this from the argument types, either. Best to do it 6091 // now, while we have enough information. 6092 Address DestAddr = EmitMSVAListRef(E->getArg(0)); 6093 Address SrcAddr = EmitMSVAListRef(E->getArg(1)); 6094 6095 DestAddr = DestAddr.withElementType(Int8PtrTy); 6096 SrcAddr = SrcAddr.withElementType(Int8PtrTy); 6097 6098 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); 6099 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); 6100 } 6101 6102 case Builtin::BI__builtin_get_device_side_mangled_name: { 6103 auto Name = CGM.getCUDARuntime().getDeviceSideName( 6104 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl()); 6105 auto Str = CGM.GetAddrOfConstantCString(Name, ""); 6106 return RValue::get(Str.getPointer()); 6107 } 6108 } 6109 6110 // If this is an alias for a lib function (e.g. __builtin_sin), emit 6111 // the call using the normal call path, but using the unmangled 6112 // version of the function name. 6113 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 6114 return emitLibraryCall(*this, FD, E, 6115 CGM.getBuiltinLibFunction(FD, BuiltinID)); 6116 6117 // If this is a predefined lib function (e.g. malloc), emit the call 6118 // using exactly the normal call path. 6119 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 6120 return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD)); 6121 6122 // Check that a call to a target specific builtin has the correct target 6123 // features. 6124 // This is down here to avoid non-target specific builtins, however, if 6125 // generic builtins start to require generic target features then we 6126 // can move this up to the beginning of the function. 6127 checkTargetFeatures(E, FD); 6128 6129 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID)) 6130 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth); 6131 6132 // See if we have a target specific intrinsic. 6133 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID); 6134 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 6135 StringRef Prefix = 6136 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); 6137 if (!Prefix.empty()) { 6138 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name); 6139 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" && 6140 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA) 6141 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name); 6142 // NOTE we don't need to perform a compatibility flag check here since the 6143 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 6144 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 6145 if (IntrinsicID == Intrinsic::not_intrinsic) 6146 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); 6147 } 6148 6149 if (IntrinsicID != Intrinsic::not_intrinsic) { 6150 SmallVector<Value*, 16> Args; 6151 6152 // Find out if any arguments are required to be integer constant 6153 // expressions. 6154 unsigned ICEArguments = 0; 6155 ASTContext::GetBuiltinTypeError Error; 6156 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 6157 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 6158 6159 Function *F = CGM.getIntrinsic(IntrinsicID); 6160 llvm::FunctionType *FTy = F->getFunctionType(); 6161 6162 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 6163 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E); 6164 // If the intrinsic arg type is different from the builtin arg type 6165 // we need to do a bit cast. 6166 llvm::Type *PTy = FTy->getParamType(i); 6167 if (PTy != ArgValue->getType()) { 6168 // XXX - vector of pointers? 6169 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) { 6170 if (PtrTy->getAddressSpace() != 6171 ArgValue->getType()->getPointerAddressSpace()) { 6172 ArgValue = Builder.CreateAddrSpaceCast( 6173 ArgValue, llvm::PointerType::get(getLLVMContext(), 6174 PtrTy->getAddressSpace())); 6175 } 6176 } 6177 6178 // Cast vector type (e.g., v256i32) to x86_amx, this only happen 6179 // in amx intrinsics. 6180 if (PTy->isX86_AMXTy()) 6181 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile, 6182 {ArgValue->getType()}, {ArgValue}); 6183 else 6184 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 6185 } 6186 6187 Args.push_back(ArgValue); 6188 } 6189 6190 Value *V = Builder.CreateCall(F, Args); 6191 QualType BuiltinRetType = E->getType(); 6192 6193 llvm::Type *RetTy = VoidTy; 6194 if (!BuiltinRetType->isVoidType()) 6195 RetTy = ConvertType(BuiltinRetType); 6196 6197 if (RetTy != V->getType()) { 6198 // XXX - vector of pointers? 6199 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) { 6200 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) { 6201 V = Builder.CreateAddrSpaceCast( 6202 V, llvm::PointerType::get(getLLVMContext(), 6203 PtrTy->getAddressSpace())); 6204 } 6205 } 6206 6207 // Cast x86_amx to vector type (e.g., v256i32), this only happen 6208 // in amx intrinsics. 6209 if (V->getType()->isX86_AMXTy()) 6210 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy}, 6211 {V}); 6212 else 6213 V = Builder.CreateBitCast(V, RetTy); 6214 } 6215 6216 if (RetTy->isVoidTy()) 6217 return RValue::get(nullptr); 6218 6219 return RValue::get(V); 6220 } 6221 6222 // Some target-specific builtins can have aggregate return values, e.g. 6223 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force 6224 // ReturnValue to be non-null, so that the target-specific emission code can 6225 // always just emit into it. 6226 TypeEvaluationKind EvalKind = getEvaluationKind(E->getType()); 6227 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) { 6228 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp"); 6229 ReturnValue = ReturnValueSlot(DestPtr, false); 6230 } 6231 6232 // Now see if we can emit a target-specific builtin. 6233 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) { 6234 switch (EvalKind) { 6235 case TEK_Scalar: 6236 if (V->getType()->isVoidTy()) 6237 return RValue::get(nullptr); 6238 return RValue::get(V); 6239 case TEK_Aggregate: 6240 return RValue::getAggregate(ReturnValue.getAddress(), 6241 ReturnValue.isVolatile()); 6242 case TEK_Complex: 6243 llvm_unreachable("No current target builtin returns complex"); 6244 } 6245 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); 6246 } 6247 6248 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL 6249 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E)) 6250 return RValue::get(V); 6251 6252 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice) 6253 return EmitHipStdParUnsupportedBuiltin(this, FD); 6254 6255 ErrorUnsupported(E, "builtin function"); 6256 6257 // Unknown builtin, for now just dump it out and return undef. 6258 return GetUndefRValue(E->getType()); 6259 } 6260 6261 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, 6262 unsigned BuiltinID, const CallExpr *E, 6263 ReturnValueSlot ReturnValue, 6264 llvm::Triple::ArchType Arch) { 6265 // When compiling in HipStdPar mode we have to be conservative in rejecting 6266 // target specific features in the FE, and defer the possible error to the 6267 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is 6268 // referenced by an accelerator executable function, we emit an error. 6269 // Returning nullptr here leads to the builtin being handled in 6270 // EmitStdParUnsupportedBuiltin. 6271 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice && 6272 Arch != CGF->getTarget().getTriple().getArch()) 6273 return nullptr; 6274 6275 switch (Arch) { 6276 case llvm::Triple::arm: 6277 case llvm::Triple::armeb: 6278 case llvm::Triple::thumb: 6279 case llvm::Triple::thumbeb: 6280 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch); 6281 case llvm::Triple::aarch64: 6282 case llvm::Triple::aarch64_32: 6283 case llvm::Triple::aarch64_be: 6284 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch); 6285 case llvm::Triple::bpfeb: 6286 case llvm::Triple::bpfel: 6287 return CGF->EmitBPFBuiltinExpr(BuiltinID, E); 6288 case llvm::Triple::x86: 6289 case llvm::Triple::x86_64: 6290 return CGF->EmitX86BuiltinExpr(BuiltinID, E); 6291 case llvm::Triple::ppc: 6292 case llvm::Triple::ppcle: 6293 case llvm::Triple::ppc64: 6294 case llvm::Triple::ppc64le: 6295 return CGF->EmitPPCBuiltinExpr(BuiltinID, E); 6296 case llvm::Triple::r600: 6297 case llvm::Triple::amdgcn: 6298 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 6299 case llvm::Triple::systemz: 6300 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); 6301 case llvm::Triple::nvptx: 6302 case llvm::Triple::nvptx64: 6303 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); 6304 case llvm::Triple::wasm32: 6305 case llvm::Triple::wasm64: 6306 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); 6307 case llvm::Triple::hexagon: 6308 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E); 6309 case llvm::Triple::riscv32: 6310 case llvm::Triple::riscv64: 6311 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); 6312 case llvm::Triple::spirv64: 6313 if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA) 6314 return nullptr; 6315 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 6316 default: 6317 return nullptr; 6318 } 6319 } 6320 6321 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 6322 const CallExpr *E, 6323 ReturnValueSlot ReturnValue) { 6324 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { 6325 assert(getContext().getAuxTargetInfo() && "Missing aux target info"); 6326 return EmitTargetArchBuiltinExpr( 6327 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, 6328 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch()); 6329 } 6330 6331 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue, 6332 getTarget().getTriple().getArch()); 6333 } 6334 6335 static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF, 6336 NeonTypeFlags TypeFlags, 6337 bool HasLegalHalfType = true, 6338 bool V1Ty = false, 6339 bool AllowBFloatArgsAndRet = true) { 6340 int IsQuad = TypeFlags.isQuad(); 6341 switch (TypeFlags.getEltType()) { 6342 case NeonTypeFlags::Int8: 6343 case NeonTypeFlags::Poly8: 6344 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 6345 case NeonTypeFlags::Int16: 6346 case NeonTypeFlags::Poly16: 6347 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 6348 case NeonTypeFlags::BFloat16: 6349 if (AllowBFloatArgsAndRet) 6350 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad)); 6351 else 6352 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 6353 case NeonTypeFlags::Float16: 6354 if (HasLegalHalfType) 6355 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad)); 6356 else 6357 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 6358 case NeonTypeFlags::Int32: 6359 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 6360 case NeonTypeFlags::Int64: 6361 case NeonTypeFlags::Poly64: 6362 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 6363 case NeonTypeFlags::Poly128: 6364 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 6365 // There is a lot of i128 and f128 API missing. 6366 // so we use v16i8 to represent poly128 and get pattern matched. 6367 return llvm::FixedVectorType::get(CGF->Int8Ty, 16); 6368 case NeonTypeFlags::Float32: 6369 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 6370 case NeonTypeFlags::Float64: 6371 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 6372 } 6373 llvm_unreachable("Unknown vector element type!"); 6374 } 6375 6376 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, 6377 NeonTypeFlags IntTypeFlags) { 6378 int IsQuad = IntTypeFlags.isQuad(); 6379 switch (IntTypeFlags.getEltType()) { 6380 case NeonTypeFlags::Int16: 6381 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad)); 6382 case NeonTypeFlags::Int32: 6383 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad)); 6384 case NeonTypeFlags::Int64: 6385 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad)); 6386 default: 6387 llvm_unreachable("Type can't be converted to floating-point!"); 6388 } 6389 } 6390 6391 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C, 6392 const ElementCount &Count) { 6393 Value *SV = llvm::ConstantVector::getSplat(Count, C); 6394 return Builder.CreateShuffleVector(V, V, SV, "lane"); 6395 } 6396 6397 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 6398 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount(); 6399 return EmitNeonSplat(V, C, EC); 6400 } 6401 6402 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 6403 const char *name, 6404 unsigned shift, bool rightshift) { 6405 unsigned j = 0; 6406 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 6407 ai != ae; ++ai, ++j) { 6408 if (F->isConstrainedFPIntrinsic()) 6409 if (ai->getType()->isMetadataTy()) 6410 continue; 6411 if (shift > 0 && shift == j) 6412 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 6413 else 6414 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 6415 } 6416 6417 if (F->isConstrainedFPIntrinsic()) 6418 return Builder.CreateConstrainedFPCall(F, Ops, name); 6419 else 6420 return Builder.CreateCall(F, Ops, name); 6421 } 6422 6423 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 6424 bool neg) { 6425 int SV = cast<ConstantInt>(V)->getSExtValue(); 6426 return ConstantInt::get(Ty, neg ? -SV : SV); 6427 } 6428 6429 // Right-shift a vector by a constant. 6430 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 6431 llvm::Type *Ty, bool usgn, 6432 const char *name) { 6433 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 6434 6435 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 6436 int EltSize = VTy->getScalarSizeInBits(); 6437 6438 Vec = Builder.CreateBitCast(Vec, Ty); 6439 6440 // lshr/ashr are undefined when the shift amount is equal to the vector 6441 // element size. 6442 if (ShiftAmt == EltSize) { 6443 if (usgn) { 6444 // Right-shifting an unsigned value by its size yields 0. 6445 return llvm::ConstantAggregateZero::get(VTy); 6446 } else { 6447 // Right-shifting a signed value by its size is equivalent 6448 // to a shift of size-1. 6449 --ShiftAmt; 6450 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 6451 } 6452 } 6453 6454 Shift = EmitNeonShiftVector(Shift, Ty, false); 6455 if (usgn) 6456 return Builder.CreateLShr(Vec, Shift, name); 6457 else 6458 return Builder.CreateAShr(Vec, Shift, name); 6459 } 6460 6461 enum { 6462 AddRetType = (1 << 0), 6463 Add1ArgType = (1 << 1), 6464 Add2ArgTypes = (1 << 2), 6465 6466 VectorizeRetType = (1 << 3), 6467 VectorizeArgTypes = (1 << 4), 6468 6469 InventFloatType = (1 << 5), 6470 UnsignedAlts = (1 << 6), 6471 6472 Use64BitVectors = (1 << 7), 6473 Use128BitVectors = (1 << 8), 6474 6475 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 6476 VectorRet = AddRetType | VectorizeRetType, 6477 VectorRetGetArgs01 = 6478 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 6479 FpCmpzModifiers = 6480 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 6481 }; 6482 6483 namespace { 6484 struct ARMVectorIntrinsicInfo { 6485 const char *NameHint; 6486 unsigned BuiltinID; 6487 unsigned LLVMIntrinsic; 6488 unsigned AltLLVMIntrinsic; 6489 uint64_t TypeModifier; 6490 6491 bool operator<(unsigned RHSBuiltinID) const { 6492 return BuiltinID < RHSBuiltinID; 6493 } 6494 bool operator<(const ARMVectorIntrinsicInfo &TE) const { 6495 return BuiltinID < TE.BuiltinID; 6496 } 6497 }; 6498 } // end anonymous namespace 6499 6500 #define NEONMAP0(NameBase) \ 6501 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } 6502 6503 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 6504 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 6505 Intrinsic::LLVMIntrinsic, 0, TypeModifier } 6506 6507 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 6508 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 6509 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 6510 TypeModifier } 6511 6512 static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { 6513 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0), 6514 NEONMAP0(splat_lane_v), 6515 NEONMAP0(splat_laneq_v), 6516 NEONMAP0(splatq_lane_v), 6517 NEONMAP0(splatq_laneq_v), 6518 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 6519 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 6520 NEONMAP1(vabs_v, arm_neon_vabs, 0), 6521 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 6522 NEONMAP0(vadd_v), 6523 NEONMAP0(vaddhn_v), 6524 NEONMAP0(vaddq_v), 6525 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0), 6526 NEONMAP1(vaeseq_u8, arm_neon_aese, 0), 6527 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0), 6528 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0), 6529 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0), 6530 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0), 6531 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0), 6532 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0), 6533 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0), 6534 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 6535 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 6536 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType), 6537 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType), 6538 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType), 6539 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType), 6540 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType), 6541 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType), 6542 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType), 6543 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType), 6544 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType), 6545 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType), 6546 NEONMAP1(vcage_v, arm_neon_vacge, 0), 6547 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 6548 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 6549 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 6550 NEONMAP1(vcale_v, arm_neon_vacge, 0), 6551 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 6552 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 6553 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 6554 NEONMAP0(vceqz_v), 6555 NEONMAP0(vceqzq_v), 6556 NEONMAP0(vcgez_v), 6557 NEONMAP0(vcgezq_v), 6558 NEONMAP0(vcgtz_v), 6559 NEONMAP0(vcgtzq_v), 6560 NEONMAP0(vclez_v), 6561 NEONMAP0(vclezq_v), 6562 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 6563 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 6564 NEONMAP0(vcltz_v), 6565 NEONMAP0(vcltzq_v), 6566 NEONMAP1(vclz_v, ctlz, Add1ArgType), 6567 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 6568 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 6569 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 6570 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), 6571 NEONMAP0(vcvt_f16_s16), 6572 NEONMAP0(vcvt_f16_u16), 6573 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 6574 NEONMAP0(vcvt_f32_v), 6575 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0), 6576 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0), 6577 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 6578 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0), 6579 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 6580 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 6581 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0), 6582 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 6583 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 6584 NEONMAP0(vcvt_s16_f16), 6585 NEONMAP0(vcvt_s32_v), 6586 NEONMAP0(vcvt_s64_v), 6587 NEONMAP0(vcvt_u16_f16), 6588 NEONMAP0(vcvt_u32_v), 6589 NEONMAP0(vcvt_u64_v), 6590 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0), 6591 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 6592 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 6593 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0), 6594 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 6595 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 6596 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0), 6597 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 6598 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 6599 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0), 6600 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 6601 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 6602 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0), 6603 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0), 6604 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 6605 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 6606 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0), 6607 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 6608 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 6609 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0), 6610 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 6611 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 6612 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0), 6613 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 6614 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 6615 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0), 6616 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 6617 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 6618 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0), 6619 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 6620 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 6621 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0), 6622 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 6623 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 6624 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0), 6625 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 6626 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 6627 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0), 6628 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 6629 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 6630 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0), 6631 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 6632 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 6633 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0), 6634 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 6635 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 6636 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0), 6637 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 6638 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 6639 NEONMAP0(vcvtq_f16_s16), 6640 NEONMAP0(vcvtq_f16_u16), 6641 NEONMAP0(vcvtq_f32_v), 6642 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0), 6643 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0), 6644 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 6645 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0), 6646 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 6647 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 6648 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0), 6649 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 6650 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 6651 NEONMAP0(vcvtq_s16_f16), 6652 NEONMAP0(vcvtq_s32_v), 6653 NEONMAP0(vcvtq_s64_v), 6654 NEONMAP0(vcvtq_u16_f16), 6655 NEONMAP0(vcvtq_u32_v), 6656 NEONMAP0(vcvtq_u64_v), 6657 NEONMAP1(vdot_s32, arm_neon_sdot, 0), 6658 NEONMAP1(vdot_u32, arm_neon_udot, 0), 6659 NEONMAP1(vdotq_s32, arm_neon_sdot, 0), 6660 NEONMAP1(vdotq_u32, arm_neon_udot, 0), 6661 NEONMAP0(vext_v), 6662 NEONMAP0(vextq_v), 6663 NEONMAP0(vfma_v), 6664 NEONMAP0(vfmaq_v), 6665 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 6666 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 6667 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 6668 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 6669 NEONMAP0(vld1_dup_v), 6670 NEONMAP1(vld1_v, arm_neon_vld1, 0), 6671 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0), 6672 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0), 6673 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0), 6674 NEONMAP0(vld1q_dup_v), 6675 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 6676 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0), 6677 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0), 6678 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0), 6679 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0), 6680 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 6681 NEONMAP1(vld2_v, arm_neon_vld2, 0), 6682 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0), 6683 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 6684 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 6685 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0), 6686 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 6687 NEONMAP1(vld3_v, arm_neon_vld3, 0), 6688 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0), 6689 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 6690 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 6691 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0), 6692 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 6693 NEONMAP1(vld4_v, arm_neon_vld4, 0), 6694 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0), 6695 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 6696 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 6697 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 6698 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 6699 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 6700 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 6701 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 6702 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 6703 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 6704 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 6705 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0), 6706 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0), 6707 NEONMAP0(vmovl_v), 6708 NEONMAP0(vmovn_v), 6709 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 6710 NEONMAP0(vmull_v), 6711 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 6712 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 6713 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 6714 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 6715 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 6716 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 6717 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 6718 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 6719 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 6720 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 6721 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 6722 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), 6723 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), 6724 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0), 6725 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0), 6726 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 6727 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 6728 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 6729 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 6730 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 6731 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 6732 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 6733 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType), 6734 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType), 6735 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType), 6736 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType), 6737 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType), 6738 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType), 6739 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType), 6740 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType), 6741 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 6742 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 6743 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 6744 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 6745 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 6746 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 6747 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 6748 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 6749 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 6750 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 6751 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), 6752 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), 6753 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 6754 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 6755 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 6756 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 6757 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 6758 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 6759 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 6760 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 6761 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 6762 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 6763 NEONMAP0(vrndi_v), 6764 NEONMAP0(vrndiq_v), 6765 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 6766 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 6767 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 6768 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 6769 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 6770 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 6771 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 6772 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 6773 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 6774 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 6775 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 6776 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 6777 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 6778 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 6779 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 6780 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 6781 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 6782 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 6783 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0), 6784 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0), 6785 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0), 6786 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0), 6787 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0), 6788 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0), 6789 NEONMAP0(vshl_n_v), 6790 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 6791 NEONMAP0(vshll_n_v), 6792 NEONMAP0(vshlq_n_v), 6793 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 6794 NEONMAP0(vshr_n_v), 6795 NEONMAP0(vshrn_n_v), 6796 NEONMAP0(vshrq_n_v), 6797 NEONMAP1(vst1_v, arm_neon_vst1, 0), 6798 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0), 6799 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0), 6800 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0), 6801 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 6802 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0), 6803 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0), 6804 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0), 6805 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 6806 NEONMAP1(vst2_v, arm_neon_vst2, 0), 6807 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 6808 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 6809 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 6810 NEONMAP1(vst3_v, arm_neon_vst3, 0), 6811 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 6812 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 6813 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 6814 NEONMAP1(vst4_v, arm_neon_vst4, 0), 6815 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 6816 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 6817 NEONMAP0(vsubhn_v), 6818 NEONMAP0(vtrn_v), 6819 NEONMAP0(vtrnq_v), 6820 NEONMAP0(vtst_v), 6821 NEONMAP0(vtstq_v), 6822 NEONMAP1(vusdot_s32, arm_neon_usdot, 0), 6823 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0), 6824 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0), 6825 NEONMAP0(vuzp_v), 6826 NEONMAP0(vuzpq_v), 6827 NEONMAP0(vzip_v), 6828 NEONMAP0(vzipq_v) 6829 }; 6830 6831 static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 6832 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0), 6833 NEONMAP0(splat_lane_v), 6834 NEONMAP0(splat_laneq_v), 6835 NEONMAP0(splatq_lane_v), 6836 NEONMAP0(splatq_laneq_v), 6837 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 6838 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 6839 NEONMAP0(vadd_v), 6840 NEONMAP0(vaddhn_v), 6841 NEONMAP0(vaddq_p128), 6842 NEONMAP0(vaddq_v), 6843 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0), 6844 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0), 6845 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0), 6846 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0), 6847 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), 6848 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), 6849 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), 6850 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), 6851 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), 6852 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), 6853 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), 6854 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), 6855 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0), 6856 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0), 6857 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0), 6858 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0), 6859 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0), 6860 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), 6861 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), 6862 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), 6863 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), 6864 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), 6865 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), 6866 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType), 6867 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), 6868 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), 6869 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType), 6870 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 6871 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 6872 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 6873 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 6874 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 6875 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 6876 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 6877 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 6878 NEONMAP0(vceqz_v), 6879 NEONMAP0(vceqzq_v), 6880 NEONMAP0(vcgez_v), 6881 NEONMAP0(vcgezq_v), 6882 NEONMAP0(vcgtz_v), 6883 NEONMAP0(vcgtzq_v), 6884 NEONMAP0(vclez_v), 6885 NEONMAP0(vclezq_v), 6886 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 6887 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 6888 NEONMAP0(vcltz_v), 6889 NEONMAP0(vcltzq_v), 6890 NEONMAP1(vclz_v, ctlz, Add1ArgType), 6891 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 6892 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType), 6893 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType), 6894 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), 6895 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), 6896 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), 6897 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), 6898 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), 6899 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), 6900 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType), 6901 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType), 6902 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType), 6903 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), 6904 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), 6905 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType), 6906 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), 6907 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), 6908 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType), 6909 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), 6910 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), 6911 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType), 6912 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 6913 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 6914 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), 6915 NEONMAP0(vcvt_f16_s16), 6916 NEONMAP0(vcvt_f16_u16), 6917 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 6918 NEONMAP0(vcvt_f32_v), 6919 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0), 6920 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0), 6921 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 6922 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 6923 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0), 6924 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 6925 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 6926 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), 6927 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 6928 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 6929 NEONMAP0(vcvtq_f16_s16), 6930 NEONMAP0(vcvtq_f16_u16), 6931 NEONMAP0(vcvtq_f32_v), 6932 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0), 6933 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0), 6934 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0), 6935 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 6936 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 6937 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0), 6938 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 6939 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 6940 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), 6941 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 6942 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 6943 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 6944 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0), 6945 NEONMAP1(vdot_u32, aarch64_neon_udot, 0), 6946 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0), 6947 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0), 6948 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), 6949 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), 6950 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), 6951 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), 6952 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), 6953 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), 6954 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), 6955 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), 6956 NEONMAP0(vext_v), 6957 NEONMAP0(vextq_v), 6958 NEONMAP0(vfma_v), 6959 NEONMAP0(vfmaq_v), 6960 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0), 6961 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0), 6962 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0), 6963 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0), 6964 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0), 6965 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0), 6966 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0), 6967 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0), 6968 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 6969 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 6970 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 6971 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 6972 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0), 6973 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0), 6974 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0), 6975 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0), 6976 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0), 6977 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0), 6978 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0), 6979 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0), 6980 NEONMAP0(vmovl_v), 6981 NEONMAP0(vmovn_v), 6982 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 6983 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 6984 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 6985 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 6986 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 6987 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 6988 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 6989 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 6990 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 6991 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 6992 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 6993 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 6994 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0), 6995 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0), 6996 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 6997 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0), 6998 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0), 6999 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 7000 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 7001 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 7002 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 7003 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 7004 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 7005 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType), 7006 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType), 7007 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType), 7008 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType), 7009 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType), 7010 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType), 7011 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType), 7012 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType), 7013 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0), 7014 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), 7015 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 7016 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0), 7017 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), 7018 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 7019 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 7020 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 7021 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 7022 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 7023 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 7024 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 7025 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 7026 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 7027 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 7028 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 7029 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 7030 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0), 7031 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 7032 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 7033 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 7034 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 7035 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 7036 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 7037 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType), 7038 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType), 7039 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType), 7040 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType), 7041 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType), 7042 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType), 7043 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType), 7044 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType), 7045 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType), 7046 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType), 7047 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType), 7048 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType), 7049 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType), 7050 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType), 7051 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType), 7052 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType), 7053 NEONMAP0(vrndi_v), 7054 NEONMAP0(vrndiq_v), 7055 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 7056 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 7057 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 7058 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 7059 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 7060 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 7061 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 7062 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 7063 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 7064 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0), 7065 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0), 7066 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0), 7067 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0), 7068 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0), 7069 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0), 7070 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0), 7071 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0), 7072 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0), 7073 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0), 7074 NEONMAP0(vshl_n_v), 7075 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 7076 NEONMAP0(vshll_n_v), 7077 NEONMAP0(vshlq_n_v), 7078 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 7079 NEONMAP0(vshr_n_v), 7080 NEONMAP0(vshrn_n_v), 7081 NEONMAP0(vshrq_n_v), 7082 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0), 7083 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0), 7084 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0), 7085 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0), 7086 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0), 7087 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0), 7088 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0), 7089 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0), 7090 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0), 7091 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0), 7092 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0), 7093 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0), 7094 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0), 7095 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0), 7096 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0), 7097 NEONMAP0(vsubhn_v), 7098 NEONMAP0(vtst_v), 7099 NEONMAP0(vtstq_v), 7100 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0), 7101 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0), 7102 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0), 7103 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0), 7104 }; 7105 7106 static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { 7107 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 7108 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 7109 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 7110 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 7111 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 7112 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 7113 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 7114 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 7115 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 7116 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 7117 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 7118 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 7119 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 7120 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 7121 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 7122 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 7123 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 7124 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 7125 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 7126 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 7127 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 7128 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 7129 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 7130 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 7131 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 7132 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 7133 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 7134 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 7135 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 7136 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 7137 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 7138 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 7139 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), 7140 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), 7141 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0), 7142 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 7143 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 7144 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 7145 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 7146 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 7147 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 7148 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 7149 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 7150 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 7151 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 7152 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 7153 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 7154 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 7155 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 7156 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 7157 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 7158 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), 7159 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), 7160 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 7161 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 7162 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 7163 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 7164 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 7165 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 7166 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 7167 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 7168 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 7169 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 7170 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 7171 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 7172 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 7173 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 7174 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 7175 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 7176 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 7177 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 7178 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 7179 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 7180 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 7181 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 7182 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 7183 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 7184 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 7185 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 7186 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 7187 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 7188 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 7189 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 7190 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 7191 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 7192 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 7193 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 7194 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 7195 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 7196 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 7197 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 7198 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 7199 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 7200 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 7201 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 7202 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 7203 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 7204 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 7205 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 7206 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 7207 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 7208 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 7209 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 7210 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 7211 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 7212 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 7213 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 7214 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 7215 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 7216 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 7217 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 7218 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 7219 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 7220 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 7221 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 7222 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 7223 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors), 7224 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType), 7225 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors), 7226 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType), 7227 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 7228 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 7229 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 7230 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 7231 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 7232 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 7233 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 7234 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 7235 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 7236 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 7237 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 7238 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 7239 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 7240 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 7241 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 7242 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 7243 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 7244 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 7245 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 7246 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 7247 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 7248 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 7249 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 7250 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 7251 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 7252 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 7253 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 7254 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 7255 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 7256 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 7257 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 7258 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 7259 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 7260 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 7261 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 7262 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 7263 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 7264 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 7265 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 7266 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 7267 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 7268 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 7269 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 7270 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 7271 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 7272 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 7273 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 7274 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 7275 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 7276 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 7277 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 7278 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 7279 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 7280 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 7281 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 7282 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 7283 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 7284 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 7285 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 7286 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 7287 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 7288 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 7289 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 7290 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 7291 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 7292 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 7293 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 7294 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 7295 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 7296 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 7297 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 7298 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 7299 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 7300 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 7301 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 7302 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 7303 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 7304 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 7305 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 7306 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 7307 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 7308 // FP16 scalar intrinisics go here. 7309 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType), 7310 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 7311 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 7312 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 7313 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 7314 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 7315 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 7316 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 7317 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 7318 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 7319 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 7320 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 7321 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 7322 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), 7323 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), 7324 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), 7325 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), 7326 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 7327 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 7328 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 7329 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 7330 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 7331 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 7332 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 7333 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 7334 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 7335 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 7336 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 7337 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 7338 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType), 7339 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType), 7340 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType), 7341 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType), 7342 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType), 7343 }; 7344 7345 // Some intrinsics are equivalent for codegen. 7346 static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = { 7347 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, }, 7348 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, }, 7349 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, }, 7350 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, }, 7351 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, }, 7352 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, }, 7353 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, }, 7354 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, }, 7355 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, }, 7356 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, }, 7357 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, }, 7358 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, }, 7359 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, }, 7360 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, }, 7361 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, }, 7362 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, }, 7363 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, }, 7364 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, }, 7365 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, }, 7366 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, }, 7367 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, }, 7368 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, }, 7369 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, }, 7370 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, }, 7371 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, }, 7372 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, }, 7373 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, }, 7374 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, }, 7375 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, }, 7376 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, }, 7377 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, }, 7378 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, }, 7379 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v }, 7380 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v }, 7381 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v }, 7382 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v }, 7383 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v }, 7384 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v }, 7385 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v }, 7386 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v }, 7387 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v }, 7388 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v }, 7389 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v }, 7390 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v }, 7391 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v }, 7392 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v }, 7393 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v }, 7394 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v }, 7395 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v }, 7396 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v }, 7397 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v }, 7398 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v }, 7399 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v }, 7400 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v }, 7401 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v }, 7402 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v }, 7403 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v }, 7404 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v }, 7405 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v }, 7406 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v }, 7407 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v }, 7408 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v }, 7409 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, }, 7410 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, }, 7411 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, }, 7412 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, }, 7413 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, }, 7414 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, }, 7415 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, }, 7416 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, }, 7417 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, }, 7418 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, }, 7419 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, }, 7420 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, }, 7421 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, }, 7422 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, }, 7423 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, }, 7424 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, }, 7425 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, }, 7426 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, }, 7427 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, }, 7428 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, }, 7429 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, }, 7430 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, }, 7431 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, }, 7432 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, }, 7433 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, }, 7434 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, }, 7435 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, }, 7436 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, }, 7437 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, }, 7438 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, }, 7439 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, }, 7440 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, }, 7441 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, }, 7442 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, }, 7443 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, }, 7444 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, }, 7445 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, }, 7446 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, }, 7447 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, }, 7448 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, }, 7449 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, }, 7450 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, }, 7451 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, }, 7452 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, }, 7453 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v }, 7454 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v }, 7455 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v }, 7456 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v }, 7457 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v }, 7458 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v }, 7459 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v }, 7460 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v }, 7461 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v }, 7462 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v }, 7463 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v }, 7464 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v }, 7465 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v }, 7466 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v }, 7467 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v }, 7468 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v }, 7469 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v }, 7470 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v }, 7471 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v }, 7472 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v }, 7473 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v }, 7474 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v }, 7475 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane 7476 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an 7477 // arbitrary one to be handled as tha canonical variation. 7478 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 }, 7479 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 }, 7480 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 }, 7481 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 }, 7482 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 }, 7483 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 }, 7484 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 }, 7485 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 }, 7486 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 }, 7487 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 }, 7488 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 }, 7489 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 }, 7490 }; 7491 7492 #undef NEONMAP0 7493 #undef NEONMAP1 7494 #undef NEONMAP2 7495 7496 #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 7497 { \ 7498 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ 7499 TypeModifier \ 7500 } 7501 7502 #define SVEMAP2(NameBase, TypeModifier) \ 7503 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier } 7504 static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { 7505 #define GET_SVE_LLVM_INTRINSIC_MAP 7506 #include "clang/Basic/arm_sve_builtin_cg.inc" 7507 #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def" 7508 #undef GET_SVE_LLVM_INTRINSIC_MAP 7509 }; 7510 7511 #undef SVEMAP1 7512 #undef SVEMAP2 7513 7514 #define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 7515 { \ 7516 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ 7517 TypeModifier \ 7518 } 7519 7520 #define SMEMAP2(NameBase, TypeModifier) \ 7521 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier } 7522 static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = { 7523 #define GET_SME_LLVM_INTRINSIC_MAP 7524 #include "clang/Basic/arm_sme_builtin_cg.inc" 7525 #undef GET_SME_LLVM_INTRINSIC_MAP 7526 }; 7527 7528 #undef SMEMAP1 7529 #undef SMEMAP2 7530 7531 static bool NEONSIMDIntrinsicsProvenSorted = false; 7532 7533 static bool AArch64SIMDIntrinsicsProvenSorted = false; 7534 static bool AArch64SISDIntrinsicsProvenSorted = false; 7535 static bool AArch64SVEIntrinsicsProvenSorted = false; 7536 static bool AArch64SMEIntrinsicsProvenSorted = false; 7537 7538 static const ARMVectorIntrinsicInfo * 7539 findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap, 7540 unsigned BuiltinID, bool &MapProvenSorted) { 7541 7542 #ifndef NDEBUG 7543 if (!MapProvenSorted) { 7544 assert(llvm::is_sorted(IntrinsicMap)); 7545 MapProvenSorted = true; 7546 } 7547 #endif 7548 7549 const ARMVectorIntrinsicInfo *Builtin = 7550 llvm::lower_bound(IntrinsicMap, BuiltinID); 7551 7552 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 7553 return Builtin; 7554 7555 return nullptr; 7556 } 7557 7558 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 7559 unsigned Modifier, 7560 llvm::Type *ArgType, 7561 const CallExpr *E) { 7562 int VectorSize = 0; 7563 if (Modifier & Use64BitVectors) 7564 VectorSize = 64; 7565 else if (Modifier & Use128BitVectors) 7566 VectorSize = 128; 7567 7568 // Return type. 7569 SmallVector<llvm::Type *, 3> Tys; 7570 if (Modifier & AddRetType) { 7571 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 7572 if (Modifier & VectorizeRetType) 7573 Ty = llvm::FixedVectorType::get( 7574 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 7575 7576 Tys.push_back(Ty); 7577 } 7578 7579 // Arguments. 7580 if (Modifier & VectorizeArgTypes) { 7581 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 7582 ArgType = llvm::FixedVectorType::get(ArgType, Elts); 7583 } 7584 7585 if (Modifier & (Add1ArgType | Add2ArgTypes)) 7586 Tys.push_back(ArgType); 7587 7588 if (Modifier & Add2ArgTypes) 7589 Tys.push_back(ArgType); 7590 7591 if (Modifier & InventFloatType) 7592 Tys.push_back(FloatTy); 7593 7594 return CGM.getIntrinsic(IntrinsicID, Tys); 7595 } 7596 7597 static Value *EmitCommonNeonSISDBuiltinExpr( 7598 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, 7599 SmallVectorImpl<Value *> &Ops, const CallExpr *E) { 7600 unsigned BuiltinID = SISDInfo.BuiltinID; 7601 unsigned int Int = SISDInfo.LLVMIntrinsic; 7602 unsigned Modifier = SISDInfo.TypeModifier; 7603 const char *s = SISDInfo.NameHint; 7604 7605 switch (BuiltinID) { 7606 case NEON::BI__builtin_neon_vcled_s64: 7607 case NEON::BI__builtin_neon_vcled_u64: 7608 case NEON::BI__builtin_neon_vcles_f32: 7609 case NEON::BI__builtin_neon_vcled_f64: 7610 case NEON::BI__builtin_neon_vcltd_s64: 7611 case NEON::BI__builtin_neon_vcltd_u64: 7612 case NEON::BI__builtin_neon_vclts_f32: 7613 case NEON::BI__builtin_neon_vcltd_f64: 7614 case NEON::BI__builtin_neon_vcales_f32: 7615 case NEON::BI__builtin_neon_vcaled_f64: 7616 case NEON::BI__builtin_neon_vcalts_f32: 7617 case NEON::BI__builtin_neon_vcaltd_f64: 7618 // Only one direction of comparisons actually exist, cmle is actually a cmge 7619 // with swapped operands. The table gives us the right intrinsic but we 7620 // still need to do the swap. 7621 std::swap(Ops[0], Ops[1]); 7622 break; 7623 } 7624 7625 assert(Int && "Generic code assumes a valid intrinsic"); 7626 7627 // Determine the type(s) of this overloaded AArch64 intrinsic. 7628 const Expr *Arg = E->getArg(0); 7629 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 7630 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 7631 7632 int j = 0; 7633 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 7634 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 7635 ai != ae; ++ai, ++j) { 7636 llvm::Type *ArgTy = ai->getType(); 7637 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 7638 ArgTy->getPrimitiveSizeInBits()) 7639 continue; 7640 7641 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 7642 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 7643 // it before inserting. 7644 Ops[j] = CGF.Builder.CreateTruncOrBitCast( 7645 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType()); 7646 Ops[j] = 7647 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0); 7648 } 7649 7650 Value *Result = CGF.EmitNeonCall(F, Ops, s); 7651 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 7652 if (ResultType->getPrimitiveSizeInBits().getFixedValue() < 7653 Result->getType()->getPrimitiveSizeInBits().getFixedValue()) 7654 return CGF.Builder.CreateExtractElement(Result, C0); 7655 7656 return CGF.Builder.CreateBitCast(Result, ResultType, s); 7657 } 7658 7659 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 7660 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 7661 const char *NameHint, unsigned Modifier, const CallExpr *E, 7662 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1, 7663 llvm::Triple::ArchType Arch) { 7664 // Get the last argument, which specifies the vector type. 7665 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 7666 std::optional<llvm::APSInt> NeonTypeConst = 7667 Arg->getIntegerConstantExpr(getContext()); 7668 if (!NeonTypeConst) 7669 return nullptr; 7670 7671 // Determine the type of this overloaded NEON intrinsic. 7672 NeonTypeFlags Type(NeonTypeConst->getZExtValue()); 7673 bool Usgn = Type.isUnsigned(); 7674 bool Quad = Type.isQuad(); 7675 const bool HasLegalHalfType = getTarget().hasLegalHalfType(); 7676 const bool AllowBFloatArgsAndRet = 7677 getTargetHooks().getABIInfo().allowBFloatArgsAndRet(); 7678 7679 llvm::FixedVectorType *VTy = 7680 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet); 7681 llvm::Type *Ty = VTy; 7682 if (!Ty) 7683 return nullptr; 7684 7685 auto getAlignmentValue32 = [&](Address addr) -> Value* { 7686 return Builder.getInt32(addr.getAlignment().getQuantity()); 7687 }; 7688 7689 unsigned Int = LLVMIntrinsic; 7690 if ((Modifier & UnsignedAlts) && !Usgn) 7691 Int = AltLLVMIntrinsic; 7692 7693 switch (BuiltinID) { 7694 default: break; 7695 case NEON::BI__builtin_neon_splat_lane_v: 7696 case NEON::BI__builtin_neon_splat_laneq_v: 7697 case NEON::BI__builtin_neon_splatq_lane_v: 7698 case NEON::BI__builtin_neon_splatq_laneq_v: { 7699 auto NumElements = VTy->getElementCount(); 7700 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v) 7701 NumElements = NumElements * 2; 7702 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v) 7703 NumElements = NumElements.divideCoefficientBy(2); 7704 7705 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 7706 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements); 7707 } 7708 case NEON::BI__builtin_neon_vpadd_v: 7709 case NEON::BI__builtin_neon_vpaddq_v: 7710 // We don't allow fp/int overloading of intrinsics. 7711 if (VTy->getElementType()->isFloatingPointTy() && 7712 Int == Intrinsic::aarch64_neon_addp) 7713 Int = Intrinsic::aarch64_neon_faddp; 7714 break; 7715 case NEON::BI__builtin_neon_vabs_v: 7716 case NEON::BI__builtin_neon_vabsq_v: 7717 if (VTy->getElementType()->isFloatingPointTy()) 7718 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 7719 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 7720 case NEON::BI__builtin_neon_vadd_v: 7721 case NEON::BI__builtin_neon_vaddq_v: { 7722 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8); 7723 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 7724 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 7725 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]); 7726 return Builder.CreateBitCast(Ops[0], Ty); 7727 } 7728 case NEON::BI__builtin_neon_vaddhn_v: { 7729 llvm::FixedVectorType *SrcTy = 7730 llvm::FixedVectorType::getExtendedElementVectorType(VTy); 7731 7732 // %sum = add <4 x i32> %lhs, %rhs 7733 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 7734 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 7735 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 7736 7737 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 7738 Constant *ShiftAmt = 7739 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 7740 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 7741 7742 // %res = trunc <4 x i32> %high to <4 x i16> 7743 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 7744 } 7745 case NEON::BI__builtin_neon_vcale_v: 7746 case NEON::BI__builtin_neon_vcaleq_v: 7747 case NEON::BI__builtin_neon_vcalt_v: 7748 case NEON::BI__builtin_neon_vcaltq_v: 7749 std::swap(Ops[0], Ops[1]); 7750 [[fallthrough]]; 7751 case NEON::BI__builtin_neon_vcage_v: 7752 case NEON::BI__builtin_neon_vcageq_v: 7753 case NEON::BI__builtin_neon_vcagt_v: 7754 case NEON::BI__builtin_neon_vcagtq_v: { 7755 llvm::Type *Ty; 7756 switch (VTy->getScalarSizeInBits()) { 7757 default: llvm_unreachable("unexpected type"); 7758 case 32: 7759 Ty = FloatTy; 7760 break; 7761 case 64: 7762 Ty = DoubleTy; 7763 break; 7764 case 16: 7765 Ty = HalfTy; 7766 break; 7767 } 7768 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements()); 7769 llvm::Type *Tys[] = { VTy, VecFlt }; 7770 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 7771 return EmitNeonCall(F, Ops, NameHint); 7772 } 7773 case NEON::BI__builtin_neon_vceqz_v: 7774 case NEON::BI__builtin_neon_vceqzq_v: 7775 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 7776 ICmpInst::ICMP_EQ, "vceqz"); 7777 case NEON::BI__builtin_neon_vcgez_v: 7778 case NEON::BI__builtin_neon_vcgezq_v: 7779 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 7780 ICmpInst::ICMP_SGE, "vcgez"); 7781 case NEON::BI__builtin_neon_vclez_v: 7782 case NEON::BI__builtin_neon_vclezq_v: 7783 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 7784 ICmpInst::ICMP_SLE, "vclez"); 7785 case NEON::BI__builtin_neon_vcgtz_v: 7786 case NEON::BI__builtin_neon_vcgtzq_v: 7787 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 7788 ICmpInst::ICMP_SGT, "vcgtz"); 7789 case NEON::BI__builtin_neon_vcltz_v: 7790 case NEON::BI__builtin_neon_vcltzq_v: 7791 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 7792 ICmpInst::ICMP_SLT, "vcltz"); 7793 case NEON::BI__builtin_neon_vclz_v: 7794 case NEON::BI__builtin_neon_vclzq_v: 7795 // We generate target-independent intrinsic, which needs a second argument 7796 // for whether or not clz of zero is undefined; on ARM it isn't. 7797 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 7798 break; 7799 case NEON::BI__builtin_neon_vcvt_f32_v: 7800 case NEON::BI__builtin_neon_vcvtq_f32_v: 7801 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7802 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), 7803 HasLegalHalfType); 7804 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 7805 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 7806 case NEON::BI__builtin_neon_vcvt_f16_s16: 7807 case NEON::BI__builtin_neon_vcvt_f16_u16: 7808 case NEON::BI__builtin_neon_vcvtq_f16_s16: 7809 case NEON::BI__builtin_neon_vcvtq_f16_u16: 7810 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7811 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), 7812 HasLegalHalfType); 7813 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 7814 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 7815 case NEON::BI__builtin_neon_vcvt_n_f16_s16: 7816 case NEON::BI__builtin_neon_vcvt_n_f16_u16: 7817 case NEON::BI__builtin_neon_vcvtq_n_f16_s16: 7818 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: { 7819 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 7820 Function *F = CGM.getIntrinsic(Int, Tys); 7821 return EmitNeonCall(F, Ops, "vcvt_n"); 7822 } 7823 case NEON::BI__builtin_neon_vcvt_n_f32_v: 7824 case NEON::BI__builtin_neon_vcvt_n_f64_v: 7825 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 7826 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 7827 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 7828 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 7829 Function *F = CGM.getIntrinsic(Int, Tys); 7830 return EmitNeonCall(F, Ops, "vcvt_n"); 7831 } 7832 case NEON::BI__builtin_neon_vcvt_n_s16_f16: 7833 case NEON::BI__builtin_neon_vcvt_n_s32_v: 7834 case NEON::BI__builtin_neon_vcvt_n_u16_f16: 7835 case NEON::BI__builtin_neon_vcvt_n_u32_v: 7836 case NEON::BI__builtin_neon_vcvt_n_s64_v: 7837 case NEON::BI__builtin_neon_vcvt_n_u64_v: 7838 case NEON::BI__builtin_neon_vcvtq_n_s16_f16: 7839 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 7840 case NEON::BI__builtin_neon_vcvtq_n_u16_f16: 7841 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 7842 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 7843 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 7844 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 7845 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 7846 return EmitNeonCall(F, Ops, "vcvt_n"); 7847 } 7848 case NEON::BI__builtin_neon_vcvt_s32_v: 7849 case NEON::BI__builtin_neon_vcvt_u32_v: 7850 case NEON::BI__builtin_neon_vcvt_s64_v: 7851 case NEON::BI__builtin_neon_vcvt_u64_v: 7852 case NEON::BI__builtin_neon_vcvt_s16_f16: 7853 case NEON::BI__builtin_neon_vcvt_u16_f16: 7854 case NEON::BI__builtin_neon_vcvtq_s32_v: 7855 case NEON::BI__builtin_neon_vcvtq_u32_v: 7856 case NEON::BI__builtin_neon_vcvtq_s64_v: 7857 case NEON::BI__builtin_neon_vcvtq_u64_v: 7858 case NEON::BI__builtin_neon_vcvtq_s16_f16: 7859 case NEON::BI__builtin_neon_vcvtq_u16_f16: { 7860 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 7861 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 7862 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 7863 } 7864 case NEON::BI__builtin_neon_vcvta_s16_f16: 7865 case NEON::BI__builtin_neon_vcvta_s32_v: 7866 case NEON::BI__builtin_neon_vcvta_s64_v: 7867 case NEON::BI__builtin_neon_vcvta_u16_f16: 7868 case NEON::BI__builtin_neon_vcvta_u32_v: 7869 case NEON::BI__builtin_neon_vcvta_u64_v: 7870 case NEON::BI__builtin_neon_vcvtaq_s16_f16: 7871 case NEON::BI__builtin_neon_vcvtaq_s32_v: 7872 case NEON::BI__builtin_neon_vcvtaq_s64_v: 7873 case NEON::BI__builtin_neon_vcvtaq_u16_f16: 7874 case NEON::BI__builtin_neon_vcvtaq_u32_v: 7875 case NEON::BI__builtin_neon_vcvtaq_u64_v: 7876 case NEON::BI__builtin_neon_vcvtn_s16_f16: 7877 case NEON::BI__builtin_neon_vcvtn_s32_v: 7878 case NEON::BI__builtin_neon_vcvtn_s64_v: 7879 case NEON::BI__builtin_neon_vcvtn_u16_f16: 7880 case NEON::BI__builtin_neon_vcvtn_u32_v: 7881 case NEON::BI__builtin_neon_vcvtn_u64_v: 7882 case NEON::BI__builtin_neon_vcvtnq_s16_f16: 7883 case NEON::BI__builtin_neon_vcvtnq_s32_v: 7884 case NEON::BI__builtin_neon_vcvtnq_s64_v: 7885 case NEON::BI__builtin_neon_vcvtnq_u16_f16: 7886 case NEON::BI__builtin_neon_vcvtnq_u32_v: 7887 case NEON::BI__builtin_neon_vcvtnq_u64_v: 7888 case NEON::BI__builtin_neon_vcvtp_s16_f16: 7889 case NEON::BI__builtin_neon_vcvtp_s32_v: 7890 case NEON::BI__builtin_neon_vcvtp_s64_v: 7891 case NEON::BI__builtin_neon_vcvtp_u16_f16: 7892 case NEON::BI__builtin_neon_vcvtp_u32_v: 7893 case NEON::BI__builtin_neon_vcvtp_u64_v: 7894 case NEON::BI__builtin_neon_vcvtpq_s16_f16: 7895 case NEON::BI__builtin_neon_vcvtpq_s32_v: 7896 case NEON::BI__builtin_neon_vcvtpq_s64_v: 7897 case NEON::BI__builtin_neon_vcvtpq_u16_f16: 7898 case NEON::BI__builtin_neon_vcvtpq_u32_v: 7899 case NEON::BI__builtin_neon_vcvtpq_u64_v: 7900 case NEON::BI__builtin_neon_vcvtm_s16_f16: 7901 case NEON::BI__builtin_neon_vcvtm_s32_v: 7902 case NEON::BI__builtin_neon_vcvtm_s64_v: 7903 case NEON::BI__builtin_neon_vcvtm_u16_f16: 7904 case NEON::BI__builtin_neon_vcvtm_u32_v: 7905 case NEON::BI__builtin_neon_vcvtm_u64_v: 7906 case NEON::BI__builtin_neon_vcvtmq_s16_f16: 7907 case NEON::BI__builtin_neon_vcvtmq_s32_v: 7908 case NEON::BI__builtin_neon_vcvtmq_s64_v: 7909 case NEON::BI__builtin_neon_vcvtmq_u16_f16: 7910 case NEON::BI__builtin_neon_vcvtmq_u32_v: 7911 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 7912 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 7913 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 7914 } 7915 case NEON::BI__builtin_neon_vcvtx_f32_v: { 7916 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty}; 7917 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 7918 7919 } 7920 case NEON::BI__builtin_neon_vext_v: 7921 case NEON::BI__builtin_neon_vextq_v: { 7922 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 7923 SmallVector<int, 16> Indices; 7924 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 7925 Indices.push_back(i+CV); 7926 7927 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7928 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7929 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); 7930 } 7931 case NEON::BI__builtin_neon_vfma_v: 7932 case NEON::BI__builtin_neon_vfmaq_v: { 7933 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 7934 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 7935 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 7936 7937 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 7938 return emitCallMaybeConstrainedFPBuiltin( 7939 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, 7940 {Ops[1], Ops[2], Ops[0]}); 7941 } 7942 case NEON::BI__builtin_neon_vld1_v: 7943 case NEON::BI__builtin_neon_vld1q_v: { 7944 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 7945 Ops.push_back(getAlignmentValue32(PtrOp0)); 7946 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); 7947 } 7948 case NEON::BI__builtin_neon_vld1_x2_v: 7949 case NEON::BI__builtin_neon_vld1q_x2_v: 7950 case NEON::BI__builtin_neon_vld1_x3_v: 7951 case NEON::BI__builtin_neon_vld1q_x3_v: 7952 case NEON::BI__builtin_neon_vld1_x4_v: 7953 case NEON::BI__builtin_neon_vld1q_x4_v: { 7954 llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; 7955 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 7956 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 7957 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7958 } 7959 case NEON::BI__builtin_neon_vld2_v: 7960 case NEON::BI__builtin_neon_vld2q_v: 7961 case NEON::BI__builtin_neon_vld3_v: 7962 case NEON::BI__builtin_neon_vld3q_v: 7963 case NEON::BI__builtin_neon_vld4_v: 7964 case NEON::BI__builtin_neon_vld4q_v: 7965 case NEON::BI__builtin_neon_vld2_dup_v: 7966 case NEON::BI__builtin_neon_vld2q_dup_v: 7967 case NEON::BI__builtin_neon_vld3_dup_v: 7968 case NEON::BI__builtin_neon_vld3q_dup_v: 7969 case NEON::BI__builtin_neon_vld4_dup_v: 7970 case NEON::BI__builtin_neon_vld4q_dup_v: { 7971 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 7972 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 7973 Value *Align = getAlignmentValue32(PtrOp1); 7974 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); 7975 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7976 } 7977 case NEON::BI__builtin_neon_vld1_dup_v: 7978 case NEON::BI__builtin_neon_vld1q_dup_v: { 7979 Value *V = PoisonValue::get(Ty); 7980 PtrOp0 = PtrOp0.withElementType(VTy->getElementType()); 7981 LoadInst *Ld = Builder.CreateLoad(PtrOp0); 7982 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 7983 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 7984 return EmitNeonSplat(Ops[0], CI); 7985 } 7986 case NEON::BI__builtin_neon_vld2_lane_v: 7987 case NEON::BI__builtin_neon_vld2q_lane_v: 7988 case NEON::BI__builtin_neon_vld3_lane_v: 7989 case NEON::BI__builtin_neon_vld3q_lane_v: 7990 case NEON::BI__builtin_neon_vld4_lane_v: 7991 case NEON::BI__builtin_neon_vld4q_lane_v: { 7992 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 7993 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 7994 for (unsigned I = 2; I < Ops.size() - 1; ++I) 7995 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 7996 Ops.push_back(getAlignmentValue32(PtrOp1)); 7997 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint); 7998 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7999 } 8000 case NEON::BI__builtin_neon_vmovl_v: { 8001 llvm::FixedVectorType *DTy = 8002 llvm::FixedVectorType::getTruncatedElementVectorType(VTy); 8003 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 8004 if (Usgn) 8005 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 8006 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 8007 } 8008 case NEON::BI__builtin_neon_vmovn_v: { 8009 llvm::FixedVectorType *QTy = 8010 llvm::FixedVectorType::getExtendedElementVectorType(VTy); 8011 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 8012 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 8013 } 8014 case NEON::BI__builtin_neon_vmull_v: 8015 // FIXME: the integer vmull operations could be emitted in terms of pure 8016 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 8017 // hoisting the exts outside loops. Until global ISel comes along that can 8018 // see through such movement this leads to bad CodeGen. So we need an 8019 // intrinsic for now. 8020 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 8021 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 8022 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 8023 case NEON::BI__builtin_neon_vpadal_v: 8024 case NEON::BI__builtin_neon_vpadalq_v: { 8025 // The source operand type has twice as many elements of half the size. 8026 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 8027 llvm::Type *EltTy = 8028 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 8029 auto *NarrowTy = 8030 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2); 8031 llvm::Type *Tys[2] = { Ty, NarrowTy }; 8032 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 8033 } 8034 case NEON::BI__builtin_neon_vpaddl_v: 8035 case NEON::BI__builtin_neon_vpaddlq_v: { 8036 // The source operand type has twice as many elements of half the size. 8037 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 8038 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 8039 auto *NarrowTy = 8040 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2); 8041 llvm::Type *Tys[2] = { Ty, NarrowTy }; 8042 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 8043 } 8044 case NEON::BI__builtin_neon_vqdmlal_v: 8045 case NEON::BI__builtin_neon_vqdmlsl_v: { 8046 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 8047 Ops[1] = 8048 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); 8049 Ops.resize(2); 8050 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); 8051 } 8052 case NEON::BI__builtin_neon_vqdmulhq_lane_v: 8053 case NEON::BI__builtin_neon_vqdmulh_lane_v: 8054 case NEON::BI__builtin_neon_vqrdmulhq_lane_v: 8055 case NEON::BI__builtin_neon_vqrdmulh_lane_v: { 8056 auto *RTy = cast<llvm::FixedVectorType>(Ty); 8057 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v || 8058 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v) 8059 RTy = llvm::FixedVectorType::get(RTy->getElementType(), 8060 RTy->getNumElements() * 2); 8061 llvm::Type *Tys[2] = { 8062 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false, 8063 /*isQuad*/ false))}; 8064 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 8065 } 8066 case NEON::BI__builtin_neon_vqdmulhq_laneq_v: 8067 case NEON::BI__builtin_neon_vqdmulh_laneq_v: 8068 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v: 8069 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: { 8070 llvm::Type *Tys[2] = { 8071 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false, 8072 /*isQuad*/ true))}; 8073 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 8074 } 8075 case NEON::BI__builtin_neon_vqshl_n_v: 8076 case NEON::BI__builtin_neon_vqshlq_n_v: 8077 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 8078 1, false); 8079 case NEON::BI__builtin_neon_vqshlu_n_v: 8080 case NEON::BI__builtin_neon_vqshluq_n_v: 8081 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 8082 1, false); 8083 case NEON::BI__builtin_neon_vrecpe_v: 8084 case NEON::BI__builtin_neon_vrecpeq_v: 8085 case NEON::BI__builtin_neon_vrsqrte_v: 8086 case NEON::BI__builtin_neon_vrsqrteq_v: 8087 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 8088 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 8089 case NEON::BI__builtin_neon_vrndi_v: 8090 case NEON::BI__builtin_neon_vrndiq_v: 8091 Int = Builder.getIsFPConstrained() 8092 ? Intrinsic::experimental_constrained_nearbyint 8093 : Intrinsic::nearbyint; 8094 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 8095 case NEON::BI__builtin_neon_vrshr_n_v: 8096 case NEON::BI__builtin_neon_vrshrq_n_v: 8097 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 8098 1, true); 8099 case NEON::BI__builtin_neon_vsha512hq_u64: 8100 case NEON::BI__builtin_neon_vsha512h2q_u64: 8101 case NEON::BI__builtin_neon_vsha512su0q_u64: 8102 case NEON::BI__builtin_neon_vsha512su1q_u64: { 8103 Function *F = CGM.getIntrinsic(Int); 8104 return EmitNeonCall(F, Ops, ""); 8105 } 8106 case NEON::BI__builtin_neon_vshl_n_v: 8107 case NEON::BI__builtin_neon_vshlq_n_v: 8108 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 8109 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 8110 "vshl_n"); 8111 case NEON::BI__builtin_neon_vshll_n_v: { 8112 llvm::FixedVectorType *SrcTy = 8113 llvm::FixedVectorType::getTruncatedElementVectorType(VTy); 8114 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 8115 if (Usgn) 8116 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 8117 else 8118 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 8119 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 8120 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 8121 } 8122 case NEON::BI__builtin_neon_vshrn_n_v: { 8123 llvm::FixedVectorType *SrcTy = 8124 llvm::FixedVectorType::getExtendedElementVectorType(VTy); 8125 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 8126 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 8127 if (Usgn) 8128 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 8129 else 8130 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 8131 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 8132 } 8133 case NEON::BI__builtin_neon_vshr_n_v: 8134 case NEON::BI__builtin_neon_vshrq_n_v: 8135 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 8136 case NEON::BI__builtin_neon_vst1_v: 8137 case NEON::BI__builtin_neon_vst1q_v: 8138 case NEON::BI__builtin_neon_vst2_v: 8139 case NEON::BI__builtin_neon_vst2q_v: 8140 case NEON::BI__builtin_neon_vst3_v: 8141 case NEON::BI__builtin_neon_vst3q_v: 8142 case NEON::BI__builtin_neon_vst4_v: 8143 case NEON::BI__builtin_neon_vst4q_v: 8144 case NEON::BI__builtin_neon_vst2_lane_v: 8145 case NEON::BI__builtin_neon_vst2q_lane_v: 8146 case NEON::BI__builtin_neon_vst3_lane_v: 8147 case NEON::BI__builtin_neon_vst3q_lane_v: 8148 case NEON::BI__builtin_neon_vst4_lane_v: 8149 case NEON::BI__builtin_neon_vst4q_lane_v: { 8150 llvm::Type *Tys[] = {Int8PtrTy, Ty}; 8151 Ops.push_back(getAlignmentValue32(PtrOp0)); 8152 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 8153 } 8154 case NEON::BI__builtin_neon_vsm3partw1q_u32: 8155 case NEON::BI__builtin_neon_vsm3partw2q_u32: 8156 case NEON::BI__builtin_neon_vsm3ss1q_u32: 8157 case NEON::BI__builtin_neon_vsm4ekeyq_u32: 8158 case NEON::BI__builtin_neon_vsm4eq_u32: { 8159 Function *F = CGM.getIntrinsic(Int); 8160 return EmitNeonCall(F, Ops, ""); 8161 } 8162 case NEON::BI__builtin_neon_vsm3tt1aq_u32: 8163 case NEON::BI__builtin_neon_vsm3tt1bq_u32: 8164 case NEON::BI__builtin_neon_vsm3tt2aq_u32: 8165 case NEON::BI__builtin_neon_vsm3tt2bq_u32: { 8166 Function *F = CGM.getIntrinsic(Int); 8167 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 8168 return EmitNeonCall(F, Ops, ""); 8169 } 8170 case NEON::BI__builtin_neon_vst1_x2_v: 8171 case NEON::BI__builtin_neon_vst1q_x2_v: 8172 case NEON::BI__builtin_neon_vst1_x3_v: 8173 case NEON::BI__builtin_neon_vst1q_x3_v: 8174 case NEON::BI__builtin_neon_vst1_x4_v: 8175 case NEON::BI__builtin_neon_vst1q_x4_v: { 8176 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas 8177 // in AArch64 it comes last. We may want to stick to one or another. 8178 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be || 8179 Arch == llvm::Triple::aarch64_32) { 8180 llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; 8181 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 8182 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); 8183 } 8184 llvm::Type *Tys[2] = {UnqualPtrTy, VTy}; 8185 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); 8186 } 8187 case NEON::BI__builtin_neon_vsubhn_v: { 8188 llvm::FixedVectorType *SrcTy = 8189 llvm::FixedVectorType::getExtendedElementVectorType(VTy); 8190 8191 // %sum = add <4 x i32> %lhs, %rhs 8192 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 8193 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 8194 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 8195 8196 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 8197 Constant *ShiftAmt = 8198 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 8199 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 8200 8201 // %res = trunc <4 x i32> %high to <4 x i16> 8202 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 8203 } 8204 case NEON::BI__builtin_neon_vtrn_v: 8205 case NEON::BI__builtin_neon_vtrnq_v: { 8206 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 8207 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 8208 Value *SV = nullptr; 8209 8210 for (unsigned vi = 0; vi != 2; ++vi) { 8211 SmallVector<int, 16> Indices; 8212 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 8213 Indices.push_back(i+vi); 8214 Indices.push_back(i+e+vi); 8215 } 8216 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 8217 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 8218 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 8219 } 8220 return SV; 8221 } 8222 case NEON::BI__builtin_neon_vtst_v: 8223 case NEON::BI__builtin_neon_vtstq_v: { 8224 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 8225 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 8226 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 8227 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 8228 ConstantAggregateZero::get(Ty)); 8229 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 8230 } 8231 case NEON::BI__builtin_neon_vuzp_v: 8232 case NEON::BI__builtin_neon_vuzpq_v: { 8233 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 8234 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 8235 Value *SV = nullptr; 8236 8237 for (unsigned vi = 0; vi != 2; ++vi) { 8238 SmallVector<int, 16> Indices; 8239 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 8240 Indices.push_back(2*i+vi); 8241 8242 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 8243 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 8244 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 8245 } 8246 return SV; 8247 } 8248 case NEON::BI__builtin_neon_vxarq_u64: { 8249 Function *F = CGM.getIntrinsic(Int); 8250 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 8251 return EmitNeonCall(F, Ops, ""); 8252 } 8253 case NEON::BI__builtin_neon_vzip_v: 8254 case NEON::BI__builtin_neon_vzipq_v: { 8255 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 8256 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 8257 Value *SV = nullptr; 8258 8259 for (unsigned vi = 0; vi != 2; ++vi) { 8260 SmallVector<int, 16> Indices; 8261 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 8262 Indices.push_back((i + vi*e) >> 1); 8263 Indices.push_back(((i + vi*e) >> 1)+e); 8264 } 8265 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 8266 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 8267 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 8268 } 8269 return SV; 8270 } 8271 case NEON::BI__builtin_neon_vdot_s32: 8272 case NEON::BI__builtin_neon_vdot_u32: 8273 case NEON::BI__builtin_neon_vdotq_s32: 8274 case NEON::BI__builtin_neon_vdotq_u32: { 8275 auto *InputTy = 8276 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); 8277 llvm::Type *Tys[2] = { Ty, InputTy }; 8278 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot"); 8279 } 8280 case NEON::BI__builtin_neon_vfmlal_low_f16: 8281 case NEON::BI__builtin_neon_vfmlalq_low_f16: { 8282 auto *InputTy = 8283 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); 8284 llvm::Type *Tys[2] = { Ty, InputTy }; 8285 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low"); 8286 } 8287 case NEON::BI__builtin_neon_vfmlsl_low_f16: 8288 case NEON::BI__builtin_neon_vfmlslq_low_f16: { 8289 auto *InputTy = 8290 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); 8291 llvm::Type *Tys[2] = { Ty, InputTy }; 8292 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low"); 8293 } 8294 case NEON::BI__builtin_neon_vfmlal_high_f16: 8295 case NEON::BI__builtin_neon_vfmlalq_high_f16: { 8296 auto *InputTy = 8297 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); 8298 llvm::Type *Tys[2] = { Ty, InputTy }; 8299 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high"); 8300 } 8301 case NEON::BI__builtin_neon_vfmlsl_high_f16: 8302 case NEON::BI__builtin_neon_vfmlslq_high_f16: { 8303 auto *InputTy = 8304 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); 8305 llvm::Type *Tys[2] = { Ty, InputTy }; 8306 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high"); 8307 } 8308 case NEON::BI__builtin_neon_vmmlaq_s32: 8309 case NEON::BI__builtin_neon_vmmlaq_u32: { 8310 auto *InputTy = 8311 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); 8312 llvm::Type *Tys[2] = { Ty, InputTy }; 8313 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla"); 8314 } 8315 case NEON::BI__builtin_neon_vusmmlaq_s32: { 8316 auto *InputTy = 8317 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); 8318 llvm::Type *Tys[2] = { Ty, InputTy }; 8319 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla"); 8320 } 8321 case NEON::BI__builtin_neon_vusdot_s32: 8322 case NEON::BI__builtin_neon_vusdotq_s32: { 8323 auto *InputTy = 8324 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); 8325 llvm::Type *Tys[2] = { Ty, InputTy }; 8326 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot"); 8327 } 8328 case NEON::BI__builtin_neon_vbfdot_f32: 8329 case NEON::BI__builtin_neon_vbfdotq_f32: { 8330 llvm::Type *InputTy = 8331 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16); 8332 llvm::Type *Tys[2] = { Ty, InputTy }; 8333 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot"); 8334 } 8335 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: { 8336 llvm::Type *Tys[1] = { Ty }; 8337 Function *F = CGM.getIntrinsic(Int, Tys); 8338 return EmitNeonCall(F, Ops, "vcvtfp2bf"); 8339 } 8340 8341 } 8342 8343 assert(Int && "Expected valid intrinsic number"); 8344 8345 // Determine the type(s) of this overloaded AArch64 intrinsic. 8346 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 8347 8348 Value *Result = EmitNeonCall(F, Ops, NameHint); 8349 llvm::Type *ResultType = ConvertType(E->getType()); 8350 // AArch64 intrinsic one-element vector type cast to 8351 // scalar type expected by the builtin 8352 return Builder.CreateBitCast(Result, ResultType, NameHint); 8353 } 8354 8355 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 8356 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 8357 const CmpInst::Predicate Ip, const Twine &Name) { 8358 llvm::Type *OTy = Op->getType(); 8359 8360 // FIXME: this is utterly horrific. We should not be looking at previous 8361 // codegen context to find out what needs doing. Unfortunately TableGen 8362 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 8363 // (etc). 8364 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 8365 OTy = BI->getOperand(0)->getType(); 8366 8367 Op = Builder.CreateBitCast(Op, OTy); 8368 if (OTy->getScalarType()->isFloatingPointTy()) { 8369 if (Fp == CmpInst::FCMP_OEQ) 8370 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 8371 else 8372 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy)); 8373 } else { 8374 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 8375 } 8376 return Builder.CreateSExt(Op, Ty, Name); 8377 } 8378 8379 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 8380 Value *ExtOp, Value *IndexOp, 8381 llvm::Type *ResTy, unsigned IntID, 8382 const char *Name) { 8383 SmallVector<Value *, 2> TblOps; 8384 if (ExtOp) 8385 TblOps.push_back(ExtOp); 8386 8387 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 8388 SmallVector<int, 16> Indices; 8389 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType()); 8390 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 8391 Indices.push_back(2*i); 8392 Indices.push_back(2*i+1); 8393 } 8394 8395 int PairPos = 0, End = Ops.size() - 1; 8396 while (PairPos < End) { 8397 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 8398 Ops[PairPos+1], Indices, 8399 Name)); 8400 PairPos += 2; 8401 } 8402 8403 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 8404 // of the 128-bit lookup table with zero. 8405 if (PairPos == End) { 8406 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 8407 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 8408 ZeroTbl, Indices, Name)); 8409 } 8410 8411 Function *TblF; 8412 TblOps.push_back(IndexOp); 8413 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 8414 8415 return CGF.EmitNeonCall(TblF, TblOps, Name); 8416 } 8417 8418 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 8419 unsigned Value; 8420 switch (BuiltinID) { 8421 default: 8422 return nullptr; 8423 case clang::ARM::BI__builtin_arm_nop: 8424 Value = 0; 8425 break; 8426 case clang::ARM::BI__builtin_arm_yield: 8427 case clang::ARM::BI__yield: 8428 Value = 1; 8429 break; 8430 case clang::ARM::BI__builtin_arm_wfe: 8431 case clang::ARM::BI__wfe: 8432 Value = 2; 8433 break; 8434 case clang::ARM::BI__builtin_arm_wfi: 8435 case clang::ARM::BI__wfi: 8436 Value = 3; 8437 break; 8438 case clang::ARM::BI__builtin_arm_sev: 8439 case clang::ARM::BI__sev: 8440 Value = 4; 8441 break; 8442 case clang::ARM::BI__builtin_arm_sevl: 8443 case clang::ARM::BI__sevl: 8444 Value = 5; 8445 break; 8446 } 8447 8448 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 8449 llvm::ConstantInt::get(Int32Ty, Value)); 8450 } 8451 8452 enum SpecialRegisterAccessKind { 8453 NormalRead, 8454 VolatileRead, 8455 Write, 8456 }; 8457 8458 // Generates the IR for __builtin_read_exec_*. 8459 // Lowers the builtin to amdgcn_ballot intrinsic. 8460 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, 8461 llvm::Type *RegisterType, 8462 llvm::Type *ValueType, bool isExecHi) { 8463 CodeGen::CGBuilderTy &Builder = CGF.Builder; 8464 CodeGen::CodeGenModule &CGM = CGF.CGM; 8465 8466 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); 8467 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); 8468 8469 if (isExecHi) { 8470 Value *Rt2 = Builder.CreateLShr(Call, 32); 8471 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty); 8472 return Rt2; 8473 } 8474 8475 return Call; 8476 } 8477 8478 // Generates the IR for the read/write special register builtin, 8479 // ValueType is the type of the value that is to be written or read, 8480 // RegisterType is the type of the register being written to or read from. 8481 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, 8482 const CallExpr *E, 8483 llvm::Type *RegisterType, 8484 llvm::Type *ValueType, 8485 SpecialRegisterAccessKind AccessKind, 8486 StringRef SysReg = "") { 8487 // write and register intrinsics only support 32, 64 and 128 bit operations. 8488 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) || 8489 RegisterType->isIntegerTy(128)) && 8490 "Unsupported size for register."); 8491 8492 CodeGen::CGBuilderTy &Builder = CGF.Builder; 8493 CodeGen::CodeGenModule &CGM = CGF.CGM; 8494 LLVMContext &Context = CGM.getLLVMContext(); 8495 8496 if (SysReg.empty()) { 8497 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); 8498 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString(); 8499 } 8500 8501 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; 8502 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 8503 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 8504 8505 llvm::Type *Types[] = { RegisterType }; 8506 8507 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); 8508 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) 8509 && "Can't fit 64-bit value in 32-bit register"); 8510 8511 if (AccessKind != Write) { 8512 assert(AccessKind == NormalRead || AccessKind == VolatileRead); 8513 llvm::Function *F = CGM.getIntrinsic( 8514 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register 8515 : llvm::Intrinsic::read_register, 8516 Types); 8517 llvm::Value *Call = Builder.CreateCall(F, Metadata); 8518 8519 if (MixedTypes) 8520 // Read into 64 bit register and then truncate result to 32 bit. 8521 return Builder.CreateTrunc(Call, ValueType); 8522 8523 if (ValueType->isPointerTy()) 8524 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). 8525 return Builder.CreateIntToPtr(Call, ValueType); 8526 8527 return Call; 8528 } 8529 8530 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 8531 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); 8532 if (MixedTypes) { 8533 // Extend 32 bit write value to 64 bit to pass to write. 8534 ArgValue = Builder.CreateZExt(ArgValue, RegisterType); 8535 return Builder.CreateCall(F, { Metadata, ArgValue }); 8536 } 8537 8538 if (ValueType->isPointerTy()) { 8539 // Have VoidPtrTy ArgValue but want to return an i32/i64. 8540 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); 8541 return Builder.CreateCall(F, { Metadata, ArgValue }); 8542 } 8543 8544 return Builder.CreateCall(F, { Metadata, ArgValue }); 8545 } 8546 8547 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra 8548 /// argument that specifies the vector type. 8549 static bool HasExtraNeonArgument(unsigned BuiltinID) { 8550 switch (BuiltinID) { 8551 default: break; 8552 case NEON::BI__builtin_neon_vget_lane_i8: 8553 case NEON::BI__builtin_neon_vget_lane_i16: 8554 case NEON::BI__builtin_neon_vget_lane_bf16: 8555 case NEON::BI__builtin_neon_vget_lane_i32: 8556 case NEON::BI__builtin_neon_vget_lane_i64: 8557 case NEON::BI__builtin_neon_vget_lane_f32: 8558 case NEON::BI__builtin_neon_vgetq_lane_i8: 8559 case NEON::BI__builtin_neon_vgetq_lane_i16: 8560 case NEON::BI__builtin_neon_vgetq_lane_bf16: 8561 case NEON::BI__builtin_neon_vgetq_lane_i32: 8562 case NEON::BI__builtin_neon_vgetq_lane_i64: 8563 case NEON::BI__builtin_neon_vgetq_lane_f32: 8564 case NEON::BI__builtin_neon_vduph_lane_bf16: 8565 case NEON::BI__builtin_neon_vduph_laneq_bf16: 8566 case NEON::BI__builtin_neon_vset_lane_i8: 8567 case NEON::BI__builtin_neon_vset_lane_i16: 8568 case NEON::BI__builtin_neon_vset_lane_bf16: 8569 case NEON::BI__builtin_neon_vset_lane_i32: 8570 case NEON::BI__builtin_neon_vset_lane_i64: 8571 case NEON::BI__builtin_neon_vset_lane_f32: 8572 case NEON::BI__builtin_neon_vsetq_lane_i8: 8573 case NEON::BI__builtin_neon_vsetq_lane_i16: 8574 case NEON::BI__builtin_neon_vsetq_lane_bf16: 8575 case NEON::BI__builtin_neon_vsetq_lane_i32: 8576 case NEON::BI__builtin_neon_vsetq_lane_i64: 8577 case NEON::BI__builtin_neon_vsetq_lane_f32: 8578 case NEON::BI__builtin_neon_vsha1h_u32: 8579 case NEON::BI__builtin_neon_vsha1cq_u32: 8580 case NEON::BI__builtin_neon_vsha1pq_u32: 8581 case NEON::BI__builtin_neon_vsha1mq_u32: 8582 case NEON::BI__builtin_neon_vcvth_bf16_f32: 8583 case clang::ARM::BI_MoveToCoprocessor: 8584 case clang::ARM::BI_MoveToCoprocessor2: 8585 return false; 8586 } 8587 return true; 8588 } 8589 8590 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 8591 const CallExpr *E, 8592 ReturnValueSlot ReturnValue, 8593 llvm::Triple::ArchType Arch) { 8594 if (auto Hint = GetValueForARMHint(BuiltinID)) 8595 return Hint; 8596 8597 if (BuiltinID == clang::ARM::BI__emit) { 8598 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 8599 llvm::FunctionType *FTy = 8600 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 8601 8602 Expr::EvalResult Result; 8603 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) 8604 llvm_unreachable("Sema will ensure that the parameter is constant"); 8605 8606 llvm::APSInt Value = Result.Val.getInt(); 8607 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 8608 8609 llvm::InlineAsm *Emit = 8610 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 8611 /*hasSideEffects=*/true) 8612 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 8613 /*hasSideEffects=*/true); 8614 8615 return Builder.CreateCall(Emit); 8616 } 8617 8618 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) { 8619 Value *Option = EmitScalarExpr(E->getArg(0)); 8620 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 8621 } 8622 8623 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) { 8624 Value *Address = EmitScalarExpr(E->getArg(0)); 8625 Value *RW = EmitScalarExpr(E->getArg(1)); 8626 Value *IsData = EmitScalarExpr(E->getArg(2)); 8627 8628 // Locality is not supported on ARM target 8629 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 8630 8631 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); 8632 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 8633 } 8634 8635 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) { 8636 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 8637 return Builder.CreateCall( 8638 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 8639 } 8640 8641 if (BuiltinID == clang::ARM::BI__builtin_arm_clz || 8642 BuiltinID == clang::ARM::BI__builtin_arm_clz64) { 8643 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 8644 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType()); 8645 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); 8646 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64) 8647 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty()); 8648 return Res; 8649 } 8650 8651 8652 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) { 8653 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 8654 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls"); 8655 } 8656 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) { 8657 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 8658 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg, 8659 "cls"); 8660 } 8661 8662 if (BuiltinID == clang::ARM::BI__clear_cache) { 8663 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 8664 const FunctionDecl *FD = E->getDirectCallee(); 8665 Value *Ops[2]; 8666 for (unsigned i = 0; i < 2; i++) 8667 Ops[i] = EmitScalarExpr(E->getArg(i)); 8668 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 8669 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 8670 StringRef Name = FD->getName(); 8671 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 8672 } 8673 8674 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr || 8675 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) { 8676 Function *F; 8677 8678 switch (BuiltinID) { 8679 default: llvm_unreachable("unexpected builtin"); 8680 case clang::ARM::BI__builtin_arm_mcrr: 8681 F = CGM.getIntrinsic(Intrinsic::arm_mcrr); 8682 break; 8683 case clang::ARM::BI__builtin_arm_mcrr2: 8684 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); 8685 break; 8686 } 8687 8688 // MCRR{2} instruction has 5 operands but 8689 // the intrinsic has 4 because Rt and Rt2 8690 // are represented as a single unsigned 64 8691 // bit integer in the intrinsic definition 8692 // but internally it's represented as 2 32 8693 // bit integers. 8694 8695 Value *Coproc = EmitScalarExpr(E->getArg(0)); 8696 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 8697 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); 8698 Value *CRm = EmitScalarExpr(E->getArg(3)); 8699 8700 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 8701 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); 8702 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); 8703 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); 8704 8705 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); 8706 } 8707 8708 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc || 8709 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) { 8710 Function *F; 8711 8712 switch (BuiltinID) { 8713 default: llvm_unreachable("unexpected builtin"); 8714 case clang::ARM::BI__builtin_arm_mrrc: 8715 F = CGM.getIntrinsic(Intrinsic::arm_mrrc); 8716 break; 8717 case clang::ARM::BI__builtin_arm_mrrc2: 8718 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); 8719 break; 8720 } 8721 8722 Value *Coproc = EmitScalarExpr(E->getArg(0)); 8723 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 8724 Value *CRm = EmitScalarExpr(E->getArg(2)); 8725 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); 8726 8727 // Returns an unsigned 64 bit integer, represented 8728 // as two 32 bit integers. 8729 8730 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); 8731 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); 8732 Rt = Builder.CreateZExt(Rt, Int64Ty); 8733 Rt1 = Builder.CreateZExt(Rt1, Int64Ty); 8734 8735 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); 8736 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); 8737 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); 8738 8739 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); 8740 } 8741 8742 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd || 8743 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex || 8744 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) && 8745 getContext().getTypeSize(E->getType()) == 64) || 8746 BuiltinID == clang::ARM::BI__ldrexd) { 8747 Function *F; 8748 8749 switch (BuiltinID) { 8750 default: llvm_unreachable("unexpected builtin"); 8751 case clang::ARM::BI__builtin_arm_ldaex: 8752 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 8753 break; 8754 case clang::ARM::BI__builtin_arm_ldrexd: 8755 case clang::ARM::BI__builtin_arm_ldrex: 8756 case clang::ARM::BI__ldrexd: 8757 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 8758 break; 8759 } 8760 8761 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 8762 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd"); 8763 8764 Value *Val0 = Builder.CreateExtractValue(Val, 1); 8765 Value *Val1 = Builder.CreateExtractValue(Val, 0); 8766 Val0 = Builder.CreateZExt(Val0, Int64Ty); 8767 Val1 = Builder.CreateZExt(Val1, Int64Ty); 8768 8769 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 8770 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 8771 Val = Builder.CreateOr(Val, Val1); 8772 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 8773 } 8774 8775 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex || 8776 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) { 8777 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 8778 8779 QualType Ty = E->getType(); 8780 llvm::Type *RealResTy = ConvertType(Ty); 8781 llvm::Type *IntTy = 8782 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); 8783 8784 Function *F = CGM.getIntrinsic( 8785 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex 8786 : Intrinsic::arm_ldrex, 8787 UnqualPtrTy); 8788 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 8789 Val->addParamAttr( 8790 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy)); 8791 8792 if (RealResTy->isPointerTy()) 8793 return Builder.CreateIntToPtr(Val, RealResTy); 8794 else { 8795 llvm::Type *IntResTy = llvm::IntegerType::get( 8796 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 8797 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy), 8798 RealResTy); 8799 } 8800 } 8801 8802 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd || 8803 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex || 8804 BuiltinID == clang::ARM::BI__builtin_arm_strex) && 8805 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 8806 Function *F = CGM.getIntrinsic( 8807 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd 8808 : Intrinsic::arm_strexd); 8809 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); 8810 8811 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 8812 Value *Val = EmitScalarExpr(E->getArg(0)); 8813 Builder.CreateStore(Val, Tmp); 8814 8815 Address LdPtr = Tmp.withElementType(STy); 8816 Val = Builder.CreateLoad(LdPtr); 8817 8818 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 8819 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 8820 Value *StPtr = EmitScalarExpr(E->getArg(1)); 8821 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); 8822 } 8823 8824 if (BuiltinID == clang::ARM::BI__builtin_arm_strex || 8825 BuiltinID == clang::ARM::BI__builtin_arm_stlex) { 8826 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 8827 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 8828 8829 QualType Ty = E->getArg(0)->getType(); 8830 llvm::Type *StoreTy = 8831 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); 8832 8833 if (StoreVal->getType()->isPointerTy()) 8834 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 8835 else { 8836 llvm::Type *IntTy = llvm::IntegerType::get( 8837 getLLVMContext(), 8838 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 8839 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 8840 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 8841 } 8842 8843 Function *F = CGM.getIntrinsic( 8844 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex 8845 : Intrinsic::arm_strex, 8846 StoreAddr->getType()); 8847 8848 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); 8849 CI->addParamAttr( 8850 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy)); 8851 return CI; 8852 } 8853 8854 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) { 8855 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 8856 return Builder.CreateCall(F); 8857 } 8858 8859 // CRC32 8860 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 8861 switch (BuiltinID) { 8862 case clang::ARM::BI__builtin_arm_crc32b: 8863 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 8864 case clang::ARM::BI__builtin_arm_crc32cb: 8865 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 8866 case clang::ARM::BI__builtin_arm_crc32h: 8867 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 8868 case clang::ARM::BI__builtin_arm_crc32ch: 8869 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 8870 case clang::ARM::BI__builtin_arm_crc32w: 8871 case clang::ARM::BI__builtin_arm_crc32d: 8872 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 8873 case clang::ARM::BI__builtin_arm_crc32cw: 8874 case clang::ARM::BI__builtin_arm_crc32cd: 8875 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 8876 } 8877 8878 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 8879 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 8880 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 8881 8882 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w 8883 // intrinsics, hence we need different codegen for these cases. 8884 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d || 8885 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) { 8886 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 8887 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 8888 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 8889 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 8890 8891 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 8892 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); 8893 return Builder.CreateCall(F, {Res, Arg1b}); 8894 } else { 8895 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 8896 8897 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 8898 return Builder.CreateCall(F, {Arg0, Arg1}); 8899 } 8900 } 8901 8902 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr || 8903 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 || 8904 BuiltinID == clang::ARM::BI__builtin_arm_rsrp || 8905 BuiltinID == clang::ARM::BI__builtin_arm_wsr || 8906 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 || 8907 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) { 8908 8909 SpecialRegisterAccessKind AccessKind = Write; 8910 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr || 8911 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 || 8912 BuiltinID == clang::ARM::BI__builtin_arm_rsrp) 8913 AccessKind = VolatileRead; 8914 8915 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp || 8916 BuiltinID == clang::ARM::BI__builtin_arm_wsrp; 8917 8918 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 || 8919 BuiltinID == clang::ARM::BI__builtin_arm_wsr64; 8920 8921 llvm::Type *ValueType; 8922 llvm::Type *RegisterType; 8923 if (IsPointerBuiltin) { 8924 ValueType = VoidPtrTy; 8925 RegisterType = Int32Ty; 8926 } else if (Is64Bit) { 8927 ValueType = RegisterType = Int64Ty; 8928 } else { 8929 ValueType = RegisterType = Int32Ty; 8930 } 8931 8932 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, 8933 AccessKind); 8934 } 8935 8936 if (BuiltinID == ARM::BI__builtin_sponentry) { 8937 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy); 8938 return Builder.CreateCall(F); 8939 } 8940 8941 // Handle MSVC intrinsics before argument evaluation to prevent double 8942 // evaluation. 8943 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID)) 8944 return EmitMSVCBuiltinExpr(*MsvcIntId, E); 8945 8946 // Deal with MVE builtins 8947 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) 8948 return Result; 8949 // Handle CDE builtins 8950 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) 8951 return Result; 8952 8953 // Some intrinsics are equivalent - if they are use the base intrinsic ID. 8954 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) { 8955 return P.first == BuiltinID; 8956 }); 8957 if (It != end(NEONEquivalentIntrinsicMap)) 8958 BuiltinID = It->second; 8959 8960 // Find out if any arguments are required to be integer constant 8961 // expressions. 8962 unsigned ICEArguments = 0; 8963 ASTContext::GetBuiltinTypeError Error; 8964 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 8965 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 8966 8967 auto getAlignmentValue32 = [&](Address addr) -> Value* { 8968 return Builder.getInt32(addr.getAlignment().getQuantity()); 8969 }; 8970 8971 Address PtrOp0 = Address::invalid(); 8972 Address PtrOp1 = Address::invalid(); 8973 SmallVector<Value*, 4> Ops; 8974 bool HasExtraArg = HasExtraNeonArgument(BuiltinID); 8975 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); 8976 for (unsigned i = 0, e = NumArgs; i != e; i++) { 8977 if (i == 0) { 8978 switch (BuiltinID) { 8979 case NEON::BI__builtin_neon_vld1_v: 8980 case NEON::BI__builtin_neon_vld1q_v: 8981 case NEON::BI__builtin_neon_vld1q_lane_v: 8982 case NEON::BI__builtin_neon_vld1_lane_v: 8983 case NEON::BI__builtin_neon_vld1_dup_v: 8984 case NEON::BI__builtin_neon_vld1q_dup_v: 8985 case NEON::BI__builtin_neon_vst1_v: 8986 case NEON::BI__builtin_neon_vst1q_v: 8987 case NEON::BI__builtin_neon_vst1q_lane_v: 8988 case NEON::BI__builtin_neon_vst1_lane_v: 8989 case NEON::BI__builtin_neon_vst2_v: 8990 case NEON::BI__builtin_neon_vst2q_v: 8991 case NEON::BI__builtin_neon_vst2_lane_v: 8992 case NEON::BI__builtin_neon_vst2q_lane_v: 8993 case NEON::BI__builtin_neon_vst3_v: 8994 case NEON::BI__builtin_neon_vst3q_v: 8995 case NEON::BI__builtin_neon_vst3_lane_v: 8996 case NEON::BI__builtin_neon_vst3q_lane_v: 8997 case NEON::BI__builtin_neon_vst4_v: 8998 case NEON::BI__builtin_neon_vst4q_v: 8999 case NEON::BI__builtin_neon_vst4_lane_v: 9000 case NEON::BI__builtin_neon_vst4q_lane_v: 9001 // Get the alignment for the argument in addition to the value; 9002 // we'll use it later. 9003 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 9004 Ops.push_back(PtrOp0.emitRawPointer(*this)); 9005 continue; 9006 } 9007 } 9008 if (i == 1) { 9009 switch (BuiltinID) { 9010 case NEON::BI__builtin_neon_vld2_v: 9011 case NEON::BI__builtin_neon_vld2q_v: 9012 case NEON::BI__builtin_neon_vld3_v: 9013 case NEON::BI__builtin_neon_vld3q_v: 9014 case NEON::BI__builtin_neon_vld4_v: 9015 case NEON::BI__builtin_neon_vld4q_v: 9016 case NEON::BI__builtin_neon_vld2_lane_v: 9017 case NEON::BI__builtin_neon_vld2q_lane_v: 9018 case NEON::BI__builtin_neon_vld3_lane_v: 9019 case NEON::BI__builtin_neon_vld3q_lane_v: 9020 case NEON::BI__builtin_neon_vld4_lane_v: 9021 case NEON::BI__builtin_neon_vld4q_lane_v: 9022 case NEON::BI__builtin_neon_vld2_dup_v: 9023 case NEON::BI__builtin_neon_vld2q_dup_v: 9024 case NEON::BI__builtin_neon_vld3_dup_v: 9025 case NEON::BI__builtin_neon_vld3q_dup_v: 9026 case NEON::BI__builtin_neon_vld4_dup_v: 9027 case NEON::BI__builtin_neon_vld4q_dup_v: 9028 // Get the alignment for the argument in addition to the value; 9029 // we'll use it later. 9030 PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); 9031 Ops.push_back(PtrOp1.emitRawPointer(*this)); 9032 continue; 9033 } 9034 } 9035 9036 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); 9037 } 9038 9039 switch (BuiltinID) { 9040 default: break; 9041 9042 case NEON::BI__builtin_neon_vget_lane_i8: 9043 case NEON::BI__builtin_neon_vget_lane_i16: 9044 case NEON::BI__builtin_neon_vget_lane_i32: 9045 case NEON::BI__builtin_neon_vget_lane_i64: 9046 case NEON::BI__builtin_neon_vget_lane_bf16: 9047 case NEON::BI__builtin_neon_vget_lane_f32: 9048 case NEON::BI__builtin_neon_vgetq_lane_i8: 9049 case NEON::BI__builtin_neon_vgetq_lane_i16: 9050 case NEON::BI__builtin_neon_vgetq_lane_i32: 9051 case NEON::BI__builtin_neon_vgetq_lane_i64: 9052 case NEON::BI__builtin_neon_vgetq_lane_bf16: 9053 case NEON::BI__builtin_neon_vgetq_lane_f32: 9054 case NEON::BI__builtin_neon_vduph_lane_bf16: 9055 case NEON::BI__builtin_neon_vduph_laneq_bf16: 9056 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); 9057 9058 case NEON::BI__builtin_neon_vrndns_f32: { 9059 Value *Arg = EmitScalarExpr(E->getArg(0)); 9060 llvm::Type *Tys[] = {Arg->getType()}; 9061 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys); 9062 return Builder.CreateCall(F, {Arg}, "vrndn"); } 9063 9064 case NEON::BI__builtin_neon_vset_lane_i8: 9065 case NEON::BI__builtin_neon_vset_lane_i16: 9066 case NEON::BI__builtin_neon_vset_lane_i32: 9067 case NEON::BI__builtin_neon_vset_lane_i64: 9068 case NEON::BI__builtin_neon_vset_lane_bf16: 9069 case NEON::BI__builtin_neon_vset_lane_f32: 9070 case NEON::BI__builtin_neon_vsetq_lane_i8: 9071 case NEON::BI__builtin_neon_vsetq_lane_i16: 9072 case NEON::BI__builtin_neon_vsetq_lane_i32: 9073 case NEON::BI__builtin_neon_vsetq_lane_i64: 9074 case NEON::BI__builtin_neon_vsetq_lane_bf16: 9075 case NEON::BI__builtin_neon_vsetq_lane_f32: 9076 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 9077 9078 case NEON::BI__builtin_neon_vsha1h_u32: 9079 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 9080 "vsha1h"); 9081 case NEON::BI__builtin_neon_vsha1cq_u32: 9082 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 9083 "vsha1h"); 9084 case NEON::BI__builtin_neon_vsha1pq_u32: 9085 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 9086 "vsha1h"); 9087 case NEON::BI__builtin_neon_vsha1mq_u32: 9088 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 9089 "vsha1h"); 9090 9091 case NEON::BI__builtin_neon_vcvth_bf16_f32: { 9092 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops, 9093 "vcvtbfp2bf"); 9094 } 9095 9096 // The ARM _MoveToCoprocessor builtins put the input register value as 9097 // the first argument, but the LLVM intrinsic expects it as the third one. 9098 case clang::ARM::BI_MoveToCoprocessor: 9099 case clang::ARM::BI_MoveToCoprocessor2: { 9100 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor 9101 ? Intrinsic::arm_mcr 9102 : Intrinsic::arm_mcr2); 9103 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], 9104 Ops[3], Ops[4], Ops[5]}); 9105 } 9106 } 9107 9108 // Get the last argument, which specifies the vector type. 9109 assert(HasExtraArg); 9110 const Expr *Arg = E->getArg(E->getNumArgs()-1); 9111 std::optional<llvm::APSInt> Result = 9112 Arg->getIntegerConstantExpr(getContext()); 9113 if (!Result) 9114 return nullptr; 9115 9116 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f || 9117 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) { 9118 // Determine the overloaded type of this builtin. 9119 llvm::Type *Ty; 9120 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f) 9121 Ty = FloatTy; 9122 else 9123 Ty = DoubleTy; 9124 9125 // Determine whether this is an unsigned conversion or not. 9126 bool usgn = Result->getZExtValue() == 1; 9127 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 9128 9129 // Call the appropriate intrinsic. 9130 Function *F = CGM.getIntrinsic(Int, Ty); 9131 return Builder.CreateCall(F, Ops, "vcvtr"); 9132 } 9133 9134 // Determine the type of this overloaded NEON intrinsic. 9135 NeonTypeFlags Type = Result->getZExtValue(); 9136 bool usgn = Type.isUnsigned(); 9137 bool rightShift = false; 9138 9139 llvm::FixedVectorType *VTy = 9140 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false, 9141 getTarget().hasBFloat16Type()); 9142 llvm::Type *Ty = VTy; 9143 if (!Ty) 9144 return nullptr; 9145 9146 // Many NEON builtins have identical semantics and uses in ARM and 9147 // AArch64. Emit these in a single function. 9148 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap); 9149 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap( 9150 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 9151 if (Builtin) 9152 return EmitCommonNeonBuiltinExpr( 9153 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 9154 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch); 9155 9156 unsigned Int; 9157 switch (BuiltinID) { 9158 default: return nullptr; 9159 case NEON::BI__builtin_neon_vld1q_lane_v: 9160 // Handle 64-bit integer elements as a special case. Use shuffles of 9161 // one-element vectors to avoid poor code for i64 in the backend. 9162 if (VTy->getElementType()->isIntegerTy(64)) { 9163 // Extract the other lane. 9164 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 9165 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 9166 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 9167 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 9168 // Load the value as a one-element vector. 9169 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1); 9170 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 9171 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); 9172 Value *Align = getAlignmentValue32(PtrOp0); 9173 Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); 9174 // Combine them. 9175 int Indices[] = {1 - Lane, Lane}; 9176 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane"); 9177 } 9178 [[fallthrough]]; 9179 case NEON::BI__builtin_neon_vld1_lane_v: { 9180 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 9181 PtrOp0 = PtrOp0.withElementType(VTy->getElementType()); 9182 Value *Ld = Builder.CreateLoad(PtrOp0); 9183 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 9184 } 9185 case NEON::BI__builtin_neon_vqrshrn_n_v: 9186 Int = 9187 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 9188 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 9189 1, true); 9190 case NEON::BI__builtin_neon_vqrshrun_n_v: 9191 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 9192 Ops, "vqrshrun_n", 1, true); 9193 case NEON::BI__builtin_neon_vqshrn_n_v: 9194 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 9195 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 9196 1, true); 9197 case NEON::BI__builtin_neon_vqshrun_n_v: 9198 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 9199 Ops, "vqshrun_n", 1, true); 9200 case NEON::BI__builtin_neon_vrecpe_v: 9201 case NEON::BI__builtin_neon_vrecpeq_v: 9202 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 9203 Ops, "vrecpe"); 9204 case NEON::BI__builtin_neon_vrshrn_n_v: 9205 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 9206 Ops, "vrshrn_n", 1, true); 9207 case NEON::BI__builtin_neon_vrsra_n_v: 9208 case NEON::BI__builtin_neon_vrsraq_n_v: 9209 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 9210 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 9211 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 9212 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 9213 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); 9214 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 9215 case NEON::BI__builtin_neon_vsri_n_v: 9216 case NEON::BI__builtin_neon_vsriq_n_v: 9217 rightShift = true; 9218 [[fallthrough]]; 9219 case NEON::BI__builtin_neon_vsli_n_v: 9220 case NEON::BI__builtin_neon_vsliq_n_v: 9221 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 9222 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 9223 Ops, "vsli_n"); 9224 case NEON::BI__builtin_neon_vsra_n_v: 9225 case NEON::BI__builtin_neon_vsraq_n_v: 9226 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 9227 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 9228 return Builder.CreateAdd(Ops[0], Ops[1]); 9229 case NEON::BI__builtin_neon_vst1q_lane_v: 9230 // Handle 64-bit integer elements as a special case. Use a shuffle to get 9231 // a one-element vector and avoid poor code for i64 in the backend. 9232 if (VTy->getElementType()->isIntegerTy(64)) { 9233 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 9234 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 9235 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 9236 Ops[2] = getAlignmentValue32(PtrOp0); 9237 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; 9238 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 9239 Tys), Ops); 9240 } 9241 [[fallthrough]]; 9242 case NEON::BI__builtin_neon_vst1_lane_v: { 9243 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 9244 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 9245 return Builder.CreateStore(Ops[1], 9246 PtrOp0.withElementType(Ops[1]->getType())); 9247 } 9248 case NEON::BI__builtin_neon_vtbl1_v: 9249 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 9250 Ops, "vtbl1"); 9251 case NEON::BI__builtin_neon_vtbl2_v: 9252 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 9253 Ops, "vtbl2"); 9254 case NEON::BI__builtin_neon_vtbl3_v: 9255 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 9256 Ops, "vtbl3"); 9257 case NEON::BI__builtin_neon_vtbl4_v: 9258 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 9259 Ops, "vtbl4"); 9260 case NEON::BI__builtin_neon_vtbx1_v: 9261 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 9262 Ops, "vtbx1"); 9263 case NEON::BI__builtin_neon_vtbx2_v: 9264 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 9265 Ops, "vtbx2"); 9266 case NEON::BI__builtin_neon_vtbx3_v: 9267 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 9268 Ops, "vtbx3"); 9269 case NEON::BI__builtin_neon_vtbx4_v: 9270 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 9271 Ops, "vtbx4"); 9272 } 9273 } 9274 9275 template<typename Integer> 9276 static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) { 9277 return E->getIntegerConstantExpr(Context)->getExtValue(); 9278 } 9279 9280 static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, 9281 llvm::Type *T, bool Unsigned) { 9282 // Helper function called by Tablegen-constructed ARM MVE builtin codegen, 9283 // which finds it convenient to specify signed/unsigned as a boolean flag. 9284 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T); 9285 } 9286 9287 static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, 9288 uint32_t Shift, bool Unsigned) { 9289 // MVE helper function for integer shift right. This must handle signed vs 9290 // unsigned, and also deal specially with the case where the shift count is 9291 // equal to the lane size. In LLVM IR, an LShr with that parameter would be 9292 // undefined behavior, but in MVE it's legal, so we must convert it to code 9293 // that is not undefined in IR. 9294 unsigned LaneBits = cast<llvm::VectorType>(V->getType()) 9295 ->getElementType() 9296 ->getPrimitiveSizeInBits(); 9297 if (Shift == LaneBits) { 9298 // An unsigned shift of the full lane size always generates zero, so we can 9299 // simply emit a zero vector. A signed shift of the full lane size does the 9300 // same thing as shifting by one bit fewer. 9301 if (Unsigned) 9302 return llvm::Constant::getNullValue(V->getType()); 9303 else 9304 --Shift; 9305 } 9306 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift); 9307 } 9308 9309 static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) { 9310 // MVE-specific helper function for a vector splat, which infers the element 9311 // count of the output vector by knowing that MVE vectors are all 128 bits 9312 // wide. 9313 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits(); 9314 return Builder.CreateVectorSplat(Elements, V); 9315 } 9316 9317 static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder, 9318 CodeGenFunction *CGF, 9319 llvm::Value *V, 9320 llvm::Type *DestType) { 9321 // Convert one MVE vector type into another by reinterpreting its in-register 9322 // format. 9323 // 9324 // Little-endian, this is identical to a bitcast (which reinterprets the 9325 // memory format). But big-endian, they're not necessarily the same, because 9326 // the register and memory formats map to each other differently depending on 9327 // the lane size. 9328 // 9329 // We generate a bitcast whenever we can (if we're little-endian, or if the 9330 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic 9331 // that performs the different kind of reinterpretation. 9332 if (CGF->getTarget().isBigEndian() && 9333 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) { 9334 return Builder.CreateCall( 9335 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq, 9336 {DestType, V->getType()}), 9337 V); 9338 } else { 9339 return Builder.CreateBitCast(V, DestType); 9340 } 9341 } 9342 9343 static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) { 9344 // Make a shufflevector that extracts every other element of a vector (evens 9345 // or odds, as desired). 9346 SmallVector<int, 16> Indices; 9347 unsigned InputElements = 9348 cast<llvm::FixedVectorType>(V->getType())->getNumElements(); 9349 for (unsigned i = 0; i < InputElements; i += 2) 9350 Indices.push_back(i + Odd); 9351 return Builder.CreateShuffleVector(V, Indices); 9352 } 9353 9354 static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0, 9355 llvm::Value *V1) { 9356 // Make a shufflevector that interleaves two vectors element by element. 9357 assert(V0->getType() == V1->getType() && "Can't zip different vector types"); 9358 SmallVector<int, 16> Indices; 9359 unsigned InputElements = 9360 cast<llvm::FixedVectorType>(V0->getType())->getNumElements(); 9361 for (unsigned i = 0; i < InputElements; i++) { 9362 Indices.push_back(i); 9363 Indices.push_back(i + InputElements); 9364 } 9365 return Builder.CreateShuffleVector(V0, V1, Indices); 9366 } 9367 9368 template<unsigned HighBit, unsigned OtherBits> 9369 static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) { 9370 // MVE-specific helper function to make a vector splat of a constant such as 9371 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal. 9372 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType(); 9373 unsigned LaneBits = T->getPrimitiveSizeInBits(); 9374 uint32_t Value = HighBit << (LaneBits - 1); 9375 if (OtherBits) 9376 Value |= (1UL << (LaneBits - 1)) - 1; 9377 llvm::Value *Lane = llvm::ConstantInt::get(T, Value); 9378 return ARMMVEVectorSplat(Builder, Lane); 9379 } 9380 9381 static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder, 9382 llvm::Value *V, 9383 unsigned ReverseWidth) { 9384 // MVE-specific helper function which reverses the elements of a 9385 // vector within every (ReverseWidth)-bit collection of lanes. 9386 SmallVector<int, 16> Indices; 9387 unsigned LaneSize = V->getType()->getScalarSizeInBits(); 9388 unsigned Elements = 128 / LaneSize; 9389 unsigned Mask = ReverseWidth / LaneSize - 1; 9390 for (unsigned i = 0; i < Elements; i++) 9391 Indices.push_back(i ^ Mask); 9392 return Builder.CreateShuffleVector(V, Indices); 9393 } 9394 9395 Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, 9396 const CallExpr *E, 9397 ReturnValueSlot ReturnValue, 9398 llvm::Triple::ArchType Arch) { 9399 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType; 9400 Intrinsic::ID IRIntr; 9401 unsigned NumVectors; 9402 9403 // Code autogenerated by Tablegen will handle all the simple builtins. 9404 switch (BuiltinID) { 9405 #include "clang/Basic/arm_mve_builtin_cg.inc" 9406 9407 // If we didn't match an MVE builtin id at all, go back to the 9408 // main EmitARMBuiltinExpr. 9409 default: 9410 return nullptr; 9411 } 9412 9413 // Anything that breaks from that switch is an MVE builtin that 9414 // needs handwritten code to generate. 9415 9416 switch (CustomCodeGenType) { 9417 9418 case CustomCodeGen::VLD24: { 9419 llvm::SmallVector<Value *, 4> Ops; 9420 llvm::SmallVector<llvm::Type *, 4> Tys; 9421 9422 auto MvecCType = E->getType(); 9423 auto MvecLType = ConvertType(MvecCType); 9424 assert(MvecLType->isStructTy() && 9425 "Return type for vld[24]q should be a struct"); 9426 assert(MvecLType->getStructNumElements() == 1 && 9427 "Return-type struct for vld[24]q should have one element"); 9428 auto MvecLTypeInner = MvecLType->getStructElementType(0); 9429 assert(MvecLTypeInner->isArrayTy() && 9430 "Return-type struct for vld[24]q should contain an array"); 9431 assert(MvecLTypeInner->getArrayNumElements() == NumVectors && 9432 "Array member of return-type struct vld[24]q has wrong length"); 9433 auto VecLType = MvecLTypeInner->getArrayElementType(); 9434 9435 Tys.push_back(VecLType); 9436 9437 auto Addr = E->getArg(0); 9438 Ops.push_back(EmitScalarExpr(Addr)); 9439 Tys.push_back(ConvertType(Addr->getType())); 9440 9441 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys)); 9442 Value *LoadResult = Builder.CreateCall(F, Ops); 9443 Value *MvecOut = PoisonValue::get(MvecLType); 9444 for (unsigned i = 0; i < NumVectors; ++i) { 9445 Value *Vec = Builder.CreateExtractValue(LoadResult, i); 9446 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i}); 9447 } 9448 9449 if (ReturnValue.isNull()) 9450 return MvecOut; 9451 else 9452 return Builder.CreateStore(MvecOut, ReturnValue.getAddress()); 9453 } 9454 9455 case CustomCodeGen::VST24: { 9456 llvm::SmallVector<Value *, 4> Ops; 9457 llvm::SmallVector<llvm::Type *, 4> Tys; 9458 9459 auto Addr = E->getArg(0); 9460 Ops.push_back(EmitScalarExpr(Addr)); 9461 Tys.push_back(ConvertType(Addr->getType())); 9462 9463 auto MvecCType = E->getArg(1)->getType(); 9464 auto MvecLType = ConvertType(MvecCType); 9465 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct"); 9466 assert(MvecLType->getStructNumElements() == 1 && 9467 "Data-type struct for vst2q should have one element"); 9468 auto MvecLTypeInner = MvecLType->getStructElementType(0); 9469 assert(MvecLTypeInner->isArrayTy() && 9470 "Data-type struct for vst2q should contain an array"); 9471 assert(MvecLTypeInner->getArrayNumElements() == NumVectors && 9472 "Array member of return-type struct vld[24]q has wrong length"); 9473 auto VecLType = MvecLTypeInner->getArrayElementType(); 9474 9475 Tys.push_back(VecLType); 9476 9477 AggValueSlot MvecSlot = CreateAggTemp(MvecCType); 9478 EmitAggExpr(E->getArg(1), MvecSlot); 9479 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress()); 9480 for (unsigned i = 0; i < NumVectors; i++) 9481 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i})); 9482 9483 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys)); 9484 Value *ToReturn = nullptr; 9485 for (unsigned i = 0; i < NumVectors; i++) { 9486 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i)); 9487 ToReturn = Builder.CreateCall(F, Ops); 9488 Ops.pop_back(); 9489 } 9490 return ToReturn; 9491 } 9492 } 9493 llvm_unreachable("unknown custom codegen type."); 9494 } 9495 9496 Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID, 9497 const CallExpr *E, 9498 ReturnValueSlot ReturnValue, 9499 llvm::Triple::ArchType Arch) { 9500 switch (BuiltinID) { 9501 default: 9502 return nullptr; 9503 #include "clang/Basic/arm_cde_builtin_cg.inc" 9504 } 9505 } 9506 9507 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 9508 const CallExpr *E, 9509 SmallVectorImpl<Value *> &Ops, 9510 llvm::Triple::ArchType Arch) { 9511 unsigned int Int = 0; 9512 const char *s = nullptr; 9513 9514 switch (BuiltinID) { 9515 default: 9516 return nullptr; 9517 case NEON::BI__builtin_neon_vtbl1_v: 9518 case NEON::BI__builtin_neon_vqtbl1_v: 9519 case NEON::BI__builtin_neon_vqtbl1q_v: 9520 case NEON::BI__builtin_neon_vtbl2_v: 9521 case NEON::BI__builtin_neon_vqtbl2_v: 9522 case NEON::BI__builtin_neon_vqtbl2q_v: 9523 case NEON::BI__builtin_neon_vtbl3_v: 9524 case NEON::BI__builtin_neon_vqtbl3_v: 9525 case NEON::BI__builtin_neon_vqtbl3q_v: 9526 case NEON::BI__builtin_neon_vtbl4_v: 9527 case NEON::BI__builtin_neon_vqtbl4_v: 9528 case NEON::BI__builtin_neon_vqtbl4q_v: 9529 break; 9530 case NEON::BI__builtin_neon_vtbx1_v: 9531 case NEON::BI__builtin_neon_vqtbx1_v: 9532 case NEON::BI__builtin_neon_vqtbx1q_v: 9533 case NEON::BI__builtin_neon_vtbx2_v: 9534 case NEON::BI__builtin_neon_vqtbx2_v: 9535 case NEON::BI__builtin_neon_vqtbx2q_v: 9536 case NEON::BI__builtin_neon_vtbx3_v: 9537 case NEON::BI__builtin_neon_vqtbx3_v: 9538 case NEON::BI__builtin_neon_vqtbx3q_v: 9539 case NEON::BI__builtin_neon_vtbx4_v: 9540 case NEON::BI__builtin_neon_vqtbx4_v: 9541 case NEON::BI__builtin_neon_vqtbx4q_v: 9542 break; 9543 } 9544 9545 assert(E->getNumArgs() >= 3); 9546 9547 // Get the last argument, which specifies the vector type. 9548 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 9549 std::optional<llvm::APSInt> Result = 9550 Arg->getIntegerConstantExpr(CGF.getContext()); 9551 if (!Result) 9552 return nullptr; 9553 9554 // Determine the type of this overloaded NEON intrinsic. 9555 NeonTypeFlags Type = Result->getZExtValue(); 9556 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type); 9557 if (!Ty) 9558 return nullptr; 9559 9560 CodeGen::CGBuilderTy &Builder = CGF.Builder; 9561 9562 // AArch64 scalar builtins are not overloaded, they do not have an extra 9563 // argument that specifies the vector type, need to handle each case. 9564 switch (BuiltinID) { 9565 case NEON::BI__builtin_neon_vtbl1_v: { 9566 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1], 9567 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 9568 } 9569 case NEON::BI__builtin_neon_vtbl2_v: { 9570 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2], 9571 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 9572 } 9573 case NEON::BI__builtin_neon_vtbl3_v: { 9574 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3], 9575 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 9576 } 9577 case NEON::BI__builtin_neon_vtbl4_v: { 9578 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4], 9579 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 9580 } 9581 case NEON::BI__builtin_neon_vtbx1_v: { 9582 Value *TblRes = 9583 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty, 9584 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 9585 9586 llvm::Constant *EightV = ConstantInt::get(Ty, 8); 9587 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 9588 CmpRes = Builder.CreateSExt(CmpRes, Ty); 9589 9590 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 9591 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 9592 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 9593 } 9594 case NEON::BI__builtin_neon_vtbx2_v: { 9595 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3], 9596 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1"); 9597 } 9598 case NEON::BI__builtin_neon_vtbx3_v: { 9599 Value *TblRes = 9600 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty, 9601 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 9602 9603 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); 9604 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 9605 TwentyFourV); 9606 CmpRes = Builder.CreateSExt(CmpRes, Ty); 9607 9608 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 9609 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 9610 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 9611 } 9612 case NEON::BI__builtin_neon_vtbx4_v: { 9613 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5], 9614 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2"); 9615 } 9616 case NEON::BI__builtin_neon_vqtbl1_v: 9617 case NEON::BI__builtin_neon_vqtbl1q_v: 9618 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 9619 case NEON::BI__builtin_neon_vqtbl2_v: 9620 case NEON::BI__builtin_neon_vqtbl2q_v: { 9621 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 9622 case NEON::BI__builtin_neon_vqtbl3_v: 9623 case NEON::BI__builtin_neon_vqtbl3q_v: 9624 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 9625 case NEON::BI__builtin_neon_vqtbl4_v: 9626 case NEON::BI__builtin_neon_vqtbl4q_v: 9627 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 9628 case NEON::BI__builtin_neon_vqtbx1_v: 9629 case NEON::BI__builtin_neon_vqtbx1q_v: 9630 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 9631 case NEON::BI__builtin_neon_vqtbx2_v: 9632 case NEON::BI__builtin_neon_vqtbx2q_v: 9633 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 9634 case NEON::BI__builtin_neon_vqtbx3_v: 9635 case NEON::BI__builtin_neon_vqtbx3q_v: 9636 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 9637 case NEON::BI__builtin_neon_vqtbx4_v: 9638 case NEON::BI__builtin_neon_vqtbx4q_v: 9639 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 9640 } 9641 } 9642 9643 if (!Int) 9644 return nullptr; 9645 9646 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 9647 return CGF.EmitNeonCall(F, Ops, s); 9648 } 9649 9650 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 9651 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4); 9652 Op = Builder.CreateBitCast(Op, Int16Ty); 9653 Value *V = PoisonValue::get(VTy); 9654 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 9655 Op = Builder.CreateInsertElement(V, Op, CI); 9656 return Op; 9657 } 9658 9659 /// SVEBuiltinMemEltTy - Returns the memory element type for this memory 9660 /// access builtin. Only required if it can't be inferred from the base pointer 9661 /// operand. 9662 llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) { 9663 switch (TypeFlags.getMemEltType()) { 9664 case SVETypeFlags::MemEltTyDefault: 9665 return getEltType(TypeFlags); 9666 case SVETypeFlags::MemEltTyInt8: 9667 return Builder.getInt8Ty(); 9668 case SVETypeFlags::MemEltTyInt16: 9669 return Builder.getInt16Ty(); 9670 case SVETypeFlags::MemEltTyInt32: 9671 return Builder.getInt32Ty(); 9672 case SVETypeFlags::MemEltTyInt64: 9673 return Builder.getInt64Ty(); 9674 } 9675 llvm_unreachable("Unknown MemEltType"); 9676 } 9677 9678 llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) { 9679 switch (TypeFlags.getEltType()) { 9680 default: 9681 llvm_unreachable("Invalid SVETypeFlag!"); 9682 9683 case SVETypeFlags::EltTyInt8: 9684 return Builder.getInt8Ty(); 9685 case SVETypeFlags::EltTyInt16: 9686 return Builder.getInt16Ty(); 9687 case SVETypeFlags::EltTyInt32: 9688 return Builder.getInt32Ty(); 9689 case SVETypeFlags::EltTyInt64: 9690 return Builder.getInt64Ty(); 9691 case SVETypeFlags::EltTyInt128: 9692 return Builder.getInt128Ty(); 9693 9694 case SVETypeFlags::EltTyFloat16: 9695 return Builder.getHalfTy(); 9696 case SVETypeFlags::EltTyFloat32: 9697 return Builder.getFloatTy(); 9698 case SVETypeFlags::EltTyFloat64: 9699 return Builder.getDoubleTy(); 9700 9701 case SVETypeFlags::EltTyBFloat16: 9702 return Builder.getBFloatTy(); 9703 9704 case SVETypeFlags::EltTyBool8: 9705 case SVETypeFlags::EltTyBool16: 9706 case SVETypeFlags::EltTyBool32: 9707 case SVETypeFlags::EltTyBool64: 9708 return Builder.getInt1Ty(); 9709 } 9710 } 9711 9712 // Return the llvm predicate vector type corresponding to the specified element 9713 // TypeFlags. 9714 llvm::ScalableVectorType * 9715 CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) { 9716 switch (TypeFlags.getEltType()) { 9717 default: llvm_unreachable("Unhandled SVETypeFlag!"); 9718 9719 case SVETypeFlags::EltTyInt8: 9720 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); 9721 case SVETypeFlags::EltTyInt16: 9722 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); 9723 case SVETypeFlags::EltTyInt32: 9724 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); 9725 case SVETypeFlags::EltTyInt64: 9726 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); 9727 9728 case SVETypeFlags::EltTyBFloat16: 9729 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); 9730 case SVETypeFlags::EltTyFloat16: 9731 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); 9732 case SVETypeFlags::EltTyFloat32: 9733 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); 9734 case SVETypeFlags::EltTyFloat64: 9735 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); 9736 9737 case SVETypeFlags::EltTyBool8: 9738 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); 9739 case SVETypeFlags::EltTyBool16: 9740 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); 9741 case SVETypeFlags::EltTyBool32: 9742 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); 9743 case SVETypeFlags::EltTyBool64: 9744 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); 9745 } 9746 } 9747 9748 // Return the llvm vector type corresponding to the specified element TypeFlags. 9749 llvm::ScalableVectorType * 9750 CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { 9751 switch (TypeFlags.getEltType()) { 9752 default: 9753 llvm_unreachable("Invalid SVETypeFlag!"); 9754 9755 case SVETypeFlags::EltTyInt8: 9756 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16); 9757 case SVETypeFlags::EltTyInt16: 9758 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8); 9759 case SVETypeFlags::EltTyInt32: 9760 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4); 9761 case SVETypeFlags::EltTyInt64: 9762 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2); 9763 9764 case SVETypeFlags::EltTyFloat16: 9765 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8); 9766 case SVETypeFlags::EltTyBFloat16: 9767 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8); 9768 case SVETypeFlags::EltTyFloat32: 9769 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4); 9770 case SVETypeFlags::EltTyFloat64: 9771 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2); 9772 9773 case SVETypeFlags::EltTyBool8: 9774 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); 9775 case SVETypeFlags::EltTyBool16: 9776 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); 9777 case SVETypeFlags::EltTyBool32: 9778 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); 9779 case SVETypeFlags::EltTyBool64: 9780 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); 9781 } 9782 } 9783 9784 llvm::Value * 9785 CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) { 9786 Function *Ptrue = 9787 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags)); 9788 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)}); 9789 } 9790 9791 constexpr unsigned SVEBitsPerBlock = 128; 9792 9793 static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) { 9794 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits(); 9795 return llvm::ScalableVectorType::get(EltTy, NumElts); 9796 } 9797 9798 // Reinterpret the input predicate so that it can be used to correctly isolate 9799 // the elements of the specified datatype. 9800 Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, 9801 llvm::ScalableVectorType *VTy) { 9802 9803 if (isa<TargetExtType>(Pred->getType()) && 9804 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount") 9805 return Pred; 9806 9807 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy); 9808 if (Pred->getType() == RTy) 9809 return Pred; 9810 9811 unsigned IntID; 9812 llvm::Type *IntrinsicTy; 9813 switch (VTy->getMinNumElements()) { 9814 default: 9815 llvm_unreachable("unsupported element count!"); 9816 case 1: 9817 case 2: 9818 case 4: 9819 case 8: 9820 IntID = Intrinsic::aarch64_sve_convert_from_svbool; 9821 IntrinsicTy = RTy; 9822 break; 9823 case 16: 9824 IntID = Intrinsic::aarch64_sve_convert_to_svbool; 9825 IntrinsicTy = Pred->getType(); 9826 break; 9827 } 9828 9829 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy); 9830 Value *C = Builder.CreateCall(F, Pred); 9831 assert(C->getType() == RTy && "Unexpected return type!"); 9832 return C; 9833 } 9834 9835 Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, 9836 SmallVectorImpl<Value *> &Ops, 9837 unsigned IntID) { 9838 auto *ResultTy = getSVEType(TypeFlags); 9839 auto *OverloadedTy = 9840 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy); 9841 9842 Function *F = nullptr; 9843 if (Ops[1]->getType()->isVectorTy()) 9844 // This is the "vector base, scalar offset" case. In order to uniquely 9845 // map this built-in to an LLVM IR intrinsic, we need both the return type 9846 // and the type of the vector base. 9847 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()}); 9848 else 9849 // This is the "scalar base, vector offset case". The type of the offset 9850 // is encoded in the name of the intrinsic. We only need to specify the 9851 // return type in order to uniquely map this built-in to an LLVM IR 9852 // intrinsic. 9853 F = CGM.getIntrinsic(IntID, OverloadedTy); 9854 9855 // At the ACLE level there's only one predicate type, svbool_t, which is 9856 // mapped to <n x 16 x i1>. However, this might be incompatible with the 9857 // actual type being loaded. For example, when loading doubles (i64) the 9858 // predicate should be <n x 2 x i1> instead. At the IR level the type of 9859 // the predicate and the data being loaded must match. Cast to the type 9860 // expected by the intrinsic. The intrinsic itself should be defined in 9861 // a way than enforces relations between parameter types. 9862 Ops[0] = EmitSVEPredicateCast( 9863 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType())); 9864 9865 // Pass 0 when the offset is missing. This can only be applied when using 9866 // the "vector base" addressing mode for which ACLE allows no offset. The 9867 // corresponding LLVM IR always requires an offset. 9868 if (Ops.size() == 2) { 9869 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset"); 9870 Ops.push_back(ConstantInt::get(Int64Ty, 0)); 9871 } 9872 9873 // For "vector base, scalar index" scale the index so that it becomes a 9874 // scalar offset. 9875 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) { 9876 unsigned BytesPerElt = 9877 OverloadedTy->getElementType()->getScalarSizeInBits() / 8; 9878 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt)); 9879 } 9880 9881 Value *Call = Builder.CreateCall(F, Ops); 9882 9883 // The following sext/zext is only needed when ResultTy != OverloadedTy. In 9884 // other cases it's folded into a nop. 9885 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy) 9886 : Builder.CreateSExt(Call, ResultTy); 9887 } 9888 9889 Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags, 9890 SmallVectorImpl<Value *> &Ops, 9891 unsigned IntID) { 9892 auto *SrcDataTy = getSVEType(TypeFlags); 9893 auto *OverloadedTy = 9894 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy); 9895 9896 // In ACLE the source data is passed in the last argument, whereas in LLVM IR 9897 // it's the first argument. Move it accordingly. 9898 Ops.insert(Ops.begin(), Ops.pop_back_val()); 9899 9900 Function *F = nullptr; 9901 if (Ops[2]->getType()->isVectorTy()) 9902 // This is the "vector base, scalar offset" case. In order to uniquely 9903 // map this built-in to an LLVM IR intrinsic, we need both the return type 9904 // and the type of the vector base. 9905 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()}); 9906 else 9907 // This is the "scalar base, vector offset case". The type of the offset 9908 // is encoded in the name of the intrinsic. We only need to specify the 9909 // return type in order to uniquely map this built-in to an LLVM IR 9910 // intrinsic. 9911 F = CGM.getIntrinsic(IntID, OverloadedTy); 9912 9913 // Pass 0 when the offset is missing. This can only be applied when using 9914 // the "vector base" addressing mode for which ACLE allows no offset. The 9915 // corresponding LLVM IR always requires an offset. 9916 if (Ops.size() == 3) { 9917 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset"); 9918 Ops.push_back(ConstantInt::get(Int64Ty, 0)); 9919 } 9920 9921 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's 9922 // folded into a nop. 9923 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy); 9924 9925 // At the ACLE level there's only one predicate type, svbool_t, which is 9926 // mapped to <n x 16 x i1>. However, this might be incompatible with the 9927 // actual type being stored. For example, when storing doubles (i64) the 9928 // predicated should be <n x 2 x i1> instead. At the IR level the type of 9929 // the predicate and the data being stored must match. Cast to the type 9930 // expected by the intrinsic. The intrinsic itself should be defined in 9931 // a way that enforces relations between parameter types. 9932 Ops[1] = EmitSVEPredicateCast( 9933 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType())); 9934 9935 // For "vector base, scalar index" scale the index so that it becomes a 9936 // scalar offset. 9937 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) { 9938 unsigned BytesPerElt = 9939 OverloadedTy->getElementType()->getScalarSizeInBits() / 8; 9940 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt)); 9941 } 9942 9943 return Builder.CreateCall(F, Ops); 9944 } 9945 9946 Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, 9947 SmallVectorImpl<Value *> &Ops, 9948 unsigned IntID) { 9949 // The gather prefetches are overloaded on the vector input - this can either 9950 // be the vector of base addresses or vector of offsets. 9951 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType()); 9952 if (!OverloadedTy) 9953 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType()); 9954 9955 // Cast the predicate from svbool_t to the right number of elements. 9956 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy); 9957 9958 // vector + imm addressing modes 9959 if (Ops[1]->getType()->isVectorTy()) { 9960 if (Ops.size() == 3) { 9961 // Pass 0 for 'vector+imm' when the index is omitted. 9962 Ops.push_back(ConstantInt::get(Int64Ty, 0)); 9963 9964 // The sv_prfop is the last operand in the builtin and IR intrinsic. 9965 std::swap(Ops[2], Ops[3]); 9966 } else { 9967 // Index needs to be passed as scaled offset. 9968 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); 9969 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8; 9970 if (BytesPerElt > 1) 9971 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt)); 9972 } 9973 } 9974 9975 Function *F = CGM.getIntrinsic(IntID, OverloadedTy); 9976 return Builder.CreateCall(F, Ops); 9977 } 9978 9979 Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags, 9980 SmallVectorImpl<Value*> &Ops, 9981 unsigned IntID) { 9982 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); 9983 9984 unsigned N; 9985 switch (IntID) { 9986 case Intrinsic::aarch64_sve_ld2_sret: 9987 case Intrinsic::aarch64_sve_ld1_pn_x2: 9988 case Intrinsic::aarch64_sve_ldnt1_pn_x2: 9989 case Intrinsic::aarch64_sve_ld2q_sret: 9990 N = 2; 9991 break; 9992 case Intrinsic::aarch64_sve_ld3_sret: 9993 case Intrinsic::aarch64_sve_ld3q_sret: 9994 N = 3; 9995 break; 9996 case Intrinsic::aarch64_sve_ld4_sret: 9997 case Intrinsic::aarch64_sve_ld1_pn_x4: 9998 case Intrinsic::aarch64_sve_ldnt1_pn_x4: 9999 case Intrinsic::aarch64_sve_ld4q_sret: 10000 N = 4; 10001 break; 10002 default: 10003 llvm_unreachable("unknown intrinsic!"); 10004 } 10005 auto RetTy = llvm::VectorType::get(VTy->getElementType(), 10006 VTy->getElementCount() * N); 10007 10008 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); 10009 Value *BasePtr = Ops[1]; 10010 10011 // Does the load have an offset? 10012 if (Ops.size() > 2) 10013 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); 10014 10015 Function *F = CGM.getIntrinsic(IntID, {VTy}); 10016 Value *Call = Builder.CreateCall(F, {Predicate, BasePtr}); 10017 unsigned MinElts = VTy->getMinNumElements(); 10018 Value *Ret = llvm::PoisonValue::get(RetTy); 10019 for (unsigned I = 0; I < N; I++) { 10020 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); 10021 Value *SRet = Builder.CreateExtractValue(Call, I); 10022 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx); 10023 } 10024 return Ret; 10025 } 10026 10027 Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, 10028 SmallVectorImpl<Value*> &Ops, 10029 unsigned IntID) { 10030 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); 10031 10032 unsigned N; 10033 switch (IntID) { 10034 case Intrinsic::aarch64_sve_st2: 10035 case Intrinsic::aarch64_sve_st1_pn_x2: 10036 case Intrinsic::aarch64_sve_stnt1_pn_x2: 10037 case Intrinsic::aarch64_sve_st2q: 10038 N = 2; 10039 break; 10040 case Intrinsic::aarch64_sve_st3: 10041 case Intrinsic::aarch64_sve_st3q: 10042 N = 3; 10043 break; 10044 case Intrinsic::aarch64_sve_st4: 10045 case Intrinsic::aarch64_sve_st1_pn_x4: 10046 case Intrinsic::aarch64_sve_stnt1_pn_x4: 10047 case Intrinsic::aarch64_sve_st4q: 10048 N = 4; 10049 break; 10050 default: 10051 llvm_unreachable("unknown intrinsic!"); 10052 } 10053 10054 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); 10055 Value *BasePtr = Ops[1]; 10056 10057 // Does the store have an offset? 10058 if (Ops.size() > (2 + N)) 10059 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); 10060 10061 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we 10062 // need to break up the tuple vector. 10063 SmallVector<llvm::Value*, 5> Operands; 10064 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I) 10065 Operands.push_back(Ops[I]); 10066 Operands.append({Predicate, BasePtr}); 10067 Function *F = CGM.getIntrinsic(IntID, { VTy }); 10068 10069 return Builder.CreateCall(F, Operands); 10070 } 10071 10072 // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and 10073 // svpmullt_pair intrinsics, with the exception that their results are bitcast 10074 // to a wider type. 10075 Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags, 10076 SmallVectorImpl<Value *> &Ops, 10077 unsigned BuiltinID) { 10078 // Splat scalar operand to vector (intrinsics with _n infix) 10079 if (TypeFlags.hasSplatOperand()) { 10080 unsigned OpNo = TypeFlags.getSplatOperand(); 10081 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]); 10082 } 10083 10084 // The pair-wise function has a narrower overloaded type. 10085 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType()); 10086 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]}); 10087 10088 // Now bitcast to the wider result type. 10089 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags); 10090 return EmitSVEReinterpret(Call, Ty); 10091 } 10092 10093 Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags, 10094 ArrayRef<Value *> Ops, unsigned BuiltinID) { 10095 llvm::Type *OverloadedTy = getSVEType(TypeFlags); 10096 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy); 10097 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)}); 10098 } 10099 10100 Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, 10101 SmallVectorImpl<Value *> &Ops, 10102 unsigned BuiltinID) { 10103 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); 10104 auto *VectorTy = getSVEVectorForElementType(MemEltTy); 10105 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); 10106 10107 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); 10108 Value *BasePtr = Ops[1]; 10109 10110 // Implement the index operand if not omitted. 10111 if (Ops.size() > 3) 10112 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); 10113 10114 Value *PrfOp = Ops.back(); 10115 10116 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType()); 10117 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp}); 10118 } 10119 10120 Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, 10121 llvm::Type *ReturnTy, 10122 SmallVectorImpl<Value *> &Ops, 10123 unsigned IntrinsicID, 10124 bool IsZExtReturn) { 10125 QualType LangPTy = E->getArg(1)->getType(); 10126 llvm::Type *MemEltTy = CGM.getTypes().ConvertType( 10127 LangPTy->castAs<PointerType>()->getPointeeType()); 10128 10129 // The vector type that is returned may be different from the 10130 // eventual type loaded from memory. 10131 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy); 10132 llvm::ScalableVectorType *MemoryTy = nullptr; 10133 llvm::ScalableVectorType *PredTy = nullptr; 10134 bool IsQuadLoad = false; 10135 switch (IntrinsicID) { 10136 case Intrinsic::aarch64_sve_ld1uwq: 10137 case Intrinsic::aarch64_sve_ld1udq: 10138 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1); 10139 PredTy = llvm::ScalableVectorType::get( 10140 llvm::Type::getInt1Ty(getLLVMContext()), 1); 10141 IsQuadLoad = true; 10142 break; 10143 default: 10144 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); 10145 PredTy = MemoryTy; 10146 break; 10147 } 10148 10149 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy); 10150 Value *BasePtr = Ops[1]; 10151 10152 // Does the load have an offset? 10153 if (Ops.size() > 2) 10154 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); 10155 10156 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy); 10157 auto *Load = 10158 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr})); 10159 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); 10160 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo); 10161 10162 if (IsQuadLoad) 10163 return Load; 10164 10165 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy) 10166 : Builder.CreateSExt(Load, VectorTy); 10167 } 10168 10169 Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, 10170 SmallVectorImpl<Value *> &Ops, 10171 unsigned IntrinsicID) { 10172 QualType LangPTy = E->getArg(1)->getType(); 10173 llvm::Type *MemEltTy = CGM.getTypes().ConvertType( 10174 LangPTy->castAs<PointerType>()->getPointeeType()); 10175 10176 // The vector type that is stored may be different from the 10177 // eventual type stored to memory. 10178 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType()); 10179 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); 10180 10181 auto PredTy = MemoryTy; 10182 auto AddrMemoryTy = MemoryTy; 10183 bool IsQuadStore = false; 10184 10185 switch (IntrinsicID) { 10186 case Intrinsic::aarch64_sve_st1wq: 10187 case Intrinsic::aarch64_sve_st1dq: 10188 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1); 10189 PredTy = 10190 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1); 10191 IsQuadStore = true; 10192 break; 10193 default: 10194 break; 10195 } 10196 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy); 10197 Value *BasePtr = Ops[1]; 10198 10199 // Does the store have an offset? 10200 if (Ops.size() == 4) 10201 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]); 10202 10203 // Last value is always the data 10204 Value *Val = 10205 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy); 10206 10207 Function *F = 10208 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy); 10209 auto *Store = 10210 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr})); 10211 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); 10212 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo); 10213 return Store; 10214 } 10215 10216 Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, 10217 SmallVectorImpl<Value *> &Ops, 10218 unsigned IntID) { 10219 Ops[2] = EmitSVEPredicateCast( 10220 Ops[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags))); 10221 10222 SmallVector<Value *> NewOps; 10223 NewOps.push_back(Ops[2]); 10224 10225 llvm::Value *BasePtr = Ops[3]; 10226 10227 // If the intrinsic contains the vnum parameter, multiply it with the vector 10228 // size in bytes. 10229 if (Ops.size() == 5) { 10230 Function *StreamingVectorLength = 10231 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); 10232 llvm::Value *StreamingVectorLengthCall = 10233 Builder.CreateCall(StreamingVectorLength); 10234 llvm::Value *Mulvl = 10235 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl"); 10236 // The type of the ptr parameter is void *, so use Int8Ty here. 10237 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl); 10238 } 10239 NewOps.push_back(BasePtr); 10240 NewOps.push_back(Ops[0]); 10241 NewOps.push_back(Ops[1]); 10242 Function *F = CGM.getIntrinsic(IntID); 10243 return Builder.CreateCall(F, NewOps); 10244 } 10245 10246 Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags, 10247 SmallVectorImpl<Value *> &Ops, 10248 unsigned IntID) { 10249 auto *VecTy = getSVEType(TypeFlags); 10250 Function *F = CGM.getIntrinsic(IntID, VecTy); 10251 if (TypeFlags.isReadZA()) 10252 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy); 10253 else if (TypeFlags.isWriteZA()) 10254 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy); 10255 return Builder.CreateCall(F, Ops); 10256 } 10257 10258 Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, 10259 SmallVectorImpl<Value *> &Ops, 10260 unsigned IntID) { 10261 // svzero_za() intrinsic zeros the entire za tile and has no paramters. 10262 if (Ops.size() == 0) 10263 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255)); 10264 Function *F = CGM.getIntrinsic(IntID, {}); 10265 return Builder.CreateCall(F, Ops); 10266 } 10267 10268 Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, 10269 SmallVectorImpl<Value *> &Ops, 10270 unsigned IntID) { 10271 if (Ops.size() == 2) 10272 Ops.push_back(Builder.getInt32(0)); 10273 else 10274 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true); 10275 Function *F = CGM.getIntrinsic(IntID, {}); 10276 return Builder.CreateCall(F, Ops); 10277 } 10278 10279 // Limit the usage of scalable llvm IR generated by the ACLE by using the 10280 // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. 10281 Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { 10282 return Builder.CreateVectorSplat( 10283 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar); 10284 } 10285 10286 Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) { 10287 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType())); 10288 } 10289 10290 Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) { 10291 // FIXME: For big endian this needs an additional REV, or needs a separate 10292 // intrinsic that is code-generated as a no-op, because the LLVM bitcast 10293 // instruction is defined as 'bitwise' equivalent from memory point of 10294 // view (when storing/reloading), whereas the svreinterpret builtin 10295 // implements bitwise equivalent cast from register point of view. 10296 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian. 10297 return Builder.CreateBitCast(Val, Ty); 10298 } 10299 10300 static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, 10301 SmallVectorImpl<Value *> &Ops) { 10302 auto *SplatZero = Constant::getNullValue(Ty); 10303 Ops.insert(Ops.begin(), SplatZero); 10304 } 10305 10306 static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, 10307 SmallVectorImpl<Value *> &Ops) { 10308 auto *SplatUndef = UndefValue::get(Ty); 10309 Ops.insert(Ops.begin(), SplatUndef); 10310 } 10311 10312 SmallVector<llvm::Type *, 2> 10313 CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags, 10314 llvm::Type *ResultType, 10315 ArrayRef<Value *> Ops) { 10316 if (TypeFlags.isOverloadNone()) 10317 return {}; 10318 10319 llvm::Type *DefaultType = getSVEType(TypeFlags); 10320 10321 if (TypeFlags.isOverloadWhileOrMultiVecCvt()) 10322 return {DefaultType, Ops[1]->getType()}; 10323 10324 if (TypeFlags.isOverloadWhileRW()) 10325 return {getSVEPredType(TypeFlags), Ops[0]->getType()}; 10326 10327 if (TypeFlags.isOverloadCvt()) 10328 return {Ops[0]->getType(), Ops.back()->getType()}; 10329 10330 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() && 10331 ResultType->isVectorTy()) 10332 return {ResultType, Ops[1]->getType()}; 10333 10334 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads"); 10335 return {DefaultType}; 10336 } 10337 10338 Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, 10339 llvm::Type *Ty, 10340 ArrayRef<Value *> Ops) { 10341 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) && 10342 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()"); 10343 10344 unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue(); 10345 auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>( 10346 TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty); 10347 10348 if (!SingleVecTy) 10349 return nullptr; 10350 10351 Value *Idx = ConstantInt::get(CGM.Int64Ty, 10352 I * SingleVecTy->getMinNumElements()); 10353 10354 if (TypeFlags.isTupleSet()) 10355 return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx); 10356 return Builder.CreateExtractVector(Ty, Ops[0], Idx); 10357 } 10358 10359 Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags, 10360 llvm::Type *Ty, 10361 ArrayRef<Value *> Ops) { 10362 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate"); 10363 10364 auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType()); 10365 10366 if (!SrcTy) 10367 return nullptr; 10368 10369 unsigned MinElts = SrcTy->getMinNumElements(); 10370 Value *Call = llvm::PoisonValue::get(Ty); 10371 for (unsigned I = 0; I < Ops.size(); I++) { 10372 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); 10373 Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx); 10374 } 10375 10376 return Call; 10377 } 10378 10379 Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { 10380 // Multi-vector results should be broken up into a single (wide) result 10381 // vector. 10382 auto *StructTy = dyn_cast<StructType>(Call->getType()); 10383 if (!StructTy) 10384 return Call; 10385 10386 auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U)); 10387 if (!VTy) 10388 return Call; 10389 unsigned N = StructTy->getNumElements(); 10390 10391 // We may need to emit a cast to a svbool_t 10392 bool IsPredTy = VTy->getElementType()->isIntegerTy(1); 10393 unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements(); 10394 10395 ScalableVectorType *WideVTy = 10396 ScalableVectorType::get(VTy->getElementType(), MinElts * N); 10397 Value *Ret = llvm::PoisonValue::get(WideVTy); 10398 for (unsigned I = 0; I < N; ++I) { 10399 Value *SRet = Builder.CreateExtractValue(Call, I); 10400 assert(SRet->getType() == VTy && "Unexpected type for result value"); 10401 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); 10402 10403 if (IsPredTy) 10404 SRet = EmitSVEPredicateCast( 10405 SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16)); 10406 10407 Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx); 10408 } 10409 Call = Ret; 10410 10411 return Call; 10412 } 10413 10414 void CodeGenFunction::GetAArch64SVEProcessedOperands( 10415 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops, 10416 SVETypeFlags TypeFlags) { 10417 // Find out if any arguments are required to be integer constant expressions. 10418 unsigned ICEArguments = 0; 10419 ASTContext::GetBuiltinTypeError Error; 10420 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 10421 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 10422 10423 // Tuple set/get only requires one insert/extract vector, which is 10424 // created by EmitSVETupleSetOrGet. 10425 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet(); 10426 10427 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 10428 bool IsICE = ICEArguments & (1 << i); 10429 Value *Arg = EmitScalarExpr(E->getArg(i)); 10430 10431 if (IsICE) { 10432 // If this is required to be a constant, constant fold it so that we know 10433 // that the generated intrinsic gets a ConstantInt. 10434 std::optional<llvm::APSInt> Result = 10435 E->getArg(i)->getIntegerConstantExpr(getContext()); 10436 assert(Result && "Expected argument to be a constant"); 10437 10438 // Immediates for SVE llvm intrinsics are always 32bit. We can safely 10439 // truncate because the immediate has been range checked and no valid 10440 // immediate requires more than a handful of bits. 10441 *Result = Result->extOrTrunc(32); 10442 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result)); 10443 continue; 10444 } 10445 10446 if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) { 10447 Ops.push_back(Arg); 10448 continue; 10449 } 10450 10451 auto *VTy = cast<ScalableVectorType>(Arg->getType()); 10452 unsigned MinElts = VTy->getMinNumElements(); 10453 bool IsPred = VTy->getElementType()->isIntegerTy(1); 10454 unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128); 10455 10456 if (N == 1) { 10457 Ops.push_back(Arg); 10458 continue; 10459 } 10460 10461 for (unsigned I = 0; I < N; ++I) { 10462 Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N); 10463 auto *NewVTy = 10464 ScalableVectorType::get(VTy->getElementType(), MinElts / N); 10465 Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx)); 10466 } 10467 } 10468 } 10469 10470 Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, 10471 const CallExpr *E) { 10472 llvm::Type *Ty = ConvertType(E->getType()); 10473 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && 10474 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) { 10475 Value *Val = EmitScalarExpr(E->getArg(0)); 10476 return EmitSVEReinterpret(Val, Ty); 10477 } 10478 10479 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID, 10480 AArch64SVEIntrinsicsProvenSorted); 10481 10482 llvm::SmallVector<Value *, 4> Ops; 10483 SVETypeFlags TypeFlags(Builtin->TypeModifier); 10484 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags); 10485 10486 if (TypeFlags.isLoad()) 10487 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic, 10488 TypeFlags.isZExtReturn()); 10489 else if (TypeFlags.isStore()) 10490 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic); 10491 else if (TypeFlags.isGatherLoad()) 10492 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); 10493 else if (TypeFlags.isScatterStore()) 10494 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); 10495 else if (TypeFlags.isPrefetch()) 10496 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); 10497 else if (TypeFlags.isGatherPrefetch()) 10498 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic); 10499 else if (TypeFlags.isStructLoad()) 10500 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); 10501 else if (TypeFlags.isStructStore()) 10502 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); 10503 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) 10504 return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops); 10505 else if (TypeFlags.isTupleCreate()) 10506 return EmitSVETupleCreate(TypeFlags, Ty, Ops); 10507 else if (TypeFlags.isUndef()) 10508 return UndefValue::get(Ty); 10509 else if (Builtin->LLVMIntrinsic != 0) { 10510 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp) 10511 InsertExplicitZeroOperand(Builder, Ty, Ops); 10512 10513 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp) 10514 InsertExplicitUndefOperand(Builder, Ty, Ops); 10515 10516 // Some ACLE builtins leave out the argument to specify the predicate 10517 // pattern, which is expected to be expanded to an SV_ALL pattern. 10518 if (TypeFlags.isAppendSVALL()) 10519 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31)); 10520 if (TypeFlags.isInsertOp1SVALL()) 10521 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31)); 10522 10523 // Predicates must match the main datatype. 10524 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 10525 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType())) 10526 if (PredTy->getElementType()->isIntegerTy(1)) 10527 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); 10528 10529 // Splat scalar operand to vector (intrinsics with _n infix) 10530 if (TypeFlags.hasSplatOperand()) { 10531 unsigned OpNo = TypeFlags.getSplatOperand(); 10532 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]); 10533 } 10534 10535 if (TypeFlags.isReverseCompare()) 10536 std::swap(Ops[1], Ops[2]); 10537 else if (TypeFlags.isReverseUSDOT()) 10538 std::swap(Ops[1], Ops[2]); 10539 else if (TypeFlags.isReverseMergeAnyBinOp() && 10540 TypeFlags.getMergeType() == SVETypeFlags::MergeAny) 10541 std::swap(Ops[1], Ops[2]); 10542 else if (TypeFlags.isReverseMergeAnyAccOp() && 10543 TypeFlags.getMergeType() == SVETypeFlags::MergeAny) 10544 std::swap(Ops[1], Ops[3]); 10545 10546 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer. 10547 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) { 10548 llvm::Type *OpndTy = Ops[1]->getType(); 10549 auto *SplatZero = Constant::getNullValue(OpndTy); 10550 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero); 10551 } 10552 10553 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, 10554 getSVEOverloadTypes(TypeFlags, Ty, Ops)); 10555 Value *Call = Builder.CreateCall(F, Ops); 10556 10557 // Predicate results must be converted to svbool_t. 10558 if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType())) 10559 if (PredTy->getScalarType()->isIntegerTy(1)) 10560 Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty)); 10561 10562 return FormSVEBuiltinResult(Call); 10563 } 10564 10565 switch (BuiltinID) { 10566 default: 10567 return nullptr; 10568 10569 case SVE::BI__builtin_sve_svreinterpret_b: { 10570 auto SVCountTy = 10571 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); 10572 Function *CastFromSVCountF = 10573 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy); 10574 return Builder.CreateCall(CastFromSVCountF, Ops[0]); 10575 } 10576 case SVE::BI__builtin_sve_svreinterpret_c: { 10577 auto SVCountTy = 10578 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); 10579 Function *CastToSVCountF = 10580 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy); 10581 return Builder.CreateCall(CastToSVCountF, Ops[0]); 10582 } 10583 10584 case SVE::BI__builtin_sve_svpsel_lane_b8: 10585 case SVE::BI__builtin_sve_svpsel_lane_b16: 10586 case SVE::BI__builtin_sve_svpsel_lane_b32: 10587 case SVE::BI__builtin_sve_svpsel_lane_b64: 10588 case SVE::BI__builtin_sve_svpsel_lane_c8: 10589 case SVE::BI__builtin_sve_svpsel_lane_c16: 10590 case SVE::BI__builtin_sve_svpsel_lane_c32: 10591 case SVE::BI__builtin_sve_svpsel_lane_c64: { 10592 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType()); 10593 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() == 10594 "aarch64.svcount")) && 10595 "Unexpected TargetExtType"); 10596 auto SVCountTy = 10597 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); 10598 Function *CastFromSVCountF = 10599 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy); 10600 Function *CastToSVCountF = 10601 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy); 10602 10603 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier)); 10604 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy); 10605 llvm::Value *Ops0 = 10606 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0]; 10607 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy); 10608 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]}); 10609 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel; 10610 } 10611 case SVE::BI__builtin_sve_svmov_b_z: { 10612 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op) 10613 SVETypeFlags TypeFlags(Builtin->TypeModifier); 10614 llvm::Type* OverloadedTy = getSVEType(TypeFlags); 10615 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy); 10616 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]}); 10617 } 10618 10619 case SVE::BI__builtin_sve_svnot_b_z: { 10620 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg) 10621 SVETypeFlags TypeFlags(Builtin->TypeModifier); 10622 llvm::Type* OverloadedTy = getSVEType(TypeFlags); 10623 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy); 10624 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]}); 10625 } 10626 10627 case SVE::BI__builtin_sve_svmovlb_u16: 10628 case SVE::BI__builtin_sve_svmovlb_u32: 10629 case SVE::BI__builtin_sve_svmovlb_u64: 10630 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb); 10631 10632 case SVE::BI__builtin_sve_svmovlb_s16: 10633 case SVE::BI__builtin_sve_svmovlb_s32: 10634 case SVE::BI__builtin_sve_svmovlb_s64: 10635 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb); 10636 10637 case SVE::BI__builtin_sve_svmovlt_u16: 10638 case SVE::BI__builtin_sve_svmovlt_u32: 10639 case SVE::BI__builtin_sve_svmovlt_u64: 10640 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt); 10641 10642 case SVE::BI__builtin_sve_svmovlt_s16: 10643 case SVE::BI__builtin_sve_svmovlt_s32: 10644 case SVE::BI__builtin_sve_svmovlt_s64: 10645 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt); 10646 10647 case SVE::BI__builtin_sve_svpmullt_u16: 10648 case SVE::BI__builtin_sve_svpmullt_u64: 10649 case SVE::BI__builtin_sve_svpmullt_n_u16: 10650 case SVE::BI__builtin_sve_svpmullt_n_u64: 10651 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair); 10652 10653 case SVE::BI__builtin_sve_svpmullb_u16: 10654 case SVE::BI__builtin_sve_svpmullb_u64: 10655 case SVE::BI__builtin_sve_svpmullb_n_u16: 10656 case SVE::BI__builtin_sve_svpmullb_n_u64: 10657 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair); 10658 10659 case SVE::BI__builtin_sve_svdup_n_b8: 10660 case SVE::BI__builtin_sve_svdup_n_b16: 10661 case SVE::BI__builtin_sve_svdup_n_b32: 10662 case SVE::BI__builtin_sve_svdup_n_b64: { 10663 Value *CmpNE = 10664 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType())); 10665 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags); 10666 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy); 10667 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty)); 10668 } 10669 10670 case SVE::BI__builtin_sve_svdupq_n_b8: 10671 case SVE::BI__builtin_sve_svdupq_n_b16: 10672 case SVE::BI__builtin_sve_svdupq_n_b32: 10673 case SVE::BI__builtin_sve_svdupq_n_b64: 10674 case SVE::BI__builtin_sve_svdupq_n_u8: 10675 case SVE::BI__builtin_sve_svdupq_n_s8: 10676 case SVE::BI__builtin_sve_svdupq_n_u64: 10677 case SVE::BI__builtin_sve_svdupq_n_f64: 10678 case SVE::BI__builtin_sve_svdupq_n_s64: 10679 case SVE::BI__builtin_sve_svdupq_n_u16: 10680 case SVE::BI__builtin_sve_svdupq_n_f16: 10681 case SVE::BI__builtin_sve_svdupq_n_bf16: 10682 case SVE::BI__builtin_sve_svdupq_n_s16: 10683 case SVE::BI__builtin_sve_svdupq_n_u32: 10684 case SVE::BI__builtin_sve_svdupq_n_f32: 10685 case SVE::BI__builtin_sve_svdupq_n_s32: { 10686 // These builtins are implemented by storing each element to an array and using 10687 // ld1rq to materialize a vector. 10688 unsigned NumOpnds = Ops.size(); 10689 10690 bool IsBoolTy = 10691 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1); 10692 10693 // For svdupq_n_b* the element type of is an integer of type 128/numelts, 10694 // so that the compare can use the width that is natural for the expected 10695 // number of predicate lanes. 10696 llvm::Type *EltTy = Ops[0]->getType(); 10697 if (IsBoolTy) 10698 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds); 10699 10700 SmallVector<llvm::Value *, 16> VecOps; 10701 for (unsigned I = 0; I < NumOpnds; ++I) 10702 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy)); 10703 Value *Vec = BuildVector(VecOps); 10704 10705 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy); 10706 Value *InsertSubVec = Builder.CreateInsertVector( 10707 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0)); 10708 10709 Function *F = 10710 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy); 10711 Value *DupQLane = 10712 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)}); 10713 10714 if (!IsBoolTy) 10715 return DupQLane; 10716 10717 SVETypeFlags TypeFlags(Builtin->TypeModifier); 10718 Value *Pred = EmitSVEAllTruePred(TypeFlags); 10719 10720 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'. 10721 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne 10722 : Intrinsic::aarch64_sve_cmpne_wide, 10723 OverloadedTy); 10724 Value *Call = Builder.CreateCall( 10725 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))}); 10726 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty)); 10727 } 10728 10729 case SVE::BI__builtin_sve_svpfalse_b: 10730 return ConstantInt::getFalse(Ty); 10731 10732 case SVE::BI__builtin_sve_svpfalse_c: { 10733 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16); 10734 Function *CastToSVCountF = 10735 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty); 10736 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy)); 10737 } 10738 10739 case SVE::BI__builtin_sve_svlen_bf16: 10740 case SVE::BI__builtin_sve_svlen_f16: 10741 case SVE::BI__builtin_sve_svlen_f32: 10742 case SVE::BI__builtin_sve_svlen_f64: 10743 case SVE::BI__builtin_sve_svlen_s8: 10744 case SVE::BI__builtin_sve_svlen_s16: 10745 case SVE::BI__builtin_sve_svlen_s32: 10746 case SVE::BI__builtin_sve_svlen_s64: 10747 case SVE::BI__builtin_sve_svlen_u8: 10748 case SVE::BI__builtin_sve_svlen_u16: 10749 case SVE::BI__builtin_sve_svlen_u32: 10750 case SVE::BI__builtin_sve_svlen_u64: { 10751 SVETypeFlags TF(Builtin->TypeModifier); 10752 auto VTy = cast<llvm::VectorType>(getSVEType(TF)); 10753 auto *NumEls = 10754 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue()); 10755 10756 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty); 10757 return Builder.CreateMul(NumEls, Builder.CreateCall(F)); 10758 } 10759 10760 case SVE::BI__builtin_sve_svtbl2_u8: 10761 case SVE::BI__builtin_sve_svtbl2_s8: 10762 case SVE::BI__builtin_sve_svtbl2_u16: 10763 case SVE::BI__builtin_sve_svtbl2_s16: 10764 case SVE::BI__builtin_sve_svtbl2_u32: 10765 case SVE::BI__builtin_sve_svtbl2_s32: 10766 case SVE::BI__builtin_sve_svtbl2_u64: 10767 case SVE::BI__builtin_sve_svtbl2_s64: 10768 case SVE::BI__builtin_sve_svtbl2_f16: 10769 case SVE::BI__builtin_sve_svtbl2_bf16: 10770 case SVE::BI__builtin_sve_svtbl2_f32: 10771 case SVE::BI__builtin_sve_svtbl2_f64: { 10772 SVETypeFlags TF(Builtin->TypeModifier); 10773 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF)); 10774 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy); 10775 return Builder.CreateCall(F, Ops); 10776 } 10777 10778 case SVE::BI__builtin_sve_svset_neonq_s8: 10779 case SVE::BI__builtin_sve_svset_neonq_s16: 10780 case SVE::BI__builtin_sve_svset_neonq_s32: 10781 case SVE::BI__builtin_sve_svset_neonq_s64: 10782 case SVE::BI__builtin_sve_svset_neonq_u8: 10783 case SVE::BI__builtin_sve_svset_neonq_u16: 10784 case SVE::BI__builtin_sve_svset_neonq_u32: 10785 case SVE::BI__builtin_sve_svset_neonq_u64: 10786 case SVE::BI__builtin_sve_svset_neonq_f16: 10787 case SVE::BI__builtin_sve_svset_neonq_f32: 10788 case SVE::BI__builtin_sve_svset_neonq_f64: 10789 case SVE::BI__builtin_sve_svset_neonq_bf16: { 10790 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0)); 10791 } 10792 10793 case SVE::BI__builtin_sve_svget_neonq_s8: 10794 case SVE::BI__builtin_sve_svget_neonq_s16: 10795 case SVE::BI__builtin_sve_svget_neonq_s32: 10796 case SVE::BI__builtin_sve_svget_neonq_s64: 10797 case SVE::BI__builtin_sve_svget_neonq_u8: 10798 case SVE::BI__builtin_sve_svget_neonq_u16: 10799 case SVE::BI__builtin_sve_svget_neonq_u32: 10800 case SVE::BI__builtin_sve_svget_neonq_u64: 10801 case SVE::BI__builtin_sve_svget_neonq_f16: 10802 case SVE::BI__builtin_sve_svget_neonq_f32: 10803 case SVE::BI__builtin_sve_svget_neonq_f64: 10804 case SVE::BI__builtin_sve_svget_neonq_bf16: { 10805 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0)); 10806 } 10807 10808 case SVE::BI__builtin_sve_svdup_neonq_s8: 10809 case SVE::BI__builtin_sve_svdup_neonq_s16: 10810 case SVE::BI__builtin_sve_svdup_neonq_s32: 10811 case SVE::BI__builtin_sve_svdup_neonq_s64: 10812 case SVE::BI__builtin_sve_svdup_neonq_u8: 10813 case SVE::BI__builtin_sve_svdup_neonq_u16: 10814 case SVE::BI__builtin_sve_svdup_neonq_u32: 10815 case SVE::BI__builtin_sve_svdup_neonq_u64: 10816 case SVE::BI__builtin_sve_svdup_neonq_f16: 10817 case SVE::BI__builtin_sve_svdup_neonq_f32: 10818 case SVE::BI__builtin_sve_svdup_neonq_f64: 10819 case SVE::BI__builtin_sve_svdup_neonq_bf16: { 10820 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0], 10821 Builder.getInt64(0)); 10822 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty}, 10823 {Insert, Builder.getInt64(0)}); 10824 } 10825 } 10826 10827 /// Should not happen 10828 return nullptr; 10829 } 10830 10831 static void swapCommutativeSMEOperands(unsigned BuiltinID, 10832 SmallVectorImpl<Value *> &Ops) { 10833 unsigned MultiVec; 10834 switch (BuiltinID) { 10835 default: 10836 return; 10837 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1: 10838 MultiVec = 1; 10839 break; 10840 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2: 10841 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2: 10842 MultiVec = 2; 10843 break; 10844 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4: 10845 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4: 10846 MultiVec = 4; 10847 break; 10848 } 10849 10850 if (MultiVec > 0) 10851 for (unsigned I = 0; I < MultiVec; ++I) 10852 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]); 10853 } 10854 10855 Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, 10856 const CallExpr *E) { 10857 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID, 10858 AArch64SMEIntrinsicsProvenSorted); 10859 10860 llvm::SmallVector<Value *, 4> Ops; 10861 SVETypeFlags TypeFlags(Builtin->TypeModifier); 10862 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags); 10863 10864 if (TypeFlags.isLoad() || TypeFlags.isStore()) 10865 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic); 10866 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA()) 10867 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic); 10868 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za || 10869 BuiltinID == SME::BI__builtin_sme_svzero_za) 10870 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic); 10871 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za || 10872 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za || 10873 BuiltinID == SME::BI__builtin_sme_svldr_za || 10874 BuiltinID == SME::BI__builtin_sme_svstr_za) 10875 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic); 10876 10877 // Handle builtins which require their multi-vector operands to be swapped 10878 swapCommutativeSMEOperands(BuiltinID, Ops); 10879 10880 // Should not happen! 10881 if (Builtin->LLVMIntrinsic == 0) 10882 return nullptr; 10883 10884 // Predicates must match the main datatype. 10885 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 10886 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType())) 10887 if (PredTy->getElementType()->isIntegerTy(1)) 10888 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); 10889 10890 Function *F = 10891 TypeFlags.isOverloadNone() 10892 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic) 10893 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)}); 10894 Value *Call = Builder.CreateCall(F, Ops); 10895 10896 return FormSVEBuiltinResult(Call); 10897 } 10898 10899 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 10900 const CallExpr *E, 10901 llvm::Triple::ArchType Arch) { 10902 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin && 10903 BuiltinID <= clang::AArch64::LastSVEBuiltin) 10904 return EmitAArch64SVEBuiltinExpr(BuiltinID, E); 10905 10906 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin && 10907 BuiltinID <= clang::AArch64::LastSMEBuiltin) 10908 return EmitAArch64SMEBuiltinExpr(BuiltinID, E); 10909 10910 if (BuiltinID == Builtin::BI__builtin_cpu_supports) 10911 return EmitAArch64CpuSupports(E); 10912 10913 unsigned HintID = static_cast<unsigned>(-1); 10914 switch (BuiltinID) { 10915 default: break; 10916 case clang::AArch64::BI__builtin_arm_nop: 10917 HintID = 0; 10918 break; 10919 case clang::AArch64::BI__builtin_arm_yield: 10920 case clang::AArch64::BI__yield: 10921 HintID = 1; 10922 break; 10923 case clang::AArch64::BI__builtin_arm_wfe: 10924 case clang::AArch64::BI__wfe: 10925 HintID = 2; 10926 break; 10927 case clang::AArch64::BI__builtin_arm_wfi: 10928 case clang::AArch64::BI__wfi: 10929 HintID = 3; 10930 break; 10931 case clang::AArch64::BI__builtin_arm_sev: 10932 case clang::AArch64::BI__sev: 10933 HintID = 4; 10934 break; 10935 case clang::AArch64::BI__builtin_arm_sevl: 10936 case clang::AArch64::BI__sevl: 10937 HintID = 5; 10938 break; 10939 } 10940 10941 if (HintID != static_cast<unsigned>(-1)) { 10942 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 10943 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 10944 } 10945 10946 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) { 10947 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break); 10948 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 10949 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty)); 10950 } 10951 10952 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) { 10953 // Create call to __arm_sme_state and store the results to the two pointers. 10954 CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction( 10955 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {}, 10956 false), 10957 "__arm_sme_state")); 10958 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(), 10959 "aarch64_pstate_sm_compatible"); 10960 CI->setAttributes(Attrs); 10961 CI->setCallingConv( 10962 llvm::CallingConv:: 10963 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2); 10964 Builder.CreateStore(Builder.CreateExtractValue(CI, 0), 10965 EmitPointerWithAlignment(E->getArg(0))); 10966 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1), 10967 EmitPointerWithAlignment(E->getArg(1))); 10968 } 10969 10970 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) { 10971 assert((getContext().getTypeSize(E->getType()) == 32) && 10972 "rbit of unusual size!"); 10973 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 10974 return Builder.CreateCall( 10975 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 10976 } 10977 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) { 10978 assert((getContext().getTypeSize(E->getType()) == 64) && 10979 "rbit of unusual size!"); 10980 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 10981 return Builder.CreateCall( 10982 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 10983 } 10984 10985 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz || 10986 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) { 10987 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 10988 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType()); 10989 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); 10990 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64) 10991 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty()); 10992 return Res; 10993 } 10994 10995 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) { 10996 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 10997 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg, 10998 "cls"); 10999 } 11000 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) { 11001 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 11002 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg, 11003 "cls"); 11004 } 11005 11006 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf || 11007 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) { 11008 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 11009 llvm::Type *Ty = Arg->getType(); 11010 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty), 11011 Arg, "frint32z"); 11012 } 11013 11014 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf || 11015 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) { 11016 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 11017 llvm::Type *Ty = Arg->getType(); 11018 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty), 11019 Arg, "frint64z"); 11020 } 11021 11022 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf || 11023 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) { 11024 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 11025 llvm::Type *Ty = Arg->getType(); 11026 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty), 11027 Arg, "frint32x"); 11028 } 11029 11030 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf || 11031 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) { 11032 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 11033 llvm::Type *Ty = Arg->getType(); 11034 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty), 11035 Arg, "frint64x"); 11036 } 11037 11038 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) { 11039 assert((getContext().getTypeSize(E->getType()) == 32) && 11040 "__jcvt of unusual size!"); 11041 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 11042 return Builder.CreateCall( 11043 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg); 11044 } 11045 11046 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b || 11047 BuiltinID == clang::AArch64::BI__builtin_arm_st64b || 11048 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv || 11049 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) { 11050 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0)); 11051 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1)); 11052 11053 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) { 11054 // Load from the address via an LLVM intrinsic, receiving a 11055 // tuple of 8 i64 words, and store each one to ValPtr. 11056 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b); 11057 llvm::Value *Val = Builder.CreateCall(F, MemAddr); 11058 llvm::Value *ToRet; 11059 for (size_t i = 0; i < 8; i++) { 11060 llvm::Value *ValOffsetPtr = 11061 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i)); 11062 Address Addr = 11063 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8)); 11064 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr); 11065 } 11066 return ToRet; 11067 } else { 11068 // Load 8 i64 words from ValPtr, and store them to the address 11069 // via an LLVM intrinsic. 11070 SmallVector<llvm::Value *, 9> Args; 11071 Args.push_back(MemAddr); 11072 for (size_t i = 0; i < 8; i++) { 11073 llvm::Value *ValOffsetPtr = 11074 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i)); 11075 Address Addr = 11076 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8)); 11077 Args.push_back(Builder.CreateLoad(Addr)); 11078 } 11079 11080 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b 11081 ? Intrinsic::aarch64_st64b 11082 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv 11083 ? Intrinsic::aarch64_st64bv 11084 : Intrinsic::aarch64_st64bv0); 11085 Function *F = CGM.getIntrinsic(Intr); 11086 return Builder.CreateCall(F, Args); 11087 } 11088 } 11089 11090 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr || 11091 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) { 11092 11093 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr 11094 ? Intrinsic::aarch64_rndr 11095 : Intrinsic::aarch64_rndrrs); 11096 Function *F = CGM.getIntrinsic(Intr); 11097 llvm::Value *Val = Builder.CreateCall(F); 11098 Value *RandomValue = Builder.CreateExtractValue(Val, 0); 11099 Value *Status = Builder.CreateExtractValue(Val, 1); 11100 11101 Address MemAddress = EmitPointerWithAlignment(E->getArg(0)); 11102 Builder.CreateStore(RandomValue, MemAddress); 11103 Status = Builder.CreateZExt(Status, Int32Ty); 11104 return Status; 11105 } 11106 11107 if (BuiltinID == clang::AArch64::BI__clear_cache) { 11108 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 11109 const FunctionDecl *FD = E->getDirectCallee(); 11110 Value *Ops[2]; 11111 for (unsigned i = 0; i < 2; i++) 11112 Ops[i] = EmitScalarExpr(E->getArg(i)); 11113 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 11114 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 11115 StringRef Name = FD->getName(); 11116 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 11117 } 11118 11119 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex || 11120 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) && 11121 getContext().getTypeSize(E->getType()) == 128) { 11122 Function *F = 11123 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex 11124 ? Intrinsic::aarch64_ldaxp 11125 : Intrinsic::aarch64_ldxp); 11126 11127 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 11128 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp"); 11129 11130 Value *Val0 = Builder.CreateExtractValue(Val, 1); 11131 Value *Val1 = Builder.CreateExtractValue(Val, 0); 11132 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 11133 Val0 = Builder.CreateZExt(Val0, Int128Ty); 11134 Val1 = Builder.CreateZExt(Val1, Int128Ty); 11135 11136 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 11137 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 11138 Val = Builder.CreateOr(Val, Val1); 11139 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 11140 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex || 11141 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) { 11142 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 11143 11144 QualType Ty = E->getType(); 11145 llvm::Type *RealResTy = ConvertType(Ty); 11146 llvm::Type *IntTy = 11147 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); 11148 11149 Function *F = 11150 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex 11151 ? Intrinsic::aarch64_ldaxr 11152 : Intrinsic::aarch64_ldxr, 11153 UnqualPtrTy); 11154 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 11155 Val->addParamAttr( 11156 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy)); 11157 11158 if (RealResTy->isPointerTy()) 11159 return Builder.CreateIntToPtr(Val, RealResTy); 11160 11161 llvm::Type *IntResTy = llvm::IntegerType::get( 11162 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 11163 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy), 11164 RealResTy); 11165 } 11166 11167 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex || 11168 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) && 11169 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 11170 Function *F = 11171 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex 11172 ? Intrinsic::aarch64_stlxp 11173 : Intrinsic::aarch64_stxp); 11174 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); 11175 11176 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 11177 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); 11178 11179 Tmp = Tmp.withElementType(STy); 11180 llvm::Value *Val = Builder.CreateLoad(Tmp); 11181 11182 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 11183 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 11184 Value *StPtr = EmitScalarExpr(E->getArg(1)); 11185 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); 11186 } 11187 11188 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex || 11189 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) { 11190 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 11191 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 11192 11193 QualType Ty = E->getArg(0)->getType(); 11194 llvm::Type *StoreTy = 11195 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); 11196 11197 if (StoreVal->getType()->isPointerTy()) 11198 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 11199 else { 11200 llvm::Type *IntTy = llvm::IntegerType::get( 11201 getLLVMContext(), 11202 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 11203 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 11204 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 11205 } 11206 11207 Function *F = 11208 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex 11209 ? Intrinsic::aarch64_stlxr 11210 : Intrinsic::aarch64_stxr, 11211 StoreAddr->getType()); 11212 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); 11213 CI->addParamAttr( 11214 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy)); 11215 return CI; 11216 } 11217 11218 if (BuiltinID == clang::AArch64::BI__getReg) { 11219 Expr::EvalResult Result; 11220 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) 11221 llvm_unreachable("Sema will ensure that the parameter is constant"); 11222 11223 llvm::APSInt Value = Result.Val.getInt(); 11224 LLVMContext &Context = CGM.getLLVMContext(); 11225 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10); 11226 11227 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)}; 11228 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 11229 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 11230 11231 llvm::Function *F = 11232 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); 11233 return Builder.CreateCall(F, Metadata); 11234 } 11235 11236 if (BuiltinID == clang::AArch64::BI__break) { 11237 Expr::EvalResult Result; 11238 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) 11239 llvm_unreachable("Sema will ensure that the parameter is constant"); 11240 11241 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break); 11242 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))}); 11243 } 11244 11245 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) { 11246 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 11247 return Builder.CreateCall(F); 11248 } 11249 11250 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier) 11251 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 11252 llvm::SyncScope::SingleThread); 11253 11254 // CRC32 11255 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 11256 switch (BuiltinID) { 11257 case clang::AArch64::BI__builtin_arm_crc32b: 11258 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 11259 case clang::AArch64::BI__builtin_arm_crc32cb: 11260 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 11261 case clang::AArch64::BI__builtin_arm_crc32h: 11262 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 11263 case clang::AArch64::BI__builtin_arm_crc32ch: 11264 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 11265 case clang::AArch64::BI__builtin_arm_crc32w: 11266 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 11267 case clang::AArch64::BI__builtin_arm_crc32cw: 11268 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 11269 case clang::AArch64::BI__builtin_arm_crc32d: 11270 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 11271 case clang::AArch64::BI__builtin_arm_crc32cd: 11272 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 11273 } 11274 11275 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 11276 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 11277 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 11278 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 11279 11280 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 11281 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 11282 11283 return Builder.CreateCall(F, {Arg0, Arg1}); 11284 } 11285 11286 // Memory Operations (MOPS) 11287 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) { 11288 Value *Dst = EmitScalarExpr(E->getArg(0)); 11289 Value *Val = EmitScalarExpr(E->getArg(1)); 11290 Value *Size = EmitScalarExpr(E->getArg(2)); 11291 Dst = Builder.CreatePointerCast(Dst, Int8PtrTy); 11292 Val = Builder.CreateTrunc(Val, Int8Ty); 11293 Size = Builder.CreateIntCast(Size, Int64Ty, false); 11294 return Builder.CreateCall( 11295 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size}); 11296 } 11297 11298 // Memory Tagging Extensions (MTE) Intrinsics 11299 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; 11300 switch (BuiltinID) { 11301 case clang::AArch64::BI__builtin_arm_irg: 11302 MTEIntrinsicID = Intrinsic::aarch64_irg; break; 11303 case clang::AArch64::BI__builtin_arm_addg: 11304 MTEIntrinsicID = Intrinsic::aarch64_addg; break; 11305 case clang::AArch64::BI__builtin_arm_gmi: 11306 MTEIntrinsicID = Intrinsic::aarch64_gmi; break; 11307 case clang::AArch64::BI__builtin_arm_ldg: 11308 MTEIntrinsicID = Intrinsic::aarch64_ldg; break; 11309 case clang::AArch64::BI__builtin_arm_stg: 11310 MTEIntrinsicID = Intrinsic::aarch64_stg; break; 11311 case clang::AArch64::BI__builtin_arm_subp: 11312 MTEIntrinsicID = Intrinsic::aarch64_subp; break; 11313 } 11314 11315 if (MTEIntrinsicID != Intrinsic::not_intrinsic) { 11316 llvm::Type *T = ConvertType(E->getType()); 11317 11318 if (MTEIntrinsicID == Intrinsic::aarch64_irg) { 11319 Value *Pointer = EmitScalarExpr(E->getArg(0)); 11320 Value *Mask = EmitScalarExpr(E->getArg(1)); 11321 11322 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); 11323 Mask = Builder.CreateZExt(Mask, Int64Ty); 11324 Value *RV = Builder.CreateCall( 11325 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask}); 11326 return Builder.CreatePointerCast(RV, T); 11327 } 11328 if (MTEIntrinsicID == Intrinsic::aarch64_addg) { 11329 Value *Pointer = EmitScalarExpr(E->getArg(0)); 11330 Value *TagOffset = EmitScalarExpr(E->getArg(1)); 11331 11332 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); 11333 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty); 11334 Value *RV = Builder.CreateCall( 11335 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset}); 11336 return Builder.CreatePointerCast(RV, T); 11337 } 11338 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) { 11339 Value *Pointer = EmitScalarExpr(E->getArg(0)); 11340 Value *ExcludedMask = EmitScalarExpr(E->getArg(1)); 11341 11342 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty); 11343 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); 11344 return Builder.CreateCall( 11345 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask}); 11346 } 11347 // Although it is possible to supply a different return 11348 // address (first arg) to this intrinsic, for now we set 11349 // return address same as input address. 11350 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) { 11351 Value *TagAddress = EmitScalarExpr(E->getArg(0)); 11352 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); 11353 Value *RV = Builder.CreateCall( 11354 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); 11355 return Builder.CreatePointerCast(RV, T); 11356 } 11357 // Although it is possible to supply a different tag (to set) 11358 // to this intrinsic (as first arg), for now we supply 11359 // the tag that is in input address arg (common use case). 11360 if (MTEIntrinsicID == Intrinsic::aarch64_stg) { 11361 Value *TagAddress = EmitScalarExpr(E->getArg(0)); 11362 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); 11363 return Builder.CreateCall( 11364 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); 11365 } 11366 if (MTEIntrinsicID == Intrinsic::aarch64_subp) { 11367 Value *PointerA = EmitScalarExpr(E->getArg(0)); 11368 Value *PointerB = EmitScalarExpr(E->getArg(1)); 11369 PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy); 11370 PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy); 11371 return Builder.CreateCall( 11372 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB}); 11373 } 11374 } 11375 11376 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr || 11377 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 || 11378 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 || 11379 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp || 11380 BuiltinID == clang::AArch64::BI__builtin_arm_wsr || 11381 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 || 11382 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 || 11383 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) { 11384 11385 SpecialRegisterAccessKind AccessKind = Write; 11386 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr || 11387 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 || 11388 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 || 11389 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp) 11390 AccessKind = VolatileRead; 11391 11392 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp || 11393 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp; 11394 11395 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr || 11396 BuiltinID == clang::AArch64::BI__builtin_arm_wsr; 11397 11398 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 || 11399 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128; 11400 11401 llvm::Type *ValueType; 11402 llvm::Type *RegisterType = Int64Ty; 11403 if (Is32Bit) { 11404 ValueType = Int32Ty; 11405 } else if (Is128Bit) { 11406 llvm::Type *Int128Ty = 11407 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext()); 11408 ValueType = Int128Ty; 11409 RegisterType = Int128Ty; 11410 } else if (IsPointerBuiltin) { 11411 ValueType = VoidPtrTy; 11412 } else { 11413 ValueType = Int64Ty; 11414 }; 11415 11416 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, 11417 AccessKind); 11418 } 11419 11420 if (BuiltinID == clang::AArch64::BI_ReadStatusReg || 11421 BuiltinID == clang::AArch64::BI_WriteStatusReg) { 11422 LLVMContext &Context = CGM.getLLVMContext(); 11423 11424 unsigned SysReg = 11425 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue(); 11426 11427 std::string SysRegStr; 11428 llvm::raw_string_ostream(SysRegStr) << 11429 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" << 11430 ((SysReg >> 11) & 7) << ":" << 11431 ((SysReg >> 7) & 15) << ":" << 11432 ((SysReg >> 3) & 15) << ":" << 11433 ( SysReg & 7); 11434 11435 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) }; 11436 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 11437 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 11438 11439 llvm::Type *RegisterType = Int64Ty; 11440 llvm::Type *Types[] = { RegisterType }; 11441 11442 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) { 11443 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 11444 11445 return Builder.CreateCall(F, Metadata); 11446 } 11447 11448 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 11449 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1)); 11450 11451 return Builder.CreateCall(F, { Metadata, ArgValue }); 11452 } 11453 11454 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) { 11455 llvm::Function *F = 11456 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); 11457 return Builder.CreateCall(F); 11458 } 11459 11460 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) { 11461 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy); 11462 return Builder.CreateCall(F); 11463 } 11464 11465 if (BuiltinID == clang::AArch64::BI__mulh || 11466 BuiltinID == clang::AArch64::BI__umulh) { 11467 llvm::Type *ResType = ConvertType(E->getType()); 11468 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 11469 11470 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh; 11471 Value *LHS = 11472 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned); 11473 Value *RHS = 11474 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned); 11475 11476 Value *MulResult, *HigherBits; 11477 if (IsSigned) { 11478 MulResult = Builder.CreateNSWMul(LHS, RHS); 11479 HigherBits = Builder.CreateAShr(MulResult, 64); 11480 } else { 11481 MulResult = Builder.CreateNUWMul(LHS, RHS); 11482 HigherBits = Builder.CreateLShr(MulResult, 64); 11483 } 11484 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); 11485 11486 return HigherBits; 11487 } 11488 11489 if (BuiltinID == AArch64::BI__writex18byte || 11490 BuiltinID == AArch64::BI__writex18word || 11491 BuiltinID == AArch64::BI__writex18dword || 11492 BuiltinID == AArch64::BI__writex18qword) { 11493 // Read x18 as i8* 11494 LLVMContext &Context = CGM.getLLVMContext(); 11495 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")}; 11496 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 11497 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 11498 llvm::Function *F = 11499 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); 11500 llvm::Value *X18 = Builder.CreateCall(F, Metadata); 11501 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy); 11502 11503 // Store val at x18 + offset 11504 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty); 11505 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset); 11506 Value *Val = EmitScalarExpr(E->getArg(1)); 11507 StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One()); 11508 return Store; 11509 } 11510 11511 if (BuiltinID == AArch64::BI__readx18byte || 11512 BuiltinID == AArch64::BI__readx18word || 11513 BuiltinID == AArch64::BI__readx18dword || 11514 BuiltinID == AArch64::BI__readx18qword) { 11515 llvm::Type *IntTy = ConvertType(E->getType()); 11516 11517 // Read x18 as i8* 11518 LLVMContext &Context = CGM.getLLVMContext(); 11519 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")}; 11520 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 11521 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 11522 llvm::Function *F = 11523 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); 11524 llvm::Value *X18 = Builder.CreateCall(F, Metadata); 11525 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy); 11526 11527 // Load x18 + offset 11528 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty); 11529 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset); 11530 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One()); 11531 return Load; 11532 } 11533 11534 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 || 11535 BuiltinID == AArch64::BI_CopyFloatFromInt32 || 11536 BuiltinID == AArch64::BI_CopyInt32FromFloat || 11537 BuiltinID == AArch64::BI_CopyInt64FromDouble) { 11538 Value *Arg = EmitScalarExpr(E->getArg(0)); 11539 llvm::Type *RetTy = ConvertType(E->getType()); 11540 return Builder.CreateBitCast(Arg, RetTy); 11541 } 11542 11543 if (BuiltinID == AArch64::BI_CountLeadingOnes || 11544 BuiltinID == AArch64::BI_CountLeadingOnes64 || 11545 BuiltinID == AArch64::BI_CountLeadingZeros || 11546 BuiltinID == AArch64::BI_CountLeadingZeros64) { 11547 Value *Arg = EmitScalarExpr(E->getArg(0)); 11548 llvm::Type *ArgType = Arg->getType(); 11549 11550 if (BuiltinID == AArch64::BI_CountLeadingOnes || 11551 BuiltinID == AArch64::BI_CountLeadingOnes64) 11552 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType)); 11553 11554 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 11555 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); 11556 11557 if (BuiltinID == AArch64::BI_CountLeadingOnes64 || 11558 BuiltinID == AArch64::BI_CountLeadingZeros64) 11559 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 11560 return Result; 11561 } 11562 11563 if (BuiltinID == AArch64::BI_CountLeadingSigns || 11564 BuiltinID == AArch64::BI_CountLeadingSigns64) { 11565 Value *Arg = EmitScalarExpr(E->getArg(0)); 11566 11567 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns) 11568 ? CGM.getIntrinsic(Intrinsic::aarch64_cls) 11569 : CGM.getIntrinsic(Intrinsic::aarch64_cls64); 11570 11571 Value *Result = Builder.CreateCall(F, Arg, "cls"); 11572 if (BuiltinID == AArch64::BI_CountLeadingSigns64) 11573 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 11574 return Result; 11575 } 11576 11577 if (BuiltinID == AArch64::BI_CountOneBits || 11578 BuiltinID == AArch64::BI_CountOneBits64) { 11579 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 11580 llvm::Type *ArgType = ArgValue->getType(); 11581 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 11582 11583 Value *Result = Builder.CreateCall(F, ArgValue); 11584 if (BuiltinID == AArch64::BI_CountOneBits64) 11585 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 11586 return Result; 11587 } 11588 11589 if (BuiltinID == AArch64::BI__prefetch) { 11590 Value *Address = EmitScalarExpr(E->getArg(0)); 11591 Value *RW = llvm::ConstantInt::get(Int32Ty, 0); 11592 Value *Locality = ConstantInt::get(Int32Ty, 3); 11593 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 11594 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); 11595 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 11596 } 11597 11598 if (BuiltinID == AArch64::BI__hlt) { 11599 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt); 11600 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))}); 11601 11602 // Return 0 for convenience, even though MSVC returns some other undefined 11603 // value. 11604 return ConstantInt::get(Builder.getInt32Ty(), 0); 11605 } 11606 11607 // Handle MSVC intrinsics before argument evaluation to prevent double 11608 // evaluation. 11609 if (std::optional<MSVCIntrin> MsvcIntId = 11610 translateAarch64ToMsvcIntrin(BuiltinID)) 11611 return EmitMSVCBuiltinExpr(*MsvcIntId, E); 11612 11613 // Some intrinsics are equivalent - if they are use the base intrinsic ID. 11614 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) { 11615 return P.first == BuiltinID; 11616 }); 11617 if (It != end(NEONEquivalentIntrinsicMap)) 11618 BuiltinID = It->second; 11619 11620 // Find out if any arguments are required to be integer constant 11621 // expressions. 11622 unsigned ICEArguments = 0; 11623 ASTContext::GetBuiltinTypeError Error; 11624 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 11625 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 11626 11627 llvm::SmallVector<Value*, 4> Ops; 11628 Address PtrOp0 = Address::invalid(); 11629 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 11630 if (i == 0) { 11631 switch (BuiltinID) { 11632 case NEON::BI__builtin_neon_vld1_v: 11633 case NEON::BI__builtin_neon_vld1q_v: 11634 case NEON::BI__builtin_neon_vld1_dup_v: 11635 case NEON::BI__builtin_neon_vld1q_dup_v: 11636 case NEON::BI__builtin_neon_vld1_lane_v: 11637 case NEON::BI__builtin_neon_vld1q_lane_v: 11638 case NEON::BI__builtin_neon_vst1_v: 11639 case NEON::BI__builtin_neon_vst1q_v: 11640 case NEON::BI__builtin_neon_vst1_lane_v: 11641 case NEON::BI__builtin_neon_vst1q_lane_v: 11642 case NEON::BI__builtin_neon_vldap1_lane_s64: 11643 case NEON::BI__builtin_neon_vldap1q_lane_s64: 11644 case NEON::BI__builtin_neon_vstl1_lane_s64: 11645 case NEON::BI__builtin_neon_vstl1q_lane_s64: 11646 // Get the alignment for the argument in addition to the value; 11647 // we'll use it later. 11648 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 11649 Ops.push_back(PtrOp0.emitRawPointer(*this)); 11650 continue; 11651 } 11652 } 11653 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); 11654 } 11655 11656 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap); 11657 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap( 11658 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 11659 11660 if (Builtin) { 11661 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 11662 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 11663 assert(Result && "SISD intrinsic should have been handled"); 11664 return Result; 11665 } 11666 11667 const Expr *Arg = E->getArg(E->getNumArgs()-1); 11668 NeonTypeFlags Type(0); 11669 if (std::optional<llvm::APSInt> Result = 11670 Arg->getIntegerConstantExpr(getContext())) 11671 // Determine the type of this overloaded NEON intrinsic. 11672 Type = NeonTypeFlags(Result->getZExtValue()); 11673 11674 bool usgn = Type.isUnsigned(); 11675 bool quad = Type.isQuad(); 11676 11677 // Handle non-overloaded intrinsics first. 11678 switch (BuiltinID) { 11679 default: break; 11680 case NEON::BI__builtin_neon_vabsh_f16: 11681 Ops.push_back(EmitScalarExpr(E->getArg(0))); 11682 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs"); 11683 case NEON::BI__builtin_neon_vaddq_p128: { 11684 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128); 11685 Ops.push_back(EmitScalarExpr(E->getArg(1))); 11686 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 11687 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 11688 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]); 11689 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); 11690 return Builder.CreateBitCast(Ops[0], Int128Ty); 11691 } 11692 case NEON::BI__builtin_neon_vldrq_p128: { 11693 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); 11694 Value *Ptr = EmitScalarExpr(E->getArg(0)); 11695 return Builder.CreateAlignedLoad(Int128Ty, Ptr, 11696 CharUnits::fromQuantity(16)); 11697 } 11698 case NEON::BI__builtin_neon_vstrq_p128: { 11699 Value *Ptr = Ops[0]; 11700 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); 11701 } 11702 case NEON::BI__builtin_neon_vcvts_f32_u32: 11703 case NEON::BI__builtin_neon_vcvtd_f64_u64: 11704 usgn = true; 11705 [[fallthrough]]; 11706 case NEON::BI__builtin_neon_vcvts_f32_s32: 11707 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 11708 Ops.push_back(EmitScalarExpr(E->getArg(0))); 11709 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 11710 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 11711 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 11712 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 11713 if (usgn) 11714 return Builder.CreateUIToFP(Ops[0], FTy); 11715 return Builder.CreateSIToFP(Ops[0], FTy); 11716 } 11717 case NEON::BI__builtin_neon_vcvth_f16_u16: 11718 case NEON::BI__builtin_neon_vcvth_f16_u32: 11719 case NEON::BI__builtin_neon_vcvth_f16_u64: 11720 usgn = true; 11721 [[fallthrough]]; 11722 case NEON::BI__builtin_neon_vcvth_f16_s16: 11723 case NEON::BI__builtin_neon_vcvth_f16_s32: 11724 case NEON::BI__builtin_neon_vcvth_f16_s64: { 11725 Ops.push_back(EmitScalarExpr(E->getArg(0))); 11726 llvm::Type *FTy = HalfTy; 11727 llvm::Type *InTy; 11728 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64) 11729 InTy = Int64Ty; 11730 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32) 11731 InTy = Int32Ty; 11732 else 11733 InTy = Int16Ty; 11734 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 11735 if (usgn) 11736 return Builder.CreateUIToFP(Ops[0], FTy); 11737 return Builder.CreateSIToFP(Ops[0], FTy); 11738 } 11739 case NEON::BI__builtin_neon_vcvtah_u16_f16: 11740 case NEON::BI__builtin_neon_vcvtmh_u16_f16: 11741 case NEON::BI__builtin_neon_vcvtnh_u16_f16: 11742 case NEON::BI__builtin_neon_vcvtph_u16_f16: 11743 case NEON::BI__builtin_neon_vcvth_u16_f16: 11744 case NEON::BI__builtin_neon_vcvtah_s16_f16: 11745 case NEON::BI__builtin_neon_vcvtmh_s16_f16: 11746 case NEON::BI__builtin_neon_vcvtnh_s16_f16: 11747 case NEON::BI__builtin_neon_vcvtph_s16_f16: 11748 case NEON::BI__builtin_neon_vcvth_s16_f16: { 11749 unsigned Int; 11750 llvm::Type* InTy = Int32Ty; 11751 llvm::Type* FTy = HalfTy; 11752 llvm::Type *Tys[2] = {InTy, FTy}; 11753 Ops.push_back(EmitScalarExpr(E->getArg(0))); 11754 switch (BuiltinID) { 11755 default: llvm_unreachable("missing builtin ID in switch!"); 11756 case NEON::BI__builtin_neon_vcvtah_u16_f16: 11757 Int = Intrinsic::aarch64_neon_fcvtau; break; 11758 case NEON::BI__builtin_neon_vcvtmh_u16_f16: 11759 Int = Intrinsic::aarch64_neon_fcvtmu; break; 11760 case NEON::BI__builtin_neon_vcvtnh_u16_f16: 11761 Int = Intrinsic::aarch64_neon_fcvtnu; break; 11762 case NEON::BI__builtin_neon_vcvtph_u16_f16: 11763 Int = Intrinsic::aarch64_neon_fcvtpu; break; 11764 case NEON::BI__builtin_neon_vcvth_u16_f16: 11765 Int = Intrinsic::aarch64_neon_fcvtzu; break; 11766 case NEON::BI__builtin_neon_vcvtah_s16_f16: 11767 Int = Intrinsic::aarch64_neon_fcvtas; break; 11768 case NEON::BI__builtin_neon_vcvtmh_s16_f16: 11769 Int = Intrinsic::aarch64_neon_fcvtms; break; 11770 case NEON::BI__builtin_neon_vcvtnh_s16_f16: 11771 Int = Intrinsic::aarch64_neon_fcvtns; break; 11772 case NEON::BI__builtin_neon_vcvtph_s16_f16: 11773 Int = Intrinsic::aarch64_neon_fcvtps; break; 11774 case NEON::BI__builtin_neon_vcvth_s16_f16: 11775 Int = Intrinsic::aarch64_neon_fcvtzs; break; 11776 } 11777 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt"); 11778 return Builder.CreateTrunc(Ops[0], Int16Ty); 11779 } 11780 case NEON::BI__builtin_neon_vcaleh_f16: 11781 case NEON::BI__builtin_neon_vcalth_f16: 11782 case NEON::BI__builtin_neon_vcageh_f16: 11783 case NEON::BI__builtin_neon_vcagth_f16: { 11784 unsigned Int; 11785 llvm::Type* InTy = Int32Ty; 11786 llvm::Type* FTy = HalfTy; 11787 llvm::Type *Tys[2] = {InTy, FTy}; 11788 Ops.push_back(EmitScalarExpr(E->getArg(1))); 11789 switch (BuiltinID) { 11790 default: llvm_unreachable("missing builtin ID in switch!"); 11791 case NEON::BI__builtin_neon_vcageh_f16: 11792 Int = Intrinsic::aarch64_neon_facge; break; 11793 case NEON::BI__builtin_neon_vcagth_f16: 11794 Int = Intrinsic::aarch64_neon_facgt; break; 11795 case NEON::BI__builtin_neon_vcaleh_f16: 11796 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break; 11797 case NEON::BI__builtin_neon_vcalth_f16: 11798 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break; 11799 } 11800 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg"); 11801 return Builder.CreateTrunc(Ops[0], Int16Ty); 11802 } 11803 case NEON::BI__builtin_neon_vcvth_n_s16_f16: 11804 case NEON::BI__builtin_neon_vcvth_n_u16_f16: { 11805 unsigned Int; 11806 llvm::Type* InTy = Int32Ty; 11807 llvm::Type* FTy = HalfTy; 11808 llvm::Type *Tys[2] = {InTy, FTy}; 11809 Ops.push_back(EmitScalarExpr(E->getArg(1))); 11810 switch (BuiltinID) { 11811 default: llvm_unreachable("missing builtin ID in switch!"); 11812 case NEON::BI__builtin_neon_vcvth_n_s16_f16: 11813 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break; 11814 case NEON::BI__builtin_neon_vcvth_n_u16_f16: 11815 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break; 11816 } 11817 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n"); 11818 return Builder.CreateTrunc(Ops[0], Int16Ty); 11819 } 11820 case NEON::BI__builtin_neon_vcvth_n_f16_s16: 11821 case NEON::BI__builtin_neon_vcvth_n_f16_u16: { 11822 unsigned Int; 11823 llvm::Type* FTy = HalfTy; 11824 llvm::Type* InTy = Int32Ty; 11825 llvm::Type *Tys[2] = {FTy, InTy}; 11826 Ops.push_back(EmitScalarExpr(E->getArg(1))); 11827 switch (BuiltinID) { 11828 default: llvm_unreachable("missing builtin ID in switch!"); 11829 case NEON::BI__builtin_neon_vcvth_n_f16_s16: 11830 Int = Intrinsic::aarch64_neon_vcvtfxs2fp; 11831 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext"); 11832 break; 11833 case NEON::BI__builtin_neon_vcvth_n_f16_u16: 11834 Int = Intrinsic::aarch64_neon_vcvtfxu2fp; 11835 Ops[0] = Builder.CreateZExt(Ops[0], InTy); 11836 break; 11837 } 11838 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n"); 11839 } 11840 case NEON::BI__builtin_neon_vpaddd_s64: { 11841 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2); 11842 Value *Vec = EmitScalarExpr(E->getArg(0)); 11843 // The vector is v2f64, so make sure it's bitcast to that. 11844 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 11845 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 11846 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 11847 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 11848 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 11849 // Pairwise addition of a v2f64 into a scalar f64. 11850 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 11851 } 11852 case NEON::BI__builtin_neon_vpaddd_f64: { 11853 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2); 11854 Value *Vec = EmitScalarExpr(E->getArg(0)); 11855 // The vector is v2f64, so make sure it's bitcast to that. 11856 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 11857 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 11858 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 11859 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 11860 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 11861 // Pairwise addition of a v2f64 into a scalar f64. 11862 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 11863 } 11864 case NEON::BI__builtin_neon_vpadds_f32: { 11865 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2); 11866 Value *Vec = EmitScalarExpr(E->getArg(0)); 11867 // The vector is v2f32, so make sure it's bitcast to that. 11868 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 11869 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 11870 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 11871 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 11872 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 11873 // Pairwise addition of a v2f32 into a scalar f32. 11874 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 11875 } 11876 case NEON::BI__builtin_neon_vceqzd_s64: 11877 case NEON::BI__builtin_neon_vceqzd_f64: 11878 case NEON::BI__builtin_neon_vceqzs_f32: 11879 case NEON::BI__builtin_neon_vceqzh_f16: 11880 Ops.push_back(EmitScalarExpr(E->getArg(0))); 11881 return EmitAArch64CompareBuiltinExpr( 11882 Ops[0], ConvertType(E->getCallReturnType(getContext())), 11883 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 11884 case NEON::BI__builtin_neon_vcgezd_s64: 11885 case NEON::BI__builtin_neon_vcgezd_f64: 11886 case NEON::BI__builtin_neon_vcgezs_f32: 11887 case NEON::BI__builtin_neon_vcgezh_f16: 11888 Ops.push_back(EmitScalarExpr(E->getArg(0))); 11889 return EmitAArch64CompareBuiltinExpr( 11890 Ops[0], ConvertType(E->getCallReturnType(getContext())), 11891 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 11892 case NEON::BI__builtin_neon_vclezd_s64: 11893 case NEON::BI__builtin_neon_vclezd_f64: 11894 case NEON::BI__builtin_neon_vclezs_f32: 11895 case NEON::BI__builtin_neon_vclezh_f16: 11896 Ops.push_back(EmitScalarExpr(E->getArg(0))); 11897 return EmitAArch64CompareBuiltinExpr( 11898 Ops[0], ConvertType(E->getCallReturnType(getContext())), 11899 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 11900 case NEON::BI__builtin_neon_vcgtzd_s64: 11901 case NEON::BI__builtin_neon_vcgtzd_f64: 11902 case NEON::BI__builtin_neon_vcgtzs_f32: 11903 case NEON::BI__builtin_neon_vcgtzh_f16: 11904 Ops.push_back(EmitScalarExpr(E->getArg(0))); 11905 return EmitAArch64CompareBuiltinExpr( 11906 Ops[0], ConvertType(E->getCallReturnType(getContext())), 11907 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 11908 case NEON::BI__builtin_neon_vcltzd_s64: 11909 case NEON::BI__builtin_neon_vcltzd_f64: 11910 case NEON::BI__builtin_neon_vcltzs_f32: 11911 case NEON::BI__builtin_neon_vcltzh_f16: 11912 Ops.push_back(EmitScalarExpr(E->getArg(0))); 11913 return EmitAArch64CompareBuiltinExpr( 11914 Ops[0], ConvertType(E->getCallReturnType(getContext())), 11915 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 11916 11917 case NEON::BI__builtin_neon_vceqzd_u64: { 11918 Ops.push_back(EmitScalarExpr(E->getArg(0))); 11919 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 11920 Ops[0] = 11921 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); 11922 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); 11923 } 11924 case NEON::BI__builtin_neon_vceqd_f64: 11925 case NEON::BI__builtin_neon_vcled_f64: 11926 case NEON::BI__builtin_neon_vcltd_f64: 11927 case NEON::BI__builtin_neon_vcged_f64: 11928 case NEON::BI__builtin_neon_vcgtd_f64: { 11929 llvm::CmpInst::Predicate P; 11930 switch (BuiltinID) { 11931 default: llvm_unreachable("missing builtin ID in switch!"); 11932 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 11933 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 11934 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 11935 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 11936 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 11937 } 11938 Ops.push_back(EmitScalarExpr(E->getArg(1))); 11939 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 11940 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 11941 if (P == llvm::FCmpInst::FCMP_OEQ) 11942 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 11943 else 11944 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); 11945 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 11946 } 11947 case NEON::BI__builtin_neon_vceqs_f32: 11948 case NEON::BI__builtin_neon_vcles_f32: 11949 case NEON::BI__builtin_neon_vclts_f32: 11950 case NEON::BI__builtin_neon_vcges_f32: 11951 case NEON::BI__builtin_neon_vcgts_f32: { 11952 llvm::CmpInst::Predicate P; 11953 switch (BuiltinID) { 11954 default: llvm_unreachable("missing builtin ID in switch!"); 11955 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 11956 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 11957 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 11958 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 11959 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 11960 } 11961 Ops.push_back(EmitScalarExpr(E->getArg(1))); 11962 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 11963 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 11964 if (P == llvm::FCmpInst::FCMP_OEQ) 11965 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 11966 else 11967 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); 11968 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 11969 } 11970 case NEON::BI__builtin_neon_vceqh_f16: 11971 case NEON::BI__builtin_neon_vcleh_f16: 11972 case NEON::BI__builtin_neon_vclth_f16: 11973 case NEON::BI__builtin_neon_vcgeh_f16: 11974 case NEON::BI__builtin_neon_vcgth_f16: { 11975 llvm::CmpInst::Predicate P; 11976 switch (BuiltinID) { 11977 default: llvm_unreachable("missing builtin ID in switch!"); 11978 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break; 11979 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break; 11980 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break; 11981 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break; 11982 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break; 11983 } 11984 Ops.push_back(EmitScalarExpr(E->getArg(1))); 11985 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); 11986 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy); 11987 if (P == llvm::FCmpInst::FCMP_OEQ) 11988 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 11989 else 11990 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); 11991 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd"); 11992 } 11993 case NEON::BI__builtin_neon_vceqd_s64: 11994 case NEON::BI__builtin_neon_vceqd_u64: 11995 case NEON::BI__builtin_neon_vcgtd_s64: 11996 case NEON::BI__builtin_neon_vcgtd_u64: 11997 case NEON::BI__builtin_neon_vcltd_s64: 11998 case NEON::BI__builtin_neon_vcltd_u64: 11999 case NEON::BI__builtin_neon_vcged_u64: 12000 case NEON::BI__builtin_neon_vcged_s64: 12001 case NEON::BI__builtin_neon_vcled_u64: 12002 case NEON::BI__builtin_neon_vcled_s64: { 12003 llvm::CmpInst::Predicate P; 12004 switch (BuiltinID) { 12005 default: llvm_unreachable("missing builtin ID in switch!"); 12006 case NEON::BI__builtin_neon_vceqd_s64: 12007 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 12008 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 12009 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 12010 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 12011 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 12012 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 12013 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 12014 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 12015 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 12016 } 12017 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12018 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 12019 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 12020 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 12021 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 12022 } 12023 case NEON::BI__builtin_neon_vtstd_s64: 12024 case NEON::BI__builtin_neon_vtstd_u64: { 12025 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12026 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 12027 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 12028 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 12029 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 12030 llvm::Constant::getNullValue(Int64Ty)); 12031 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); 12032 } 12033 case NEON::BI__builtin_neon_vset_lane_i8: 12034 case NEON::BI__builtin_neon_vset_lane_i16: 12035 case NEON::BI__builtin_neon_vset_lane_i32: 12036 case NEON::BI__builtin_neon_vset_lane_i64: 12037 case NEON::BI__builtin_neon_vset_lane_bf16: 12038 case NEON::BI__builtin_neon_vset_lane_f32: 12039 case NEON::BI__builtin_neon_vsetq_lane_i8: 12040 case NEON::BI__builtin_neon_vsetq_lane_i16: 12041 case NEON::BI__builtin_neon_vsetq_lane_i32: 12042 case NEON::BI__builtin_neon_vsetq_lane_i64: 12043 case NEON::BI__builtin_neon_vsetq_lane_bf16: 12044 case NEON::BI__builtin_neon_vsetq_lane_f32: 12045 Ops.push_back(EmitScalarExpr(E->getArg(2))); 12046 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 12047 case NEON::BI__builtin_neon_vset_lane_f64: 12048 // The vector type needs a cast for the v1f64 variant. 12049 Ops[1] = 12050 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1)); 12051 Ops.push_back(EmitScalarExpr(E->getArg(2))); 12052 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 12053 case NEON::BI__builtin_neon_vsetq_lane_f64: 12054 // The vector type needs a cast for the v2f64 variant. 12055 Ops[1] = 12056 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2)); 12057 Ops.push_back(EmitScalarExpr(E->getArg(2))); 12058 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 12059 12060 case NEON::BI__builtin_neon_vget_lane_i8: 12061 case NEON::BI__builtin_neon_vdupb_lane_i8: 12062 Ops[0] = 12063 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8)); 12064 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12065 "vget_lane"); 12066 case NEON::BI__builtin_neon_vgetq_lane_i8: 12067 case NEON::BI__builtin_neon_vdupb_laneq_i8: 12068 Ops[0] = 12069 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16)); 12070 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12071 "vgetq_lane"); 12072 case NEON::BI__builtin_neon_vget_lane_i16: 12073 case NEON::BI__builtin_neon_vduph_lane_i16: 12074 Ops[0] = 12075 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4)); 12076 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12077 "vget_lane"); 12078 case NEON::BI__builtin_neon_vgetq_lane_i16: 12079 case NEON::BI__builtin_neon_vduph_laneq_i16: 12080 Ops[0] = 12081 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8)); 12082 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12083 "vgetq_lane"); 12084 case NEON::BI__builtin_neon_vget_lane_i32: 12085 case NEON::BI__builtin_neon_vdups_lane_i32: 12086 Ops[0] = 12087 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2)); 12088 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12089 "vget_lane"); 12090 case NEON::BI__builtin_neon_vdups_lane_f32: 12091 Ops[0] = 12092 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2)); 12093 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12094 "vdups_lane"); 12095 case NEON::BI__builtin_neon_vgetq_lane_i32: 12096 case NEON::BI__builtin_neon_vdups_laneq_i32: 12097 Ops[0] = 12098 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4)); 12099 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12100 "vgetq_lane"); 12101 case NEON::BI__builtin_neon_vget_lane_i64: 12102 case NEON::BI__builtin_neon_vdupd_lane_i64: 12103 Ops[0] = 12104 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1)); 12105 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12106 "vget_lane"); 12107 case NEON::BI__builtin_neon_vdupd_lane_f64: 12108 Ops[0] = 12109 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1)); 12110 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12111 "vdupd_lane"); 12112 case NEON::BI__builtin_neon_vgetq_lane_i64: 12113 case NEON::BI__builtin_neon_vdupd_laneq_i64: 12114 Ops[0] = 12115 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2)); 12116 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12117 "vgetq_lane"); 12118 case NEON::BI__builtin_neon_vget_lane_f32: 12119 Ops[0] = 12120 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2)); 12121 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12122 "vget_lane"); 12123 case NEON::BI__builtin_neon_vget_lane_f64: 12124 Ops[0] = 12125 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1)); 12126 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12127 "vget_lane"); 12128 case NEON::BI__builtin_neon_vgetq_lane_f32: 12129 case NEON::BI__builtin_neon_vdups_laneq_f32: 12130 Ops[0] = 12131 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4)); 12132 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12133 "vgetq_lane"); 12134 case NEON::BI__builtin_neon_vgetq_lane_f64: 12135 case NEON::BI__builtin_neon_vdupd_laneq_f64: 12136 Ops[0] = 12137 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2)); 12138 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12139 "vgetq_lane"); 12140 case NEON::BI__builtin_neon_vaddh_f16: 12141 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12142 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh"); 12143 case NEON::BI__builtin_neon_vsubh_f16: 12144 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12145 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh"); 12146 case NEON::BI__builtin_neon_vmulh_f16: 12147 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12148 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh"); 12149 case NEON::BI__builtin_neon_vdivh_f16: 12150 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12151 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh"); 12152 case NEON::BI__builtin_neon_vfmah_f16: 12153 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 12154 return emitCallMaybeConstrainedFPBuiltin( 12155 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy, 12156 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); 12157 case NEON::BI__builtin_neon_vfmsh_f16: { 12158 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh"); 12159 12160 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 12161 return emitCallMaybeConstrainedFPBuiltin( 12162 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy, 12163 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]}); 12164 } 12165 case NEON::BI__builtin_neon_vaddd_s64: 12166 case NEON::BI__builtin_neon_vaddd_u64: 12167 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 12168 case NEON::BI__builtin_neon_vsubd_s64: 12169 case NEON::BI__builtin_neon_vsubd_u64: 12170 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 12171 case NEON::BI__builtin_neon_vqdmlalh_s16: 12172 case NEON::BI__builtin_neon_vqdmlslh_s16: { 12173 SmallVector<Value *, 2> ProductOps; 12174 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 12175 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 12176 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4); 12177 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 12178 ProductOps, "vqdmlXl"); 12179 Constant *CI = ConstantInt::get(SizeTy, 0); 12180 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 12181 12182 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 12183 ? Intrinsic::aarch64_neon_sqadd 12184 : Intrinsic::aarch64_neon_sqsub; 12185 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 12186 } 12187 case NEON::BI__builtin_neon_vqshlud_n_s64: { 12188 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12189 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 12190 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 12191 Ops, "vqshlu_n"); 12192 } 12193 case NEON::BI__builtin_neon_vqshld_n_u64: 12194 case NEON::BI__builtin_neon_vqshld_n_s64: { 12195 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 12196 ? Intrinsic::aarch64_neon_uqshl 12197 : Intrinsic::aarch64_neon_sqshl; 12198 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12199 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 12200 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 12201 } 12202 case NEON::BI__builtin_neon_vrshrd_n_u64: 12203 case NEON::BI__builtin_neon_vrshrd_n_s64: { 12204 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 12205 ? Intrinsic::aarch64_neon_urshl 12206 : Intrinsic::aarch64_neon_srshl; 12207 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12208 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 12209 Ops[1] = ConstantInt::get(Int64Ty, -SV); 12210 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 12211 } 12212 case NEON::BI__builtin_neon_vrsrad_n_u64: 12213 case NEON::BI__builtin_neon_vrsrad_n_s64: { 12214 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 12215 ? Intrinsic::aarch64_neon_urshl 12216 : Intrinsic::aarch64_neon_srshl; 12217 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 12218 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 12219 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), 12220 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); 12221 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 12222 } 12223 case NEON::BI__builtin_neon_vshld_n_s64: 12224 case NEON::BI__builtin_neon_vshld_n_u64: { 12225 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 12226 return Builder.CreateShl( 12227 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 12228 } 12229 case NEON::BI__builtin_neon_vshrd_n_s64: { 12230 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 12231 return Builder.CreateAShr( 12232 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 12233 Amt->getZExtValue())), 12234 "shrd_n"); 12235 } 12236 case NEON::BI__builtin_neon_vshrd_n_u64: { 12237 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 12238 uint64_t ShiftAmt = Amt->getZExtValue(); 12239 // Right-shifting an unsigned value by its size yields 0. 12240 if (ShiftAmt == 64) 12241 return ConstantInt::get(Int64Ty, 0); 12242 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 12243 "shrd_n"); 12244 } 12245 case NEON::BI__builtin_neon_vsrad_n_s64: { 12246 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 12247 Ops[1] = Builder.CreateAShr( 12248 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 12249 Amt->getZExtValue())), 12250 "shrd_n"); 12251 return Builder.CreateAdd(Ops[0], Ops[1]); 12252 } 12253 case NEON::BI__builtin_neon_vsrad_n_u64: { 12254 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 12255 uint64_t ShiftAmt = Amt->getZExtValue(); 12256 // Right-shifting an unsigned value by its size yields 0. 12257 // As Op + 0 = Op, return Ops[0] directly. 12258 if (ShiftAmt == 64) 12259 return Ops[0]; 12260 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 12261 "shrd_n"); 12262 return Builder.CreateAdd(Ops[0], Ops[1]); 12263 } 12264 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 12265 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 12266 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 12267 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 12268 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 12269 "lane"); 12270 SmallVector<Value *, 2> ProductOps; 12271 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 12272 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 12273 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4); 12274 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 12275 ProductOps, "vqdmlXl"); 12276 Constant *CI = ConstantInt::get(SizeTy, 0); 12277 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 12278 Ops.pop_back(); 12279 12280 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 12281 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 12282 ? Intrinsic::aarch64_neon_sqadd 12283 : Intrinsic::aarch64_neon_sqsub; 12284 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 12285 } 12286 case NEON::BI__builtin_neon_vqdmlals_s32: 12287 case NEON::BI__builtin_neon_vqdmlsls_s32: { 12288 SmallVector<Value *, 2> ProductOps; 12289 ProductOps.push_back(Ops[1]); 12290 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 12291 Ops[1] = 12292 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 12293 ProductOps, "vqdmlXl"); 12294 12295 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 12296 ? Intrinsic::aarch64_neon_sqadd 12297 : Intrinsic::aarch64_neon_sqsub; 12298 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 12299 } 12300 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 12301 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 12302 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 12303 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 12304 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 12305 "lane"); 12306 SmallVector<Value *, 2> ProductOps; 12307 ProductOps.push_back(Ops[1]); 12308 ProductOps.push_back(Ops[2]); 12309 Ops[1] = 12310 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 12311 ProductOps, "vqdmlXl"); 12312 Ops.pop_back(); 12313 12314 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 12315 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 12316 ? Intrinsic::aarch64_neon_sqadd 12317 : Intrinsic::aarch64_neon_sqsub; 12318 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 12319 } 12320 case NEON::BI__builtin_neon_vget_lane_bf16: 12321 case NEON::BI__builtin_neon_vduph_lane_bf16: 12322 case NEON::BI__builtin_neon_vduph_lane_f16: { 12323 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12324 "vget_lane"); 12325 } 12326 case NEON::BI__builtin_neon_vgetq_lane_bf16: 12327 case NEON::BI__builtin_neon_vduph_laneq_bf16: 12328 case NEON::BI__builtin_neon_vduph_laneq_f16: { 12329 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 12330 "vgetq_lane"); 12331 } 12332 12333 case clang::AArch64::BI_InterlockedAdd: 12334 case clang::AArch64::BI_InterlockedAdd64: { 12335 Address DestAddr = CheckAtomicAlignment(*this, E); 12336 Value *Val = EmitScalarExpr(E->getArg(1)); 12337 AtomicRMWInst *RMWI = 12338 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, 12339 llvm::AtomicOrdering::SequentiallyConsistent); 12340 return Builder.CreateAdd(RMWI, Val); 12341 } 12342 } 12343 12344 llvm::FixedVectorType *VTy = GetNeonType(this, Type); 12345 llvm::Type *Ty = VTy; 12346 if (!Ty) 12347 return nullptr; 12348 12349 // Not all intrinsics handled by the common case work for AArch64 yet, so only 12350 // defer to common code if it's been added to our special map. 12351 Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 12352 AArch64SIMDIntrinsicsProvenSorted); 12353 12354 if (Builtin) 12355 return EmitCommonNeonBuiltinExpr( 12356 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 12357 Builtin->NameHint, Builtin->TypeModifier, E, Ops, 12358 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch); 12359 12360 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch)) 12361 return V; 12362 12363 unsigned Int; 12364 switch (BuiltinID) { 12365 default: return nullptr; 12366 case NEON::BI__builtin_neon_vbsl_v: 12367 case NEON::BI__builtin_neon_vbslq_v: { 12368 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 12369 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 12370 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 12371 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 12372 12373 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 12374 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 12375 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 12376 return Builder.CreateBitCast(Ops[0], Ty); 12377 } 12378 case NEON::BI__builtin_neon_vfma_lane_v: 12379 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 12380 // The ARM builtins (and instructions) have the addend as the first 12381 // operand, but the 'fma' intrinsics have it last. Swap it around here. 12382 Value *Addend = Ops[0]; 12383 Value *Multiplicand = Ops[1]; 12384 Value *LaneSource = Ops[2]; 12385 Ops[0] = Multiplicand; 12386 Ops[1] = LaneSource; 12387 Ops[2] = Addend; 12388 12389 // Now adjust things to handle the lane access. 12390 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v 12391 ? llvm::FixedVectorType::get(VTy->getElementType(), 12392 VTy->getNumElements() / 2) 12393 : VTy; 12394 llvm::Constant *cst = cast<Constant>(Ops[3]); 12395 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst); 12396 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 12397 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 12398 12399 Ops.pop_back(); 12400 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma 12401 : Intrinsic::fma; 12402 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 12403 } 12404 case NEON::BI__builtin_neon_vfma_laneq_v: { 12405 auto *VTy = cast<llvm::FixedVectorType>(Ty); 12406 // v1f64 fma should be mapped to Neon scalar f64 fma 12407 if (VTy && VTy->getElementType() == DoubleTy) { 12408 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 12409 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 12410 llvm::FixedVectorType *VTy = 12411 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 12412 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 12413 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 12414 Value *Result; 12415 Result = emitCallMaybeConstrainedFPBuiltin( 12416 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, 12417 DoubleTy, {Ops[1], Ops[2], Ops[0]}); 12418 return Builder.CreateBitCast(Result, Ty); 12419 } 12420 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 12421 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 12422 12423 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(), 12424 VTy->getNumElements() * 2); 12425 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 12426 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), 12427 cast<ConstantInt>(Ops[3])); 12428 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 12429 12430 return emitCallMaybeConstrainedFPBuiltin( 12431 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, 12432 {Ops[2], Ops[1], Ops[0]}); 12433 } 12434 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 12435 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 12436 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 12437 12438 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 12439 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 12440 return emitCallMaybeConstrainedFPBuiltin( 12441 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, 12442 {Ops[2], Ops[1], Ops[0]}); 12443 } 12444 case NEON::BI__builtin_neon_vfmah_lane_f16: 12445 case NEON::BI__builtin_neon_vfmas_lane_f32: 12446 case NEON::BI__builtin_neon_vfmah_laneq_f16: 12447 case NEON::BI__builtin_neon_vfmas_laneq_f32: 12448 case NEON::BI__builtin_neon_vfmad_lane_f64: 12449 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 12450 Ops.push_back(EmitScalarExpr(E->getArg(3))); 12451 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 12452 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 12453 return emitCallMaybeConstrainedFPBuiltin( 12454 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, 12455 {Ops[1], Ops[2], Ops[0]}); 12456 } 12457 case NEON::BI__builtin_neon_vmull_v: 12458 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 12459 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 12460 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 12461 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 12462 case NEON::BI__builtin_neon_vmax_v: 12463 case NEON::BI__builtin_neon_vmaxq_v: 12464 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 12465 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 12466 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 12467 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 12468 case NEON::BI__builtin_neon_vmaxh_f16: { 12469 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12470 Int = Intrinsic::aarch64_neon_fmax; 12471 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax"); 12472 } 12473 case NEON::BI__builtin_neon_vmin_v: 12474 case NEON::BI__builtin_neon_vminq_v: 12475 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 12476 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 12477 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 12478 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 12479 case NEON::BI__builtin_neon_vminh_f16: { 12480 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12481 Int = Intrinsic::aarch64_neon_fmin; 12482 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin"); 12483 } 12484 case NEON::BI__builtin_neon_vabd_v: 12485 case NEON::BI__builtin_neon_vabdq_v: 12486 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 12487 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 12488 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 12489 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 12490 case NEON::BI__builtin_neon_vpadal_v: 12491 case NEON::BI__builtin_neon_vpadalq_v: { 12492 unsigned ArgElts = VTy->getNumElements(); 12493 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 12494 unsigned BitWidth = EltTy->getBitWidth(); 12495 auto *ArgTy = llvm::FixedVectorType::get( 12496 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts); 12497 llvm::Type* Tys[2] = { VTy, ArgTy }; 12498 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 12499 SmallVector<llvm::Value*, 1> TmpOps; 12500 TmpOps.push_back(Ops[1]); 12501 Function *F = CGM.getIntrinsic(Int, Tys); 12502 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 12503 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 12504 return Builder.CreateAdd(tmp, addend); 12505 } 12506 case NEON::BI__builtin_neon_vpmin_v: 12507 case NEON::BI__builtin_neon_vpminq_v: 12508 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 12509 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 12510 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 12511 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 12512 case NEON::BI__builtin_neon_vpmax_v: 12513 case NEON::BI__builtin_neon_vpmaxq_v: 12514 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 12515 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 12516 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 12517 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 12518 case NEON::BI__builtin_neon_vminnm_v: 12519 case NEON::BI__builtin_neon_vminnmq_v: 12520 Int = Intrinsic::aarch64_neon_fminnm; 12521 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 12522 case NEON::BI__builtin_neon_vminnmh_f16: 12523 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12524 Int = Intrinsic::aarch64_neon_fminnm; 12525 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm"); 12526 case NEON::BI__builtin_neon_vmaxnm_v: 12527 case NEON::BI__builtin_neon_vmaxnmq_v: 12528 Int = Intrinsic::aarch64_neon_fmaxnm; 12529 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 12530 case NEON::BI__builtin_neon_vmaxnmh_f16: 12531 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12532 Int = Intrinsic::aarch64_neon_fmaxnm; 12533 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm"); 12534 case NEON::BI__builtin_neon_vrecpss_f32: { 12535 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12536 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), 12537 Ops, "vrecps"); 12538 } 12539 case NEON::BI__builtin_neon_vrecpsd_f64: 12540 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12541 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), 12542 Ops, "vrecps"); 12543 case NEON::BI__builtin_neon_vrecpsh_f16: 12544 Ops.push_back(EmitScalarExpr(E->getArg(1))); 12545 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy), 12546 Ops, "vrecps"); 12547 case NEON::BI__builtin_neon_vqshrun_n_v: 12548 Int = Intrinsic::aarch64_neon_sqshrun; 12549 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 12550 case NEON::BI__builtin_neon_vqrshrun_n_v: 12551 Int = Intrinsic::aarch64_neon_sqrshrun; 12552 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 12553 case NEON::BI__builtin_neon_vqshrn_n_v: 12554 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 12555 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 12556 case NEON::BI__builtin_neon_vrshrn_n_v: 12557 Int = Intrinsic::aarch64_neon_rshrn; 12558 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 12559 case NEON::BI__builtin_neon_vqrshrn_n_v: 12560 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 12561 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 12562 case NEON::BI__builtin_neon_vrndah_f16: { 12563 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12564 Int = Builder.getIsFPConstrained() 12565 ? Intrinsic::experimental_constrained_round 12566 : Intrinsic::round; 12567 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda"); 12568 } 12569 case NEON::BI__builtin_neon_vrnda_v: 12570 case NEON::BI__builtin_neon_vrndaq_v: { 12571 Int = Builder.getIsFPConstrained() 12572 ? Intrinsic::experimental_constrained_round 12573 : Intrinsic::round; 12574 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 12575 } 12576 case NEON::BI__builtin_neon_vrndih_f16: { 12577 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12578 Int = Builder.getIsFPConstrained() 12579 ? Intrinsic::experimental_constrained_nearbyint 12580 : Intrinsic::nearbyint; 12581 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi"); 12582 } 12583 case NEON::BI__builtin_neon_vrndmh_f16: { 12584 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12585 Int = Builder.getIsFPConstrained() 12586 ? Intrinsic::experimental_constrained_floor 12587 : Intrinsic::floor; 12588 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm"); 12589 } 12590 case NEON::BI__builtin_neon_vrndm_v: 12591 case NEON::BI__builtin_neon_vrndmq_v: { 12592 Int = Builder.getIsFPConstrained() 12593 ? Intrinsic::experimental_constrained_floor 12594 : Intrinsic::floor; 12595 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 12596 } 12597 case NEON::BI__builtin_neon_vrndnh_f16: { 12598 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12599 Int = Builder.getIsFPConstrained() 12600 ? Intrinsic::experimental_constrained_roundeven 12601 : Intrinsic::roundeven; 12602 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn"); 12603 } 12604 case NEON::BI__builtin_neon_vrndn_v: 12605 case NEON::BI__builtin_neon_vrndnq_v: { 12606 Int = Builder.getIsFPConstrained() 12607 ? Intrinsic::experimental_constrained_roundeven 12608 : Intrinsic::roundeven; 12609 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 12610 } 12611 case NEON::BI__builtin_neon_vrndns_f32: { 12612 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12613 Int = Builder.getIsFPConstrained() 12614 ? Intrinsic::experimental_constrained_roundeven 12615 : Intrinsic::roundeven; 12616 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn"); 12617 } 12618 case NEON::BI__builtin_neon_vrndph_f16: { 12619 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12620 Int = Builder.getIsFPConstrained() 12621 ? Intrinsic::experimental_constrained_ceil 12622 : Intrinsic::ceil; 12623 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp"); 12624 } 12625 case NEON::BI__builtin_neon_vrndp_v: 12626 case NEON::BI__builtin_neon_vrndpq_v: { 12627 Int = Builder.getIsFPConstrained() 12628 ? Intrinsic::experimental_constrained_ceil 12629 : Intrinsic::ceil; 12630 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 12631 } 12632 case NEON::BI__builtin_neon_vrndxh_f16: { 12633 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12634 Int = Builder.getIsFPConstrained() 12635 ? Intrinsic::experimental_constrained_rint 12636 : Intrinsic::rint; 12637 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx"); 12638 } 12639 case NEON::BI__builtin_neon_vrndx_v: 12640 case NEON::BI__builtin_neon_vrndxq_v: { 12641 Int = Builder.getIsFPConstrained() 12642 ? Intrinsic::experimental_constrained_rint 12643 : Intrinsic::rint; 12644 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 12645 } 12646 case NEON::BI__builtin_neon_vrndh_f16: { 12647 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12648 Int = Builder.getIsFPConstrained() 12649 ? Intrinsic::experimental_constrained_trunc 12650 : Intrinsic::trunc; 12651 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz"); 12652 } 12653 case NEON::BI__builtin_neon_vrnd32x_f32: 12654 case NEON::BI__builtin_neon_vrnd32xq_f32: 12655 case NEON::BI__builtin_neon_vrnd32x_f64: 12656 case NEON::BI__builtin_neon_vrnd32xq_f64: { 12657 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12658 Int = Intrinsic::aarch64_neon_frint32x; 12659 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x"); 12660 } 12661 case NEON::BI__builtin_neon_vrnd32z_f32: 12662 case NEON::BI__builtin_neon_vrnd32zq_f32: 12663 case NEON::BI__builtin_neon_vrnd32z_f64: 12664 case NEON::BI__builtin_neon_vrnd32zq_f64: { 12665 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12666 Int = Intrinsic::aarch64_neon_frint32z; 12667 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z"); 12668 } 12669 case NEON::BI__builtin_neon_vrnd64x_f32: 12670 case NEON::BI__builtin_neon_vrnd64xq_f32: 12671 case NEON::BI__builtin_neon_vrnd64x_f64: 12672 case NEON::BI__builtin_neon_vrnd64xq_f64: { 12673 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12674 Int = Intrinsic::aarch64_neon_frint64x; 12675 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x"); 12676 } 12677 case NEON::BI__builtin_neon_vrnd64z_f32: 12678 case NEON::BI__builtin_neon_vrnd64zq_f32: 12679 case NEON::BI__builtin_neon_vrnd64z_f64: 12680 case NEON::BI__builtin_neon_vrnd64zq_f64: { 12681 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12682 Int = Intrinsic::aarch64_neon_frint64z; 12683 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z"); 12684 } 12685 case NEON::BI__builtin_neon_vrnd_v: 12686 case NEON::BI__builtin_neon_vrndq_v: { 12687 Int = Builder.getIsFPConstrained() 12688 ? Intrinsic::experimental_constrained_trunc 12689 : Intrinsic::trunc; 12690 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 12691 } 12692 case NEON::BI__builtin_neon_vcvt_f64_v: 12693 case NEON::BI__builtin_neon_vcvtq_f64_v: 12694 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 12695 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 12696 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 12697 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 12698 case NEON::BI__builtin_neon_vcvt_f64_f32: { 12699 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 12700 "unexpected vcvt_f64_f32 builtin"); 12701 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 12702 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 12703 12704 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 12705 } 12706 case NEON::BI__builtin_neon_vcvt_f32_f64: { 12707 assert(Type.getEltType() == NeonTypeFlags::Float32 && 12708 "unexpected vcvt_f32_f64 builtin"); 12709 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 12710 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 12711 12712 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 12713 } 12714 case NEON::BI__builtin_neon_vcvt_s32_v: 12715 case NEON::BI__builtin_neon_vcvt_u32_v: 12716 case NEON::BI__builtin_neon_vcvt_s64_v: 12717 case NEON::BI__builtin_neon_vcvt_u64_v: 12718 case NEON::BI__builtin_neon_vcvt_s16_f16: 12719 case NEON::BI__builtin_neon_vcvt_u16_f16: 12720 case NEON::BI__builtin_neon_vcvtq_s32_v: 12721 case NEON::BI__builtin_neon_vcvtq_u32_v: 12722 case NEON::BI__builtin_neon_vcvtq_s64_v: 12723 case NEON::BI__builtin_neon_vcvtq_u64_v: 12724 case NEON::BI__builtin_neon_vcvtq_s16_f16: 12725 case NEON::BI__builtin_neon_vcvtq_u16_f16: { 12726 Int = 12727 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs; 12728 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)}; 12729 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz"); 12730 } 12731 case NEON::BI__builtin_neon_vcvta_s16_f16: 12732 case NEON::BI__builtin_neon_vcvta_u16_f16: 12733 case NEON::BI__builtin_neon_vcvta_s32_v: 12734 case NEON::BI__builtin_neon_vcvtaq_s16_f16: 12735 case NEON::BI__builtin_neon_vcvtaq_s32_v: 12736 case NEON::BI__builtin_neon_vcvta_u32_v: 12737 case NEON::BI__builtin_neon_vcvtaq_u16_f16: 12738 case NEON::BI__builtin_neon_vcvtaq_u32_v: 12739 case NEON::BI__builtin_neon_vcvta_s64_v: 12740 case NEON::BI__builtin_neon_vcvtaq_s64_v: 12741 case NEON::BI__builtin_neon_vcvta_u64_v: 12742 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 12743 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 12744 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 12745 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 12746 } 12747 case NEON::BI__builtin_neon_vcvtm_s16_f16: 12748 case NEON::BI__builtin_neon_vcvtm_s32_v: 12749 case NEON::BI__builtin_neon_vcvtmq_s16_f16: 12750 case NEON::BI__builtin_neon_vcvtmq_s32_v: 12751 case NEON::BI__builtin_neon_vcvtm_u16_f16: 12752 case NEON::BI__builtin_neon_vcvtm_u32_v: 12753 case NEON::BI__builtin_neon_vcvtmq_u16_f16: 12754 case NEON::BI__builtin_neon_vcvtmq_u32_v: 12755 case NEON::BI__builtin_neon_vcvtm_s64_v: 12756 case NEON::BI__builtin_neon_vcvtmq_s64_v: 12757 case NEON::BI__builtin_neon_vcvtm_u64_v: 12758 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 12759 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 12760 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 12761 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 12762 } 12763 case NEON::BI__builtin_neon_vcvtn_s16_f16: 12764 case NEON::BI__builtin_neon_vcvtn_s32_v: 12765 case NEON::BI__builtin_neon_vcvtnq_s16_f16: 12766 case NEON::BI__builtin_neon_vcvtnq_s32_v: 12767 case NEON::BI__builtin_neon_vcvtn_u16_f16: 12768 case NEON::BI__builtin_neon_vcvtn_u32_v: 12769 case NEON::BI__builtin_neon_vcvtnq_u16_f16: 12770 case NEON::BI__builtin_neon_vcvtnq_u32_v: 12771 case NEON::BI__builtin_neon_vcvtn_s64_v: 12772 case NEON::BI__builtin_neon_vcvtnq_s64_v: 12773 case NEON::BI__builtin_neon_vcvtn_u64_v: 12774 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 12775 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 12776 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 12777 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 12778 } 12779 case NEON::BI__builtin_neon_vcvtp_s16_f16: 12780 case NEON::BI__builtin_neon_vcvtp_s32_v: 12781 case NEON::BI__builtin_neon_vcvtpq_s16_f16: 12782 case NEON::BI__builtin_neon_vcvtpq_s32_v: 12783 case NEON::BI__builtin_neon_vcvtp_u16_f16: 12784 case NEON::BI__builtin_neon_vcvtp_u32_v: 12785 case NEON::BI__builtin_neon_vcvtpq_u16_f16: 12786 case NEON::BI__builtin_neon_vcvtpq_u32_v: 12787 case NEON::BI__builtin_neon_vcvtp_s64_v: 12788 case NEON::BI__builtin_neon_vcvtpq_s64_v: 12789 case NEON::BI__builtin_neon_vcvtp_u64_v: 12790 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 12791 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 12792 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 12793 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 12794 } 12795 case NEON::BI__builtin_neon_vmulx_v: 12796 case NEON::BI__builtin_neon_vmulxq_v: { 12797 Int = Intrinsic::aarch64_neon_fmulx; 12798 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 12799 } 12800 case NEON::BI__builtin_neon_vmulxh_lane_f16: 12801 case NEON::BI__builtin_neon_vmulxh_laneq_f16: { 12802 // vmulx_lane should be mapped to Neon scalar mulx after 12803 // extracting the scalar element 12804 Ops.push_back(EmitScalarExpr(E->getArg(2))); 12805 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 12806 Ops.pop_back(); 12807 Int = Intrinsic::aarch64_neon_fmulx; 12808 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx"); 12809 } 12810 case NEON::BI__builtin_neon_vmul_lane_v: 12811 case NEON::BI__builtin_neon_vmul_laneq_v: { 12812 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 12813 bool Quad = false; 12814 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 12815 Quad = true; 12816 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 12817 llvm::FixedVectorType *VTy = 12818 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 12819 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 12820 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 12821 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 12822 return Builder.CreateBitCast(Result, Ty); 12823 } 12824 case NEON::BI__builtin_neon_vnegd_s64: 12825 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 12826 case NEON::BI__builtin_neon_vnegh_f16: 12827 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh"); 12828 case NEON::BI__builtin_neon_vpmaxnm_v: 12829 case NEON::BI__builtin_neon_vpmaxnmq_v: { 12830 Int = Intrinsic::aarch64_neon_fmaxnmp; 12831 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 12832 } 12833 case NEON::BI__builtin_neon_vpminnm_v: 12834 case NEON::BI__builtin_neon_vpminnmq_v: { 12835 Int = Intrinsic::aarch64_neon_fminnmp; 12836 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 12837 } 12838 case NEON::BI__builtin_neon_vsqrth_f16: { 12839 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12840 Int = Builder.getIsFPConstrained() 12841 ? Intrinsic::experimental_constrained_sqrt 12842 : Intrinsic::sqrt; 12843 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt"); 12844 } 12845 case NEON::BI__builtin_neon_vsqrt_v: 12846 case NEON::BI__builtin_neon_vsqrtq_v: { 12847 Int = Builder.getIsFPConstrained() 12848 ? Intrinsic::experimental_constrained_sqrt 12849 : Intrinsic::sqrt; 12850 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 12851 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 12852 } 12853 case NEON::BI__builtin_neon_vrbit_v: 12854 case NEON::BI__builtin_neon_vrbitq_v: { 12855 Int = Intrinsic::bitreverse; 12856 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 12857 } 12858 case NEON::BI__builtin_neon_vaddv_u8: 12859 // FIXME: These are handled by the AArch64 scalar code. 12860 usgn = true; 12861 [[fallthrough]]; 12862 case NEON::BI__builtin_neon_vaddv_s8: { 12863 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 12864 Ty = Int32Ty; 12865 VTy = llvm::FixedVectorType::get(Int8Ty, 8); 12866 llvm::Type *Tys[2] = { Ty, VTy }; 12867 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12868 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 12869 return Builder.CreateTrunc(Ops[0], Int8Ty); 12870 } 12871 case NEON::BI__builtin_neon_vaddv_u16: 12872 usgn = true; 12873 [[fallthrough]]; 12874 case NEON::BI__builtin_neon_vaddv_s16: { 12875 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 12876 Ty = Int32Ty; 12877 VTy = llvm::FixedVectorType::get(Int16Ty, 4); 12878 llvm::Type *Tys[2] = { Ty, VTy }; 12879 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12880 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 12881 return Builder.CreateTrunc(Ops[0], Int16Ty); 12882 } 12883 case NEON::BI__builtin_neon_vaddvq_u8: 12884 usgn = true; 12885 [[fallthrough]]; 12886 case NEON::BI__builtin_neon_vaddvq_s8: { 12887 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 12888 Ty = Int32Ty; 12889 VTy = llvm::FixedVectorType::get(Int8Ty, 16); 12890 llvm::Type *Tys[2] = { Ty, VTy }; 12891 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12892 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 12893 return Builder.CreateTrunc(Ops[0], Int8Ty); 12894 } 12895 case NEON::BI__builtin_neon_vaddvq_u16: 12896 usgn = true; 12897 [[fallthrough]]; 12898 case NEON::BI__builtin_neon_vaddvq_s16: { 12899 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 12900 Ty = Int32Ty; 12901 VTy = llvm::FixedVectorType::get(Int16Ty, 8); 12902 llvm::Type *Tys[2] = { Ty, VTy }; 12903 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12904 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 12905 return Builder.CreateTrunc(Ops[0], Int16Ty); 12906 } 12907 case NEON::BI__builtin_neon_vmaxv_u8: { 12908 Int = Intrinsic::aarch64_neon_umaxv; 12909 Ty = Int32Ty; 12910 VTy = llvm::FixedVectorType::get(Int8Ty, 8); 12911 llvm::Type *Tys[2] = { Ty, VTy }; 12912 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12913 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 12914 return Builder.CreateTrunc(Ops[0], Int8Ty); 12915 } 12916 case NEON::BI__builtin_neon_vmaxv_u16: { 12917 Int = Intrinsic::aarch64_neon_umaxv; 12918 Ty = Int32Ty; 12919 VTy = llvm::FixedVectorType::get(Int16Ty, 4); 12920 llvm::Type *Tys[2] = { Ty, VTy }; 12921 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12922 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 12923 return Builder.CreateTrunc(Ops[0], Int16Ty); 12924 } 12925 case NEON::BI__builtin_neon_vmaxvq_u8: { 12926 Int = Intrinsic::aarch64_neon_umaxv; 12927 Ty = Int32Ty; 12928 VTy = llvm::FixedVectorType::get(Int8Ty, 16); 12929 llvm::Type *Tys[2] = { Ty, VTy }; 12930 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12931 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 12932 return Builder.CreateTrunc(Ops[0], Int8Ty); 12933 } 12934 case NEON::BI__builtin_neon_vmaxvq_u16: { 12935 Int = Intrinsic::aarch64_neon_umaxv; 12936 Ty = Int32Ty; 12937 VTy = llvm::FixedVectorType::get(Int16Ty, 8); 12938 llvm::Type *Tys[2] = { Ty, VTy }; 12939 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12940 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 12941 return Builder.CreateTrunc(Ops[0], Int16Ty); 12942 } 12943 case NEON::BI__builtin_neon_vmaxv_s8: { 12944 Int = Intrinsic::aarch64_neon_smaxv; 12945 Ty = Int32Ty; 12946 VTy = llvm::FixedVectorType::get(Int8Ty, 8); 12947 llvm::Type *Tys[2] = { Ty, VTy }; 12948 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12949 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 12950 return Builder.CreateTrunc(Ops[0], Int8Ty); 12951 } 12952 case NEON::BI__builtin_neon_vmaxv_s16: { 12953 Int = Intrinsic::aarch64_neon_smaxv; 12954 Ty = Int32Ty; 12955 VTy = llvm::FixedVectorType::get(Int16Ty, 4); 12956 llvm::Type *Tys[2] = { Ty, VTy }; 12957 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12958 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 12959 return Builder.CreateTrunc(Ops[0], Int16Ty); 12960 } 12961 case NEON::BI__builtin_neon_vmaxvq_s8: { 12962 Int = Intrinsic::aarch64_neon_smaxv; 12963 Ty = Int32Ty; 12964 VTy = llvm::FixedVectorType::get(Int8Ty, 16); 12965 llvm::Type *Tys[2] = { Ty, VTy }; 12966 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12967 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 12968 return Builder.CreateTrunc(Ops[0], Int8Ty); 12969 } 12970 case NEON::BI__builtin_neon_vmaxvq_s16: { 12971 Int = Intrinsic::aarch64_neon_smaxv; 12972 Ty = Int32Ty; 12973 VTy = llvm::FixedVectorType::get(Int16Ty, 8); 12974 llvm::Type *Tys[2] = { Ty, VTy }; 12975 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12976 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 12977 return Builder.CreateTrunc(Ops[0], Int16Ty); 12978 } 12979 case NEON::BI__builtin_neon_vmaxv_f16: { 12980 Int = Intrinsic::aarch64_neon_fmaxv; 12981 Ty = HalfTy; 12982 VTy = llvm::FixedVectorType::get(HalfTy, 4); 12983 llvm::Type *Tys[2] = { Ty, VTy }; 12984 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12985 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 12986 return Builder.CreateTrunc(Ops[0], HalfTy); 12987 } 12988 case NEON::BI__builtin_neon_vmaxvq_f16: { 12989 Int = Intrinsic::aarch64_neon_fmaxv; 12990 Ty = HalfTy; 12991 VTy = llvm::FixedVectorType::get(HalfTy, 8); 12992 llvm::Type *Tys[2] = { Ty, VTy }; 12993 Ops.push_back(EmitScalarExpr(E->getArg(0))); 12994 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 12995 return Builder.CreateTrunc(Ops[0], HalfTy); 12996 } 12997 case NEON::BI__builtin_neon_vminv_u8: { 12998 Int = Intrinsic::aarch64_neon_uminv; 12999 Ty = Int32Ty; 13000 VTy = llvm::FixedVectorType::get(Int8Ty, 8); 13001 llvm::Type *Tys[2] = { Ty, VTy }; 13002 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13003 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 13004 return Builder.CreateTrunc(Ops[0], Int8Ty); 13005 } 13006 case NEON::BI__builtin_neon_vminv_u16: { 13007 Int = Intrinsic::aarch64_neon_uminv; 13008 Ty = Int32Ty; 13009 VTy = llvm::FixedVectorType::get(Int16Ty, 4); 13010 llvm::Type *Tys[2] = { Ty, VTy }; 13011 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13012 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 13013 return Builder.CreateTrunc(Ops[0], Int16Ty); 13014 } 13015 case NEON::BI__builtin_neon_vminvq_u8: { 13016 Int = Intrinsic::aarch64_neon_uminv; 13017 Ty = Int32Ty; 13018 VTy = llvm::FixedVectorType::get(Int8Ty, 16); 13019 llvm::Type *Tys[2] = { Ty, VTy }; 13020 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13021 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 13022 return Builder.CreateTrunc(Ops[0], Int8Ty); 13023 } 13024 case NEON::BI__builtin_neon_vminvq_u16: { 13025 Int = Intrinsic::aarch64_neon_uminv; 13026 Ty = Int32Ty; 13027 VTy = llvm::FixedVectorType::get(Int16Ty, 8); 13028 llvm::Type *Tys[2] = { Ty, VTy }; 13029 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13030 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 13031 return Builder.CreateTrunc(Ops[0], Int16Ty); 13032 } 13033 case NEON::BI__builtin_neon_vminv_s8: { 13034 Int = Intrinsic::aarch64_neon_sminv; 13035 Ty = Int32Ty; 13036 VTy = llvm::FixedVectorType::get(Int8Ty, 8); 13037 llvm::Type *Tys[2] = { Ty, VTy }; 13038 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13039 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 13040 return Builder.CreateTrunc(Ops[0], Int8Ty); 13041 } 13042 case NEON::BI__builtin_neon_vminv_s16: { 13043 Int = Intrinsic::aarch64_neon_sminv; 13044 Ty = Int32Ty; 13045 VTy = llvm::FixedVectorType::get(Int16Ty, 4); 13046 llvm::Type *Tys[2] = { Ty, VTy }; 13047 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13048 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 13049 return Builder.CreateTrunc(Ops[0], Int16Ty); 13050 } 13051 case NEON::BI__builtin_neon_vminvq_s8: { 13052 Int = Intrinsic::aarch64_neon_sminv; 13053 Ty = Int32Ty; 13054 VTy = llvm::FixedVectorType::get(Int8Ty, 16); 13055 llvm::Type *Tys[2] = { Ty, VTy }; 13056 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13057 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 13058 return Builder.CreateTrunc(Ops[0], Int8Ty); 13059 } 13060 case NEON::BI__builtin_neon_vminvq_s16: { 13061 Int = Intrinsic::aarch64_neon_sminv; 13062 Ty = Int32Ty; 13063 VTy = llvm::FixedVectorType::get(Int16Ty, 8); 13064 llvm::Type *Tys[2] = { Ty, VTy }; 13065 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13066 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 13067 return Builder.CreateTrunc(Ops[0], Int16Ty); 13068 } 13069 case NEON::BI__builtin_neon_vminv_f16: { 13070 Int = Intrinsic::aarch64_neon_fminv; 13071 Ty = HalfTy; 13072 VTy = llvm::FixedVectorType::get(HalfTy, 4); 13073 llvm::Type *Tys[2] = { Ty, VTy }; 13074 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13075 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 13076 return Builder.CreateTrunc(Ops[0], HalfTy); 13077 } 13078 case NEON::BI__builtin_neon_vminvq_f16: { 13079 Int = Intrinsic::aarch64_neon_fminv; 13080 Ty = HalfTy; 13081 VTy = llvm::FixedVectorType::get(HalfTy, 8); 13082 llvm::Type *Tys[2] = { Ty, VTy }; 13083 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13084 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 13085 return Builder.CreateTrunc(Ops[0], HalfTy); 13086 } 13087 case NEON::BI__builtin_neon_vmaxnmv_f16: { 13088 Int = Intrinsic::aarch64_neon_fmaxnmv; 13089 Ty = HalfTy; 13090 VTy = llvm::FixedVectorType::get(HalfTy, 4); 13091 llvm::Type *Tys[2] = { Ty, VTy }; 13092 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13093 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); 13094 return Builder.CreateTrunc(Ops[0], HalfTy); 13095 } 13096 case NEON::BI__builtin_neon_vmaxnmvq_f16: { 13097 Int = Intrinsic::aarch64_neon_fmaxnmv; 13098 Ty = HalfTy; 13099 VTy = llvm::FixedVectorType::get(HalfTy, 8); 13100 llvm::Type *Tys[2] = { Ty, VTy }; 13101 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13102 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); 13103 return Builder.CreateTrunc(Ops[0], HalfTy); 13104 } 13105 case NEON::BI__builtin_neon_vminnmv_f16: { 13106 Int = Intrinsic::aarch64_neon_fminnmv; 13107 Ty = HalfTy; 13108 VTy = llvm::FixedVectorType::get(HalfTy, 4); 13109 llvm::Type *Tys[2] = { Ty, VTy }; 13110 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13111 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); 13112 return Builder.CreateTrunc(Ops[0], HalfTy); 13113 } 13114 case NEON::BI__builtin_neon_vminnmvq_f16: { 13115 Int = Intrinsic::aarch64_neon_fminnmv; 13116 Ty = HalfTy; 13117 VTy = llvm::FixedVectorType::get(HalfTy, 8); 13118 llvm::Type *Tys[2] = { Ty, VTy }; 13119 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13120 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); 13121 return Builder.CreateTrunc(Ops[0], HalfTy); 13122 } 13123 case NEON::BI__builtin_neon_vmul_n_f64: { 13124 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 13125 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 13126 return Builder.CreateFMul(Ops[0], RHS); 13127 } 13128 case NEON::BI__builtin_neon_vaddlv_u8: { 13129 Int = Intrinsic::aarch64_neon_uaddlv; 13130 Ty = Int32Ty; 13131 VTy = llvm::FixedVectorType::get(Int8Ty, 8); 13132 llvm::Type *Tys[2] = { Ty, VTy }; 13133 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13134 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 13135 return Builder.CreateTrunc(Ops[0], Int16Ty); 13136 } 13137 case NEON::BI__builtin_neon_vaddlv_u16: { 13138 Int = Intrinsic::aarch64_neon_uaddlv; 13139 Ty = Int32Ty; 13140 VTy = llvm::FixedVectorType::get(Int16Ty, 4); 13141 llvm::Type *Tys[2] = { Ty, VTy }; 13142 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13143 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 13144 } 13145 case NEON::BI__builtin_neon_vaddlvq_u8: { 13146 Int = Intrinsic::aarch64_neon_uaddlv; 13147 Ty = Int32Ty; 13148 VTy = llvm::FixedVectorType::get(Int8Ty, 16); 13149 llvm::Type *Tys[2] = { Ty, VTy }; 13150 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13151 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 13152 return Builder.CreateTrunc(Ops[0], Int16Ty); 13153 } 13154 case NEON::BI__builtin_neon_vaddlvq_u16: { 13155 Int = Intrinsic::aarch64_neon_uaddlv; 13156 Ty = Int32Ty; 13157 VTy = llvm::FixedVectorType::get(Int16Ty, 8); 13158 llvm::Type *Tys[2] = { Ty, VTy }; 13159 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13160 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 13161 } 13162 case NEON::BI__builtin_neon_vaddlv_s8: { 13163 Int = Intrinsic::aarch64_neon_saddlv; 13164 Ty = Int32Ty; 13165 VTy = llvm::FixedVectorType::get(Int8Ty, 8); 13166 llvm::Type *Tys[2] = { Ty, VTy }; 13167 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13168 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 13169 return Builder.CreateTrunc(Ops[0], Int16Ty); 13170 } 13171 case NEON::BI__builtin_neon_vaddlv_s16: { 13172 Int = Intrinsic::aarch64_neon_saddlv; 13173 Ty = Int32Ty; 13174 VTy = llvm::FixedVectorType::get(Int16Ty, 4); 13175 llvm::Type *Tys[2] = { Ty, VTy }; 13176 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13177 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 13178 } 13179 case NEON::BI__builtin_neon_vaddlvq_s8: { 13180 Int = Intrinsic::aarch64_neon_saddlv; 13181 Ty = Int32Ty; 13182 VTy = llvm::FixedVectorType::get(Int8Ty, 16); 13183 llvm::Type *Tys[2] = { Ty, VTy }; 13184 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13185 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 13186 return Builder.CreateTrunc(Ops[0], Int16Ty); 13187 } 13188 case NEON::BI__builtin_neon_vaddlvq_s16: { 13189 Int = Intrinsic::aarch64_neon_saddlv; 13190 Ty = Int32Ty; 13191 VTy = llvm::FixedVectorType::get(Int16Ty, 8); 13192 llvm::Type *Tys[2] = { Ty, VTy }; 13193 Ops.push_back(EmitScalarExpr(E->getArg(0))); 13194 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 13195 } 13196 case NEON::BI__builtin_neon_vsri_n_v: 13197 case NEON::BI__builtin_neon_vsriq_n_v: { 13198 Int = Intrinsic::aarch64_neon_vsri; 13199 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 13200 return EmitNeonCall(Intrin, Ops, "vsri_n"); 13201 } 13202 case NEON::BI__builtin_neon_vsli_n_v: 13203 case NEON::BI__builtin_neon_vsliq_n_v: { 13204 Int = Intrinsic::aarch64_neon_vsli; 13205 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 13206 return EmitNeonCall(Intrin, Ops, "vsli_n"); 13207 } 13208 case NEON::BI__builtin_neon_vsra_n_v: 13209 case NEON::BI__builtin_neon_vsraq_n_v: 13210 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 13211 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 13212 return Builder.CreateAdd(Ops[0], Ops[1]); 13213 case NEON::BI__builtin_neon_vrsra_n_v: 13214 case NEON::BI__builtin_neon_vrsraq_n_v: { 13215 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 13216 SmallVector<llvm::Value*,2> TmpOps; 13217 TmpOps.push_back(Ops[1]); 13218 TmpOps.push_back(Ops[2]); 13219 Function* F = CGM.getIntrinsic(Int, Ty); 13220 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 13221 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 13222 return Builder.CreateAdd(Ops[0], tmp); 13223 } 13224 case NEON::BI__builtin_neon_vld1_v: 13225 case NEON::BI__builtin_neon_vld1q_v: { 13226 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment()); 13227 } 13228 case NEON::BI__builtin_neon_vst1_v: 13229 case NEON::BI__builtin_neon_vst1q_v: 13230 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 13231 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment()); 13232 case NEON::BI__builtin_neon_vld1_lane_v: 13233 case NEON::BI__builtin_neon_vld1q_lane_v: { 13234 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 13235 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], 13236 PtrOp0.getAlignment()); 13237 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 13238 } 13239 case NEON::BI__builtin_neon_vldap1_lane_s64: 13240 case NEON::BI__builtin_neon_vldap1q_lane_s64: { 13241 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 13242 llvm::LoadInst *LI = Builder.CreateAlignedLoad( 13243 VTy->getElementType(), Ops[0], PtrOp0.getAlignment()); 13244 LI->setAtomic(llvm::AtomicOrdering::Acquire); 13245 Ops[0] = LI; 13246 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane"); 13247 } 13248 case NEON::BI__builtin_neon_vld1_dup_v: 13249 case NEON::BI__builtin_neon_vld1q_dup_v: { 13250 Value *V = PoisonValue::get(Ty); 13251 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], 13252 PtrOp0.getAlignment()); 13253 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 13254 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 13255 return EmitNeonSplat(Ops[0], CI); 13256 } 13257 case NEON::BI__builtin_neon_vst1_lane_v: 13258 case NEON::BI__builtin_neon_vst1q_lane_v: 13259 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 13260 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 13261 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment()); 13262 case NEON::BI__builtin_neon_vstl1_lane_s64: 13263 case NEON::BI__builtin_neon_vstl1q_lane_s64: { 13264 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 13265 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 13266 llvm::StoreInst *SI = 13267 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment()); 13268 SI->setAtomic(llvm::AtomicOrdering::Release); 13269 return SI; 13270 } 13271 case NEON::BI__builtin_neon_vld2_v: 13272 case NEON::BI__builtin_neon_vld2q_v: { 13273 llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; 13274 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 13275 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 13276 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 13277 } 13278 case NEON::BI__builtin_neon_vld3_v: 13279 case NEON::BI__builtin_neon_vld3q_v: { 13280 llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; 13281 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 13282 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 13283 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 13284 } 13285 case NEON::BI__builtin_neon_vld4_v: 13286 case NEON::BI__builtin_neon_vld4q_v: { 13287 llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; 13288 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 13289 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 13290 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 13291 } 13292 case NEON::BI__builtin_neon_vld2_dup_v: 13293 case NEON::BI__builtin_neon_vld2q_dup_v: { 13294 llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; 13295 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 13296 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 13297 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 13298 } 13299 case NEON::BI__builtin_neon_vld3_dup_v: 13300 case NEON::BI__builtin_neon_vld3q_dup_v: { 13301 llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; 13302 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 13303 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 13304 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 13305 } 13306 case NEON::BI__builtin_neon_vld4_dup_v: 13307 case NEON::BI__builtin_neon_vld4q_dup_v: { 13308 llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; 13309 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 13310 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 13311 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 13312 } 13313 case NEON::BI__builtin_neon_vld2_lane_v: 13314 case NEON::BI__builtin_neon_vld2q_lane_v: { 13315 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 13316 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 13317 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end()); 13318 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 13319 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 13320 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 13321 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane"); 13322 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 13323 } 13324 case NEON::BI__builtin_neon_vld3_lane_v: 13325 case NEON::BI__builtin_neon_vld3q_lane_v: { 13326 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 13327 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 13328 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end()); 13329 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 13330 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 13331 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 13332 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 13333 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane"); 13334 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 13335 } 13336 case NEON::BI__builtin_neon_vld4_lane_v: 13337 case NEON::BI__builtin_neon_vld4q_lane_v: { 13338 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 13339 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 13340 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end()); 13341 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 13342 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 13343 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 13344 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 13345 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); 13346 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane"); 13347 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 13348 } 13349 case NEON::BI__builtin_neon_vst2_v: 13350 case NEON::BI__builtin_neon_vst2q_v: { 13351 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 13352 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 13353 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 13354 Ops, ""); 13355 } 13356 case NEON::BI__builtin_neon_vst2_lane_v: 13357 case NEON::BI__builtin_neon_vst2q_lane_v: { 13358 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 13359 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 13360 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 13361 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 13362 Ops, ""); 13363 } 13364 case NEON::BI__builtin_neon_vst3_v: 13365 case NEON::BI__builtin_neon_vst3q_v: { 13366 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 13367 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 13368 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 13369 Ops, ""); 13370 } 13371 case NEON::BI__builtin_neon_vst3_lane_v: 13372 case NEON::BI__builtin_neon_vst3q_lane_v: { 13373 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 13374 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 13375 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 13376 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 13377 Ops, ""); 13378 } 13379 case NEON::BI__builtin_neon_vst4_v: 13380 case NEON::BI__builtin_neon_vst4q_v: { 13381 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 13382 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 13383 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 13384 Ops, ""); 13385 } 13386 case NEON::BI__builtin_neon_vst4_lane_v: 13387 case NEON::BI__builtin_neon_vst4q_lane_v: { 13388 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 13389 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 13390 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 13391 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 13392 Ops, ""); 13393 } 13394 case NEON::BI__builtin_neon_vtrn_v: 13395 case NEON::BI__builtin_neon_vtrnq_v: { 13396 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 13397 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 13398 Value *SV = nullptr; 13399 13400 for (unsigned vi = 0; vi != 2; ++vi) { 13401 SmallVector<int, 16> Indices; 13402 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 13403 Indices.push_back(i+vi); 13404 Indices.push_back(i+e+vi); 13405 } 13406 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 13407 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 13408 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 13409 } 13410 return SV; 13411 } 13412 case NEON::BI__builtin_neon_vuzp_v: 13413 case NEON::BI__builtin_neon_vuzpq_v: { 13414 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 13415 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 13416 Value *SV = nullptr; 13417 13418 for (unsigned vi = 0; vi != 2; ++vi) { 13419 SmallVector<int, 16> Indices; 13420 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 13421 Indices.push_back(2*i+vi); 13422 13423 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 13424 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 13425 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 13426 } 13427 return SV; 13428 } 13429 case NEON::BI__builtin_neon_vzip_v: 13430 case NEON::BI__builtin_neon_vzipq_v: { 13431 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 13432 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 13433 Value *SV = nullptr; 13434 13435 for (unsigned vi = 0; vi != 2; ++vi) { 13436 SmallVector<int, 16> Indices; 13437 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 13438 Indices.push_back((i + vi*e) >> 1); 13439 Indices.push_back(((i + vi*e) >> 1)+e); 13440 } 13441 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 13442 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 13443 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 13444 } 13445 return SV; 13446 } 13447 case NEON::BI__builtin_neon_vqtbl1q_v: { 13448 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 13449 Ops, "vtbl1"); 13450 } 13451 case NEON::BI__builtin_neon_vqtbl2q_v: { 13452 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 13453 Ops, "vtbl2"); 13454 } 13455 case NEON::BI__builtin_neon_vqtbl3q_v: { 13456 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 13457 Ops, "vtbl3"); 13458 } 13459 case NEON::BI__builtin_neon_vqtbl4q_v: { 13460 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 13461 Ops, "vtbl4"); 13462 } 13463 case NEON::BI__builtin_neon_vqtbx1q_v: { 13464 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 13465 Ops, "vtbx1"); 13466 } 13467 case NEON::BI__builtin_neon_vqtbx2q_v: { 13468 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 13469 Ops, "vtbx2"); 13470 } 13471 case NEON::BI__builtin_neon_vqtbx3q_v: { 13472 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 13473 Ops, "vtbx3"); 13474 } 13475 case NEON::BI__builtin_neon_vqtbx4q_v: { 13476 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 13477 Ops, "vtbx4"); 13478 } 13479 case NEON::BI__builtin_neon_vsqadd_v: 13480 case NEON::BI__builtin_neon_vsqaddq_v: { 13481 Int = Intrinsic::aarch64_neon_usqadd; 13482 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 13483 } 13484 case NEON::BI__builtin_neon_vuqadd_v: 13485 case NEON::BI__builtin_neon_vuqaddq_v: { 13486 Int = Intrinsic::aarch64_neon_suqadd; 13487 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 13488 } 13489 } 13490 } 13491 13492 Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, 13493 const CallExpr *E) { 13494 assert((BuiltinID == BPF::BI__builtin_preserve_field_info || 13495 BuiltinID == BPF::BI__builtin_btf_type_id || 13496 BuiltinID == BPF::BI__builtin_preserve_type_info || 13497 BuiltinID == BPF::BI__builtin_preserve_enum_value) && 13498 "unexpected BPF builtin"); 13499 13500 // A sequence number, injected into IR builtin functions, to 13501 // prevent CSE given the only difference of the function 13502 // may just be the debuginfo metadata. 13503 static uint32_t BuiltinSeqNum; 13504 13505 switch (BuiltinID) { 13506 default: 13507 llvm_unreachable("Unexpected BPF builtin"); 13508 case BPF::BI__builtin_preserve_field_info: { 13509 const Expr *Arg = E->getArg(0); 13510 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField; 13511 13512 if (!getDebugInfo()) { 13513 CGM.Error(E->getExprLoc(), 13514 "using __builtin_preserve_field_info() without -g"); 13515 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this) 13516 : EmitLValue(Arg).emitRawPointer(*this); 13517 } 13518 13519 // Enable underlying preserve_*_access_index() generation. 13520 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion; 13521 IsInPreservedAIRegion = true; 13522 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this) 13523 : EmitLValue(Arg).emitRawPointer(*this); 13524 IsInPreservedAIRegion = OldIsInPreservedAIRegion; 13525 13526 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 13527 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue()); 13528 13529 // Built the IR for the preserve_field_info intrinsic. 13530 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration( 13531 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info, 13532 {FieldAddr->getType()}); 13533 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind}); 13534 } 13535 case BPF::BI__builtin_btf_type_id: 13536 case BPF::BI__builtin_preserve_type_info: { 13537 if (!getDebugInfo()) { 13538 CGM.Error(E->getExprLoc(), "using builtin function without -g"); 13539 return nullptr; 13540 } 13541 13542 const Expr *Arg0 = E->getArg(0); 13543 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType( 13544 Arg0->getType(), Arg0->getExprLoc()); 13545 13546 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 13547 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue()); 13548 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++); 13549 13550 llvm::Function *FnDecl; 13551 if (BuiltinID == BPF::BI__builtin_btf_type_id) 13552 FnDecl = llvm::Intrinsic::getDeclaration( 13553 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {}); 13554 else 13555 FnDecl = llvm::Intrinsic::getDeclaration( 13556 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {}); 13557 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue}); 13558 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); 13559 return Fn; 13560 } 13561 case BPF::BI__builtin_preserve_enum_value: { 13562 if (!getDebugInfo()) { 13563 CGM.Error(E->getExprLoc(), "using builtin function without -g"); 13564 return nullptr; 13565 } 13566 13567 const Expr *Arg0 = E->getArg(0); 13568 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType( 13569 Arg0->getType(), Arg0->getExprLoc()); 13570 13571 // Find enumerator 13572 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens()); 13573 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr()); 13574 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr()); 13575 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl()); 13576 13577 auto InitVal = Enumerator->getInitVal(); 13578 std::string InitValStr; 13579 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX)) 13580 InitValStr = std::to_string(InitVal.getSExtValue()); 13581 else 13582 InitValStr = std::to_string(InitVal.getZExtValue()); 13583 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr; 13584 Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr); 13585 13586 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 13587 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue()); 13588 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++); 13589 13590 llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration( 13591 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {}); 13592 CallInst *Fn = 13593 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue}); 13594 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); 13595 return Fn; 13596 } 13597 } 13598 } 13599 13600 llvm::Value *CodeGenFunction:: 13601 BuildVector(ArrayRef<llvm::Value*> Ops) { 13602 assert((Ops.size() & (Ops.size() - 1)) == 0 && 13603 "Not a power-of-two sized vector!"); 13604 bool AllConstants = true; 13605 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 13606 AllConstants &= isa<Constant>(Ops[i]); 13607 13608 // If this is a constant vector, create a ConstantVector. 13609 if (AllConstants) { 13610 SmallVector<llvm::Constant*, 16> CstOps; 13611 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 13612 CstOps.push_back(cast<Constant>(Ops[i])); 13613 return llvm::ConstantVector::get(CstOps); 13614 } 13615 13616 // Otherwise, insertelement the values to build the vector. 13617 Value *Result = llvm::PoisonValue::get( 13618 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size())); 13619 13620 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 13621 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i)); 13622 13623 return Result; 13624 } 13625 13626 // Convert the mask from an integer type to a vector of i1. 13627 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, 13628 unsigned NumElts) { 13629 13630 auto *MaskTy = llvm::FixedVectorType::get( 13631 CGF.Builder.getInt1Ty(), 13632 cast<IntegerType>(Mask->getType())->getBitWidth()); 13633 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); 13634 13635 // If we have less than 8 elements, then the starting mask was an i8 and 13636 // we need to extract down to the right number of elements. 13637 if (NumElts < 8) { 13638 int Indices[4]; 13639 for (unsigned i = 0; i != NumElts; ++i) 13640 Indices[i] = i; 13641 MaskVec = CGF.Builder.CreateShuffleVector( 13642 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract"); 13643 } 13644 return MaskVec; 13645 } 13646 13647 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 13648 Align Alignment) { 13649 Value *Ptr = Ops[0]; 13650 13651 Value *MaskVec = getMaskVecValue( 13652 CGF, Ops[2], 13653 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements()); 13654 13655 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec); 13656 } 13657 13658 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 13659 Align Alignment) { 13660 llvm::Type *Ty = Ops[1]->getType(); 13661 Value *Ptr = Ops[0]; 13662 13663 Value *MaskVec = getMaskVecValue( 13664 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements()); 13665 13666 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]); 13667 } 13668 13669 static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, 13670 ArrayRef<Value *> Ops) { 13671 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType()); 13672 Value *Ptr = Ops[0]; 13673 13674 Value *MaskVec = getMaskVecValue( 13675 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements()); 13676 13677 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload, 13678 ResultTy); 13679 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] }); 13680 } 13681 13682 static Value *EmitX86CompressExpand(CodeGenFunction &CGF, 13683 ArrayRef<Value *> Ops, 13684 bool IsCompress) { 13685 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType()); 13686 13687 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements()); 13688 13689 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress 13690 : Intrinsic::x86_avx512_mask_expand; 13691 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy); 13692 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec }); 13693 } 13694 13695 static Value *EmitX86CompressStore(CodeGenFunction &CGF, 13696 ArrayRef<Value *> Ops) { 13697 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType()); 13698 Value *Ptr = Ops[0]; 13699 13700 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements()); 13701 13702 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore, 13703 ResultTy); 13704 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec }); 13705 } 13706 13707 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, 13708 ArrayRef<Value *> Ops, 13709 bool InvertLHS = false) { 13710 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 13711 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); 13712 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); 13713 13714 if (InvertLHS) 13715 LHS = CGF.Builder.CreateNot(LHS); 13716 13717 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS), 13718 Ops[0]->getType()); 13719 } 13720 13721 static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, 13722 Value *Amt, bool IsRight) { 13723 llvm::Type *Ty = Op0->getType(); 13724 13725 // Amount may be scalar immediate, in which case create a splat vector. 13726 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so 13727 // we only care about the lowest log2 bits anyway. 13728 if (Amt->getType() != Ty) { 13729 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements(); 13730 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false); 13731 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt); 13732 } 13733 13734 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl; 13735 Function *F = CGF.CGM.getIntrinsic(IID, Ty); 13736 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt}); 13737 } 13738 13739 static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 13740 bool IsSigned) { 13741 Value *Op0 = Ops[0]; 13742 Value *Op1 = Ops[1]; 13743 llvm::Type *Ty = Op0->getType(); 13744 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 13745 13746 CmpInst::Predicate Pred; 13747 switch (Imm) { 13748 case 0x0: 13749 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; 13750 break; 13751 case 0x1: 13752 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; 13753 break; 13754 case 0x2: 13755 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; 13756 break; 13757 case 0x3: 13758 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; 13759 break; 13760 case 0x4: 13761 Pred = ICmpInst::ICMP_EQ; 13762 break; 13763 case 0x5: 13764 Pred = ICmpInst::ICMP_NE; 13765 break; 13766 case 0x6: 13767 return llvm::Constant::getNullValue(Ty); // FALSE 13768 case 0x7: 13769 return llvm::Constant::getAllOnesValue(Ty); // TRUE 13770 default: 13771 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); 13772 } 13773 13774 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1); 13775 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty); 13776 return Res; 13777 } 13778 13779 static Value *EmitX86Select(CodeGenFunction &CGF, 13780 Value *Mask, Value *Op0, Value *Op1) { 13781 13782 // If the mask is all ones just return first argument. 13783 if (const auto *C = dyn_cast<Constant>(Mask)) 13784 if (C->isAllOnesValue()) 13785 return Op0; 13786 13787 Mask = getMaskVecValue( 13788 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements()); 13789 13790 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 13791 } 13792 13793 static Value *EmitX86ScalarSelect(CodeGenFunction &CGF, 13794 Value *Mask, Value *Op0, Value *Op1) { 13795 // If the mask is all ones just return first argument. 13796 if (const auto *C = dyn_cast<Constant>(Mask)) 13797 if (C->isAllOnesValue()) 13798 return Op0; 13799 13800 auto *MaskTy = llvm::FixedVectorType::get( 13801 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth()); 13802 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy); 13803 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0); 13804 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 13805 } 13806 13807 static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, 13808 unsigned NumElts, Value *MaskIn) { 13809 if (MaskIn) { 13810 const auto *C = dyn_cast<Constant>(MaskIn); 13811 if (!C || !C->isAllOnesValue()) 13812 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts)); 13813 } 13814 13815 if (NumElts < 8) { 13816 int Indices[8]; 13817 for (unsigned i = 0; i != NumElts; ++i) 13818 Indices[i] = i; 13819 for (unsigned i = NumElts; i != 8; ++i) 13820 Indices[i] = i % NumElts + NumElts; 13821 Cmp = CGF.Builder.CreateShuffleVector( 13822 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); 13823 } 13824 13825 return CGF.Builder.CreateBitCast(Cmp, 13826 IntegerType::get(CGF.getLLVMContext(), 13827 std::max(NumElts, 8U))); 13828 } 13829 13830 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, 13831 bool Signed, ArrayRef<Value *> Ops) { 13832 assert((Ops.size() == 2 || Ops.size() == 4) && 13833 "Unexpected number of arguments"); 13834 unsigned NumElts = 13835 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 13836 Value *Cmp; 13837 13838 if (CC == 3) { 13839 Cmp = Constant::getNullValue( 13840 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 13841 } else if (CC == 7) { 13842 Cmp = Constant::getAllOnesValue( 13843 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 13844 } else { 13845 ICmpInst::Predicate Pred; 13846 switch (CC) { 13847 default: llvm_unreachable("Unknown condition code"); 13848 case 0: Pred = ICmpInst::ICMP_EQ; break; 13849 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 13850 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 13851 case 4: Pred = ICmpInst::ICMP_NE; break; 13852 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 13853 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 13854 } 13855 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 13856 } 13857 13858 Value *MaskIn = nullptr; 13859 if (Ops.size() == 4) 13860 MaskIn = Ops[3]; 13861 13862 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn); 13863 } 13864 13865 static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { 13866 Value *Zero = Constant::getNullValue(In->getType()); 13867 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero }); 13868 } 13869 13870 static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, 13871 ArrayRef<Value *> Ops, bool IsSigned) { 13872 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue(); 13873 llvm::Type *Ty = Ops[1]->getType(); 13874 13875 Value *Res; 13876 if (Rnd != 4) { 13877 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round 13878 : Intrinsic::x86_avx512_uitofp_round; 13879 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() }); 13880 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] }); 13881 } else { 13882 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); 13883 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty) 13884 : CGF.Builder.CreateUIToFP(Ops[0], Ty); 13885 } 13886 13887 return EmitX86Select(CGF, Ops[2], Res, Ops[1]); 13888 } 13889 13890 // Lowers X86 FMA intrinsics to IR. 13891 static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, 13892 ArrayRef<Value *> Ops, unsigned BuiltinID, 13893 bool IsAddSub) { 13894 13895 bool Subtract = false; 13896 Intrinsic::ID IID = Intrinsic::not_intrinsic; 13897 switch (BuiltinID) { 13898 default: break; 13899 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3: 13900 Subtract = true; 13901 [[fallthrough]]; 13902 case clang::X86::BI__builtin_ia32_vfmaddph512_mask: 13903 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz: 13904 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3: 13905 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512; 13906 break; 13907 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3: 13908 Subtract = true; 13909 [[fallthrough]]; 13910 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask: 13911 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz: 13912 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3: 13913 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512; 13914 break; 13915 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: 13916 Subtract = true; 13917 [[fallthrough]]; 13918 case clang::X86::BI__builtin_ia32_vfmaddps512_mask: 13919 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: 13920 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: 13921 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break; 13922 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: 13923 Subtract = true; 13924 [[fallthrough]]; 13925 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: 13926 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: 13927 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: 13928 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break; 13929 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: 13930 Subtract = true; 13931 [[fallthrough]]; 13932 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: 13933 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: 13934 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: 13935 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512; 13936 break; 13937 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: 13938 Subtract = true; 13939 [[fallthrough]]; 13940 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: 13941 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: 13942 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: 13943 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512; 13944 break; 13945 } 13946 13947 Value *A = Ops[0]; 13948 Value *B = Ops[1]; 13949 Value *C = Ops[2]; 13950 13951 if (Subtract) 13952 C = CGF.Builder.CreateFNeg(C); 13953 13954 Value *Res; 13955 13956 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding). 13957 if (IID != Intrinsic::not_intrinsic && 13958 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 || 13959 IsAddSub)) { 13960 Function *Intr = CGF.CGM.getIntrinsic(IID); 13961 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() }); 13962 } else { 13963 llvm::Type *Ty = A->getType(); 13964 Function *FMA; 13965 if (CGF.Builder.getIsFPConstrained()) { 13966 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); 13967 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty); 13968 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C}); 13969 } else { 13970 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); 13971 Res = CGF.Builder.CreateCall(FMA, {A, B, C}); 13972 } 13973 } 13974 13975 // Handle any required masking. 13976 Value *MaskFalseVal = nullptr; 13977 switch (BuiltinID) { 13978 case clang::X86::BI__builtin_ia32_vfmaddph512_mask: 13979 case clang::X86::BI__builtin_ia32_vfmaddps512_mask: 13980 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: 13981 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask: 13982 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: 13983 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: 13984 MaskFalseVal = Ops[0]; 13985 break; 13986 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz: 13987 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: 13988 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: 13989 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz: 13990 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: 13991 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: 13992 MaskFalseVal = Constant::getNullValue(Ops[0]->getType()); 13993 break; 13994 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3: 13995 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3: 13996 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: 13997 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: 13998 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: 13999 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: 14000 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3: 14001 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3: 14002 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: 14003 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: 14004 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: 14005 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: 14006 MaskFalseVal = Ops[2]; 14007 break; 14008 } 14009 14010 if (MaskFalseVal) 14011 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal); 14012 14013 return Res; 14014 } 14015 14016 static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, 14017 MutableArrayRef<Value *> Ops, Value *Upper, 14018 bool ZeroMask = false, unsigned PTIdx = 0, 14019 bool NegAcc = false) { 14020 unsigned Rnd = 4; 14021 if (Ops.size() > 4) 14022 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue(); 14023 14024 if (NegAcc) 14025 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]); 14026 14027 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0); 14028 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0); 14029 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0); 14030 Value *Res; 14031 if (Rnd != 4) { 14032 Intrinsic::ID IID; 14033 14034 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) { 14035 case 16: 14036 IID = Intrinsic::x86_avx512fp16_vfmadd_f16; 14037 break; 14038 case 32: 14039 IID = Intrinsic::x86_avx512_vfmadd_f32; 14040 break; 14041 case 64: 14042 IID = Intrinsic::x86_avx512_vfmadd_f64; 14043 break; 14044 default: 14045 llvm_unreachable("Unexpected size"); 14046 } 14047 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), 14048 {Ops[0], Ops[1], Ops[2], Ops[4]}); 14049 } else if (CGF.Builder.getIsFPConstrained()) { 14050 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); 14051 Function *FMA = CGF.CGM.getIntrinsic( 14052 Intrinsic::experimental_constrained_fma, Ops[0]->getType()); 14053 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3)); 14054 } else { 14055 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType()); 14056 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3)); 14057 } 14058 // If we have more than 3 arguments, we need to do masking. 14059 if (Ops.size() > 3) { 14060 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType()) 14061 : Ops[PTIdx]; 14062 14063 // If we negated the accumulator and the its the PassThru value we need to 14064 // bypass the negate. Conveniently Upper should be the same thing in this 14065 // case. 14066 if (NegAcc && PTIdx == 2) 14067 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0); 14068 14069 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru); 14070 } 14071 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0); 14072 } 14073 14074 static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, 14075 ArrayRef<Value *> Ops) { 14076 llvm::Type *Ty = Ops[0]->getType(); 14077 // Arguments have a vXi32 type so cast to vXi64. 14078 Ty = llvm::FixedVectorType::get(CGF.Int64Ty, 14079 Ty->getPrimitiveSizeInBits() / 64); 14080 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty); 14081 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty); 14082 14083 if (IsSigned) { 14084 // Shift left then arithmetic shift right. 14085 Constant *ShiftAmt = ConstantInt::get(Ty, 32); 14086 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt); 14087 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt); 14088 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt); 14089 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt); 14090 } else { 14091 // Clear the upper bits. 14092 Constant *Mask = ConstantInt::get(Ty, 0xffffffff); 14093 LHS = CGF.Builder.CreateAnd(LHS, Mask); 14094 RHS = CGF.Builder.CreateAnd(RHS, Mask); 14095 } 14096 14097 return CGF.Builder.CreateMul(LHS, RHS); 14098 } 14099 14100 // Emit a masked pternlog intrinsic. This only exists because the header has to 14101 // use a macro and we aren't able to pass the input argument to a pternlog 14102 // builtin and a select builtin without evaluating it twice. 14103 static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, 14104 ArrayRef<Value *> Ops) { 14105 llvm::Type *Ty = Ops[0]->getType(); 14106 14107 unsigned VecWidth = Ty->getPrimitiveSizeInBits(); 14108 unsigned EltWidth = Ty->getScalarSizeInBits(); 14109 Intrinsic::ID IID; 14110 if (VecWidth == 128 && EltWidth == 32) 14111 IID = Intrinsic::x86_avx512_pternlog_d_128; 14112 else if (VecWidth == 256 && EltWidth == 32) 14113 IID = Intrinsic::x86_avx512_pternlog_d_256; 14114 else if (VecWidth == 512 && EltWidth == 32) 14115 IID = Intrinsic::x86_avx512_pternlog_d_512; 14116 else if (VecWidth == 128 && EltWidth == 64) 14117 IID = Intrinsic::x86_avx512_pternlog_q_128; 14118 else if (VecWidth == 256 && EltWidth == 64) 14119 IID = Intrinsic::x86_avx512_pternlog_q_256; 14120 else if (VecWidth == 512 && EltWidth == 64) 14121 IID = Intrinsic::x86_avx512_pternlog_q_512; 14122 else 14123 llvm_unreachable("Unexpected intrinsic"); 14124 14125 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), 14126 Ops.drop_back()); 14127 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0]; 14128 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru); 14129 } 14130 14131 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 14132 llvm::Type *DstTy) { 14133 unsigned NumberOfElements = 14134 cast<llvm::FixedVectorType>(DstTy)->getNumElements(); 14135 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); 14136 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); 14137 } 14138 14139 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { 14140 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); 14141 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); 14142 return EmitX86CpuIs(CPUStr); 14143 } 14144 14145 // Convert F16 halfs to floats. 14146 static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, 14147 ArrayRef<Value *> Ops, 14148 llvm::Type *DstTy) { 14149 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) && 14150 "Unknown cvtph2ps intrinsic"); 14151 14152 // If the SAE intrinsic doesn't use default rounding then we can't upgrade. 14153 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) { 14154 Function *F = 14155 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512); 14156 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]}); 14157 } 14158 14159 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements(); 14160 Value *Src = Ops[0]; 14161 14162 // Extract the subvector. 14163 if (NumDstElts != 14164 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) { 14165 assert(NumDstElts == 4 && "Unexpected vector size"); 14166 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3}); 14167 } 14168 14169 // Bitcast from vXi16 to vXf16. 14170 auto *HalfTy = llvm::FixedVectorType::get( 14171 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts); 14172 Src = CGF.Builder.CreateBitCast(Src, HalfTy); 14173 14174 // Perform the fp-extension. 14175 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps"); 14176 14177 if (Ops.size() >= 3) 14178 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]); 14179 return Res; 14180 } 14181 14182 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { 14183 14184 llvm::Type *Int32Ty = Builder.getInt32Ty(); 14185 14186 // Matching the struct layout from the compiler-rt/libgcc structure that is 14187 // filled in: 14188 // unsigned int __cpu_vendor; 14189 // unsigned int __cpu_type; 14190 // unsigned int __cpu_subtype; 14191 // unsigned int __cpu_features[1]; 14192 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 14193 llvm::ArrayType::get(Int32Ty, 1)); 14194 14195 // Grab the global __cpu_model. 14196 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 14197 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true); 14198 14199 // Calculate the index needed to access the correct field based on the 14200 // range. Also adjust the expected value. 14201 unsigned Index; 14202 unsigned Value; 14203 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) 14204 #define X86_VENDOR(ENUM, STRING) \ 14205 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)}) 14206 #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \ 14207 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) 14208 #define X86_CPU_TYPE(ENUM, STR) \ 14209 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) 14210 #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \ 14211 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)}) 14212 #define X86_CPU_SUBTYPE(ENUM, STR) \ 14213 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)}) 14214 #include "llvm/TargetParser/X86TargetParser.def" 14215 .Default({0, 0}); 14216 assert(Value != 0 && "Invalid CPUStr passed to CpuIs"); 14217 14218 // Grab the appropriate field from __cpu_model. 14219 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), 14220 ConstantInt::get(Int32Ty, Index)}; 14221 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs); 14222 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue, 14223 CharUnits::fromQuantity(4)); 14224 14225 // Check the value of the field against the requested value. 14226 return Builder.CreateICmpEQ(CpuValue, 14227 llvm::ConstantInt::get(Int32Ty, Value)); 14228 } 14229 14230 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { 14231 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); 14232 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); 14233 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr)) 14234 return Builder.getFalse(); 14235 return EmitX86CpuSupports(FeatureStr); 14236 } 14237 14238 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { 14239 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs)); 14240 } 14241 14242 llvm::Value * 14243 CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) { 14244 Value *Result = Builder.getTrue(); 14245 if (FeatureMask[0] != 0) { 14246 // Matching the struct layout from the compiler-rt/libgcc structure that is 14247 // filled in: 14248 // unsigned int __cpu_vendor; 14249 // unsigned int __cpu_type; 14250 // unsigned int __cpu_subtype; 14251 // unsigned int __cpu_features[1]; 14252 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 14253 llvm::ArrayType::get(Int32Ty, 1)); 14254 14255 // Grab the global __cpu_model. 14256 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 14257 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true); 14258 14259 // Grab the first (0th) element from the field __cpu_features off of the 14260 // global in the struct STy. 14261 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3), 14262 Builder.getInt32(0)}; 14263 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs); 14264 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures, 14265 CharUnits::fromQuantity(4)); 14266 14267 // Check the value of the bit corresponding to the feature requested. 14268 Value *Mask = Builder.getInt32(FeatureMask[0]); 14269 Value *Bitset = Builder.CreateAnd(Features, Mask); 14270 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); 14271 Result = Builder.CreateAnd(Result, Cmp); 14272 } 14273 14274 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3); 14275 llvm::Constant *CpuFeatures2 = 14276 CGM.CreateRuntimeVariable(ATy, "__cpu_features2"); 14277 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true); 14278 for (int i = 1; i != 4; ++i) { 14279 const uint32_t M = FeatureMask[i]; 14280 if (!M) 14281 continue; 14282 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)}; 14283 Value *Features = Builder.CreateAlignedLoad( 14284 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs), 14285 CharUnits::fromQuantity(4)); 14286 // Check the value of the bit corresponding to the feature requested. 14287 Value *Mask = Builder.getInt32(M); 14288 Value *Bitset = Builder.CreateAnd(Features, Mask); 14289 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); 14290 Result = Builder.CreateAnd(Result, Cmp); 14291 } 14292 14293 return Result; 14294 } 14295 14296 Value *CodeGenFunction::EmitAArch64CpuInit() { 14297 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); 14298 llvm::FunctionCallee Func = 14299 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver"); 14300 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true); 14301 cast<llvm::GlobalValue>(Func.getCallee()) 14302 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); 14303 return Builder.CreateCall(Func); 14304 } 14305 14306 Value *CodeGenFunction::EmitX86CpuInit() { 14307 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, 14308 /*Variadic*/ false); 14309 llvm::FunctionCallee Func = 14310 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); 14311 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true); 14312 cast<llvm::GlobalValue>(Func.getCallee()) 14313 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); 14314 return Builder.CreateCall(Func); 14315 } 14316 14317 Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) { 14318 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts(); 14319 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString(); 14320 llvm::SmallVector<StringRef, 8> Features; 14321 ArgStr.split(Features, "+"); 14322 for (auto &Feature : Features) { 14323 Feature = Feature.trim(); 14324 if (!llvm::AArch64::parseFMVExtension(Feature)) 14325 return Builder.getFalse(); 14326 if (Feature != "default") 14327 Features.push_back(Feature); 14328 } 14329 return EmitAArch64CpuSupports(Features); 14330 } 14331 14332 llvm::Value * 14333 CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) { 14334 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs); 14335 Value *Result = Builder.getTrue(); 14336 if (FeaturesMask != 0) { 14337 // Get features from structure in runtime library 14338 // struct { 14339 // unsigned long long features; 14340 // } __aarch64_cpu_features; 14341 llvm::Type *STy = llvm::StructType::get(Int64Ty); 14342 llvm::Constant *AArch64CPUFeatures = 14343 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features"); 14344 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true); 14345 llvm::Value *CpuFeatures = Builder.CreateGEP( 14346 STy, AArch64CPUFeatures, 14347 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)}); 14348 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures, 14349 CharUnits::fromQuantity(8)); 14350 Value *Mask = Builder.getInt64(FeaturesMask); 14351 Value *Bitset = Builder.CreateAnd(Features, Mask); 14352 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); 14353 Result = Builder.CreateAnd(Result, Cmp); 14354 } 14355 return Result; 14356 } 14357 14358 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 14359 const CallExpr *E) { 14360 if (BuiltinID == Builtin::BI__builtin_cpu_is) 14361 return EmitX86CpuIs(E); 14362 if (BuiltinID == Builtin::BI__builtin_cpu_supports) 14363 return EmitX86CpuSupports(E); 14364 if (BuiltinID == Builtin::BI__builtin_cpu_init) 14365 return EmitX86CpuInit(); 14366 14367 // Handle MSVC intrinsics before argument evaluation to prevent double 14368 // evaluation. 14369 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID)) 14370 return EmitMSVCBuiltinExpr(*MsvcIntId, E); 14371 14372 SmallVector<Value*, 4> Ops; 14373 bool IsMaskFCmp = false; 14374 bool IsConjFMA = false; 14375 14376 // Find out if any arguments are required to be integer constant expressions. 14377 unsigned ICEArguments = 0; 14378 ASTContext::GetBuiltinTypeError Error; 14379 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 14380 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 14381 14382 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 14383 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); 14384 } 14385 14386 // These exist so that the builtin that takes an immediate can be bounds 14387 // checked by clang to avoid passing bad immediates to the backend. Since 14388 // AVX has a larger immediate than SSE we would need separate builtins to 14389 // do the different bounds checking. Rather than create a clang specific 14390 // SSE only builtin, this implements eight separate builtins to match gcc 14391 // implementation. 14392 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { 14393 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 14394 llvm::Function *F = CGM.getIntrinsic(ID); 14395 return Builder.CreateCall(F, Ops); 14396 }; 14397 14398 // For the vector forms of FP comparisons, translate the builtins directly to 14399 // IR. 14400 // TODO: The builtins could be removed if the SSE header files used vector 14401 // extension comparisons directly (vector ordered/unordered may need 14402 // additional support via __builtin_isnan()). 14403 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred, 14404 bool IsSignaling) { 14405 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 14406 Value *Cmp; 14407 if (IsSignaling) 14408 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]); 14409 else 14410 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 14411 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); 14412 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); 14413 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); 14414 return Builder.CreateBitCast(Sext, FPVecTy); 14415 }; 14416 14417 switch (BuiltinID) { 14418 default: return nullptr; 14419 case X86::BI_mm_prefetch: { 14420 Value *Address = Ops[0]; 14421 ConstantInt *C = cast<ConstantInt>(Ops[1]); 14422 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1); 14423 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3); 14424 Value *Data = ConstantInt::get(Int32Ty, 1); 14425 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); 14426 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 14427 } 14428 case X86::BI_mm_clflush: { 14429 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), 14430 Ops[0]); 14431 } 14432 case X86::BI_mm_lfence: { 14433 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); 14434 } 14435 case X86::BI_mm_mfence: { 14436 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); 14437 } 14438 case X86::BI_mm_sfence: { 14439 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); 14440 } 14441 case X86::BI_mm_pause: { 14442 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); 14443 } 14444 case X86::BI__rdtsc: { 14445 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); 14446 } 14447 case X86::BI__builtin_ia32_rdtscp: { 14448 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp)); 14449 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), 14450 Ops[0]); 14451 return Builder.CreateExtractValue(Call, 0); 14452 } 14453 case X86::BI__builtin_ia32_lzcnt_u16: 14454 case X86::BI__builtin_ia32_lzcnt_u32: 14455 case X86::BI__builtin_ia32_lzcnt_u64: { 14456 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 14457 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); 14458 } 14459 case X86::BI__builtin_ia32_tzcnt_u16: 14460 case X86::BI__builtin_ia32_tzcnt_u32: 14461 case X86::BI__builtin_ia32_tzcnt_u64: { 14462 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); 14463 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); 14464 } 14465 case X86::BI__builtin_ia32_undef128: 14466 case X86::BI__builtin_ia32_undef256: 14467 case X86::BI__builtin_ia32_undef512: 14468 // The x86 definition of "undef" is not the same as the LLVM definition 14469 // (PR32176). We leave optimizing away an unnecessary zero constant to the 14470 // IR optimizer and backend. 14471 // TODO: If we had a "freeze" IR instruction to generate a fixed undef 14472 // value, we should use that here instead of a zero. 14473 return llvm::Constant::getNullValue(ConvertType(E->getType())); 14474 case X86::BI__builtin_ia32_vec_init_v8qi: 14475 case X86::BI__builtin_ia32_vec_init_v4hi: 14476 case X86::BI__builtin_ia32_vec_init_v2si: 14477 return Builder.CreateBitCast(BuildVector(Ops), 14478 llvm::Type::getX86_MMXTy(getLLVMContext())); 14479 case X86::BI__builtin_ia32_vec_ext_v2si: 14480 case X86::BI__builtin_ia32_vec_ext_v16qi: 14481 case X86::BI__builtin_ia32_vec_ext_v8hi: 14482 case X86::BI__builtin_ia32_vec_ext_v4si: 14483 case X86::BI__builtin_ia32_vec_ext_v4sf: 14484 case X86::BI__builtin_ia32_vec_ext_v2di: 14485 case X86::BI__builtin_ia32_vec_ext_v32qi: 14486 case X86::BI__builtin_ia32_vec_ext_v16hi: 14487 case X86::BI__builtin_ia32_vec_ext_v8si: 14488 case X86::BI__builtin_ia32_vec_ext_v4di: { 14489 unsigned NumElts = 14490 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 14491 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue(); 14492 Index &= NumElts - 1; 14493 // These builtins exist so we can ensure the index is an ICE and in range. 14494 // Otherwise we could just do this in the header file. 14495 return Builder.CreateExtractElement(Ops[0], Index); 14496 } 14497 case X86::BI__builtin_ia32_vec_set_v16qi: 14498 case X86::BI__builtin_ia32_vec_set_v8hi: 14499 case X86::BI__builtin_ia32_vec_set_v4si: 14500 case X86::BI__builtin_ia32_vec_set_v2di: 14501 case X86::BI__builtin_ia32_vec_set_v32qi: 14502 case X86::BI__builtin_ia32_vec_set_v16hi: 14503 case X86::BI__builtin_ia32_vec_set_v8si: 14504 case X86::BI__builtin_ia32_vec_set_v4di: { 14505 unsigned NumElts = 14506 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 14507 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); 14508 Index &= NumElts - 1; 14509 // These builtins exist so we can ensure the index is an ICE and in range. 14510 // Otherwise we could just do this in the header file. 14511 return Builder.CreateInsertElement(Ops[0], Ops[1], Index); 14512 } 14513 case X86::BI_mm_setcsr: 14514 case X86::BI__builtin_ia32_ldmxcsr: { 14515 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType()); 14516 Builder.CreateStore(Ops[0], Tmp); 14517 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 14518 Tmp.getPointer()); 14519 } 14520 case X86::BI_mm_getcsr: 14521 case X86::BI__builtin_ia32_stmxcsr: { 14522 RawAddress Tmp = CreateMemTemp(E->getType()); 14523 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 14524 Tmp.getPointer()); 14525 return Builder.CreateLoad(Tmp, "stmxcsr"); 14526 } 14527 case X86::BI__builtin_ia32_xsave: 14528 case X86::BI__builtin_ia32_xsave64: 14529 case X86::BI__builtin_ia32_xrstor: 14530 case X86::BI__builtin_ia32_xrstor64: 14531 case X86::BI__builtin_ia32_xsaveopt: 14532 case X86::BI__builtin_ia32_xsaveopt64: 14533 case X86::BI__builtin_ia32_xrstors: 14534 case X86::BI__builtin_ia32_xrstors64: 14535 case X86::BI__builtin_ia32_xsavec: 14536 case X86::BI__builtin_ia32_xsavec64: 14537 case X86::BI__builtin_ia32_xsaves: 14538 case X86::BI__builtin_ia32_xsaves64: 14539 case X86::BI__builtin_ia32_xsetbv: 14540 case X86::BI_xsetbv: { 14541 Intrinsic::ID ID; 14542 #define INTRINSIC_X86_XSAVE_ID(NAME) \ 14543 case X86::BI__builtin_ia32_##NAME: \ 14544 ID = Intrinsic::x86_##NAME; \ 14545 break 14546 switch (BuiltinID) { 14547 default: llvm_unreachable("Unsupported intrinsic!"); 14548 INTRINSIC_X86_XSAVE_ID(xsave); 14549 INTRINSIC_X86_XSAVE_ID(xsave64); 14550 INTRINSIC_X86_XSAVE_ID(xrstor); 14551 INTRINSIC_X86_XSAVE_ID(xrstor64); 14552 INTRINSIC_X86_XSAVE_ID(xsaveopt); 14553 INTRINSIC_X86_XSAVE_ID(xsaveopt64); 14554 INTRINSIC_X86_XSAVE_ID(xrstors); 14555 INTRINSIC_X86_XSAVE_ID(xrstors64); 14556 INTRINSIC_X86_XSAVE_ID(xsavec); 14557 INTRINSIC_X86_XSAVE_ID(xsavec64); 14558 INTRINSIC_X86_XSAVE_ID(xsaves); 14559 INTRINSIC_X86_XSAVE_ID(xsaves64); 14560 INTRINSIC_X86_XSAVE_ID(xsetbv); 14561 case X86::BI_xsetbv: 14562 ID = Intrinsic::x86_xsetbv; 14563 break; 14564 } 14565 #undef INTRINSIC_X86_XSAVE_ID 14566 Value *Mhi = Builder.CreateTrunc( 14567 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); 14568 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); 14569 Ops[1] = Mhi; 14570 Ops.push_back(Mlo); 14571 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 14572 } 14573 case X86::BI__builtin_ia32_xgetbv: 14574 case X86::BI_xgetbv: 14575 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops); 14576 case X86::BI__builtin_ia32_storedqudi128_mask: 14577 case X86::BI__builtin_ia32_storedqusi128_mask: 14578 case X86::BI__builtin_ia32_storedquhi128_mask: 14579 case X86::BI__builtin_ia32_storedquqi128_mask: 14580 case X86::BI__builtin_ia32_storeupd128_mask: 14581 case X86::BI__builtin_ia32_storeups128_mask: 14582 case X86::BI__builtin_ia32_storedqudi256_mask: 14583 case X86::BI__builtin_ia32_storedqusi256_mask: 14584 case X86::BI__builtin_ia32_storedquhi256_mask: 14585 case X86::BI__builtin_ia32_storedquqi256_mask: 14586 case X86::BI__builtin_ia32_storeupd256_mask: 14587 case X86::BI__builtin_ia32_storeups256_mask: 14588 case X86::BI__builtin_ia32_storedqudi512_mask: 14589 case X86::BI__builtin_ia32_storedqusi512_mask: 14590 case X86::BI__builtin_ia32_storedquhi512_mask: 14591 case X86::BI__builtin_ia32_storedquqi512_mask: 14592 case X86::BI__builtin_ia32_storeupd512_mask: 14593 case X86::BI__builtin_ia32_storeups512_mask: 14594 return EmitX86MaskedStore(*this, Ops, Align(1)); 14595 14596 case X86::BI__builtin_ia32_storesh128_mask: 14597 case X86::BI__builtin_ia32_storess128_mask: 14598 case X86::BI__builtin_ia32_storesd128_mask: 14599 return EmitX86MaskedStore(*this, Ops, Align(1)); 14600 14601 case X86::BI__builtin_ia32_vpopcntb_128: 14602 case X86::BI__builtin_ia32_vpopcntd_128: 14603 case X86::BI__builtin_ia32_vpopcntq_128: 14604 case X86::BI__builtin_ia32_vpopcntw_128: 14605 case X86::BI__builtin_ia32_vpopcntb_256: 14606 case X86::BI__builtin_ia32_vpopcntd_256: 14607 case X86::BI__builtin_ia32_vpopcntq_256: 14608 case X86::BI__builtin_ia32_vpopcntw_256: 14609 case X86::BI__builtin_ia32_vpopcntb_512: 14610 case X86::BI__builtin_ia32_vpopcntd_512: 14611 case X86::BI__builtin_ia32_vpopcntq_512: 14612 case X86::BI__builtin_ia32_vpopcntw_512: { 14613 llvm::Type *ResultType = ConvertType(E->getType()); 14614 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 14615 return Builder.CreateCall(F, Ops); 14616 } 14617 case X86::BI__builtin_ia32_cvtmask2b128: 14618 case X86::BI__builtin_ia32_cvtmask2b256: 14619 case X86::BI__builtin_ia32_cvtmask2b512: 14620 case X86::BI__builtin_ia32_cvtmask2w128: 14621 case X86::BI__builtin_ia32_cvtmask2w256: 14622 case X86::BI__builtin_ia32_cvtmask2w512: 14623 case X86::BI__builtin_ia32_cvtmask2d128: 14624 case X86::BI__builtin_ia32_cvtmask2d256: 14625 case X86::BI__builtin_ia32_cvtmask2d512: 14626 case X86::BI__builtin_ia32_cvtmask2q128: 14627 case X86::BI__builtin_ia32_cvtmask2q256: 14628 case X86::BI__builtin_ia32_cvtmask2q512: 14629 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); 14630 14631 case X86::BI__builtin_ia32_cvtb2mask128: 14632 case X86::BI__builtin_ia32_cvtb2mask256: 14633 case X86::BI__builtin_ia32_cvtb2mask512: 14634 case X86::BI__builtin_ia32_cvtw2mask128: 14635 case X86::BI__builtin_ia32_cvtw2mask256: 14636 case X86::BI__builtin_ia32_cvtw2mask512: 14637 case X86::BI__builtin_ia32_cvtd2mask128: 14638 case X86::BI__builtin_ia32_cvtd2mask256: 14639 case X86::BI__builtin_ia32_cvtd2mask512: 14640 case X86::BI__builtin_ia32_cvtq2mask128: 14641 case X86::BI__builtin_ia32_cvtq2mask256: 14642 case X86::BI__builtin_ia32_cvtq2mask512: 14643 return EmitX86ConvertToMask(*this, Ops[0]); 14644 14645 case X86::BI__builtin_ia32_cvtdq2ps512_mask: 14646 case X86::BI__builtin_ia32_cvtqq2ps512_mask: 14647 case X86::BI__builtin_ia32_cvtqq2pd512_mask: 14648 case X86::BI__builtin_ia32_vcvtw2ph512_mask: 14649 case X86::BI__builtin_ia32_vcvtdq2ph512_mask: 14650 case X86::BI__builtin_ia32_vcvtqq2ph512_mask: 14651 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true); 14652 case X86::BI__builtin_ia32_cvtudq2ps512_mask: 14653 case X86::BI__builtin_ia32_cvtuqq2ps512_mask: 14654 case X86::BI__builtin_ia32_cvtuqq2pd512_mask: 14655 case X86::BI__builtin_ia32_vcvtuw2ph512_mask: 14656 case X86::BI__builtin_ia32_vcvtudq2ph512_mask: 14657 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: 14658 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false); 14659 14660 case X86::BI__builtin_ia32_vfmaddss3: 14661 case X86::BI__builtin_ia32_vfmaddsd3: 14662 case X86::BI__builtin_ia32_vfmaddsh3_mask: 14663 case X86::BI__builtin_ia32_vfmaddss3_mask: 14664 case X86::BI__builtin_ia32_vfmaddsd3_mask: 14665 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]); 14666 case X86::BI__builtin_ia32_vfmaddss: 14667 case X86::BI__builtin_ia32_vfmaddsd: 14668 return EmitScalarFMAExpr(*this, E, Ops, 14669 Constant::getNullValue(Ops[0]->getType())); 14670 case X86::BI__builtin_ia32_vfmaddsh3_maskz: 14671 case X86::BI__builtin_ia32_vfmaddss3_maskz: 14672 case X86::BI__builtin_ia32_vfmaddsd3_maskz: 14673 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true); 14674 case X86::BI__builtin_ia32_vfmaddsh3_mask3: 14675 case X86::BI__builtin_ia32_vfmaddss3_mask3: 14676 case X86::BI__builtin_ia32_vfmaddsd3_mask3: 14677 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2); 14678 case X86::BI__builtin_ia32_vfmsubsh3_mask3: 14679 case X86::BI__builtin_ia32_vfmsubss3_mask3: 14680 case X86::BI__builtin_ia32_vfmsubsd3_mask3: 14681 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2, 14682 /*NegAcc*/ true); 14683 case X86::BI__builtin_ia32_vfmaddph: 14684 case X86::BI__builtin_ia32_vfmaddps: 14685 case X86::BI__builtin_ia32_vfmaddpd: 14686 case X86::BI__builtin_ia32_vfmaddph256: 14687 case X86::BI__builtin_ia32_vfmaddps256: 14688 case X86::BI__builtin_ia32_vfmaddpd256: 14689 case X86::BI__builtin_ia32_vfmaddph512_mask: 14690 case X86::BI__builtin_ia32_vfmaddph512_maskz: 14691 case X86::BI__builtin_ia32_vfmaddph512_mask3: 14692 case X86::BI__builtin_ia32_vfmaddps512_mask: 14693 case X86::BI__builtin_ia32_vfmaddps512_maskz: 14694 case X86::BI__builtin_ia32_vfmaddps512_mask3: 14695 case X86::BI__builtin_ia32_vfmsubps512_mask3: 14696 case X86::BI__builtin_ia32_vfmaddpd512_mask: 14697 case X86::BI__builtin_ia32_vfmaddpd512_maskz: 14698 case X86::BI__builtin_ia32_vfmaddpd512_mask3: 14699 case X86::BI__builtin_ia32_vfmsubpd512_mask3: 14700 case X86::BI__builtin_ia32_vfmsubph512_mask3: 14701 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false); 14702 case X86::BI__builtin_ia32_vfmaddsubph512_mask: 14703 case X86::BI__builtin_ia32_vfmaddsubph512_maskz: 14704 case X86::BI__builtin_ia32_vfmaddsubph512_mask3: 14705 case X86::BI__builtin_ia32_vfmsubaddph512_mask3: 14706 case X86::BI__builtin_ia32_vfmaddsubps512_mask: 14707 case X86::BI__builtin_ia32_vfmaddsubps512_maskz: 14708 case X86::BI__builtin_ia32_vfmaddsubps512_mask3: 14709 case X86::BI__builtin_ia32_vfmsubaddps512_mask3: 14710 case X86::BI__builtin_ia32_vfmaddsubpd512_mask: 14711 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: 14712 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: 14713 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: 14714 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true); 14715 14716 case X86::BI__builtin_ia32_movdqa32store128_mask: 14717 case X86::BI__builtin_ia32_movdqa64store128_mask: 14718 case X86::BI__builtin_ia32_storeaps128_mask: 14719 case X86::BI__builtin_ia32_storeapd128_mask: 14720 case X86::BI__builtin_ia32_movdqa32store256_mask: 14721 case X86::BI__builtin_ia32_movdqa64store256_mask: 14722 case X86::BI__builtin_ia32_storeaps256_mask: 14723 case X86::BI__builtin_ia32_storeapd256_mask: 14724 case X86::BI__builtin_ia32_movdqa32store512_mask: 14725 case X86::BI__builtin_ia32_movdqa64store512_mask: 14726 case X86::BI__builtin_ia32_storeaps512_mask: 14727 case X86::BI__builtin_ia32_storeapd512_mask: 14728 return EmitX86MaskedStore( 14729 *this, Ops, 14730 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign()); 14731 14732 case X86::BI__builtin_ia32_loadups128_mask: 14733 case X86::BI__builtin_ia32_loadups256_mask: 14734 case X86::BI__builtin_ia32_loadups512_mask: 14735 case X86::BI__builtin_ia32_loadupd128_mask: 14736 case X86::BI__builtin_ia32_loadupd256_mask: 14737 case X86::BI__builtin_ia32_loadupd512_mask: 14738 case X86::BI__builtin_ia32_loaddquqi128_mask: 14739 case X86::BI__builtin_ia32_loaddquqi256_mask: 14740 case X86::BI__builtin_ia32_loaddquqi512_mask: 14741 case X86::BI__builtin_ia32_loaddquhi128_mask: 14742 case X86::BI__builtin_ia32_loaddquhi256_mask: 14743 case X86::BI__builtin_ia32_loaddquhi512_mask: 14744 case X86::BI__builtin_ia32_loaddqusi128_mask: 14745 case X86::BI__builtin_ia32_loaddqusi256_mask: 14746 case X86::BI__builtin_ia32_loaddqusi512_mask: 14747 case X86::BI__builtin_ia32_loaddqudi128_mask: 14748 case X86::BI__builtin_ia32_loaddqudi256_mask: 14749 case X86::BI__builtin_ia32_loaddqudi512_mask: 14750 return EmitX86MaskedLoad(*this, Ops, Align(1)); 14751 14752 case X86::BI__builtin_ia32_loadsh128_mask: 14753 case X86::BI__builtin_ia32_loadss128_mask: 14754 case X86::BI__builtin_ia32_loadsd128_mask: 14755 return EmitX86MaskedLoad(*this, Ops, Align(1)); 14756 14757 case X86::BI__builtin_ia32_loadaps128_mask: 14758 case X86::BI__builtin_ia32_loadaps256_mask: 14759 case X86::BI__builtin_ia32_loadaps512_mask: 14760 case X86::BI__builtin_ia32_loadapd128_mask: 14761 case X86::BI__builtin_ia32_loadapd256_mask: 14762 case X86::BI__builtin_ia32_loadapd512_mask: 14763 case X86::BI__builtin_ia32_movdqa32load128_mask: 14764 case X86::BI__builtin_ia32_movdqa32load256_mask: 14765 case X86::BI__builtin_ia32_movdqa32load512_mask: 14766 case X86::BI__builtin_ia32_movdqa64load128_mask: 14767 case X86::BI__builtin_ia32_movdqa64load256_mask: 14768 case X86::BI__builtin_ia32_movdqa64load512_mask: 14769 return EmitX86MaskedLoad( 14770 *this, Ops, 14771 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign()); 14772 14773 case X86::BI__builtin_ia32_expandloaddf128_mask: 14774 case X86::BI__builtin_ia32_expandloaddf256_mask: 14775 case X86::BI__builtin_ia32_expandloaddf512_mask: 14776 case X86::BI__builtin_ia32_expandloadsf128_mask: 14777 case X86::BI__builtin_ia32_expandloadsf256_mask: 14778 case X86::BI__builtin_ia32_expandloadsf512_mask: 14779 case X86::BI__builtin_ia32_expandloaddi128_mask: 14780 case X86::BI__builtin_ia32_expandloaddi256_mask: 14781 case X86::BI__builtin_ia32_expandloaddi512_mask: 14782 case X86::BI__builtin_ia32_expandloadsi128_mask: 14783 case X86::BI__builtin_ia32_expandloadsi256_mask: 14784 case X86::BI__builtin_ia32_expandloadsi512_mask: 14785 case X86::BI__builtin_ia32_expandloadhi128_mask: 14786 case X86::BI__builtin_ia32_expandloadhi256_mask: 14787 case X86::BI__builtin_ia32_expandloadhi512_mask: 14788 case X86::BI__builtin_ia32_expandloadqi128_mask: 14789 case X86::BI__builtin_ia32_expandloadqi256_mask: 14790 case X86::BI__builtin_ia32_expandloadqi512_mask: 14791 return EmitX86ExpandLoad(*this, Ops); 14792 14793 case X86::BI__builtin_ia32_compressstoredf128_mask: 14794 case X86::BI__builtin_ia32_compressstoredf256_mask: 14795 case X86::BI__builtin_ia32_compressstoredf512_mask: 14796 case X86::BI__builtin_ia32_compressstoresf128_mask: 14797 case X86::BI__builtin_ia32_compressstoresf256_mask: 14798 case X86::BI__builtin_ia32_compressstoresf512_mask: 14799 case X86::BI__builtin_ia32_compressstoredi128_mask: 14800 case X86::BI__builtin_ia32_compressstoredi256_mask: 14801 case X86::BI__builtin_ia32_compressstoredi512_mask: 14802 case X86::BI__builtin_ia32_compressstoresi128_mask: 14803 case X86::BI__builtin_ia32_compressstoresi256_mask: 14804 case X86::BI__builtin_ia32_compressstoresi512_mask: 14805 case X86::BI__builtin_ia32_compressstorehi128_mask: 14806 case X86::BI__builtin_ia32_compressstorehi256_mask: 14807 case X86::BI__builtin_ia32_compressstorehi512_mask: 14808 case X86::BI__builtin_ia32_compressstoreqi128_mask: 14809 case X86::BI__builtin_ia32_compressstoreqi256_mask: 14810 case X86::BI__builtin_ia32_compressstoreqi512_mask: 14811 return EmitX86CompressStore(*this, Ops); 14812 14813 case X86::BI__builtin_ia32_expanddf128_mask: 14814 case X86::BI__builtin_ia32_expanddf256_mask: 14815 case X86::BI__builtin_ia32_expanddf512_mask: 14816 case X86::BI__builtin_ia32_expandsf128_mask: 14817 case X86::BI__builtin_ia32_expandsf256_mask: 14818 case X86::BI__builtin_ia32_expandsf512_mask: 14819 case X86::BI__builtin_ia32_expanddi128_mask: 14820 case X86::BI__builtin_ia32_expanddi256_mask: 14821 case X86::BI__builtin_ia32_expanddi512_mask: 14822 case X86::BI__builtin_ia32_expandsi128_mask: 14823 case X86::BI__builtin_ia32_expandsi256_mask: 14824 case X86::BI__builtin_ia32_expandsi512_mask: 14825 case X86::BI__builtin_ia32_expandhi128_mask: 14826 case X86::BI__builtin_ia32_expandhi256_mask: 14827 case X86::BI__builtin_ia32_expandhi512_mask: 14828 case X86::BI__builtin_ia32_expandqi128_mask: 14829 case X86::BI__builtin_ia32_expandqi256_mask: 14830 case X86::BI__builtin_ia32_expandqi512_mask: 14831 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false); 14832 14833 case X86::BI__builtin_ia32_compressdf128_mask: 14834 case X86::BI__builtin_ia32_compressdf256_mask: 14835 case X86::BI__builtin_ia32_compressdf512_mask: 14836 case X86::BI__builtin_ia32_compresssf128_mask: 14837 case X86::BI__builtin_ia32_compresssf256_mask: 14838 case X86::BI__builtin_ia32_compresssf512_mask: 14839 case X86::BI__builtin_ia32_compressdi128_mask: 14840 case X86::BI__builtin_ia32_compressdi256_mask: 14841 case X86::BI__builtin_ia32_compressdi512_mask: 14842 case X86::BI__builtin_ia32_compresssi128_mask: 14843 case X86::BI__builtin_ia32_compresssi256_mask: 14844 case X86::BI__builtin_ia32_compresssi512_mask: 14845 case X86::BI__builtin_ia32_compresshi128_mask: 14846 case X86::BI__builtin_ia32_compresshi256_mask: 14847 case X86::BI__builtin_ia32_compresshi512_mask: 14848 case X86::BI__builtin_ia32_compressqi128_mask: 14849 case X86::BI__builtin_ia32_compressqi256_mask: 14850 case X86::BI__builtin_ia32_compressqi512_mask: 14851 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true); 14852 14853 case X86::BI__builtin_ia32_gather3div2df: 14854 case X86::BI__builtin_ia32_gather3div2di: 14855 case X86::BI__builtin_ia32_gather3div4df: 14856 case X86::BI__builtin_ia32_gather3div4di: 14857 case X86::BI__builtin_ia32_gather3div4sf: 14858 case X86::BI__builtin_ia32_gather3div4si: 14859 case X86::BI__builtin_ia32_gather3div8sf: 14860 case X86::BI__builtin_ia32_gather3div8si: 14861 case X86::BI__builtin_ia32_gather3siv2df: 14862 case X86::BI__builtin_ia32_gather3siv2di: 14863 case X86::BI__builtin_ia32_gather3siv4df: 14864 case X86::BI__builtin_ia32_gather3siv4di: 14865 case X86::BI__builtin_ia32_gather3siv4sf: 14866 case X86::BI__builtin_ia32_gather3siv4si: 14867 case X86::BI__builtin_ia32_gather3siv8sf: 14868 case X86::BI__builtin_ia32_gather3siv8si: 14869 case X86::BI__builtin_ia32_gathersiv8df: 14870 case X86::BI__builtin_ia32_gathersiv16sf: 14871 case X86::BI__builtin_ia32_gatherdiv8df: 14872 case X86::BI__builtin_ia32_gatherdiv16sf: 14873 case X86::BI__builtin_ia32_gathersiv8di: 14874 case X86::BI__builtin_ia32_gathersiv16si: 14875 case X86::BI__builtin_ia32_gatherdiv8di: 14876 case X86::BI__builtin_ia32_gatherdiv16si: { 14877 Intrinsic::ID IID; 14878 switch (BuiltinID) { 14879 default: llvm_unreachable("Unexpected builtin"); 14880 case X86::BI__builtin_ia32_gather3div2df: 14881 IID = Intrinsic::x86_avx512_mask_gather3div2_df; 14882 break; 14883 case X86::BI__builtin_ia32_gather3div2di: 14884 IID = Intrinsic::x86_avx512_mask_gather3div2_di; 14885 break; 14886 case X86::BI__builtin_ia32_gather3div4df: 14887 IID = Intrinsic::x86_avx512_mask_gather3div4_df; 14888 break; 14889 case X86::BI__builtin_ia32_gather3div4di: 14890 IID = Intrinsic::x86_avx512_mask_gather3div4_di; 14891 break; 14892 case X86::BI__builtin_ia32_gather3div4sf: 14893 IID = Intrinsic::x86_avx512_mask_gather3div4_sf; 14894 break; 14895 case X86::BI__builtin_ia32_gather3div4si: 14896 IID = Intrinsic::x86_avx512_mask_gather3div4_si; 14897 break; 14898 case X86::BI__builtin_ia32_gather3div8sf: 14899 IID = Intrinsic::x86_avx512_mask_gather3div8_sf; 14900 break; 14901 case X86::BI__builtin_ia32_gather3div8si: 14902 IID = Intrinsic::x86_avx512_mask_gather3div8_si; 14903 break; 14904 case X86::BI__builtin_ia32_gather3siv2df: 14905 IID = Intrinsic::x86_avx512_mask_gather3siv2_df; 14906 break; 14907 case X86::BI__builtin_ia32_gather3siv2di: 14908 IID = Intrinsic::x86_avx512_mask_gather3siv2_di; 14909 break; 14910 case X86::BI__builtin_ia32_gather3siv4df: 14911 IID = Intrinsic::x86_avx512_mask_gather3siv4_df; 14912 break; 14913 case X86::BI__builtin_ia32_gather3siv4di: 14914 IID = Intrinsic::x86_avx512_mask_gather3siv4_di; 14915 break; 14916 case X86::BI__builtin_ia32_gather3siv4sf: 14917 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf; 14918 break; 14919 case X86::BI__builtin_ia32_gather3siv4si: 14920 IID = Intrinsic::x86_avx512_mask_gather3siv4_si; 14921 break; 14922 case X86::BI__builtin_ia32_gather3siv8sf: 14923 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf; 14924 break; 14925 case X86::BI__builtin_ia32_gather3siv8si: 14926 IID = Intrinsic::x86_avx512_mask_gather3siv8_si; 14927 break; 14928 case X86::BI__builtin_ia32_gathersiv8df: 14929 IID = Intrinsic::x86_avx512_mask_gather_dpd_512; 14930 break; 14931 case X86::BI__builtin_ia32_gathersiv16sf: 14932 IID = Intrinsic::x86_avx512_mask_gather_dps_512; 14933 break; 14934 case X86::BI__builtin_ia32_gatherdiv8df: 14935 IID = Intrinsic::x86_avx512_mask_gather_qpd_512; 14936 break; 14937 case X86::BI__builtin_ia32_gatherdiv16sf: 14938 IID = Intrinsic::x86_avx512_mask_gather_qps_512; 14939 break; 14940 case X86::BI__builtin_ia32_gathersiv8di: 14941 IID = Intrinsic::x86_avx512_mask_gather_dpq_512; 14942 break; 14943 case X86::BI__builtin_ia32_gathersiv16si: 14944 IID = Intrinsic::x86_avx512_mask_gather_dpi_512; 14945 break; 14946 case X86::BI__builtin_ia32_gatherdiv8di: 14947 IID = Intrinsic::x86_avx512_mask_gather_qpq_512; 14948 break; 14949 case X86::BI__builtin_ia32_gatherdiv16si: 14950 IID = Intrinsic::x86_avx512_mask_gather_qpi_512; 14951 break; 14952 } 14953 14954 unsigned MinElts = std::min( 14955 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(), 14956 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements()); 14957 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts); 14958 Function *Intr = CGM.getIntrinsic(IID); 14959 return Builder.CreateCall(Intr, Ops); 14960 } 14961 14962 case X86::BI__builtin_ia32_scattersiv8df: 14963 case X86::BI__builtin_ia32_scattersiv16sf: 14964 case X86::BI__builtin_ia32_scatterdiv8df: 14965 case X86::BI__builtin_ia32_scatterdiv16sf: 14966 case X86::BI__builtin_ia32_scattersiv8di: 14967 case X86::BI__builtin_ia32_scattersiv16si: 14968 case X86::BI__builtin_ia32_scatterdiv8di: 14969 case X86::BI__builtin_ia32_scatterdiv16si: 14970 case X86::BI__builtin_ia32_scatterdiv2df: 14971 case X86::BI__builtin_ia32_scatterdiv2di: 14972 case X86::BI__builtin_ia32_scatterdiv4df: 14973 case X86::BI__builtin_ia32_scatterdiv4di: 14974 case X86::BI__builtin_ia32_scatterdiv4sf: 14975 case X86::BI__builtin_ia32_scatterdiv4si: 14976 case X86::BI__builtin_ia32_scatterdiv8sf: 14977 case X86::BI__builtin_ia32_scatterdiv8si: 14978 case X86::BI__builtin_ia32_scattersiv2df: 14979 case X86::BI__builtin_ia32_scattersiv2di: 14980 case X86::BI__builtin_ia32_scattersiv4df: 14981 case X86::BI__builtin_ia32_scattersiv4di: 14982 case X86::BI__builtin_ia32_scattersiv4sf: 14983 case X86::BI__builtin_ia32_scattersiv4si: 14984 case X86::BI__builtin_ia32_scattersiv8sf: 14985 case X86::BI__builtin_ia32_scattersiv8si: { 14986 Intrinsic::ID IID; 14987 switch (BuiltinID) { 14988 default: llvm_unreachable("Unexpected builtin"); 14989 case X86::BI__builtin_ia32_scattersiv8df: 14990 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512; 14991 break; 14992 case X86::BI__builtin_ia32_scattersiv16sf: 14993 IID = Intrinsic::x86_avx512_mask_scatter_dps_512; 14994 break; 14995 case X86::BI__builtin_ia32_scatterdiv8df: 14996 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512; 14997 break; 14998 case X86::BI__builtin_ia32_scatterdiv16sf: 14999 IID = Intrinsic::x86_avx512_mask_scatter_qps_512; 15000 break; 15001 case X86::BI__builtin_ia32_scattersiv8di: 15002 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512; 15003 break; 15004 case X86::BI__builtin_ia32_scattersiv16si: 15005 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512; 15006 break; 15007 case X86::BI__builtin_ia32_scatterdiv8di: 15008 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512; 15009 break; 15010 case X86::BI__builtin_ia32_scatterdiv16si: 15011 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512; 15012 break; 15013 case X86::BI__builtin_ia32_scatterdiv2df: 15014 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df; 15015 break; 15016 case X86::BI__builtin_ia32_scatterdiv2di: 15017 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di; 15018 break; 15019 case X86::BI__builtin_ia32_scatterdiv4df: 15020 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df; 15021 break; 15022 case X86::BI__builtin_ia32_scatterdiv4di: 15023 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di; 15024 break; 15025 case X86::BI__builtin_ia32_scatterdiv4sf: 15026 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf; 15027 break; 15028 case X86::BI__builtin_ia32_scatterdiv4si: 15029 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si; 15030 break; 15031 case X86::BI__builtin_ia32_scatterdiv8sf: 15032 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf; 15033 break; 15034 case X86::BI__builtin_ia32_scatterdiv8si: 15035 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si; 15036 break; 15037 case X86::BI__builtin_ia32_scattersiv2df: 15038 IID = Intrinsic::x86_avx512_mask_scattersiv2_df; 15039 break; 15040 case X86::BI__builtin_ia32_scattersiv2di: 15041 IID = Intrinsic::x86_avx512_mask_scattersiv2_di; 15042 break; 15043 case X86::BI__builtin_ia32_scattersiv4df: 15044 IID = Intrinsic::x86_avx512_mask_scattersiv4_df; 15045 break; 15046 case X86::BI__builtin_ia32_scattersiv4di: 15047 IID = Intrinsic::x86_avx512_mask_scattersiv4_di; 15048 break; 15049 case X86::BI__builtin_ia32_scattersiv4sf: 15050 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf; 15051 break; 15052 case X86::BI__builtin_ia32_scattersiv4si: 15053 IID = Intrinsic::x86_avx512_mask_scattersiv4_si; 15054 break; 15055 case X86::BI__builtin_ia32_scattersiv8sf: 15056 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf; 15057 break; 15058 case X86::BI__builtin_ia32_scattersiv8si: 15059 IID = Intrinsic::x86_avx512_mask_scattersiv8_si; 15060 break; 15061 } 15062 15063 unsigned MinElts = std::min( 15064 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(), 15065 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements()); 15066 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts); 15067 Function *Intr = CGM.getIntrinsic(IID); 15068 return Builder.CreateCall(Intr, Ops); 15069 } 15070 15071 case X86::BI__builtin_ia32_vextractf128_pd256: 15072 case X86::BI__builtin_ia32_vextractf128_ps256: 15073 case X86::BI__builtin_ia32_vextractf128_si256: 15074 case X86::BI__builtin_ia32_extract128i256: 15075 case X86::BI__builtin_ia32_extractf64x4_mask: 15076 case X86::BI__builtin_ia32_extractf32x4_mask: 15077 case X86::BI__builtin_ia32_extracti64x4_mask: 15078 case X86::BI__builtin_ia32_extracti32x4_mask: 15079 case X86::BI__builtin_ia32_extractf32x8_mask: 15080 case X86::BI__builtin_ia32_extracti32x8_mask: 15081 case X86::BI__builtin_ia32_extractf32x4_256_mask: 15082 case X86::BI__builtin_ia32_extracti32x4_256_mask: 15083 case X86::BI__builtin_ia32_extractf64x2_256_mask: 15084 case X86::BI__builtin_ia32_extracti64x2_256_mask: 15085 case X86::BI__builtin_ia32_extractf64x2_512_mask: 15086 case X86::BI__builtin_ia32_extracti64x2_512_mask: { 15087 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType())); 15088 unsigned NumElts = DstTy->getNumElements(); 15089 unsigned SrcNumElts = 15090 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 15091 unsigned SubVectors = SrcNumElts / NumElts; 15092 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue(); 15093 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); 15094 Index &= SubVectors - 1; // Remove any extra bits. 15095 Index *= NumElts; 15096 15097 int Indices[16]; 15098 for (unsigned i = 0; i != NumElts; ++i) 15099 Indices[i] = i + Index; 15100 15101 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), 15102 "extract"); 15103 15104 if (Ops.size() == 4) 15105 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]); 15106 15107 return Res; 15108 } 15109 case X86::BI__builtin_ia32_vinsertf128_pd256: 15110 case X86::BI__builtin_ia32_vinsertf128_ps256: 15111 case X86::BI__builtin_ia32_vinsertf128_si256: 15112 case X86::BI__builtin_ia32_insert128i256: 15113 case X86::BI__builtin_ia32_insertf64x4: 15114 case X86::BI__builtin_ia32_insertf32x4: 15115 case X86::BI__builtin_ia32_inserti64x4: 15116 case X86::BI__builtin_ia32_inserti32x4: 15117 case X86::BI__builtin_ia32_insertf32x8: 15118 case X86::BI__builtin_ia32_inserti32x8: 15119 case X86::BI__builtin_ia32_insertf32x4_256: 15120 case X86::BI__builtin_ia32_inserti32x4_256: 15121 case X86::BI__builtin_ia32_insertf64x2_256: 15122 case X86::BI__builtin_ia32_inserti64x2_256: 15123 case X86::BI__builtin_ia32_insertf64x2_512: 15124 case X86::BI__builtin_ia32_inserti64x2_512: { 15125 unsigned DstNumElts = 15126 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 15127 unsigned SrcNumElts = 15128 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements(); 15129 unsigned SubVectors = DstNumElts / SrcNumElts; 15130 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); 15131 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); 15132 Index &= SubVectors - 1; // Remove any extra bits. 15133 Index *= SrcNumElts; 15134 15135 int Indices[16]; 15136 for (unsigned i = 0; i != DstNumElts; ++i) 15137 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i; 15138 15139 Value *Op1 = Builder.CreateShuffleVector( 15140 Ops[1], ArrayRef(Indices, DstNumElts), "widen"); 15141 15142 for (unsigned i = 0; i != DstNumElts; ++i) { 15143 if (i >= Index && i < (Index + SrcNumElts)) 15144 Indices[i] = (i - Index) + DstNumElts; 15145 else 15146 Indices[i] = i; 15147 } 15148 15149 return Builder.CreateShuffleVector(Ops[0], Op1, 15150 ArrayRef(Indices, DstNumElts), "insert"); 15151 } 15152 case X86::BI__builtin_ia32_pmovqd512_mask: 15153 case X86::BI__builtin_ia32_pmovwb512_mask: { 15154 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType()); 15155 return EmitX86Select(*this, Ops[2], Res, Ops[1]); 15156 } 15157 case X86::BI__builtin_ia32_pmovdb512_mask: 15158 case X86::BI__builtin_ia32_pmovdw512_mask: 15159 case X86::BI__builtin_ia32_pmovqw512_mask: { 15160 if (const auto *C = dyn_cast<Constant>(Ops[2])) 15161 if (C->isAllOnesValue()) 15162 return Builder.CreateTrunc(Ops[0], Ops[1]->getType()); 15163 15164 Intrinsic::ID IID; 15165 switch (BuiltinID) { 15166 default: llvm_unreachable("Unsupported intrinsic!"); 15167 case X86::BI__builtin_ia32_pmovdb512_mask: 15168 IID = Intrinsic::x86_avx512_mask_pmov_db_512; 15169 break; 15170 case X86::BI__builtin_ia32_pmovdw512_mask: 15171 IID = Intrinsic::x86_avx512_mask_pmov_dw_512; 15172 break; 15173 case X86::BI__builtin_ia32_pmovqw512_mask: 15174 IID = Intrinsic::x86_avx512_mask_pmov_qw_512; 15175 break; 15176 } 15177 15178 Function *Intr = CGM.getIntrinsic(IID); 15179 return Builder.CreateCall(Intr, Ops); 15180 } 15181 case X86::BI__builtin_ia32_pblendw128: 15182 case X86::BI__builtin_ia32_blendpd: 15183 case X86::BI__builtin_ia32_blendps: 15184 case X86::BI__builtin_ia32_blendpd256: 15185 case X86::BI__builtin_ia32_blendps256: 15186 case X86::BI__builtin_ia32_pblendw256: 15187 case X86::BI__builtin_ia32_pblendd128: 15188 case X86::BI__builtin_ia32_pblendd256: { 15189 unsigned NumElts = 15190 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 15191 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 15192 15193 int Indices[16]; 15194 // If there are more than 8 elements, the immediate is used twice so make 15195 // sure we handle that. 15196 for (unsigned i = 0; i != NumElts; ++i) 15197 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i; 15198 15199 return Builder.CreateShuffleVector(Ops[0], Ops[1], 15200 ArrayRef(Indices, NumElts), "blend"); 15201 } 15202 case X86::BI__builtin_ia32_pshuflw: 15203 case X86::BI__builtin_ia32_pshuflw256: 15204 case X86::BI__builtin_ia32_pshuflw512: { 15205 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); 15206 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); 15207 unsigned NumElts = Ty->getNumElements(); 15208 15209 // Splat the 8-bits of immediate 4 times to help the loop wrap around. 15210 Imm = (Imm & 0xff) * 0x01010101; 15211 15212 int Indices[32]; 15213 for (unsigned l = 0; l != NumElts; l += 8) { 15214 for (unsigned i = 0; i != 4; ++i) { 15215 Indices[l + i] = l + (Imm & 3); 15216 Imm >>= 2; 15217 } 15218 for (unsigned i = 4; i != 8; ++i) 15219 Indices[l + i] = l + i; 15220 } 15221 15222 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), 15223 "pshuflw"); 15224 } 15225 case X86::BI__builtin_ia32_pshufhw: 15226 case X86::BI__builtin_ia32_pshufhw256: 15227 case X86::BI__builtin_ia32_pshufhw512: { 15228 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); 15229 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); 15230 unsigned NumElts = Ty->getNumElements(); 15231 15232 // Splat the 8-bits of immediate 4 times to help the loop wrap around. 15233 Imm = (Imm & 0xff) * 0x01010101; 15234 15235 int Indices[32]; 15236 for (unsigned l = 0; l != NumElts; l += 8) { 15237 for (unsigned i = 0; i != 4; ++i) 15238 Indices[l + i] = l + i; 15239 for (unsigned i = 4; i != 8; ++i) { 15240 Indices[l + i] = l + 4 + (Imm & 3); 15241 Imm >>= 2; 15242 } 15243 } 15244 15245 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), 15246 "pshufhw"); 15247 } 15248 case X86::BI__builtin_ia32_pshufd: 15249 case X86::BI__builtin_ia32_pshufd256: 15250 case X86::BI__builtin_ia32_pshufd512: 15251 case X86::BI__builtin_ia32_vpermilpd: 15252 case X86::BI__builtin_ia32_vpermilps: 15253 case X86::BI__builtin_ia32_vpermilpd256: 15254 case X86::BI__builtin_ia32_vpermilps256: 15255 case X86::BI__builtin_ia32_vpermilpd512: 15256 case X86::BI__builtin_ia32_vpermilps512: { 15257 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); 15258 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); 15259 unsigned NumElts = Ty->getNumElements(); 15260 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; 15261 unsigned NumLaneElts = NumElts / NumLanes; 15262 15263 // Splat the 8-bits of immediate 4 times to help the loop wrap around. 15264 Imm = (Imm & 0xff) * 0x01010101; 15265 15266 int Indices[16]; 15267 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 15268 for (unsigned i = 0; i != NumLaneElts; ++i) { 15269 Indices[i + l] = (Imm % NumLaneElts) + l; 15270 Imm /= NumLaneElts; 15271 } 15272 } 15273 15274 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), 15275 "permil"); 15276 } 15277 case X86::BI__builtin_ia32_shufpd: 15278 case X86::BI__builtin_ia32_shufpd256: 15279 case X86::BI__builtin_ia32_shufpd512: 15280 case X86::BI__builtin_ia32_shufps: 15281 case X86::BI__builtin_ia32_shufps256: 15282 case X86::BI__builtin_ia32_shufps512: { 15283 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 15284 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); 15285 unsigned NumElts = Ty->getNumElements(); 15286 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; 15287 unsigned NumLaneElts = NumElts / NumLanes; 15288 15289 // Splat the 8-bits of immediate 4 times to help the loop wrap around. 15290 Imm = (Imm & 0xff) * 0x01010101; 15291 15292 int Indices[16]; 15293 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 15294 for (unsigned i = 0; i != NumLaneElts; ++i) { 15295 unsigned Index = Imm % NumLaneElts; 15296 Imm /= NumLaneElts; 15297 if (i >= (NumLaneElts / 2)) 15298 Index += NumElts; 15299 Indices[l + i] = l + Index; 15300 } 15301 } 15302 15303 return Builder.CreateShuffleVector(Ops[0], Ops[1], 15304 ArrayRef(Indices, NumElts), "shufp"); 15305 } 15306 case X86::BI__builtin_ia32_permdi256: 15307 case X86::BI__builtin_ia32_permdf256: 15308 case X86::BI__builtin_ia32_permdi512: 15309 case X86::BI__builtin_ia32_permdf512: { 15310 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); 15311 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); 15312 unsigned NumElts = Ty->getNumElements(); 15313 15314 // These intrinsics operate on 256-bit lanes of four 64-bit elements. 15315 int Indices[8]; 15316 for (unsigned l = 0; l != NumElts; l += 4) 15317 for (unsigned i = 0; i != 4; ++i) 15318 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3); 15319 15320 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), 15321 "perm"); 15322 } 15323 case X86::BI__builtin_ia32_palignr128: 15324 case X86::BI__builtin_ia32_palignr256: 15325 case X86::BI__builtin_ia32_palignr512: { 15326 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; 15327 15328 unsigned NumElts = 15329 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 15330 assert(NumElts % 16 == 0); 15331 15332 // If palignr is shifting the pair of vectors more than the size of two 15333 // lanes, emit zero. 15334 if (ShiftVal >= 32) 15335 return llvm::Constant::getNullValue(ConvertType(E->getType())); 15336 15337 // If palignr is shifting the pair of input vectors more than one lane, 15338 // but less than two lanes, convert to shifting in zeroes. 15339 if (ShiftVal > 16) { 15340 ShiftVal -= 16; 15341 Ops[1] = Ops[0]; 15342 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 15343 } 15344 15345 int Indices[64]; 15346 // 256-bit palignr operates on 128-bit lanes so we need to handle that 15347 for (unsigned l = 0; l != NumElts; l += 16) { 15348 for (unsigned i = 0; i != 16; ++i) { 15349 unsigned Idx = ShiftVal + i; 15350 if (Idx >= 16) 15351 Idx += NumElts - 16; // End of lane, switch operand. 15352 Indices[l + i] = Idx + l; 15353 } 15354 } 15355 15356 return Builder.CreateShuffleVector(Ops[1], Ops[0], 15357 ArrayRef(Indices, NumElts), "palignr"); 15358 } 15359 case X86::BI__builtin_ia32_alignd128: 15360 case X86::BI__builtin_ia32_alignd256: 15361 case X86::BI__builtin_ia32_alignd512: 15362 case X86::BI__builtin_ia32_alignq128: 15363 case X86::BI__builtin_ia32_alignq256: 15364 case X86::BI__builtin_ia32_alignq512: { 15365 unsigned NumElts = 15366 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 15367 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; 15368 15369 // Mask the shift amount to width of a vector. 15370 ShiftVal &= NumElts - 1; 15371 15372 int Indices[16]; 15373 for (unsigned i = 0; i != NumElts; ++i) 15374 Indices[i] = i + ShiftVal; 15375 15376 return Builder.CreateShuffleVector(Ops[1], Ops[0], 15377 ArrayRef(Indices, NumElts), "valign"); 15378 } 15379 case X86::BI__builtin_ia32_shuf_f32x4_256: 15380 case X86::BI__builtin_ia32_shuf_f64x2_256: 15381 case X86::BI__builtin_ia32_shuf_i32x4_256: 15382 case X86::BI__builtin_ia32_shuf_i64x2_256: 15383 case X86::BI__builtin_ia32_shuf_f32x4: 15384 case X86::BI__builtin_ia32_shuf_f64x2: 15385 case X86::BI__builtin_ia32_shuf_i32x4: 15386 case X86::BI__builtin_ia32_shuf_i64x2: { 15387 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 15388 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType()); 15389 unsigned NumElts = Ty->getNumElements(); 15390 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2; 15391 unsigned NumLaneElts = NumElts / NumLanes; 15392 15393 int Indices[16]; 15394 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 15395 unsigned Index = (Imm % NumLanes) * NumLaneElts; 15396 Imm /= NumLanes; // Discard the bits we just used. 15397 if (l >= (NumElts / 2)) 15398 Index += NumElts; // Switch to other source. 15399 for (unsigned i = 0; i != NumLaneElts; ++i) { 15400 Indices[l + i] = Index + i; 15401 } 15402 } 15403 15404 return Builder.CreateShuffleVector(Ops[0], Ops[1], 15405 ArrayRef(Indices, NumElts), "shuf"); 15406 } 15407 15408 case X86::BI__builtin_ia32_vperm2f128_pd256: 15409 case X86::BI__builtin_ia32_vperm2f128_ps256: 15410 case X86::BI__builtin_ia32_vperm2f128_si256: 15411 case X86::BI__builtin_ia32_permti256: { 15412 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 15413 unsigned NumElts = 15414 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 15415 15416 // This takes a very simple approach since there are two lanes and a 15417 // shuffle can have 2 inputs. So we reserve the first input for the first 15418 // lane and the second input for the second lane. This may result in 15419 // duplicate sources, but this can be dealt with in the backend. 15420 15421 Value *OutOps[2]; 15422 int Indices[8]; 15423 for (unsigned l = 0; l != 2; ++l) { 15424 // Determine the source for this lane. 15425 if (Imm & (1 << ((l * 4) + 3))) 15426 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType()); 15427 else if (Imm & (1 << ((l * 4) + 1))) 15428 OutOps[l] = Ops[1]; 15429 else 15430 OutOps[l] = Ops[0]; 15431 15432 for (unsigned i = 0; i != NumElts/2; ++i) { 15433 // Start with ith element of the source for this lane. 15434 unsigned Idx = (l * NumElts) + i; 15435 // If bit 0 of the immediate half is set, switch to the high half of 15436 // the source. 15437 if (Imm & (1 << (l * 4))) 15438 Idx += NumElts/2; 15439 Indices[(l * (NumElts/2)) + i] = Idx; 15440 } 15441 } 15442 15443 return Builder.CreateShuffleVector(OutOps[0], OutOps[1], 15444 ArrayRef(Indices, NumElts), "vperm"); 15445 } 15446 15447 case X86::BI__builtin_ia32_pslldqi128_byteshift: 15448 case X86::BI__builtin_ia32_pslldqi256_byteshift: 15449 case X86::BI__builtin_ia32_pslldqi512_byteshift: { 15450 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; 15451 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType()); 15452 // Builtin type is vXi64 so multiply by 8 to get bytes. 15453 unsigned NumElts = ResultType->getNumElements() * 8; 15454 15455 // If pslldq is shifting the vector more than 15 bytes, emit zero. 15456 if (ShiftVal >= 16) 15457 return llvm::Constant::getNullValue(ResultType); 15458 15459 int Indices[64]; 15460 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that 15461 for (unsigned l = 0; l != NumElts; l += 16) { 15462 for (unsigned i = 0; i != 16; ++i) { 15463 unsigned Idx = NumElts + i - ShiftVal; 15464 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand. 15465 Indices[l + i] = Idx + l; 15466 } 15467 } 15468 15469 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts); 15470 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 15471 Value *Zero = llvm::Constant::getNullValue(VecTy); 15472 Value *SV = Builder.CreateShuffleVector( 15473 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq"); 15474 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast"); 15475 } 15476 case X86::BI__builtin_ia32_psrldqi128_byteshift: 15477 case X86::BI__builtin_ia32_psrldqi256_byteshift: 15478 case X86::BI__builtin_ia32_psrldqi512_byteshift: { 15479 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; 15480 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType()); 15481 // Builtin type is vXi64 so multiply by 8 to get bytes. 15482 unsigned NumElts = ResultType->getNumElements() * 8; 15483 15484 // If psrldq is shifting the vector more than 15 bytes, emit zero. 15485 if (ShiftVal >= 16) 15486 return llvm::Constant::getNullValue(ResultType); 15487 15488 int Indices[64]; 15489 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that 15490 for (unsigned l = 0; l != NumElts; l += 16) { 15491 for (unsigned i = 0; i != 16; ++i) { 15492 unsigned Idx = i + ShiftVal; 15493 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand. 15494 Indices[l + i] = Idx + l; 15495 } 15496 } 15497 15498 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts); 15499 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 15500 Value *Zero = llvm::Constant::getNullValue(VecTy); 15501 Value *SV = Builder.CreateShuffleVector( 15502 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq"); 15503 return Builder.CreateBitCast(SV, ResultType, "cast"); 15504 } 15505 case X86::BI__builtin_ia32_kshiftliqi: 15506 case X86::BI__builtin_ia32_kshiftlihi: 15507 case X86::BI__builtin_ia32_kshiftlisi: 15508 case X86::BI__builtin_ia32_kshiftlidi: { 15509 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; 15510 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 15511 15512 if (ShiftVal >= NumElts) 15513 return llvm::Constant::getNullValue(Ops[0]->getType()); 15514 15515 Value *In = getMaskVecValue(*this, Ops[0], NumElts); 15516 15517 int Indices[64]; 15518 for (unsigned i = 0; i != NumElts; ++i) 15519 Indices[i] = NumElts + i - ShiftVal; 15520 15521 Value *Zero = llvm::Constant::getNullValue(In->getType()); 15522 Value *SV = Builder.CreateShuffleVector( 15523 Zero, In, ArrayRef(Indices, NumElts), "kshiftl"); 15524 return Builder.CreateBitCast(SV, Ops[0]->getType()); 15525 } 15526 case X86::BI__builtin_ia32_kshiftriqi: 15527 case X86::BI__builtin_ia32_kshiftrihi: 15528 case X86::BI__builtin_ia32_kshiftrisi: 15529 case X86::BI__builtin_ia32_kshiftridi: { 15530 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; 15531 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 15532 15533 if (ShiftVal >= NumElts) 15534 return llvm::Constant::getNullValue(Ops[0]->getType()); 15535 15536 Value *In = getMaskVecValue(*this, Ops[0], NumElts); 15537 15538 int Indices[64]; 15539 for (unsigned i = 0; i != NumElts; ++i) 15540 Indices[i] = i + ShiftVal; 15541 15542 Value *Zero = llvm::Constant::getNullValue(In->getType()); 15543 Value *SV = Builder.CreateShuffleVector( 15544 In, Zero, ArrayRef(Indices, NumElts), "kshiftr"); 15545 return Builder.CreateBitCast(SV, Ops[0]->getType()); 15546 } 15547 case X86::BI__builtin_ia32_movnti: 15548 case X86::BI__builtin_ia32_movnti64: 15549 case X86::BI__builtin_ia32_movntsd: 15550 case X86::BI__builtin_ia32_movntss: { 15551 llvm::MDNode *Node = llvm::MDNode::get( 15552 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 15553 15554 Value *Ptr = Ops[0]; 15555 Value *Src = Ops[1]; 15556 15557 // Extract the 0'th element of the source vector. 15558 if (BuiltinID == X86::BI__builtin_ia32_movntsd || 15559 BuiltinID == X86::BI__builtin_ia32_movntss) 15560 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); 15561 15562 // Unaligned nontemporal store of the scalar value. 15563 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr); 15564 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node); 15565 SI->setAlignment(llvm::Align(1)); 15566 return SI; 15567 } 15568 // Rotate is a special case of funnel shift - 1st 2 args are the same. 15569 case X86::BI__builtin_ia32_vprotb: 15570 case X86::BI__builtin_ia32_vprotw: 15571 case X86::BI__builtin_ia32_vprotd: 15572 case X86::BI__builtin_ia32_vprotq: 15573 case X86::BI__builtin_ia32_vprotbi: 15574 case X86::BI__builtin_ia32_vprotwi: 15575 case X86::BI__builtin_ia32_vprotdi: 15576 case X86::BI__builtin_ia32_vprotqi: 15577 case X86::BI__builtin_ia32_prold128: 15578 case X86::BI__builtin_ia32_prold256: 15579 case X86::BI__builtin_ia32_prold512: 15580 case X86::BI__builtin_ia32_prolq128: 15581 case X86::BI__builtin_ia32_prolq256: 15582 case X86::BI__builtin_ia32_prolq512: 15583 case X86::BI__builtin_ia32_prolvd128: 15584 case X86::BI__builtin_ia32_prolvd256: 15585 case X86::BI__builtin_ia32_prolvd512: 15586 case X86::BI__builtin_ia32_prolvq128: 15587 case X86::BI__builtin_ia32_prolvq256: 15588 case X86::BI__builtin_ia32_prolvq512: 15589 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false); 15590 case X86::BI__builtin_ia32_prord128: 15591 case X86::BI__builtin_ia32_prord256: 15592 case X86::BI__builtin_ia32_prord512: 15593 case X86::BI__builtin_ia32_prorq128: 15594 case X86::BI__builtin_ia32_prorq256: 15595 case X86::BI__builtin_ia32_prorq512: 15596 case X86::BI__builtin_ia32_prorvd128: 15597 case X86::BI__builtin_ia32_prorvd256: 15598 case X86::BI__builtin_ia32_prorvd512: 15599 case X86::BI__builtin_ia32_prorvq128: 15600 case X86::BI__builtin_ia32_prorvq256: 15601 case X86::BI__builtin_ia32_prorvq512: 15602 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true); 15603 case X86::BI__builtin_ia32_selectb_128: 15604 case X86::BI__builtin_ia32_selectb_256: 15605 case X86::BI__builtin_ia32_selectb_512: 15606 case X86::BI__builtin_ia32_selectw_128: 15607 case X86::BI__builtin_ia32_selectw_256: 15608 case X86::BI__builtin_ia32_selectw_512: 15609 case X86::BI__builtin_ia32_selectd_128: 15610 case X86::BI__builtin_ia32_selectd_256: 15611 case X86::BI__builtin_ia32_selectd_512: 15612 case X86::BI__builtin_ia32_selectq_128: 15613 case X86::BI__builtin_ia32_selectq_256: 15614 case X86::BI__builtin_ia32_selectq_512: 15615 case X86::BI__builtin_ia32_selectph_128: 15616 case X86::BI__builtin_ia32_selectph_256: 15617 case X86::BI__builtin_ia32_selectph_512: 15618 case X86::BI__builtin_ia32_selectpbf_128: 15619 case X86::BI__builtin_ia32_selectpbf_256: 15620 case X86::BI__builtin_ia32_selectpbf_512: 15621 case X86::BI__builtin_ia32_selectps_128: 15622 case X86::BI__builtin_ia32_selectps_256: 15623 case X86::BI__builtin_ia32_selectps_512: 15624 case X86::BI__builtin_ia32_selectpd_128: 15625 case X86::BI__builtin_ia32_selectpd_256: 15626 case X86::BI__builtin_ia32_selectpd_512: 15627 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); 15628 case X86::BI__builtin_ia32_selectsh_128: 15629 case X86::BI__builtin_ia32_selectsbf_128: 15630 case X86::BI__builtin_ia32_selectss_128: 15631 case X86::BI__builtin_ia32_selectsd_128: { 15632 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); 15633 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0); 15634 A = EmitX86ScalarSelect(*this, Ops[0], A, B); 15635 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0); 15636 } 15637 case X86::BI__builtin_ia32_cmpb128_mask: 15638 case X86::BI__builtin_ia32_cmpb256_mask: 15639 case X86::BI__builtin_ia32_cmpb512_mask: 15640 case X86::BI__builtin_ia32_cmpw128_mask: 15641 case X86::BI__builtin_ia32_cmpw256_mask: 15642 case X86::BI__builtin_ia32_cmpw512_mask: 15643 case X86::BI__builtin_ia32_cmpd128_mask: 15644 case X86::BI__builtin_ia32_cmpd256_mask: 15645 case X86::BI__builtin_ia32_cmpd512_mask: 15646 case X86::BI__builtin_ia32_cmpq128_mask: 15647 case X86::BI__builtin_ia32_cmpq256_mask: 15648 case X86::BI__builtin_ia32_cmpq512_mask: { 15649 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 15650 return EmitX86MaskedCompare(*this, CC, true, Ops); 15651 } 15652 case X86::BI__builtin_ia32_ucmpb128_mask: 15653 case X86::BI__builtin_ia32_ucmpb256_mask: 15654 case X86::BI__builtin_ia32_ucmpb512_mask: 15655 case X86::BI__builtin_ia32_ucmpw128_mask: 15656 case X86::BI__builtin_ia32_ucmpw256_mask: 15657 case X86::BI__builtin_ia32_ucmpw512_mask: 15658 case X86::BI__builtin_ia32_ucmpd128_mask: 15659 case X86::BI__builtin_ia32_ucmpd256_mask: 15660 case X86::BI__builtin_ia32_ucmpd512_mask: 15661 case X86::BI__builtin_ia32_ucmpq128_mask: 15662 case X86::BI__builtin_ia32_ucmpq256_mask: 15663 case X86::BI__builtin_ia32_ucmpq512_mask: { 15664 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 15665 return EmitX86MaskedCompare(*this, CC, false, Ops); 15666 } 15667 case X86::BI__builtin_ia32_vpcomb: 15668 case X86::BI__builtin_ia32_vpcomw: 15669 case X86::BI__builtin_ia32_vpcomd: 15670 case X86::BI__builtin_ia32_vpcomq: 15671 return EmitX86vpcom(*this, Ops, true); 15672 case X86::BI__builtin_ia32_vpcomub: 15673 case X86::BI__builtin_ia32_vpcomuw: 15674 case X86::BI__builtin_ia32_vpcomud: 15675 case X86::BI__builtin_ia32_vpcomuq: 15676 return EmitX86vpcom(*this, Ops, false); 15677 15678 case X86::BI__builtin_ia32_kortestcqi: 15679 case X86::BI__builtin_ia32_kortestchi: 15680 case X86::BI__builtin_ia32_kortestcsi: 15681 case X86::BI__builtin_ia32_kortestcdi: { 15682 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); 15683 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType()); 15684 Value *Cmp = Builder.CreateICmpEQ(Or, C); 15685 return Builder.CreateZExt(Cmp, ConvertType(E->getType())); 15686 } 15687 case X86::BI__builtin_ia32_kortestzqi: 15688 case X86::BI__builtin_ia32_kortestzhi: 15689 case X86::BI__builtin_ia32_kortestzsi: 15690 case X86::BI__builtin_ia32_kortestzdi: { 15691 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); 15692 Value *C = llvm::Constant::getNullValue(Ops[0]->getType()); 15693 Value *Cmp = Builder.CreateICmpEQ(Or, C); 15694 return Builder.CreateZExt(Cmp, ConvertType(E->getType())); 15695 } 15696 15697 case X86::BI__builtin_ia32_ktestcqi: 15698 case X86::BI__builtin_ia32_ktestzqi: 15699 case X86::BI__builtin_ia32_ktestchi: 15700 case X86::BI__builtin_ia32_ktestzhi: 15701 case X86::BI__builtin_ia32_ktestcsi: 15702 case X86::BI__builtin_ia32_ktestzsi: 15703 case X86::BI__builtin_ia32_ktestcdi: 15704 case X86::BI__builtin_ia32_ktestzdi: { 15705 Intrinsic::ID IID; 15706 switch (BuiltinID) { 15707 default: llvm_unreachable("Unsupported intrinsic!"); 15708 case X86::BI__builtin_ia32_ktestcqi: 15709 IID = Intrinsic::x86_avx512_ktestc_b; 15710 break; 15711 case X86::BI__builtin_ia32_ktestzqi: 15712 IID = Intrinsic::x86_avx512_ktestz_b; 15713 break; 15714 case X86::BI__builtin_ia32_ktestchi: 15715 IID = Intrinsic::x86_avx512_ktestc_w; 15716 break; 15717 case X86::BI__builtin_ia32_ktestzhi: 15718 IID = Intrinsic::x86_avx512_ktestz_w; 15719 break; 15720 case X86::BI__builtin_ia32_ktestcsi: 15721 IID = Intrinsic::x86_avx512_ktestc_d; 15722 break; 15723 case X86::BI__builtin_ia32_ktestzsi: 15724 IID = Intrinsic::x86_avx512_ktestz_d; 15725 break; 15726 case X86::BI__builtin_ia32_ktestcdi: 15727 IID = Intrinsic::x86_avx512_ktestc_q; 15728 break; 15729 case X86::BI__builtin_ia32_ktestzdi: 15730 IID = Intrinsic::x86_avx512_ktestz_q; 15731 break; 15732 } 15733 15734 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 15735 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); 15736 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); 15737 Function *Intr = CGM.getIntrinsic(IID); 15738 return Builder.CreateCall(Intr, {LHS, RHS}); 15739 } 15740 15741 case X86::BI__builtin_ia32_kaddqi: 15742 case X86::BI__builtin_ia32_kaddhi: 15743 case X86::BI__builtin_ia32_kaddsi: 15744 case X86::BI__builtin_ia32_kadddi: { 15745 Intrinsic::ID IID; 15746 switch (BuiltinID) { 15747 default: llvm_unreachable("Unsupported intrinsic!"); 15748 case X86::BI__builtin_ia32_kaddqi: 15749 IID = Intrinsic::x86_avx512_kadd_b; 15750 break; 15751 case X86::BI__builtin_ia32_kaddhi: 15752 IID = Intrinsic::x86_avx512_kadd_w; 15753 break; 15754 case X86::BI__builtin_ia32_kaddsi: 15755 IID = Intrinsic::x86_avx512_kadd_d; 15756 break; 15757 case X86::BI__builtin_ia32_kadddi: 15758 IID = Intrinsic::x86_avx512_kadd_q; 15759 break; 15760 } 15761 15762 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 15763 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); 15764 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); 15765 Function *Intr = CGM.getIntrinsic(IID); 15766 Value *Res = Builder.CreateCall(Intr, {LHS, RHS}); 15767 return Builder.CreateBitCast(Res, Ops[0]->getType()); 15768 } 15769 case X86::BI__builtin_ia32_kandqi: 15770 case X86::BI__builtin_ia32_kandhi: 15771 case X86::BI__builtin_ia32_kandsi: 15772 case X86::BI__builtin_ia32_kanddi: 15773 return EmitX86MaskLogic(*this, Instruction::And, Ops); 15774 case X86::BI__builtin_ia32_kandnqi: 15775 case X86::BI__builtin_ia32_kandnhi: 15776 case X86::BI__builtin_ia32_kandnsi: 15777 case X86::BI__builtin_ia32_kandndi: 15778 return EmitX86MaskLogic(*this, Instruction::And, Ops, true); 15779 case X86::BI__builtin_ia32_korqi: 15780 case X86::BI__builtin_ia32_korhi: 15781 case X86::BI__builtin_ia32_korsi: 15782 case X86::BI__builtin_ia32_kordi: 15783 return EmitX86MaskLogic(*this, Instruction::Or, Ops); 15784 case X86::BI__builtin_ia32_kxnorqi: 15785 case X86::BI__builtin_ia32_kxnorhi: 15786 case X86::BI__builtin_ia32_kxnorsi: 15787 case X86::BI__builtin_ia32_kxnordi: 15788 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true); 15789 case X86::BI__builtin_ia32_kxorqi: 15790 case X86::BI__builtin_ia32_kxorhi: 15791 case X86::BI__builtin_ia32_kxorsi: 15792 case X86::BI__builtin_ia32_kxordi: 15793 return EmitX86MaskLogic(*this, Instruction::Xor, Ops); 15794 case X86::BI__builtin_ia32_knotqi: 15795 case X86::BI__builtin_ia32_knothi: 15796 case X86::BI__builtin_ia32_knotsi: 15797 case X86::BI__builtin_ia32_knotdi: { 15798 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 15799 Value *Res = getMaskVecValue(*this, Ops[0], NumElts); 15800 return Builder.CreateBitCast(Builder.CreateNot(Res), 15801 Ops[0]->getType()); 15802 } 15803 case X86::BI__builtin_ia32_kmovb: 15804 case X86::BI__builtin_ia32_kmovw: 15805 case X86::BI__builtin_ia32_kmovd: 15806 case X86::BI__builtin_ia32_kmovq: { 15807 // Bitcast to vXi1 type and then back to integer. This gets the mask 15808 // register type into the IR, but might be optimized out depending on 15809 // what's around it. 15810 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 15811 Value *Res = getMaskVecValue(*this, Ops[0], NumElts); 15812 return Builder.CreateBitCast(Res, Ops[0]->getType()); 15813 } 15814 15815 case X86::BI__builtin_ia32_kunpckdi: 15816 case X86::BI__builtin_ia32_kunpcksi: 15817 case X86::BI__builtin_ia32_kunpckhi: { 15818 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); 15819 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); 15820 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); 15821 int Indices[64]; 15822 for (unsigned i = 0; i != NumElts; ++i) 15823 Indices[i] = i; 15824 15825 // First extract half of each vector. This gives better codegen than 15826 // doing it in a single shuffle. 15827 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2)); 15828 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2)); 15829 // Concat the vectors. 15830 // NOTE: Operands are swapped to match the intrinsic definition. 15831 Value *Res = 15832 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts)); 15833 return Builder.CreateBitCast(Res, Ops[0]->getType()); 15834 } 15835 15836 case X86::BI__builtin_ia32_vplzcntd_128: 15837 case X86::BI__builtin_ia32_vplzcntd_256: 15838 case X86::BI__builtin_ia32_vplzcntd_512: 15839 case X86::BI__builtin_ia32_vplzcntq_128: 15840 case X86::BI__builtin_ia32_vplzcntq_256: 15841 case X86::BI__builtin_ia32_vplzcntq_512: { 15842 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 15843 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}); 15844 } 15845 case X86::BI__builtin_ia32_sqrtss: 15846 case X86::BI__builtin_ia32_sqrtsd: { 15847 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); 15848 Function *F; 15849 if (Builder.getIsFPConstrained()) { 15850 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 15851 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, 15852 A->getType()); 15853 A = Builder.CreateConstrainedFPCall(F, {A}); 15854 } else { 15855 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); 15856 A = Builder.CreateCall(F, {A}); 15857 } 15858 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); 15859 } 15860 case X86::BI__builtin_ia32_sqrtsh_round_mask: 15861 case X86::BI__builtin_ia32_sqrtsd_round_mask: 15862 case X86::BI__builtin_ia32_sqrtss_round_mask: { 15863 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue(); 15864 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), 15865 // otherwise keep the intrinsic. 15866 if (CC != 4) { 15867 Intrinsic::ID IID; 15868 15869 switch (BuiltinID) { 15870 default: 15871 llvm_unreachable("Unsupported intrinsic!"); 15872 case X86::BI__builtin_ia32_sqrtsh_round_mask: 15873 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh; 15874 break; 15875 case X86::BI__builtin_ia32_sqrtsd_round_mask: 15876 IID = Intrinsic::x86_avx512_mask_sqrt_sd; 15877 break; 15878 case X86::BI__builtin_ia32_sqrtss_round_mask: 15879 IID = Intrinsic::x86_avx512_mask_sqrt_ss; 15880 break; 15881 } 15882 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); 15883 } 15884 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); 15885 Function *F; 15886 if (Builder.getIsFPConstrained()) { 15887 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 15888 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, 15889 A->getType()); 15890 A = Builder.CreateConstrainedFPCall(F, A); 15891 } else { 15892 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); 15893 A = Builder.CreateCall(F, A); 15894 } 15895 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0); 15896 A = EmitX86ScalarSelect(*this, Ops[3], A, Src); 15897 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); 15898 } 15899 case X86::BI__builtin_ia32_sqrtpd256: 15900 case X86::BI__builtin_ia32_sqrtpd: 15901 case X86::BI__builtin_ia32_sqrtps256: 15902 case X86::BI__builtin_ia32_sqrtps: 15903 case X86::BI__builtin_ia32_sqrtph256: 15904 case X86::BI__builtin_ia32_sqrtph: 15905 case X86::BI__builtin_ia32_sqrtph512: 15906 case X86::BI__builtin_ia32_sqrtps512: 15907 case X86::BI__builtin_ia32_sqrtpd512: { 15908 if (Ops.size() == 2) { 15909 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); 15910 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), 15911 // otherwise keep the intrinsic. 15912 if (CC != 4) { 15913 Intrinsic::ID IID; 15914 15915 switch (BuiltinID) { 15916 default: 15917 llvm_unreachable("Unsupported intrinsic!"); 15918 case X86::BI__builtin_ia32_sqrtph512: 15919 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512; 15920 break; 15921 case X86::BI__builtin_ia32_sqrtps512: 15922 IID = Intrinsic::x86_avx512_sqrt_ps_512; 15923 break; 15924 case X86::BI__builtin_ia32_sqrtpd512: 15925 IID = Intrinsic::x86_avx512_sqrt_pd_512; 15926 break; 15927 } 15928 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); 15929 } 15930 } 15931 if (Builder.getIsFPConstrained()) { 15932 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 15933 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, 15934 Ops[0]->getType()); 15935 return Builder.CreateConstrainedFPCall(F, Ops[0]); 15936 } else { 15937 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType()); 15938 return Builder.CreateCall(F, Ops[0]); 15939 } 15940 } 15941 15942 case X86::BI__builtin_ia32_pmuludq128: 15943 case X86::BI__builtin_ia32_pmuludq256: 15944 case X86::BI__builtin_ia32_pmuludq512: 15945 return EmitX86Muldq(*this, /*IsSigned*/false, Ops); 15946 15947 case X86::BI__builtin_ia32_pmuldq128: 15948 case X86::BI__builtin_ia32_pmuldq256: 15949 case X86::BI__builtin_ia32_pmuldq512: 15950 return EmitX86Muldq(*this, /*IsSigned*/true, Ops); 15951 15952 case X86::BI__builtin_ia32_pternlogd512_mask: 15953 case X86::BI__builtin_ia32_pternlogq512_mask: 15954 case X86::BI__builtin_ia32_pternlogd128_mask: 15955 case X86::BI__builtin_ia32_pternlogd256_mask: 15956 case X86::BI__builtin_ia32_pternlogq128_mask: 15957 case X86::BI__builtin_ia32_pternlogq256_mask: 15958 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops); 15959 15960 case X86::BI__builtin_ia32_pternlogd512_maskz: 15961 case X86::BI__builtin_ia32_pternlogq512_maskz: 15962 case X86::BI__builtin_ia32_pternlogd128_maskz: 15963 case X86::BI__builtin_ia32_pternlogd256_maskz: 15964 case X86::BI__builtin_ia32_pternlogq128_maskz: 15965 case X86::BI__builtin_ia32_pternlogq256_maskz: 15966 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops); 15967 15968 case X86::BI__builtin_ia32_vpshldd128: 15969 case X86::BI__builtin_ia32_vpshldd256: 15970 case X86::BI__builtin_ia32_vpshldd512: 15971 case X86::BI__builtin_ia32_vpshldq128: 15972 case X86::BI__builtin_ia32_vpshldq256: 15973 case X86::BI__builtin_ia32_vpshldq512: 15974 case X86::BI__builtin_ia32_vpshldw128: 15975 case X86::BI__builtin_ia32_vpshldw256: 15976 case X86::BI__builtin_ia32_vpshldw512: 15977 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); 15978 15979 case X86::BI__builtin_ia32_vpshrdd128: 15980 case X86::BI__builtin_ia32_vpshrdd256: 15981 case X86::BI__builtin_ia32_vpshrdd512: 15982 case X86::BI__builtin_ia32_vpshrdq128: 15983 case X86::BI__builtin_ia32_vpshrdq256: 15984 case X86::BI__builtin_ia32_vpshrdq512: 15985 case X86::BI__builtin_ia32_vpshrdw128: 15986 case X86::BI__builtin_ia32_vpshrdw256: 15987 case X86::BI__builtin_ia32_vpshrdw512: 15988 // Ops 0 and 1 are swapped. 15989 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); 15990 15991 case X86::BI__builtin_ia32_vpshldvd128: 15992 case X86::BI__builtin_ia32_vpshldvd256: 15993 case X86::BI__builtin_ia32_vpshldvd512: 15994 case X86::BI__builtin_ia32_vpshldvq128: 15995 case X86::BI__builtin_ia32_vpshldvq256: 15996 case X86::BI__builtin_ia32_vpshldvq512: 15997 case X86::BI__builtin_ia32_vpshldvw128: 15998 case X86::BI__builtin_ia32_vpshldvw256: 15999 case X86::BI__builtin_ia32_vpshldvw512: 16000 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); 16001 16002 case X86::BI__builtin_ia32_vpshrdvd128: 16003 case X86::BI__builtin_ia32_vpshrdvd256: 16004 case X86::BI__builtin_ia32_vpshrdvd512: 16005 case X86::BI__builtin_ia32_vpshrdvq128: 16006 case X86::BI__builtin_ia32_vpshrdvq256: 16007 case X86::BI__builtin_ia32_vpshrdvq512: 16008 case X86::BI__builtin_ia32_vpshrdvw128: 16009 case X86::BI__builtin_ia32_vpshrdvw256: 16010 case X86::BI__builtin_ia32_vpshrdvw512: 16011 // Ops 0 and 1 are swapped. 16012 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); 16013 16014 // Reductions 16015 case X86::BI__builtin_ia32_reduce_fadd_pd512: 16016 case X86::BI__builtin_ia32_reduce_fadd_ps512: 16017 case X86::BI__builtin_ia32_reduce_fadd_ph512: 16018 case X86::BI__builtin_ia32_reduce_fadd_ph256: 16019 case X86::BI__builtin_ia32_reduce_fadd_ph128: { 16020 Function *F = 16021 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType()); 16022 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); 16023 Builder.getFastMathFlags().setAllowReassoc(); 16024 return Builder.CreateCall(F, {Ops[0], Ops[1]}); 16025 } 16026 case X86::BI__builtin_ia32_reduce_fmul_pd512: 16027 case X86::BI__builtin_ia32_reduce_fmul_ps512: 16028 case X86::BI__builtin_ia32_reduce_fmul_ph512: 16029 case X86::BI__builtin_ia32_reduce_fmul_ph256: 16030 case X86::BI__builtin_ia32_reduce_fmul_ph128: { 16031 Function *F = 16032 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType()); 16033 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); 16034 Builder.getFastMathFlags().setAllowReassoc(); 16035 return Builder.CreateCall(F, {Ops[0], Ops[1]}); 16036 } 16037 case X86::BI__builtin_ia32_reduce_fmax_pd512: 16038 case X86::BI__builtin_ia32_reduce_fmax_ps512: 16039 case X86::BI__builtin_ia32_reduce_fmax_ph512: 16040 case X86::BI__builtin_ia32_reduce_fmax_ph256: 16041 case X86::BI__builtin_ia32_reduce_fmax_ph128: { 16042 Function *F = 16043 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType()); 16044 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); 16045 Builder.getFastMathFlags().setNoNaNs(); 16046 return Builder.CreateCall(F, {Ops[0]}); 16047 } 16048 case X86::BI__builtin_ia32_reduce_fmin_pd512: 16049 case X86::BI__builtin_ia32_reduce_fmin_ps512: 16050 case X86::BI__builtin_ia32_reduce_fmin_ph512: 16051 case X86::BI__builtin_ia32_reduce_fmin_ph256: 16052 case X86::BI__builtin_ia32_reduce_fmin_ph128: { 16053 Function *F = 16054 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType()); 16055 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); 16056 Builder.getFastMathFlags().setNoNaNs(); 16057 return Builder.CreateCall(F, {Ops[0]}); 16058 } 16059 16060 case X86::BI__builtin_ia32_rdrand16_step: 16061 case X86::BI__builtin_ia32_rdrand32_step: 16062 case X86::BI__builtin_ia32_rdrand64_step: 16063 case X86::BI__builtin_ia32_rdseed16_step: 16064 case X86::BI__builtin_ia32_rdseed32_step: 16065 case X86::BI__builtin_ia32_rdseed64_step: { 16066 Intrinsic::ID ID; 16067 switch (BuiltinID) { 16068 default: llvm_unreachable("Unsupported intrinsic!"); 16069 case X86::BI__builtin_ia32_rdrand16_step: 16070 ID = Intrinsic::x86_rdrand_16; 16071 break; 16072 case X86::BI__builtin_ia32_rdrand32_step: 16073 ID = Intrinsic::x86_rdrand_32; 16074 break; 16075 case X86::BI__builtin_ia32_rdrand64_step: 16076 ID = Intrinsic::x86_rdrand_64; 16077 break; 16078 case X86::BI__builtin_ia32_rdseed16_step: 16079 ID = Intrinsic::x86_rdseed_16; 16080 break; 16081 case X86::BI__builtin_ia32_rdseed32_step: 16082 ID = Intrinsic::x86_rdseed_32; 16083 break; 16084 case X86::BI__builtin_ia32_rdseed64_step: 16085 ID = Intrinsic::x86_rdseed_64; 16086 break; 16087 } 16088 16089 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 16090 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), 16091 Ops[0]); 16092 return Builder.CreateExtractValue(Call, 1); 16093 } 16094 case X86::BI__builtin_ia32_addcarryx_u32: 16095 case X86::BI__builtin_ia32_addcarryx_u64: 16096 case X86::BI__builtin_ia32_subborrow_u32: 16097 case X86::BI__builtin_ia32_subborrow_u64: { 16098 Intrinsic::ID IID; 16099 switch (BuiltinID) { 16100 default: llvm_unreachable("Unsupported intrinsic!"); 16101 case X86::BI__builtin_ia32_addcarryx_u32: 16102 IID = Intrinsic::x86_addcarry_32; 16103 break; 16104 case X86::BI__builtin_ia32_addcarryx_u64: 16105 IID = Intrinsic::x86_addcarry_64; 16106 break; 16107 case X86::BI__builtin_ia32_subborrow_u32: 16108 IID = Intrinsic::x86_subborrow_32; 16109 break; 16110 case X86::BI__builtin_ia32_subborrow_u64: 16111 IID = Intrinsic::x86_subborrow_64; 16112 break; 16113 } 16114 16115 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), 16116 { Ops[0], Ops[1], Ops[2] }); 16117 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), 16118 Ops[3]); 16119 return Builder.CreateExtractValue(Call, 0); 16120 } 16121 16122 case X86::BI__builtin_ia32_fpclassps128_mask: 16123 case X86::BI__builtin_ia32_fpclassps256_mask: 16124 case X86::BI__builtin_ia32_fpclassps512_mask: 16125 case X86::BI__builtin_ia32_fpclassph128_mask: 16126 case X86::BI__builtin_ia32_fpclassph256_mask: 16127 case X86::BI__builtin_ia32_fpclassph512_mask: 16128 case X86::BI__builtin_ia32_fpclasspd128_mask: 16129 case X86::BI__builtin_ia32_fpclasspd256_mask: 16130 case X86::BI__builtin_ia32_fpclasspd512_mask: { 16131 unsigned NumElts = 16132 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 16133 Value *MaskIn = Ops[2]; 16134 Ops.erase(&Ops[2]); 16135 16136 Intrinsic::ID ID; 16137 switch (BuiltinID) { 16138 default: llvm_unreachable("Unsupported intrinsic!"); 16139 case X86::BI__builtin_ia32_fpclassph128_mask: 16140 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128; 16141 break; 16142 case X86::BI__builtin_ia32_fpclassph256_mask: 16143 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256; 16144 break; 16145 case X86::BI__builtin_ia32_fpclassph512_mask: 16146 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512; 16147 break; 16148 case X86::BI__builtin_ia32_fpclassps128_mask: 16149 ID = Intrinsic::x86_avx512_fpclass_ps_128; 16150 break; 16151 case X86::BI__builtin_ia32_fpclassps256_mask: 16152 ID = Intrinsic::x86_avx512_fpclass_ps_256; 16153 break; 16154 case X86::BI__builtin_ia32_fpclassps512_mask: 16155 ID = Intrinsic::x86_avx512_fpclass_ps_512; 16156 break; 16157 case X86::BI__builtin_ia32_fpclasspd128_mask: 16158 ID = Intrinsic::x86_avx512_fpclass_pd_128; 16159 break; 16160 case X86::BI__builtin_ia32_fpclasspd256_mask: 16161 ID = Intrinsic::x86_avx512_fpclass_pd_256; 16162 break; 16163 case X86::BI__builtin_ia32_fpclasspd512_mask: 16164 ID = Intrinsic::x86_avx512_fpclass_pd_512; 16165 break; 16166 } 16167 16168 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 16169 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn); 16170 } 16171 16172 case X86::BI__builtin_ia32_vp2intersect_q_512: 16173 case X86::BI__builtin_ia32_vp2intersect_q_256: 16174 case X86::BI__builtin_ia32_vp2intersect_q_128: 16175 case X86::BI__builtin_ia32_vp2intersect_d_512: 16176 case X86::BI__builtin_ia32_vp2intersect_d_256: 16177 case X86::BI__builtin_ia32_vp2intersect_d_128: { 16178 unsigned NumElts = 16179 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 16180 Intrinsic::ID ID; 16181 16182 switch (BuiltinID) { 16183 default: llvm_unreachable("Unsupported intrinsic!"); 16184 case X86::BI__builtin_ia32_vp2intersect_q_512: 16185 ID = Intrinsic::x86_avx512_vp2intersect_q_512; 16186 break; 16187 case X86::BI__builtin_ia32_vp2intersect_q_256: 16188 ID = Intrinsic::x86_avx512_vp2intersect_q_256; 16189 break; 16190 case X86::BI__builtin_ia32_vp2intersect_q_128: 16191 ID = Intrinsic::x86_avx512_vp2intersect_q_128; 16192 break; 16193 case X86::BI__builtin_ia32_vp2intersect_d_512: 16194 ID = Intrinsic::x86_avx512_vp2intersect_d_512; 16195 break; 16196 case X86::BI__builtin_ia32_vp2intersect_d_256: 16197 ID = Intrinsic::x86_avx512_vp2intersect_d_256; 16198 break; 16199 case X86::BI__builtin_ia32_vp2intersect_d_128: 16200 ID = Intrinsic::x86_avx512_vp2intersect_d_128; 16201 break; 16202 } 16203 16204 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]}); 16205 Value *Result = Builder.CreateExtractValue(Call, 0); 16206 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); 16207 Builder.CreateDefaultAlignedStore(Result, Ops[2]); 16208 16209 Result = Builder.CreateExtractValue(Call, 1); 16210 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); 16211 return Builder.CreateDefaultAlignedStore(Result, Ops[3]); 16212 } 16213 16214 case X86::BI__builtin_ia32_vpmultishiftqb128: 16215 case X86::BI__builtin_ia32_vpmultishiftqb256: 16216 case X86::BI__builtin_ia32_vpmultishiftqb512: { 16217 Intrinsic::ID ID; 16218 switch (BuiltinID) { 16219 default: llvm_unreachable("Unsupported intrinsic!"); 16220 case X86::BI__builtin_ia32_vpmultishiftqb128: 16221 ID = Intrinsic::x86_avx512_pmultishift_qb_128; 16222 break; 16223 case X86::BI__builtin_ia32_vpmultishiftqb256: 16224 ID = Intrinsic::x86_avx512_pmultishift_qb_256; 16225 break; 16226 case X86::BI__builtin_ia32_vpmultishiftqb512: 16227 ID = Intrinsic::x86_avx512_pmultishift_qb_512; 16228 break; 16229 } 16230 16231 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 16232 } 16233 16234 case X86::BI__builtin_ia32_vpshufbitqmb128_mask: 16235 case X86::BI__builtin_ia32_vpshufbitqmb256_mask: 16236 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: { 16237 unsigned NumElts = 16238 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 16239 Value *MaskIn = Ops[2]; 16240 Ops.erase(&Ops[2]); 16241 16242 Intrinsic::ID ID; 16243 switch (BuiltinID) { 16244 default: llvm_unreachable("Unsupported intrinsic!"); 16245 case X86::BI__builtin_ia32_vpshufbitqmb128_mask: 16246 ID = Intrinsic::x86_avx512_vpshufbitqmb_128; 16247 break; 16248 case X86::BI__builtin_ia32_vpshufbitqmb256_mask: 16249 ID = Intrinsic::x86_avx512_vpshufbitqmb_256; 16250 break; 16251 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: 16252 ID = Intrinsic::x86_avx512_vpshufbitqmb_512; 16253 break; 16254 } 16255 16256 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 16257 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn); 16258 } 16259 16260 // packed comparison intrinsics 16261 case X86::BI__builtin_ia32_cmpeqps: 16262 case X86::BI__builtin_ia32_cmpeqpd: 16263 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false); 16264 case X86::BI__builtin_ia32_cmpltps: 16265 case X86::BI__builtin_ia32_cmpltpd: 16266 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true); 16267 case X86::BI__builtin_ia32_cmpleps: 16268 case X86::BI__builtin_ia32_cmplepd: 16269 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true); 16270 case X86::BI__builtin_ia32_cmpunordps: 16271 case X86::BI__builtin_ia32_cmpunordpd: 16272 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false); 16273 case X86::BI__builtin_ia32_cmpneqps: 16274 case X86::BI__builtin_ia32_cmpneqpd: 16275 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false); 16276 case X86::BI__builtin_ia32_cmpnltps: 16277 case X86::BI__builtin_ia32_cmpnltpd: 16278 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true); 16279 case X86::BI__builtin_ia32_cmpnleps: 16280 case X86::BI__builtin_ia32_cmpnlepd: 16281 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true); 16282 case X86::BI__builtin_ia32_cmpordps: 16283 case X86::BI__builtin_ia32_cmpordpd: 16284 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false); 16285 case X86::BI__builtin_ia32_cmpph128_mask: 16286 case X86::BI__builtin_ia32_cmpph256_mask: 16287 case X86::BI__builtin_ia32_cmpph512_mask: 16288 case X86::BI__builtin_ia32_cmpps128_mask: 16289 case X86::BI__builtin_ia32_cmpps256_mask: 16290 case X86::BI__builtin_ia32_cmpps512_mask: 16291 case X86::BI__builtin_ia32_cmppd128_mask: 16292 case X86::BI__builtin_ia32_cmppd256_mask: 16293 case X86::BI__builtin_ia32_cmppd512_mask: 16294 IsMaskFCmp = true; 16295 [[fallthrough]]; 16296 case X86::BI__builtin_ia32_cmpps: 16297 case X86::BI__builtin_ia32_cmpps256: 16298 case X86::BI__builtin_ia32_cmppd: 16299 case X86::BI__builtin_ia32_cmppd256: { 16300 // Lowering vector comparisons to fcmp instructions, while 16301 // ignoring signalling behaviour requested 16302 // ignoring rounding mode requested 16303 // This is only possible if fp-model is not strict and FENV_ACCESS is off. 16304 16305 // The third argument is the comparison condition, and integer in the 16306 // range [0, 31] 16307 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f; 16308 16309 // Lowering to IR fcmp instruction. 16310 // Ignoring requested signaling behaviour, 16311 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT. 16312 FCmpInst::Predicate Pred; 16313 bool IsSignaling; 16314 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling 16315 // behavior is inverted. We'll handle that after the switch. 16316 switch (CC & 0xf) { 16317 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break; 16318 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break; 16319 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break; 16320 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break; 16321 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break; 16322 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break; 16323 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break; 16324 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break; 16325 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break; 16326 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break; 16327 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break; 16328 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break; 16329 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break; 16330 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break; 16331 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break; 16332 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break; 16333 default: llvm_unreachable("Unhandled CC"); 16334 } 16335 16336 // Invert the signalling behavior for 16-31. 16337 if (CC & 0x10) 16338 IsSignaling = !IsSignaling; 16339 16340 // If the predicate is true or false and we're using constrained intrinsics, 16341 // we don't have a compare intrinsic we can use. Just use the legacy X86 16342 // specific intrinsic. 16343 // If the intrinsic is mask enabled and we're using constrained intrinsics, 16344 // use the legacy X86 specific intrinsic. 16345 if (Builder.getIsFPConstrained() && 16346 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE || 16347 IsMaskFCmp)) { 16348 16349 Intrinsic::ID IID; 16350 switch (BuiltinID) { 16351 default: llvm_unreachable("Unexpected builtin"); 16352 case X86::BI__builtin_ia32_cmpps: 16353 IID = Intrinsic::x86_sse_cmp_ps; 16354 break; 16355 case X86::BI__builtin_ia32_cmpps256: 16356 IID = Intrinsic::x86_avx_cmp_ps_256; 16357 break; 16358 case X86::BI__builtin_ia32_cmppd: 16359 IID = Intrinsic::x86_sse2_cmp_pd; 16360 break; 16361 case X86::BI__builtin_ia32_cmppd256: 16362 IID = Intrinsic::x86_avx_cmp_pd_256; 16363 break; 16364 case X86::BI__builtin_ia32_cmpph128_mask: 16365 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128; 16366 break; 16367 case X86::BI__builtin_ia32_cmpph256_mask: 16368 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256; 16369 break; 16370 case X86::BI__builtin_ia32_cmpph512_mask: 16371 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512; 16372 break; 16373 case X86::BI__builtin_ia32_cmpps512_mask: 16374 IID = Intrinsic::x86_avx512_mask_cmp_ps_512; 16375 break; 16376 case X86::BI__builtin_ia32_cmppd512_mask: 16377 IID = Intrinsic::x86_avx512_mask_cmp_pd_512; 16378 break; 16379 case X86::BI__builtin_ia32_cmpps128_mask: 16380 IID = Intrinsic::x86_avx512_mask_cmp_ps_128; 16381 break; 16382 case X86::BI__builtin_ia32_cmpps256_mask: 16383 IID = Intrinsic::x86_avx512_mask_cmp_ps_256; 16384 break; 16385 case X86::BI__builtin_ia32_cmppd128_mask: 16386 IID = Intrinsic::x86_avx512_mask_cmp_pd_128; 16387 break; 16388 case X86::BI__builtin_ia32_cmppd256_mask: 16389 IID = Intrinsic::x86_avx512_mask_cmp_pd_256; 16390 break; 16391 } 16392 16393 Function *Intr = CGM.getIntrinsic(IID); 16394 if (IsMaskFCmp) { 16395 unsigned NumElts = 16396 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 16397 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts); 16398 Value *Cmp = Builder.CreateCall(Intr, Ops); 16399 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr); 16400 } 16401 16402 return Builder.CreateCall(Intr, Ops); 16403 } 16404 16405 // Builtins without the _mask suffix return a vector of integers 16406 // of the same width as the input vectors 16407 if (IsMaskFCmp) { 16408 // We ignore SAE if strict FP is disabled. We only keep precise 16409 // exception behavior under strict FP. 16410 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII 16411 // object will be required. 16412 unsigned NumElts = 16413 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); 16414 Value *Cmp; 16415 if (IsSignaling) 16416 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]); 16417 else 16418 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 16419 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]); 16420 } 16421 16422 return getVectorFCmpIR(Pred, IsSignaling); 16423 } 16424 16425 // SSE scalar comparison intrinsics 16426 case X86::BI__builtin_ia32_cmpeqss: 16427 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); 16428 case X86::BI__builtin_ia32_cmpltss: 16429 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); 16430 case X86::BI__builtin_ia32_cmpless: 16431 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); 16432 case X86::BI__builtin_ia32_cmpunordss: 16433 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); 16434 case X86::BI__builtin_ia32_cmpneqss: 16435 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); 16436 case X86::BI__builtin_ia32_cmpnltss: 16437 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); 16438 case X86::BI__builtin_ia32_cmpnless: 16439 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); 16440 case X86::BI__builtin_ia32_cmpordss: 16441 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); 16442 case X86::BI__builtin_ia32_cmpeqsd: 16443 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); 16444 case X86::BI__builtin_ia32_cmpltsd: 16445 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); 16446 case X86::BI__builtin_ia32_cmplesd: 16447 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); 16448 case X86::BI__builtin_ia32_cmpunordsd: 16449 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); 16450 case X86::BI__builtin_ia32_cmpneqsd: 16451 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); 16452 case X86::BI__builtin_ia32_cmpnltsd: 16453 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); 16454 case X86::BI__builtin_ia32_cmpnlesd: 16455 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); 16456 case X86::BI__builtin_ia32_cmpordsd: 16457 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); 16458 16459 // f16c half2float intrinsics 16460 case X86::BI__builtin_ia32_vcvtph2ps: 16461 case X86::BI__builtin_ia32_vcvtph2ps256: 16462 case X86::BI__builtin_ia32_vcvtph2ps_mask: 16463 case X86::BI__builtin_ia32_vcvtph2ps256_mask: 16464 case X86::BI__builtin_ia32_vcvtph2ps512_mask: { 16465 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); 16466 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType())); 16467 } 16468 16469 // AVX512 bf16 intrinsics 16470 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { 16471 Ops[2] = getMaskVecValue( 16472 *this, Ops[2], 16473 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements()); 16474 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128; 16475 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); 16476 } 16477 case X86::BI__builtin_ia32_cvtsbf162ss_32: 16478 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy()); 16479 16480 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: 16481 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: { 16482 Intrinsic::ID IID; 16483 switch (BuiltinID) { 16484 default: llvm_unreachable("Unsupported intrinsic!"); 16485 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: 16486 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256; 16487 break; 16488 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: 16489 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512; 16490 break; 16491 } 16492 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]); 16493 return EmitX86Select(*this, Ops[2], Res, Ops[1]); 16494 } 16495 16496 case X86::BI__cpuid: 16497 case X86::BI__cpuidex: { 16498 Value *FuncId = EmitScalarExpr(E->getArg(1)); 16499 Value *SubFuncId = BuiltinID == X86::BI__cpuidex 16500 ? EmitScalarExpr(E->getArg(2)) 16501 : llvm::ConstantInt::get(Int32Ty, 0); 16502 16503 llvm::StructType *CpuidRetTy = 16504 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty); 16505 llvm::FunctionType *FTy = 16506 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false); 16507 16508 StringRef Asm, Constraints; 16509 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 16510 Asm = "cpuid"; 16511 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}"; 16512 } else { 16513 // x86-64 uses %rbx as the base register, so preserve it. 16514 Asm = "xchgq %rbx, ${1:q}\n" 16515 "cpuid\n" 16516 "xchgq %rbx, ${1:q}"; 16517 Constraints = "={ax},=r,={cx},={dx},0,2"; 16518 } 16519 16520 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, 16521 /*hasSideEffects=*/false); 16522 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId}); 16523 Value *BasePtr = EmitScalarExpr(E->getArg(0)); 16524 Value *Store = nullptr; 16525 for (unsigned i = 0; i < 4; i++) { 16526 Value *Extracted = Builder.CreateExtractValue(IACall, i); 16527 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i); 16528 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign()); 16529 } 16530 16531 // Return the last store instruction to signal that we have emitted the 16532 // the intrinsic. 16533 return Store; 16534 } 16535 16536 case X86::BI__emul: 16537 case X86::BI__emulu: { 16538 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); 16539 bool isSigned = (BuiltinID == X86::BI__emul); 16540 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); 16541 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); 16542 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); 16543 } 16544 case X86::BI__mulh: 16545 case X86::BI__umulh: 16546 case X86::BI_mul128: 16547 case X86::BI_umul128: { 16548 llvm::Type *ResType = ConvertType(E->getType()); 16549 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 16550 16551 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); 16552 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); 16553 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); 16554 16555 Value *MulResult, *HigherBits; 16556 if (IsSigned) { 16557 MulResult = Builder.CreateNSWMul(LHS, RHS); 16558 HigherBits = Builder.CreateAShr(MulResult, 64); 16559 } else { 16560 MulResult = Builder.CreateNUWMul(LHS, RHS); 16561 HigherBits = Builder.CreateLShr(MulResult, 64); 16562 } 16563 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); 16564 16565 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) 16566 return HigherBits; 16567 16568 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); 16569 Builder.CreateStore(HigherBits, HighBitsAddress); 16570 return Builder.CreateIntCast(MulResult, ResType, IsSigned); 16571 } 16572 16573 case X86::BI__faststorefence: { 16574 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 16575 llvm::SyncScope::System); 16576 } 16577 case X86::BI__shiftleft128: 16578 case X86::BI__shiftright128: { 16579 llvm::Function *F = CGM.getIntrinsic( 16580 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr, 16581 Int64Ty); 16582 // Flip low/high ops and zero-extend amount to matching type. 16583 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt) 16584 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt) 16585 std::swap(Ops[0], Ops[1]); 16586 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 16587 return Builder.CreateCall(F, Ops); 16588 } 16589 case X86::BI_ReadWriteBarrier: 16590 case X86::BI_ReadBarrier: 16591 case X86::BI_WriteBarrier: { 16592 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 16593 llvm::SyncScope::SingleThread); 16594 } 16595 16596 case X86::BI_AddressOfReturnAddress: { 16597 Function *F = 16598 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); 16599 return Builder.CreateCall(F); 16600 } 16601 case X86::BI__stosb: { 16602 // We treat __stosb as a volatile memset - it may not generate "rep stosb" 16603 // instruction, but it will create a memset that won't be optimized away. 16604 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true); 16605 } 16606 case X86::BI__ud2: 16607 // llvm.trap makes a ud2a instruction on x86. 16608 return EmitTrapCall(Intrinsic::trap); 16609 case X86::BI__int2c: { 16610 // This syscall signals a driver assertion failure in x86 NT kernels. 16611 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); 16612 llvm::InlineAsm *IA = 16613 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true); 16614 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 16615 getLLVMContext(), llvm::AttributeList::FunctionIndex, 16616 llvm::Attribute::NoReturn); 16617 llvm::CallInst *CI = Builder.CreateCall(IA); 16618 CI->setAttributes(NoReturnAttr); 16619 return CI; 16620 } 16621 case X86::BI__readfsbyte: 16622 case X86::BI__readfsword: 16623 case X86::BI__readfsdword: 16624 case X86::BI__readfsqword: { 16625 llvm::Type *IntTy = ConvertType(E->getType()); 16626 Value *Ptr = Builder.CreateIntToPtr( 16627 Ops[0], llvm::PointerType::get(getLLVMContext(), 257)); 16628 LoadInst *Load = Builder.CreateAlignedLoad( 16629 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 16630 Load->setVolatile(true); 16631 return Load; 16632 } 16633 case X86::BI__readgsbyte: 16634 case X86::BI__readgsword: 16635 case X86::BI__readgsdword: 16636 case X86::BI__readgsqword: { 16637 llvm::Type *IntTy = ConvertType(E->getType()); 16638 Value *Ptr = Builder.CreateIntToPtr( 16639 Ops[0], llvm::PointerType::get(getLLVMContext(), 256)); 16640 LoadInst *Load = Builder.CreateAlignedLoad( 16641 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 16642 Load->setVolatile(true); 16643 return Load; 16644 } 16645 case X86::BI__builtin_ia32_encodekey128_u32: { 16646 Intrinsic::ID IID = Intrinsic::x86_encodekey128; 16647 16648 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]}); 16649 16650 for (int i = 0; i < 3; ++i) { 16651 Value *Extract = Builder.CreateExtractValue(Call, i + 1); 16652 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16); 16653 Builder.CreateAlignedStore(Extract, Ptr, Align(1)); 16654 } 16655 16656 return Builder.CreateExtractValue(Call, 0); 16657 } 16658 case X86::BI__builtin_ia32_encodekey256_u32: { 16659 Intrinsic::ID IID = Intrinsic::x86_encodekey256; 16660 16661 Value *Call = 16662 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]}); 16663 16664 for (int i = 0; i < 4; ++i) { 16665 Value *Extract = Builder.CreateExtractValue(Call, i + 1); 16666 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16); 16667 Builder.CreateAlignedStore(Extract, Ptr, Align(1)); 16668 } 16669 16670 return Builder.CreateExtractValue(Call, 0); 16671 } 16672 case X86::BI__builtin_ia32_aesenc128kl_u8: 16673 case X86::BI__builtin_ia32_aesdec128kl_u8: 16674 case X86::BI__builtin_ia32_aesenc256kl_u8: 16675 case X86::BI__builtin_ia32_aesdec256kl_u8: { 16676 Intrinsic::ID IID; 16677 StringRef BlockName; 16678 switch (BuiltinID) { 16679 default: 16680 llvm_unreachable("Unexpected builtin"); 16681 case X86::BI__builtin_ia32_aesenc128kl_u8: 16682 IID = Intrinsic::x86_aesenc128kl; 16683 BlockName = "aesenc128kl"; 16684 break; 16685 case X86::BI__builtin_ia32_aesdec128kl_u8: 16686 IID = Intrinsic::x86_aesdec128kl; 16687 BlockName = "aesdec128kl"; 16688 break; 16689 case X86::BI__builtin_ia32_aesenc256kl_u8: 16690 IID = Intrinsic::x86_aesenc256kl; 16691 BlockName = "aesenc256kl"; 16692 break; 16693 case X86::BI__builtin_ia32_aesdec256kl_u8: 16694 IID = Intrinsic::x86_aesdec256kl; 16695 BlockName = "aesdec256kl"; 16696 break; 16697 } 16698 16699 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]}); 16700 16701 BasicBlock *NoError = 16702 createBasicBlock(BlockName + "_no_error", this->CurFn); 16703 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn); 16704 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn); 16705 16706 Value *Ret = Builder.CreateExtractValue(Call, 0); 16707 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty()); 16708 Value *Out = Builder.CreateExtractValue(Call, 1); 16709 Builder.CreateCondBr(Succ, NoError, Error); 16710 16711 Builder.SetInsertPoint(NoError); 16712 Builder.CreateDefaultAlignedStore(Out, Ops[0]); 16713 Builder.CreateBr(End); 16714 16715 Builder.SetInsertPoint(Error); 16716 Constant *Zero = llvm::Constant::getNullValue(Out->getType()); 16717 Builder.CreateDefaultAlignedStore(Zero, Ops[0]); 16718 Builder.CreateBr(End); 16719 16720 Builder.SetInsertPoint(End); 16721 return Builder.CreateExtractValue(Call, 0); 16722 } 16723 case X86::BI__builtin_ia32_aesencwide128kl_u8: 16724 case X86::BI__builtin_ia32_aesdecwide128kl_u8: 16725 case X86::BI__builtin_ia32_aesencwide256kl_u8: 16726 case X86::BI__builtin_ia32_aesdecwide256kl_u8: { 16727 Intrinsic::ID IID; 16728 StringRef BlockName; 16729 switch (BuiltinID) { 16730 case X86::BI__builtin_ia32_aesencwide128kl_u8: 16731 IID = Intrinsic::x86_aesencwide128kl; 16732 BlockName = "aesencwide128kl"; 16733 break; 16734 case X86::BI__builtin_ia32_aesdecwide128kl_u8: 16735 IID = Intrinsic::x86_aesdecwide128kl; 16736 BlockName = "aesdecwide128kl"; 16737 break; 16738 case X86::BI__builtin_ia32_aesencwide256kl_u8: 16739 IID = Intrinsic::x86_aesencwide256kl; 16740 BlockName = "aesencwide256kl"; 16741 break; 16742 case X86::BI__builtin_ia32_aesdecwide256kl_u8: 16743 IID = Intrinsic::x86_aesdecwide256kl; 16744 BlockName = "aesdecwide256kl"; 16745 break; 16746 } 16747 16748 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2); 16749 Value *InOps[9]; 16750 InOps[0] = Ops[2]; 16751 for (int i = 0; i != 8; ++i) { 16752 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i); 16753 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16)); 16754 } 16755 16756 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps); 16757 16758 BasicBlock *NoError = 16759 createBasicBlock(BlockName + "_no_error", this->CurFn); 16760 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn); 16761 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn); 16762 16763 Value *Ret = Builder.CreateExtractValue(Call, 0); 16764 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty()); 16765 Builder.CreateCondBr(Succ, NoError, Error); 16766 16767 Builder.SetInsertPoint(NoError); 16768 for (int i = 0; i != 8; ++i) { 16769 Value *Extract = Builder.CreateExtractValue(Call, i + 1); 16770 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i); 16771 Builder.CreateAlignedStore(Extract, Ptr, Align(16)); 16772 } 16773 Builder.CreateBr(End); 16774 16775 Builder.SetInsertPoint(Error); 16776 for (int i = 0; i != 8; ++i) { 16777 Value *Out = Builder.CreateExtractValue(Call, i + 1); 16778 Constant *Zero = llvm::Constant::getNullValue(Out->getType()); 16779 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i); 16780 Builder.CreateAlignedStore(Zero, Ptr, Align(16)); 16781 } 16782 Builder.CreateBr(End); 16783 16784 Builder.SetInsertPoint(End); 16785 return Builder.CreateExtractValue(Call, 0); 16786 } 16787 case X86::BI__builtin_ia32_vfcmaddcph512_mask: 16788 IsConjFMA = true; 16789 [[fallthrough]]; 16790 case X86::BI__builtin_ia32_vfmaddcph512_mask: { 16791 Intrinsic::ID IID = IsConjFMA 16792 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512 16793 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512; 16794 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); 16795 return EmitX86Select(*this, Ops[3], Call, Ops[0]); 16796 } 16797 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask: 16798 IsConjFMA = true; 16799 [[fallthrough]]; 16800 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: { 16801 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh 16802 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh; 16803 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); 16804 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1)); 16805 return EmitX86Select(*this, And, Call, Ops[0]); 16806 } 16807 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: 16808 IsConjFMA = true; 16809 [[fallthrough]]; 16810 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: { 16811 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh 16812 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh; 16813 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); 16814 static constexpr int Mask[] = {0, 5, 6, 7}; 16815 return Builder.CreateShuffleVector(Call, Ops[2], Mask); 16816 } 16817 case X86::BI__builtin_ia32_prefetchi: 16818 return Builder.CreateCall( 16819 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()), 16820 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1], 16821 llvm::ConstantInt::get(Int32Ty, 0)}); 16822 } 16823 } 16824 16825 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 16826 const CallExpr *E) { 16827 // Do not emit the builtin arguments in the arguments of a function call, 16828 // because the evaluation order of function arguments is not specified in C++. 16829 // This is important when testing to ensure the arguments are emitted in the 16830 // same order every time. Eg: 16831 // Instead of: 16832 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)), 16833 // EmitScalarExpr(E->getArg(1)), "swdiv"); 16834 // Use: 16835 // Value *Op0 = EmitScalarExpr(E->getArg(0)); 16836 // Value *Op1 = EmitScalarExpr(E->getArg(1)); 16837 // return Builder.CreateFDiv(Op0, Op1, "swdiv") 16838 16839 Intrinsic::ID ID = Intrinsic::not_intrinsic; 16840 16841 #include "llvm/TargetParser/PPCTargetParser.def" 16842 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx, 16843 unsigned Mask, CmpInst::Predicate CompOp, 16844 unsigned OpValue) -> Value * { 16845 if (SupportMethod == BUILTIN_PPC_FALSE) 16846 return llvm::ConstantInt::getFalse(ConvertType(E->getType())); 16847 16848 if (SupportMethod == BUILTIN_PPC_TRUE) 16849 return llvm::ConstantInt::getTrue(ConvertType(E->getType())); 16850 16851 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod."); 16852 16853 llvm::Value *FieldValue = nullptr; 16854 if (SupportMethod == USE_SYS_CONF) { 16855 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE); 16856 llvm::Constant *SysConf = 16857 CGM.CreateRuntimeVariable(STy, "_system_configuration"); 16858 16859 // Grab the appropriate field from _system_configuration. 16860 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), 16861 ConstantInt::get(Int32Ty, FieldIdx)}; 16862 16863 FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs); 16864 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue, 16865 CharUnits::fromQuantity(4)); 16866 } else if (SupportMethod == SYS_CALL) { 16867 llvm::FunctionType *FTy = 16868 llvm::FunctionType::get(Int64Ty, Int32Ty, false); 16869 llvm::FunctionCallee Func = 16870 CGM.CreateRuntimeFunction(FTy, "getsystemcfg"); 16871 16872 FieldValue = 16873 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)}); 16874 } 16875 assert(FieldValue && 16876 "SupportMethod value is not defined in PPCTargetParser.def."); 16877 16878 if (Mask) 16879 FieldValue = Builder.CreateAnd(FieldValue, Mask); 16880 16881 llvm::Type *ValueType = FieldValue->getType(); 16882 bool IsValueType64Bit = ValueType->isIntegerTy(64); 16883 assert( 16884 (IsValueType64Bit || ValueType->isIntegerTy(32)) && 16885 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr()."); 16886 16887 return Builder.CreateICmp( 16888 CompOp, FieldValue, 16889 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue)); 16890 }; 16891 16892 switch (BuiltinID) { 16893 default: return nullptr; 16894 16895 case Builtin::BI__builtin_cpu_is: { 16896 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); 16897 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); 16898 llvm::Triple Triple = getTarget().getTriple(); 16899 16900 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue; 16901 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo; 16902 16903 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) = 16904 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr) 16905 #define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \ 16906 AIXID) \ 16907 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID}) 16908 #include "llvm/TargetParser/PPCTargetParser.def" 16909 .Default({BUILTIN_PPC_UNSUPPORTED, 0, 16910 BUILTIN_PPC_UNSUPPORTED, 0})); 16911 16912 if (Triple.isOSAIX()) { 16913 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) && 16914 "Invalid CPU name. Missed by SemaChecking?"); 16915 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0, 16916 ICmpInst::ICMP_EQ, AIXIDValue); 16917 } 16918 16919 assert(Triple.isOSLinux() && 16920 "__builtin_cpu_is() is only supported for AIX and Linux."); 16921 16922 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) && 16923 "Invalid CPU name. Missed by SemaChecking?"); 16924 16925 if (LinuxSupportMethod == BUILTIN_PPC_FALSE) 16926 return llvm::ConstantInt::getFalse(ConvertType(E->getType())); 16927 16928 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID); 16929 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld); 16930 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is"); 16931 return Builder.CreateICmpEQ(TheCall, 16932 llvm::ConstantInt::get(Int32Ty, LinuxIDValue)); 16933 } 16934 case Builtin::BI__builtin_cpu_supports: { 16935 llvm::Triple Triple = getTarget().getTriple(); 16936 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); 16937 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); 16938 if (Triple.isOSAIX()) { 16939 unsigned SupportMethod, FieldIdx, Mask, Value; 16940 CmpInst::Predicate CompOp; 16941 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate, 16942 unsigned> 16943 CPUSupportType; 16944 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) = 16945 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr) 16946 #define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \ 16947 VALUE) \ 16948 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE}) 16949 #include "llvm/TargetParser/PPCTargetParser.def" 16950 .Default({BUILTIN_PPC_FALSE, 0, 0, 16951 CmpInst::Predicate(), 0})); 16952 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp, 16953 Value); 16954 } 16955 16956 assert(Triple.isOSLinux() && 16957 "__builtin_cpu_supports() is only supported for AIX and Linux."); 16958 unsigned FeatureWord; 16959 unsigned BitMask; 16960 std::tie(FeatureWord, BitMask) = 16961 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) 16962 #define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \ 16963 .Case(Name, {FA_WORD, Bitmask}) 16964 #include "llvm/TargetParser/PPCTargetParser.def" 16965 .Default({0, 0}); 16966 if (!BitMask) 16967 return Builder.getFalse(); 16968 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord); 16969 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld); 16970 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports"); 16971 Value *Mask = 16972 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask)); 16973 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty)); 16974 #undef PPC_FAWORD_HWCAP 16975 #undef PPC_FAWORD_HWCAP2 16976 #undef PPC_FAWORD_CPUID 16977 } 16978 16979 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we 16980 // call __builtin_readcyclecounter. 16981 case PPC::BI__builtin_ppc_get_timebase: 16982 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); 16983 16984 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr 16985 case PPC::BI__builtin_altivec_lvx: 16986 case PPC::BI__builtin_altivec_lvxl: 16987 case PPC::BI__builtin_altivec_lvebx: 16988 case PPC::BI__builtin_altivec_lvehx: 16989 case PPC::BI__builtin_altivec_lvewx: 16990 case PPC::BI__builtin_altivec_lvsl: 16991 case PPC::BI__builtin_altivec_lvsr: 16992 case PPC::BI__builtin_vsx_lxvd2x: 16993 case PPC::BI__builtin_vsx_lxvw4x: 16994 case PPC::BI__builtin_vsx_lxvd2x_be: 16995 case PPC::BI__builtin_vsx_lxvw4x_be: 16996 case PPC::BI__builtin_vsx_lxvl: 16997 case PPC::BI__builtin_vsx_lxvll: 16998 { 16999 SmallVector<Value *, 2> Ops; 17000 Ops.push_back(EmitScalarExpr(E->getArg(0))); 17001 Ops.push_back(EmitScalarExpr(E->getArg(1))); 17002 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl || 17003 BuiltinID == PPC::BI__builtin_vsx_lxvll)) { 17004 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]); 17005 Ops.pop_back(); 17006 } 17007 17008 switch (BuiltinID) { 17009 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 17010 case PPC::BI__builtin_altivec_lvx: 17011 ID = Intrinsic::ppc_altivec_lvx; 17012 break; 17013 case PPC::BI__builtin_altivec_lvxl: 17014 ID = Intrinsic::ppc_altivec_lvxl; 17015 break; 17016 case PPC::BI__builtin_altivec_lvebx: 17017 ID = Intrinsic::ppc_altivec_lvebx; 17018 break; 17019 case PPC::BI__builtin_altivec_lvehx: 17020 ID = Intrinsic::ppc_altivec_lvehx; 17021 break; 17022 case PPC::BI__builtin_altivec_lvewx: 17023 ID = Intrinsic::ppc_altivec_lvewx; 17024 break; 17025 case PPC::BI__builtin_altivec_lvsl: 17026 ID = Intrinsic::ppc_altivec_lvsl; 17027 break; 17028 case PPC::BI__builtin_altivec_lvsr: 17029 ID = Intrinsic::ppc_altivec_lvsr; 17030 break; 17031 case PPC::BI__builtin_vsx_lxvd2x: 17032 ID = Intrinsic::ppc_vsx_lxvd2x; 17033 break; 17034 case PPC::BI__builtin_vsx_lxvw4x: 17035 ID = Intrinsic::ppc_vsx_lxvw4x; 17036 break; 17037 case PPC::BI__builtin_vsx_lxvd2x_be: 17038 ID = Intrinsic::ppc_vsx_lxvd2x_be; 17039 break; 17040 case PPC::BI__builtin_vsx_lxvw4x_be: 17041 ID = Intrinsic::ppc_vsx_lxvw4x_be; 17042 break; 17043 case PPC::BI__builtin_vsx_lxvl: 17044 ID = Intrinsic::ppc_vsx_lxvl; 17045 break; 17046 case PPC::BI__builtin_vsx_lxvll: 17047 ID = Intrinsic::ppc_vsx_lxvll; 17048 break; 17049 } 17050 llvm::Function *F = CGM.getIntrinsic(ID); 17051 return Builder.CreateCall(F, Ops, ""); 17052 } 17053 17054 // vec_st, vec_xst_be 17055 case PPC::BI__builtin_altivec_stvx: 17056 case PPC::BI__builtin_altivec_stvxl: 17057 case PPC::BI__builtin_altivec_stvebx: 17058 case PPC::BI__builtin_altivec_stvehx: 17059 case PPC::BI__builtin_altivec_stvewx: 17060 case PPC::BI__builtin_vsx_stxvd2x: 17061 case PPC::BI__builtin_vsx_stxvw4x: 17062 case PPC::BI__builtin_vsx_stxvd2x_be: 17063 case PPC::BI__builtin_vsx_stxvw4x_be: 17064 case PPC::BI__builtin_vsx_stxvl: 17065 case PPC::BI__builtin_vsx_stxvll: 17066 { 17067 SmallVector<Value *, 3> Ops; 17068 Ops.push_back(EmitScalarExpr(E->getArg(0))); 17069 Ops.push_back(EmitScalarExpr(E->getArg(1))); 17070 Ops.push_back(EmitScalarExpr(E->getArg(2))); 17071 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl || 17072 BuiltinID == PPC::BI__builtin_vsx_stxvll)) { 17073 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]); 17074 Ops.pop_back(); 17075 } 17076 17077 switch (BuiltinID) { 17078 default: llvm_unreachable("Unsupported st intrinsic!"); 17079 case PPC::BI__builtin_altivec_stvx: 17080 ID = Intrinsic::ppc_altivec_stvx; 17081 break; 17082 case PPC::BI__builtin_altivec_stvxl: 17083 ID = Intrinsic::ppc_altivec_stvxl; 17084 break; 17085 case PPC::BI__builtin_altivec_stvebx: 17086 ID = Intrinsic::ppc_altivec_stvebx; 17087 break; 17088 case PPC::BI__builtin_altivec_stvehx: 17089 ID = Intrinsic::ppc_altivec_stvehx; 17090 break; 17091 case PPC::BI__builtin_altivec_stvewx: 17092 ID = Intrinsic::ppc_altivec_stvewx; 17093 break; 17094 case PPC::BI__builtin_vsx_stxvd2x: 17095 ID = Intrinsic::ppc_vsx_stxvd2x; 17096 break; 17097 case PPC::BI__builtin_vsx_stxvw4x: 17098 ID = Intrinsic::ppc_vsx_stxvw4x; 17099 break; 17100 case PPC::BI__builtin_vsx_stxvd2x_be: 17101 ID = Intrinsic::ppc_vsx_stxvd2x_be; 17102 break; 17103 case PPC::BI__builtin_vsx_stxvw4x_be: 17104 ID = Intrinsic::ppc_vsx_stxvw4x_be; 17105 break; 17106 case PPC::BI__builtin_vsx_stxvl: 17107 ID = Intrinsic::ppc_vsx_stxvl; 17108 break; 17109 case PPC::BI__builtin_vsx_stxvll: 17110 ID = Intrinsic::ppc_vsx_stxvll; 17111 break; 17112 } 17113 llvm::Function *F = CGM.getIntrinsic(ID); 17114 return Builder.CreateCall(F, Ops, ""); 17115 } 17116 case PPC::BI__builtin_vsx_ldrmb: { 17117 // Essentially boils down to performing an unaligned VMX load sequence so 17118 // as to avoid crossing a page boundary and then shuffling the elements 17119 // into the right side of the vector register. 17120 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17121 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17122 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue(); 17123 llvm::Type *ResTy = ConvertType(E->getType()); 17124 bool IsLE = getTarget().isLittleEndian(); 17125 17126 // If the user wants the entire vector, just load the entire vector. 17127 if (NumBytes == 16) { 17128 Value *LD = 17129 Builder.CreateLoad(Address(Op0, ResTy, CharUnits::fromQuantity(1))); 17130 if (!IsLE) 17131 return LD; 17132 17133 // Reverse the bytes on LE. 17134 SmallVector<int, 16> RevMask; 17135 for (int Idx = 0; Idx < 16; Idx++) 17136 RevMask.push_back(15 - Idx); 17137 return Builder.CreateShuffleVector(LD, LD, RevMask); 17138 } 17139 17140 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx); 17141 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr 17142 : Intrinsic::ppc_altivec_lvsl); 17143 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm); 17144 Value *HiMem = Builder.CreateGEP( 17145 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1)); 17146 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo"); 17147 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi"); 17148 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1"); 17149 17150 Op0 = IsLE ? HiLd : LoLd; 17151 Op1 = IsLE ? LoLd : HiLd; 17152 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1"); 17153 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType()); 17154 17155 if (IsLE) { 17156 SmallVector<int, 16> Consts; 17157 for (int Idx = 0; Idx < 16; Idx++) { 17158 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1) 17159 : 16 - (NumBytes - Idx); 17160 Consts.push_back(Val); 17161 } 17162 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy), 17163 Zero, Consts); 17164 } 17165 SmallVector<Constant *, 16> Consts; 17166 for (int Idx = 0; Idx < 16; Idx++) 17167 Consts.push_back(Builder.getInt8(NumBytes + Idx)); 17168 Value *Mask2 = ConstantVector::get(Consts); 17169 return Builder.CreateBitCast( 17170 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy); 17171 } 17172 case PPC::BI__builtin_vsx_strmb: { 17173 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17174 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17175 Value *Op2 = EmitScalarExpr(E->getArg(2)); 17176 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue(); 17177 bool IsLE = getTarget().isLittleEndian(); 17178 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) { 17179 // Storing the whole vector, simply store it on BE and reverse bytes and 17180 // store on LE. 17181 if (Width == 16) { 17182 Value *StVec = Op2; 17183 if (IsLE) { 17184 SmallVector<int, 16> RevMask; 17185 for (int Idx = 0; Idx < 16; Idx++) 17186 RevMask.push_back(15 - Idx); 17187 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask); 17188 } 17189 return Builder.CreateStore( 17190 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1))); 17191 } 17192 auto *ConvTy = Int64Ty; 17193 unsigned NumElts = 0; 17194 switch (Width) { 17195 default: 17196 llvm_unreachable("width for stores must be a power of 2"); 17197 case 8: 17198 ConvTy = Int64Ty; 17199 NumElts = 2; 17200 break; 17201 case 4: 17202 ConvTy = Int32Ty; 17203 NumElts = 4; 17204 break; 17205 case 2: 17206 ConvTy = Int16Ty; 17207 NumElts = 8; 17208 break; 17209 case 1: 17210 ConvTy = Int8Ty; 17211 NumElts = 16; 17212 break; 17213 } 17214 Value *Vec = Builder.CreateBitCast( 17215 Op2, llvm::FixedVectorType::get(ConvTy, NumElts)); 17216 Value *Ptr = 17217 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset)); 17218 Value *Elt = Builder.CreateExtractElement(Vec, EltNo); 17219 if (IsLE && Width > 1) { 17220 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy); 17221 Elt = Builder.CreateCall(F, Elt); 17222 } 17223 return Builder.CreateStore( 17224 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1))); 17225 }; 17226 unsigned Stored = 0; 17227 unsigned RemainingBytes = NumBytes; 17228 Value *Result; 17229 if (NumBytes == 16) 17230 return StoreSubVec(16, 0, 0); 17231 if (NumBytes >= 8) { 17232 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1); 17233 RemainingBytes -= 8; 17234 Stored += 8; 17235 } 17236 if (RemainingBytes >= 4) { 17237 Result = StoreSubVec(4, NumBytes - Stored - 4, 17238 IsLE ? (Stored >> 2) : 3 - (Stored >> 2)); 17239 RemainingBytes -= 4; 17240 Stored += 4; 17241 } 17242 if (RemainingBytes >= 2) { 17243 Result = StoreSubVec(2, NumBytes - Stored - 2, 17244 IsLE ? (Stored >> 1) : 7 - (Stored >> 1)); 17245 RemainingBytes -= 2; 17246 Stored += 2; 17247 } 17248 if (RemainingBytes) 17249 Result = 17250 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored); 17251 return Result; 17252 } 17253 // Square root 17254 case PPC::BI__builtin_vsx_xvsqrtsp: 17255 case PPC::BI__builtin_vsx_xvsqrtdp: { 17256 llvm::Type *ResultType = ConvertType(E->getType()); 17257 Value *X = EmitScalarExpr(E->getArg(0)); 17258 if (Builder.getIsFPConstrained()) { 17259 llvm::Function *F = CGM.getIntrinsic( 17260 Intrinsic::experimental_constrained_sqrt, ResultType); 17261 return Builder.CreateConstrainedFPCall(F, X); 17262 } else { 17263 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 17264 return Builder.CreateCall(F, X); 17265 } 17266 } 17267 // Count leading zeros 17268 case PPC::BI__builtin_altivec_vclzb: 17269 case PPC::BI__builtin_altivec_vclzh: 17270 case PPC::BI__builtin_altivec_vclzw: 17271 case PPC::BI__builtin_altivec_vclzd: { 17272 llvm::Type *ResultType = ConvertType(E->getType()); 17273 Value *X = EmitScalarExpr(E->getArg(0)); 17274 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 17275 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 17276 return Builder.CreateCall(F, {X, Undef}); 17277 } 17278 case PPC::BI__builtin_altivec_vctzb: 17279 case PPC::BI__builtin_altivec_vctzh: 17280 case PPC::BI__builtin_altivec_vctzw: 17281 case PPC::BI__builtin_altivec_vctzd: { 17282 llvm::Type *ResultType = ConvertType(E->getType()); 17283 Value *X = EmitScalarExpr(E->getArg(0)); 17284 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 17285 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 17286 return Builder.CreateCall(F, {X, Undef}); 17287 } 17288 case PPC::BI__builtin_altivec_vinsd: 17289 case PPC::BI__builtin_altivec_vinsw: 17290 case PPC::BI__builtin_altivec_vinsd_elt: 17291 case PPC::BI__builtin_altivec_vinsw_elt: { 17292 llvm::Type *ResultType = ConvertType(E->getType()); 17293 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17294 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17295 Value *Op2 = EmitScalarExpr(E->getArg(2)); 17296 17297 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw || 17298 BuiltinID == PPC::BI__builtin_altivec_vinsd); 17299 17300 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw || 17301 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt); 17302 17303 // The third argument must be a compile time constant. 17304 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); 17305 assert(ArgCI && 17306 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!"); 17307 17308 // Valid value for the third argument is dependent on the input type and 17309 // builtin called. 17310 int ValidMaxValue = 0; 17311 if (IsUnaligned) 17312 ValidMaxValue = (Is32bit) ? 12 : 8; 17313 else 17314 ValidMaxValue = (Is32bit) ? 3 : 1; 17315 17316 // Get value of third argument. 17317 int64_t ConstArg = ArgCI->getSExtValue(); 17318 17319 // Compose range checking error message. 17320 std::string RangeErrMsg = IsUnaligned ? "byte" : "element"; 17321 RangeErrMsg += " number " + llvm::to_string(ConstArg); 17322 RangeErrMsg += " is outside of the valid range [0, "; 17323 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]"; 17324 17325 // Issue error if third argument is not within the valid range. 17326 if (ConstArg < 0 || ConstArg > ValidMaxValue) 17327 CGM.Error(E->getExprLoc(), RangeErrMsg); 17328 17329 // Input to vec_replace_elt is an element index, convert to byte index. 17330 if (!IsUnaligned) { 17331 ConstArg *= Is32bit ? 4 : 8; 17332 // Fix the constant according to endianess. 17333 if (getTarget().isLittleEndian()) 17334 ConstArg = (Is32bit ? 12 : 8) - ConstArg; 17335 } 17336 17337 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd; 17338 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg); 17339 // Casting input to vector int as per intrinsic definition. 17340 Op0 = 17341 Is32bit 17342 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)) 17343 : Builder.CreateBitCast(Op0, 17344 llvm::FixedVectorType::get(Int64Ty, 2)); 17345 return Builder.CreateBitCast( 17346 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType); 17347 } 17348 case PPC::BI__builtin_altivec_vpopcntb: 17349 case PPC::BI__builtin_altivec_vpopcnth: 17350 case PPC::BI__builtin_altivec_vpopcntw: 17351 case PPC::BI__builtin_altivec_vpopcntd: { 17352 llvm::Type *ResultType = ConvertType(E->getType()); 17353 Value *X = EmitScalarExpr(E->getArg(0)); 17354 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 17355 return Builder.CreateCall(F, X); 17356 } 17357 case PPC::BI__builtin_altivec_vadduqm: 17358 case PPC::BI__builtin_altivec_vsubuqm: { 17359 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17360 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17361 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 17362 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1)); 17363 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1)); 17364 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm) 17365 return Builder.CreateAdd(Op0, Op1, "vadduqm"); 17366 else 17367 return Builder.CreateSub(Op0, Op1, "vsubuqm"); 17368 } 17369 case PPC::BI__builtin_altivec_vaddcuq_c: 17370 case PPC::BI__builtin_altivec_vsubcuq_c: { 17371 SmallVector<Value *, 2> Ops; 17372 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17373 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17374 llvm::Type *V1I128Ty = llvm::FixedVectorType::get( 17375 llvm::IntegerType::get(getLLVMContext(), 128), 1); 17376 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty)); 17377 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty)); 17378 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c) 17379 ? Intrinsic::ppc_altivec_vaddcuq 17380 : Intrinsic::ppc_altivec_vsubcuq; 17381 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); 17382 } 17383 case PPC::BI__builtin_altivec_vaddeuqm_c: 17384 case PPC::BI__builtin_altivec_vaddecuq_c: 17385 case PPC::BI__builtin_altivec_vsubeuqm_c: 17386 case PPC::BI__builtin_altivec_vsubecuq_c: { 17387 SmallVector<Value *, 3> Ops; 17388 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17389 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17390 Value *Op2 = EmitScalarExpr(E->getArg(2)); 17391 llvm::Type *V1I128Ty = llvm::FixedVectorType::get( 17392 llvm::IntegerType::get(getLLVMContext(), 128), 1); 17393 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty)); 17394 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty)); 17395 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty)); 17396 switch (BuiltinID) { 17397 default: 17398 llvm_unreachable("Unsupported intrinsic!"); 17399 case PPC::BI__builtin_altivec_vaddeuqm_c: 17400 ID = Intrinsic::ppc_altivec_vaddeuqm; 17401 break; 17402 case PPC::BI__builtin_altivec_vaddecuq_c: 17403 ID = Intrinsic::ppc_altivec_vaddecuq; 17404 break; 17405 case PPC::BI__builtin_altivec_vsubeuqm_c: 17406 ID = Intrinsic::ppc_altivec_vsubeuqm; 17407 break; 17408 case PPC::BI__builtin_altivec_vsubecuq_c: 17409 ID = Intrinsic::ppc_altivec_vsubecuq; 17410 break; 17411 } 17412 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); 17413 } 17414 case PPC::BI__builtin_ppc_rldimi: 17415 case PPC::BI__builtin_ppc_rlwimi: { 17416 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17417 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17418 Value *Op2 = EmitScalarExpr(E->getArg(2)); 17419 Value *Op3 = EmitScalarExpr(E->getArg(3)); 17420 // rldimi is 64-bit instruction, expand the intrinsic before isel to 17421 // leverage peephole and avoid legalization efforts. 17422 if (BuiltinID == PPC::BI__builtin_ppc_rldimi && 17423 !getTarget().getTriple().isPPC64()) { 17424 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType()); 17425 Op2 = Builder.CreateZExt(Op2, Int64Ty); 17426 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2}); 17427 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3), 17428 Builder.CreateAnd(Op1, Builder.CreateNot(Op3))); 17429 } 17430 return Builder.CreateCall( 17431 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi 17432 ? Intrinsic::ppc_rldimi 17433 : Intrinsic::ppc_rlwimi), 17434 {Op0, Op1, Op2, Op3}); 17435 } 17436 case PPC::BI__builtin_ppc_rlwnm: { 17437 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17438 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17439 Value *Op2 = EmitScalarExpr(E->getArg(2)); 17440 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm), 17441 {Op0, Op1, Op2}); 17442 } 17443 case PPC::BI__builtin_ppc_poppar4: 17444 case PPC::BI__builtin_ppc_poppar8: { 17445 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17446 llvm::Type *ArgType = Op0->getType(); 17447 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 17448 Value *Tmp = Builder.CreateCall(F, Op0); 17449 17450 llvm::Type *ResultType = ConvertType(E->getType()); 17451 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 17452 if (Result->getType() != ResultType) 17453 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 17454 "cast"); 17455 return Result; 17456 } 17457 case PPC::BI__builtin_ppc_cmpb: { 17458 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17459 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17460 if (getTarget().getTriple().isPPC64()) { 17461 Function *F = 17462 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty}); 17463 return Builder.CreateCall(F, {Op0, Op1}, "cmpb"); 17464 } 17465 // For 32 bit, emit the code as below: 17466 // %conv = trunc i64 %a to i32 17467 // %conv1 = trunc i64 %b to i32 17468 // %shr = lshr i64 %a, 32 17469 // %conv2 = trunc i64 %shr to i32 17470 // %shr3 = lshr i64 %b, 32 17471 // %conv4 = trunc i64 %shr3 to i32 17472 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1) 17473 // %conv5 = zext i32 %0 to i64 17474 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4) 17475 // %conv614 = zext i32 %1 to i64 17476 // %shl = shl nuw i64 %conv614, 32 17477 // %or = or i64 %shl, %conv5 17478 // ret i64 %or 17479 Function *F = 17480 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty}); 17481 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty); 17482 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty); 17483 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32); 17484 Value *ArgOneHi = 17485 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty); 17486 Value *ArgTwoHi = 17487 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty); 17488 Value *ResLo = Builder.CreateZExt( 17489 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty); 17490 Value *ResHiShift = Builder.CreateZExt( 17491 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty); 17492 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt); 17493 return Builder.CreateOr(ResLo, ResHi); 17494 } 17495 // Copy sign 17496 case PPC::BI__builtin_vsx_xvcpsgnsp: 17497 case PPC::BI__builtin_vsx_xvcpsgndp: { 17498 llvm::Type *ResultType = ConvertType(E->getType()); 17499 Value *X = EmitScalarExpr(E->getArg(0)); 17500 Value *Y = EmitScalarExpr(E->getArg(1)); 17501 ID = Intrinsic::copysign; 17502 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 17503 return Builder.CreateCall(F, {X, Y}); 17504 } 17505 // Rounding/truncation 17506 case PPC::BI__builtin_vsx_xvrspip: 17507 case PPC::BI__builtin_vsx_xvrdpip: 17508 case PPC::BI__builtin_vsx_xvrdpim: 17509 case PPC::BI__builtin_vsx_xvrspim: 17510 case PPC::BI__builtin_vsx_xvrdpi: 17511 case PPC::BI__builtin_vsx_xvrspi: 17512 case PPC::BI__builtin_vsx_xvrdpic: 17513 case PPC::BI__builtin_vsx_xvrspic: 17514 case PPC::BI__builtin_vsx_xvrdpiz: 17515 case PPC::BI__builtin_vsx_xvrspiz: { 17516 llvm::Type *ResultType = ConvertType(E->getType()); 17517 Value *X = EmitScalarExpr(E->getArg(0)); 17518 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || 17519 BuiltinID == PPC::BI__builtin_vsx_xvrspim) 17520 ID = Builder.getIsFPConstrained() 17521 ? Intrinsic::experimental_constrained_floor 17522 : Intrinsic::floor; 17523 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || 17524 BuiltinID == PPC::BI__builtin_vsx_xvrspi) 17525 ID = Builder.getIsFPConstrained() 17526 ? Intrinsic::experimental_constrained_round 17527 : Intrinsic::round; 17528 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || 17529 BuiltinID == PPC::BI__builtin_vsx_xvrspic) 17530 ID = Builder.getIsFPConstrained() 17531 ? Intrinsic::experimental_constrained_rint 17532 : Intrinsic::rint; 17533 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || 17534 BuiltinID == PPC::BI__builtin_vsx_xvrspip) 17535 ID = Builder.getIsFPConstrained() 17536 ? Intrinsic::experimental_constrained_ceil 17537 : Intrinsic::ceil; 17538 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || 17539 BuiltinID == PPC::BI__builtin_vsx_xvrspiz) 17540 ID = Builder.getIsFPConstrained() 17541 ? Intrinsic::experimental_constrained_trunc 17542 : Intrinsic::trunc; 17543 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 17544 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X) 17545 : Builder.CreateCall(F, X); 17546 } 17547 17548 // Absolute value 17549 case PPC::BI__builtin_vsx_xvabsdp: 17550 case PPC::BI__builtin_vsx_xvabssp: { 17551 llvm::Type *ResultType = ConvertType(E->getType()); 17552 Value *X = EmitScalarExpr(E->getArg(0)); 17553 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 17554 return Builder.CreateCall(F, X); 17555 } 17556 17557 // Fastmath by default 17558 case PPC::BI__builtin_ppc_recipdivf: 17559 case PPC::BI__builtin_ppc_recipdivd: 17560 case PPC::BI__builtin_ppc_rsqrtf: 17561 case PPC::BI__builtin_ppc_rsqrtd: { 17562 FastMathFlags FMF = Builder.getFastMathFlags(); 17563 Builder.getFastMathFlags().setFast(); 17564 llvm::Type *ResultType = ConvertType(E->getType()); 17565 Value *X = EmitScalarExpr(E->getArg(0)); 17566 17567 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf || 17568 BuiltinID == PPC::BI__builtin_ppc_recipdivd) { 17569 Value *Y = EmitScalarExpr(E->getArg(1)); 17570 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv"); 17571 Builder.getFastMathFlags() &= (FMF); 17572 return FDiv; 17573 } 17574 auto *One = ConstantFP::get(ResultType, 1.0); 17575 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 17576 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt"); 17577 Builder.getFastMathFlags() &= (FMF); 17578 return FDiv; 17579 } 17580 case PPC::BI__builtin_ppc_alignx: { 17581 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17582 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17583 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0); 17584 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) 17585 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(), 17586 llvm::Value::MaximumAlignment); 17587 17588 emitAlignmentAssumption(Op1, E->getArg(1), 17589 /*The expr loc is sufficient.*/ SourceLocation(), 17590 AlignmentCI, nullptr); 17591 return Op1; 17592 } 17593 case PPC::BI__builtin_ppc_rdlam: { 17594 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17595 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17596 Value *Op2 = EmitScalarExpr(E->getArg(2)); 17597 llvm::Type *Ty = Op0->getType(); 17598 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false); 17599 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); 17600 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt}); 17601 return Builder.CreateAnd(Rotate, Op2); 17602 } 17603 case PPC::BI__builtin_ppc_load2r: { 17604 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r); 17605 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17606 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0}); 17607 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty); 17608 } 17609 // FMA variations 17610 case PPC::BI__builtin_ppc_fnmsub: 17611 case PPC::BI__builtin_ppc_fnmsubs: 17612 case PPC::BI__builtin_vsx_xvmaddadp: 17613 case PPC::BI__builtin_vsx_xvmaddasp: 17614 case PPC::BI__builtin_vsx_xvnmaddadp: 17615 case PPC::BI__builtin_vsx_xvnmaddasp: 17616 case PPC::BI__builtin_vsx_xvmsubadp: 17617 case PPC::BI__builtin_vsx_xvmsubasp: 17618 case PPC::BI__builtin_vsx_xvnmsubadp: 17619 case PPC::BI__builtin_vsx_xvnmsubasp: { 17620 llvm::Type *ResultType = ConvertType(E->getType()); 17621 Value *X = EmitScalarExpr(E->getArg(0)); 17622 Value *Y = EmitScalarExpr(E->getArg(1)); 17623 Value *Z = EmitScalarExpr(E->getArg(2)); 17624 llvm::Function *F; 17625 if (Builder.getIsFPConstrained()) 17626 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); 17627 else 17628 F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 17629 switch (BuiltinID) { 17630 case PPC::BI__builtin_vsx_xvmaddadp: 17631 case PPC::BI__builtin_vsx_xvmaddasp: 17632 if (Builder.getIsFPConstrained()) 17633 return Builder.CreateConstrainedFPCall(F, {X, Y, Z}); 17634 else 17635 return Builder.CreateCall(F, {X, Y, Z}); 17636 case PPC::BI__builtin_vsx_xvnmaddadp: 17637 case PPC::BI__builtin_vsx_xvnmaddasp: 17638 if (Builder.getIsFPConstrained()) 17639 return Builder.CreateFNeg( 17640 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg"); 17641 else 17642 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg"); 17643 case PPC::BI__builtin_vsx_xvmsubadp: 17644 case PPC::BI__builtin_vsx_xvmsubasp: 17645 if (Builder.getIsFPConstrained()) 17646 return Builder.CreateConstrainedFPCall( 17647 F, {X, Y, Builder.CreateFNeg(Z, "neg")}); 17648 else 17649 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); 17650 case PPC::BI__builtin_ppc_fnmsub: 17651 case PPC::BI__builtin_ppc_fnmsubs: 17652 case PPC::BI__builtin_vsx_xvnmsubadp: 17653 case PPC::BI__builtin_vsx_xvnmsubasp: 17654 if (Builder.getIsFPConstrained()) 17655 return Builder.CreateFNeg( 17656 Builder.CreateConstrainedFPCall( 17657 F, {X, Y, Builder.CreateFNeg(Z, "neg")}), 17658 "neg"); 17659 else 17660 return Builder.CreateCall( 17661 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z}); 17662 } 17663 llvm_unreachable("Unknown FMA operation"); 17664 return nullptr; // Suppress no-return warning 17665 } 17666 17667 case PPC::BI__builtin_vsx_insertword: { 17668 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17669 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17670 Value *Op2 = EmitScalarExpr(E->getArg(2)); 17671 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); 17672 17673 // Third argument is a compile time constant int. It must be clamped to 17674 // to the range [0, 12]. 17675 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); 17676 assert(ArgCI && 17677 "Third arg to xxinsertw intrinsic must be constant integer"); 17678 const int64_t MaxIndex = 12; 17679 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex); 17680 17681 // The builtin semantics don't exactly match the xxinsertw instructions 17682 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the 17683 // word from the first argument, and inserts it in the second argument. The 17684 // instruction extracts the word from its second input register and inserts 17685 // it into its first input register, so swap the first and second arguments. 17686 std::swap(Op0, Op1); 17687 17688 // Need to cast the second argument from a vector of unsigned int to a 17689 // vector of long long. 17690 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); 17691 17692 if (getTarget().isLittleEndian()) { 17693 // Reverse the double words in the vector we will extract from. 17694 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); 17695 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0}); 17696 17697 // Reverse the index. 17698 Index = MaxIndex - Index; 17699 } 17700 17701 // Intrinsic expects the first arg to be a vector of int. 17702 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); 17703 Op2 = ConstantInt::getSigned(Int32Ty, Index); 17704 return Builder.CreateCall(F, {Op0, Op1, Op2}); 17705 } 17706 17707 case PPC::BI__builtin_vsx_extractuword: { 17708 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17709 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17710 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); 17711 17712 // Intrinsic expects the first argument to be a vector of doublewords. 17713 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); 17714 17715 // The second argument is a compile time constant int that needs to 17716 // be clamped to the range [0, 12]. 17717 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1); 17718 assert(ArgCI && 17719 "Second Arg to xxextractuw intrinsic must be a constant integer!"); 17720 const int64_t MaxIndex = 12; 17721 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex); 17722 17723 if (getTarget().isLittleEndian()) { 17724 // Reverse the index. 17725 Index = MaxIndex - Index; 17726 Op1 = ConstantInt::getSigned(Int32Ty, Index); 17727 17728 // Emit the call, then reverse the double words of the results vector. 17729 Value *Call = Builder.CreateCall(F, {Op0, Op1}); 17730 17731 Value *ShuffleCall = 17732 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0}); 17733 return ShuffleCall; 17734 } else { 17735 Op1 = ConstantInt::getSigned(Int32Ty, Index); 17736 return Builder.CreateCall(F, {Op0, Op1}); 17737 } 17738 } 17739 17740 case PPC::BI__builtin_vsx_xxpermdi: { 17741 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17742 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17743 Value *Op2 = EmitScalarExpr(E->getArg(2)); 17744 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); 17745 assert(ArgCI && "Third arg must be constant integer!"); 17746 17747 unsigned Index = ArgCI->getZExtValue(); 17748 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); 17749 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); 17750 17751 // Account for endianness by treating this as just a shuffle. So we use the 17752 // same indices for both LE and BE in order to produce expected results in 17753 // both cases. 17754 int ElemIdx0 = (Index & 2) >> 1; 17755 int ElemIdx1 = 2 + (Index & 1); 17756 17757 int ShuffleElts[2] = {ElemIdx0, ElemIdx1}; 17758 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); 17759 QualType BIRetType = E->getType(); 17760 auto RetTy = ConvertType(BIRetType); 17761 return Builder.CreateBitCast(ShuffleCall, RetTy); 17762 } 17763 17764 case PPC::BI__builtin_vsx_xxsldwi: { 17765 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17766 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17767 Value *Op2 = EmitScalarExpr(E->getArg(2)); 17768 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); 17769 assert(ArgCI && "Third argument must be a compile time constant"); 17770 unsigned Index = ArgCI->getZExtValue() & 0x3; 17771 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); 17772 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4)); 17773 17774 // Create a shuffle mask 17775 int ElemIdx0; 17776 int ElemIdx1; 17777 int ElemIdx2; 17778 int ElemIdx3; 17779 if (getTarget().isLittleEndian()) { 17780 // Little endian element N comes from element 8+N-Index of the 17781 // concatenated wide vector (of course, using modulo arithmetic on 17782 // the total number of elements). 17783 ElemIdx0 = (8 - Index) % 8; 17784 ElemIdx1 = (9 - Index) % 8; 17785 ElemIdx2 = (10 - Index) % 8; 17786 ElemIdx3 = (11 - Index) % 8; 17787 } else { 17788 // Big endian ElemIdx<N> = Index + N 17789 ElemIdx0 = Index; 17790 ElemIdx1 = Index + 1; 17791 ElemIdx2 = Index + 2; 17792 ElemIdx3 = Index + 3; 17793 } 17794 17795 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3}; 17796 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); 17797 QualType BIRetType = E->getType(); 17798 auto RetTy = ConvertType(BIRetType); 17799 return Builder.CreateBitCast(ShuffleCall, RetTy); 17800 } 17801 17802 case PPC::BI__builtin_pack_vector_int128: { 17803 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17804 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17805 bool isLittleEndian = getTarget().isLittleEndian(); 17806 Value *PoisonValue = 17807 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2)); 17808 Value *Res = Builder.CreateInsertElement( 17809 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0)); 17810 Res = Builder.CreateInsertElement(Res, Op1, 17811 (uint64_t)(isLittleEndian ? 0 : 1)); 17812 return Builder.CreateBitCast(Res, ConvertType(E->getType())); 17813 } 17814 17815 case PPC::BI__builtin_unpack_vector_int128: { 17816 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17817 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17818 ConstantInt *Index = cast<ConstantInt>(Op1); 17819 Value *Unpacked = Builder.CreateBitCast( 17820 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2)); 17821 17822 if (getTarget().isLittleEndian()) 17823 Index = 17824 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue()); 17825 17826 return Builder.CreateExtractElement(Unpacked, Index); 17827 } 17828 17829 case PPC::BI__builtin_ppc_sthcx: { 17830 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx); 17831 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17832 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty); 17833 return Builder.CreateCall(F, {Op0, Op1}); 17834 } 17835 17836 // The PPC MMA builtins take a pointer to a __vector_quad as an argument. 17837 // Some of the MMA instructions accumulate their result into an existing 17838 // accumulator whereas the others generate a new accumulator. So we need to 17839 // use custom code generation to expand a builtin call with a pointer to a 17840 // load (if the corresponding instruction accumulates its result) followed by 17841 // the call to the intrinsic and a store of the result. 17842 #define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \ 17843 case PPC::BI__builtin_##Name: 17844 #include "clang/Basic/BuiltinsPPC.def" 17845 { 17846 SmallVector<Value *, 4> Ops; 17847 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 17848 if (E->getArg(i)->getType()->isArrayType()) 17849 Ops.push_back( 17850 EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this)); 17851 else 17852 Ops.push_back(EmitScalarExpr(E->getArg(i))); 17853 // The first argument of these two builtins is a pointer used to store their 17854 // result. However, the llvm intrinsics return their result in multiple 17855 // return values. So, here we emit code extracting these values from the 17856 // intrinsic results and storing them using that pointer. 17857 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc || 17858 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair || 17859 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) { 17860 unsigned NumVecs = 2; 17861 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair; 17862 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) { 17863 NumVecs = 4; 17864 Intrinsic = Intrinsic::ppc_mma_disassemble_acc; 17865 } 17866 llvm::Function *F = CGM.getIntrinsic(Intrinsic); 17867 Address Addr = EmitPointerWithAlignment(E->getArg(1)); 17868 Value *Vec = Builder.CreateLoad(Addr); 17869 Value *Call = Builder.CreateCall(F, {Vec}); 17870 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16); 17871 Value *Ptr = Ops[0]; 17872 for (unsigned i=0; i<NumVecs; i++) { 17873 Value *Vec = Builder.CreateExtractValue(Call, i); 17874 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i); 17875 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index); 17876 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16)); 17877 } 17878 return Call; 17879 } 17880 if (BuiltinID == PPC::BI__builtin_vsx_build_pair || 17881 BuiltinID == PPC::BI__builtin_mma_build_acc) { 17882 // Reverse the order of the operands for LE, so the 17883 // same builtin call can be used on both LE and BE 17884 // without the need for the programmer to swap operands. 17885 // The operands are reversed starting from the second argument, 17886 // the first operand is the pointer to the pair/accumulator 17887 // that is being built. 17888 if (getTarget().isLittleEndian()) 17889 std::reverse(Ops.begin() + 1, Ops.end()); 17890 } 17891 bool Accumulate; 17892 switch (BuiltinID) { 17893 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \ 17894 case PPC::BI__builtin_##Name: \ 17895 ID = Intrinsic::ppc_##Intr; \ 17896 Accumulate = Acc; \ 17897 break; 17898 #include "clang/Basic/BuiltinsPPC.def" 17899 } 17900 if (BuiltinID == PPC::BI__builtin_vsx_lxvp || 17901 BuiltinID == PPC::BI__builtin_vsx_stxvp || 17902 BuiltinID == PPC::BI__builtin_mma_lxvp || 17903 BuiltinID == PPC::BI__builtin_mma_stxvp) { 17904 if (BuiltinID == PPC::BI__builtin_vsx_lxvp || 17905 BuiltinID == PPC::BI__builtin_mma_lxvp) { 17906 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]); 17907 } else { 17908 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]); 17909 } 17910 Ops.pop_back(); 17911 llvm::Function *F = CGM.getIntrinsic(ID); 17912 return Builder.CreateCall(F, Ops, ""); 17913 } 17914 SmallVector<Value*, 4> CallOps; 17915 if (Accumulate) { 17916 Address Addr = EmitPointerWithAlignment(E->getArg(0)); 17917 Value *Acc = Builder.CreateLoad(Addr); 17918 CallOps.push_back(Acc); 17919 } 17920 for (unsigned i=1; i<Ops.size(); i++) 17921 CallOps.push_back(Ops[i]); 17922 llvm::Function *F = CGM.getIntrinsic(ID); 17923 Value *Call = Builder.CreateCall(F, CallOps); 17924 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64)); 17925 } 17926 17927 case PPC::BI__builtin_ppc_compare_and_swap: 17928 case PPC::BI__builtin_ppc_compare_and_swaplp: { 17929 Address Addr = EmitPointerWithAlignment(E->getArg(0)); 17930 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1)); 17931 Value *OldVal = Builder.CreateLoad(OldValAddr); 17932 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType(); 17933 LValue LV = MakeAddrLValue(Addr, AtomicTy); 17934 Value *Op2 = EmitScalarExpr(E->getArg(2)); 17935 auto Pair = EmitAtomicCompareExchange( 17936 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(), 17937 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true); 17938 // Unlike c11's atomic_compare_exchange, according to 17939 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp 17940 // > In either case, the contents of the memory location specified by addr 17941 // > are copied into the memory location specified by old_val_addr. 17942 // But it hasn't specified storing to OldValAddr is atomic or not and 17943 // which order to use. Now following XL's codegen, treat it as a normal 17944 // store. 17945 Value *LoadedVal = Pair.first.getScalarVal(); 17946 Builder.CreateStore(LoadedVal, OldValAddr); 17947 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty()); 17948 } 17949 case PPC::BI__builtin_ppc_fetch_and_add: 17950 case PPC::BI__builtin_ppc_fetch_and_addlp: { 17951 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, 17952 llvm::AtomicOrdering::Monotonic); 17953 } 17954 case PPC::BI__builtin_ppc_fetch_and_and: 17955 case PPC::BI__builtin_ppc_fetch_and_andlp: { 17956 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, 17957 llvm::AtomicOrdering::Monotonic); 17958 } 17959 17960 case PPC::BI__builtin_ppc_fetch_and_or: 17961 case PPC::BI__builtin_ppc_fetch_and_orlp: { 17962 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, 17963 llvm::AtomicOrdering::Monotonic); 17964 } 17965 case PPC::BI__builtin_ppc_fetch_and_swap: 17966 case PPC::BI__builtin_ppc_fetch_and_swaplp: { 17967 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, 17968 llvm::AtomicOrdering::Monotonic); 17969 } 17970 case PPC::BI__builtin_ppc_ldarx: 17971 case PPC::BI__builtin_ppc_lwarx: 17972 case PPC::BI__builtin_ppc_lharx: 17973 case PPC::BI__builtin_ppc_lbarx: 17974 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E); 17975 case PPC::BI__builtin_ppc_mfspr: { 17976 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17977 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32 17978 ? Int32Ty 17979 : Int64Ty; 17980 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType); 17981 return Builder.CreateCall(F, {Op0}); 17982 } 17983 case PPC::BI__builtin_ppc_mtspr: { 17984 Value *Op0 = EmitScalarExpr(E->getArg(0)); 17985 Value *Op1 = EmitScalarExpr(E->getArg(1)); 17986 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32 17987 ? Int32Ty 17988 : Int64Ty; 17989 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType); 17990 return Builder.CreateCall(F, {Op0, Op1}); 17991 } 17992 case PPC::BI__builtin_ppc_popcntb: { 17993 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 17994 llvm::Type *ArgType = ArgValue->getType(); 17995 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType}); 17996 return Builder.CreateCall(F, {ArgValue}, "popcntb"); 17997 } 17998 case PPC::BI__builtin_ppc_mtfsf: { 17999 // The builtin takes a uint32 that needs to be cast to an 18000 // f64 to be passed to the intrinsic. 18001 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18002 Value *Op1 = EmitScalarExpr(E->getArg(1)); 18003 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy); 18004 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf); 18005 return Builder.CreateCall(F, {Op0, Cast}, ""); 18006 } 18007 18008 case PPC::BI__builtin_ppc_swdiv_nochk: 18009 case PPC::BI__builtin_ppc_swdivs_nochk: { 18010 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18011 Value *Op1 = EmitScalarExpr(E->getArg(1)); 18012 FastMathFlags FMF = Builder.getFastMathFlags(); 18013 Builder.getFastMathFlags().setFast(); 18014 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk"); 18015 Builder.getFastMathFlags() &= (FMF); 18016 return FDiv; 18017 } 18018 case PPC::BI__builtin_ppc_fric: 18019 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( 18020 *this, E, Intrinsic::rint, 18021 Intrinsic::experimental_constrained_rint)) 18022 .getScalarVal(); 18023 case PPC::BI__builtin_ppc_frim: 18024 case PPC::BI__builtin_ppc_frims: 18025 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( 18026 *this, E, Intrinsic::floor, 18027 Intrinsic::experimental_constrained_floor)) 18028 .getScalarVal(); 18029 case PPC::BI__builtin_ppc_frin: 18030 case PPC::BI__builtin_ppc_frins: 18031 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( 18032 *this, E, Intrinsic::round, 18033 Intrinsic::experimental_constrained_round)) 18034 .getScalarVal(); 18035 case PPC::BI__builtin_ppc_frip: 18036 case PPC::BI__builtin_ppc_frips: 18037 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( 18038 *this, E, Intrinsic::ceil, 18039 Intrinsic::experimental_constrained_ceil)) 18040 .getScalarVal(); 18041 case PPC::BI__builtin_ppc_friz: 18042 case PPC::BI__builtin_ppc_frizs: 18043 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( 18044 *this, E, Intrinsic::trunc, 18045 Intrinsic::experimental_constrained_trunc)) 18046 .getScalarVal(); 18047 case PPC::BI__builtin_ppc_fsqrt: 18048 case PPC::BI__builtin_ppc_fsqrts: 18049 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( 18050 *this, E, Intrinsic::sqrt, 18051 Intrinsic::experimental_constrained_sqrt)) 18052 .getScalarVal(); 18053 case PPC::BI__builtin_ppc_test_data_class: { 18054 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18055 Value *Op1 = EmitScalarExpr(E->getArg(1)); 18056 return Builder.CreateCall( 18057 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()), 18058 {Op0, Op1}, "test_data_class"); 18059 } 18060 case PPC::BI__builtin_ppc_maxfe: { 18061 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18062 Value *Op1 = EmitScalarExpr(E->getArg(1)); 18063 Value *Op2 = EmitScalarExpr(E->getArg(2)); 18064 Value *Op3 = EmitScalarExpr(E->getArg(3)); 18065 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe), 18066 {Op0, Op1, Op2, Op3}); 18067 } 18068 case PPC::BI__builtin_ppc_maxfl: { 18069 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18070 Value *Op1 = EmitScalarExpr(E->getArg(1)); 18071 Value *Op2 = EmitScalarExpr(E->getArg(2)); 18072 Value *Op3 = EmitScalarExpr(E->getArg(3)); 18073 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl), 18074 {Op0, Op1, Op2, Op3}); 18075 } 18076 case PPC::BI__builtin_ppc_maxfs: { 18077 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18078 Value *Op1 = EmitScalarExpr(E->getArg(1)); 18079 Value *Op2 = EmitScalarExpr(E->getArg(2)); 18080 Value *Op3 = EmitScalarExpr(E->getArg(3)); 18081 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs), 18082 {Op0, Op1, Op2, Op3}); 18083 } 18084 case PPC::BI__builtin_ppc_minfe: { 18085 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18086 Value *Op1 = EmitScalarExpr(E->getArg(1)); 18087 Value *Op2 = EmitScalarExpr(E->getArg(2)); 18088 Value *Op3 = EmitScalarExpr(E->getArg(3)); 18089 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe), 18090 {Op0, Op1, Op2, Op3}); 18091 } 18092 case PPC::BI__builtin_ppc_minfl: { 18093 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18094 Value *Op1 = EmitScalarExpr(E->getArg(1)); 18095 Value *Op2 = EmitScalarExpr(E->getArg(2)); 18096 Value *Op3 = EmitScalarExpr(E->getArg(3)); 18097 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl), 18098 {Op0, Op1, Op2, Op3}); 18099 } 18100 case PPC::BI__builtin_ppc_minfs: { 18101 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18102 Value *Op1 = EmitScalarExpr(E->getArg(1)); 18103 Value *Op2 = EmitScalarExpr(E->getArg(2)); 18104 Value *Op3 = EmitScalarExpr(E->getArg(3)); 18105 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs), 18106 {Op0, Op1, Op2, Op3}); 18107 } 18108 case PPC::BI__builtin_ppc_swdiv: 18109 case PPC::BI__builtin_ppc_swdivs: { 18110 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18111 Value *Op1 = EmitScalarExpr(E->getArg(1)); 18112 return Builder.CreateFDiv(Op0, Op1, "swdiv"); 18113 } 18114 case PPC::BI__builtin_ppc_set_fpscr_rn: 18115 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd), 18116 {EmitScalarExpr(E->getArg(0))}); 18117 case PPC::BI__builtin_ppc_mffs: 18118 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm)); 18119 } 18120 } 18121 18122 namespace { 18123 // If \p E is not null pointer, insert address space cast to match return 18124 // type of \p E if necessary. 18125 Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF, 18126 const CallExpr *E = nullptr) { 18127 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr); 18128 auto *Call = CGF.Builder.CreateCall(F); 18129 Call->addRetAttr( 18130 Attribute::getWithDereferenceableBytes(Call->getContext(), 64)); 18131 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4))); 18132 if (!E) 18133 return Call; 18134 QualType BuiltinRetType = E->getType(); 18135 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType)); 18136 if (RetTy == Call->getType()) 18137 return Call; 18138 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy); 18139 } 18140 18141 Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) { 18142 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr); 18143 auto *Call = CGF.Builder.CreateCall(F); 18144 Call->addRetAttr( 18145 Attribute::getWithDereferenceableBytes(Call->getContext(), 256)); 18146 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8))); 18147 return Call; 18148 } 18149 18150 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. 18151 /// Emit code based on Code Object ABI version. 18152 /// COV_4 : Emit code to use dispatch ptr 18153 /// COV_5+ : Emit code to use implicitarg ptr 18154 /// COV_NONE : Emit code to load a global variable "__oclc_ABI_version" 18155 /// and use its value for COV_4 or COV_5+ approach. It is used for 18156 /// compiling device libraries in an ABI-agnostic way. 18157 /// 18158 /// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by 18159 /// clang during compilation of user code. 18160 Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { 18161 llvm::LoadInst *LD; 18162 18163 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion; 18164 18165 if (Cov == CodeObjectVersionKind::COV_None) { 18166 StringRef Name = "__oclc_ABI_version"; 18167 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name); 18168 if (!ABIVersionC) 18169 ABIVersionC = new llvm::GlobalVariable( 18170 CGF.CGM.getModule(), CGF.Int32Ty, false, 18171 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr, 18172 llvm::GlobalVariable::NotThreadLocal, 18173 CGF.CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant)); 18174 18175 // This load will be eliminated by the IPSCCP because it is constant 18176 // weak_odr without externally_initialized. Either changing it to weak or 18177 // adding externally_initialized will keep the load. 18178 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC, 18179 CGF.CGM.getIntAlign()); 18180 18181 Value *IsCOV5 = CGF.Builder.CreateICmpSGE( 18182 ABIVersion, 18183 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5)); 18184 18185 // Indexing the implicit kernarg segment. 18186 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32( 18187 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2); 18188 18189 // Indexing the HSA kernel_dispatch_packet struct. 18190 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32( 18191 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2); 18192 18193 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP); 18194 LD = CGF.Builder.CreateLoad( 18195 Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2))); 18196 } else { 18197 Value *GEP = nullptr; 18198 if (Cov >= CodeObjectVersionKind::COV_5) { 18199 // Indexing the implicit kernarg segment. 18200 GEP = CGF.Builder.CreateConstGEP1_32( 18201 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2); 18202 } else { 18203 // Indexing the HSA kernel_dispatch_packet struct. 18204 GEP = CGF.Builder.CreateConstGEP1_32( 18205 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2); 18206 } 18207 LD = CGF.Builder.CreateLoad( 18208 Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2))); 18209 } 18210 18211 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 18212 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1), 18213 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1)); 18214 LD->setMetadata(llvm::LLVMContext::MD_range, RNode); 18215 LD->setMetadata(llvm::LLVMContext::MD_noundef, 18216 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); 18217 LD->setMetadata(llvm::LLVMContext::MD_invariant_load, 18218 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); 18219 return LD; 18220 } 18221 18222 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. 18223 Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) { 18224 const unsigned XOffset = 12; 18225 auto *DP = EmitAMDGPUDispatchPtr(CGF); 18226 // Indexing the HSA kernel_dispatch_packet struct. 18227 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4); 18228 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset); 18229 auto *LD = CGF.Builder.CreateLoad( 18230 Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(4))); 18231 LD->setMetadata(llvm::LLVMContext::MD_invariant_load, 18232 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); 18233 return LD; 18234 } 18235 } // namespace 18236 18237 // For processing memory ordering and memory scope arguments of various 18238 // amdgcn builtins. 18239 // \p Order takes a C++11 comptabile memory-ordering specifier and converts 18240 // it into LLVM's memory ordering specifier using atomic C ABI, and writes 18241 // to \p AO. \p Scope takes a const char * and converts it into AMDGCN 18242 // specific SyncScopeID and writes it to \p SSID. 18243 void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, 18244 llvm::AtomicOrdering &AO, 18245 llvm::SyncScope::ID &SSID) { 18246 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 18247 18248 // Map C11/C++11 memory ordering to LLVM memory ordering 18249 assert(llvm::isValidAtomicOrderingCABI(ord)); 18250 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) { 18251 case llvm::AtomicOrderingCABI::acquire: 18252 case llvm::AtomicOrderingCABI::consume: 18253 AO = llvm::AtomicOrdering::Acquire; 18254 break; 18255 case llvm::AtomicOrderingCABI::release: 18256 AO = llvm::AtomicOrdering::Release; 18257 break; 18258 case llvm::AtomicOrderingCABI::acq_rel: 18259 AO = llvm::AtomicOrdering::AcquireRelease; 18260 break; 18261 case llvm::AtomicOrderingCABI::seq_cst: 18262 AO = llvm::AtomicOrdering::SequentiallyConsistent; 18263 break; 18264 case llvm::AtomicOrderingCABI::relaxed: 18265 AO = llvm::AtomicOrdering::Monotonic; 18266 break; 18267 } 18268 18269 // Some of the atomic builtins take the scope as a string name. 18270 StringRef scp; 18271 if (llvm::getConstantStringInfo(Scope, scp)) { 18272 SSID = getLLVMContext().getOrInsertSyncScopeID(scp); 18273 return; 18274 } 18275 18276 // Older builtins had an enum argument for the memory scope. 18277 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue(); 18278 switch (scope) { 18279 case 0: // __MEMORY_SCOPE_SYSTEM 18280 SSID = llvm::SyncScope::System; 18281 break; 18282 case 1: // __MEMORY_SCOPE_DEVICE 18283 SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); 18284 break; 18285 case 2: // __MEMORY_SCOPE_WRKGRP 18286 SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup"); 18287 break; 18288 case 3: // __MEMORY_SCOPE_WVFRNT 18289 SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront"); 18290 break; 18291 case 4: // __MEMORY_SCOPE_SINGLE 18292 SSID = llvm::SyncScope::SingleThread; 18293 break; 18294 default: 18295 SSID = llvm::SyncScope::System; 18296 break; 18297 } 18298 } 18299 18300 llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments, 18301 unsigned Idx, 18302 const CallExpr *E) { 18303 llvm::Value *Arg = nullptr; 18304 if ((ICEArguments & (1 << Idx)) == 0) { 18305 Arg = EmitScalarExpr(E->getArg(Idx)); 18306 } else { 18307 // If this is required to be a constant, constant fold it so that we 18308 // know that the generated intrinsic gets a ConstantInt. 18309 std::optional<llvm::APSInt> Result = 18310 E->getArg(Idx)->getIntegerConstantExpr(getContext()); 18311 assert(Result && "Expected argument to be a constant"); 18312 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result); 18313 } 18314 return Arg; 18315 } 18316 18317 Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) { 18318 if (QT->hasFloatingRepresentation()) { 18319 switch (elementCount) { 18320 case 2: 18321 return Intrinsic::dx_dot2; 18322 case 3: 18323 return Intrinsic::dx_dot3; 18324 case 4: 18325 return Intrinsic::dx_dot4; 18326 } 18327 } 18328 if (QT->hasSignedIntegerRepresentation()) 18329 return Intrinsic::dx_sdot; 18330 18331 assert(QT->hasUnsignedIntegerRepresentation()); 18332 return Intrinsic::dx_udot; 18333 } 18334 18335 Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, 18336 const CallExpr *E) { 18337 if (!getLangOpts().HLSL) 18338 return nullptr; 18339 18340 switch (BuiltinID) { 18341 case Builtin::BI__builtin_hlsl_elementwise_all: { 18342 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18343 return Builder.CreateIntrinsic( 18344 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()), 18345 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr, 18346 "hlsl.all"); 18347 } 18348 case Builtin::BI__builtin_hlsl_elementwise_any: { 18349 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18350 return Builder.CreateIntrinsic( 18351 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()), 18352 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr, 18353 "hlsl.any"); 18354 } 18355 case Builtin::BI__builtin_hlsl_elementwise_clamp: { 18356 Value *OpX = EmitScalarExpr(E->getArg(0)); 18357 Value *OpMin = EmitScalarExpr(E->getArg(1)); 18358 Value *OpMax = EmitScalarExpr(E->getArg(2)); 18359 18360 QualType Ty = E->getArg(0)->getType(); 18361 bool IsUnsigned = false; 18362 if (auto *VecTy = Ty->getAs<VectorType>()) 18363 Ty = VecTy->getElementType(); 18364 IsUnsigned = Ty->isUnsignedIntegerType(); 18365 return Builder.CreateIntrinsic( 18366 /*ReturnType=*/OpX->getType(), 18367 IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp, 18368 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp"); 18369 } 18370 case Builtin::BI__builtin_hlsl_dot: { 18371 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18372 Value *Op1 = EmitScalarExpr(E->getArg(1)); 18373 llvm::Type *T0 = Op0->getType(); 18374 llvm::Type *T1 = Op1->getType(); 18375 if (!T0->isVectorTy() && !T1->isVectorTy()) { 18376 if (T0->isFloatingPointTy()) 18377 return Builder.CreateFMul(Op0, Op1, "dx.dot"); 18378 18379 if (T0->isIntegerTy()) 18380 return Builder.CreateMul(Op0, Op1, "dx.dot"); 18381 18382 // Bools should have been promoted 18383 llvm_unreachable( 18384 "Scalar dot product is only supported on ints and floats."); 18385 } 18386 // A VectorSplat should have happened 18387 assert(T0->isVectorTy() && T1->isVectorTy() && 18388 "Dot product of vector and scalar is not supported."); 18389 18390 // A vector sext or sitofp should have happened 18391 assert(T0->getScalarType() == T1->getScalarType() && 18392 "Dot product of vectors need the same element types."); 18393 18394 auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>(); 18395 [[maybe_unused]] auto *VecTy1 = 18396 E->getArg(1)->getType()->getAs<VectorType>(); 18397 // A HLSLVectorTruncation should have happend 18398 assert(VecTy0->getNumElements() == VecTy1->getNumElements() && 18399 "Dot product requires vectors to be of the same size."); 18400 18401 return Builder.CreateIntrinsic( 18402 /*ReturnType=*/T0->getScalarType(), 18403 getDotProductIntrinsic(E->getArg(0)->getType(), 18404 VecTy0->getNumElements()), 18405 ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot"); 18406 } break; 18407 case Builtin::BI__builtin_hlsl_lerp: { 18408 Value *X = EmitScalarExpr(E->getArg(0)); 18409 Value *Y = EmitScalarExpr(E->getArg(1)); 18410 Value *S = EmitScalarExpr(E->getArg(2)); 18411 if (!E->getArg(0)->getType()->hasFloatingRepresentation()) 18412 llvm_unreachable("lerp operand must have a float representation"); 18413 return Builder.CreateIntrinsic( 18414 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(), 18415 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp"); 18416 } 18417 case Builtin::BI__builtin_hlsl_elementwise_frac: { 18418 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18419 if (!E->getArg(0)->getType()->hasFloatingRepresentation()) 18420 llvm_unreachable("frac operand must have a float representation"); 18421 return Builder.CreateIntrinsic( 18422 /*ReturnType=*/Op0->getType(), Intrinsic::dx_frac, 18423 ArrayRef<Value *>{Op0}, nullptr, "dx.frac"); 18424 } 18425 case Builtin::BI__builtin_hlsl_elementwise_isinf: { 18426 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18427 llvm::Type *Xty = Op0->getType(); 18428 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext()); 18429 if (Xty->isVectorTy()) { 18430 auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>(); 18431 retType = llvm::VectorType::get( 18432 retType, ElementCount::getFixed(XVecTy->getNumElements())); 18433 } 18434 if (!E->getArg(0)->getType()->hasFloatingRepresentation()) 18435 llvm_unreachable("isinf operand must have a float representation"); 18436 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf, 18437 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf"); 18438 } 18439 case Builtin::BI__builtin_hlsl_mad: { 18440 Value *M = EmitScalarExpr(E->getArg(0)); 18441 Value *A = EmitScalarExpr(E->getArg(1)); 18442 Value *B = EmitScalarExpr(E->getArg(2)); 18443 if (E->getArg(0)->getType()->hasFloatingRepresentation()) 18444 return Builder.CreateIntrinsic( 18445 /*ReturnType*/ M->getType(), Intrinsic::fmuladd, 18446 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad"); 18447 18448 if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) { 18449 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) 18450 return Builder.CreateIntrinsic( 18451 /*ReturnType*/ M->getType(), Intrinsic::dx_imad, 18452 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad"); 18453 18454 Value *Mul = Builder.CreateNSWMul(M, A); 18455 return Builder.CreateNSWAdd(Mul, B); 18456 } 18457 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation()); 18458 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) 18459 return Builder.CreateIntrinsic( 18460 /*ReturnType=*/M->getType(), Intrinsic::dx_umad, 18461 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad"); 18462 18463 Value *Mul = Builder.CreateNUWMul(M, A); 18464 return Builder.CreateNUWAdd(Mul, B); 18465 } 18466 case Builtin::BI__builtin_hlsl_elementwise_rcp: { 18467 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18468 if (!E->getArg(0)->getType()->hasFloatingRepresentation()) 18469 llvm_unreachable("rcp operand must have a float representation"); 18470 llvm::Type *Ty = Op0->getType(); 18471 llvm::Type *EltTy = Ty->getScalarType(); 18472 Constant *One = Ty->isVectorTy() 18473 ? ConstantVector::getSplat( 18474 ElementCount::getFixed( 18475 cast<FixedVectorType>(Ty)->getNumElements()), 18476 ConstantFP::get(EltTy, 1.0)) 18477 : ConstantFP::get(EltTy, 1.0); 18478 return Builder.CreateFDiv(One, Op0, "hlsl.rcp"); 18479 } 18480 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: { 18481 Value *Op0 = EmitScalarExpr(E->getArg(0)); 18482 if (!E->getArg(0)->getType()->hasFloatingRepresentation()) 18483 llvm_unreachable("rsqrt operand must have a float representation"); 18484 return Builder.CreateIntrinsic( 18485 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(), 18486 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt"); 18487 } 18488 case Builtin::BI__builtin_hlsl_wave_get_lane_index: { 18489 return EmitRuntimeCall(CGM.CreateRuntimeFunction( 18490 llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index", 18491 {}, false, true)); 18492 } 18493 } 18494 return nullptr; 18495 } 18496 18497 void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, 18498 const CallExpr *E) { 18499 constexpr const char *Tag = "amdgpu-as"; 18500 18501 LLVMContext &Ctx = Inst->getContext(); 18502 SmallVector<MMRAMetadata::TagT, 3> MMRAs; 18503 for (unsigned K = 2; K < E->getNumArgs(); ++K) { 18504 llvm::Value *V = EmitScalarExpr(E->getArg(K)); 18505 StringRef AS; 18506 if (llvm::getConstantStringInfo(V, AS)) { 18507 MMRAs.push_back({Tag, AS}); 18508 // TODO: Delete the resulting unused constant? 18509 continue; 18510 } 18511 CGM.Error(E->getExprLoc(), 18512 "expected an address space name as a string literal"); 18513 } 18514 18515 llvm::sort(MMRAs); 18516 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end()); 18517 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs)); 18518 } 18519 18520 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, 18521 const CallExpr *E) { 18522 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; 18523 llvm::SyncScope::ID SSID; 18524 switch (BuiltinID) { 18525 case AMDGPU::BI__builtin_amdgcn_div_scale: 18526 case AMDGPU::BI__builtin_amdgcn_div_scalef: { 18527 // Translate from the intrinsics's struct return to the builtin's out 18528 // argument. 18529 18530 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); 18531 18532 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 18533 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 18534 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 18535 18536 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, 18537 X->getType()); 18538 18539 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); 18540 18541 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 18542 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 18543 18544 llvm::Type *RealFlagType = FlagOutPtr.getElementType(); 18545 18546 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 18547 Builder.CreateStore(FlagExt, FlagOutPtr); 18548 return Result; 18549 } 18550 case AMDGPU::BI__builtin_amdgcn_div_fmas: 18551 case AMDGPU::BI__builtin_amdgcn_div_fmasf: { 18552 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 18553 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 18554 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 18555 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 18556 18557 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, 18558 Src0->getType()); 18559 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 18560 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); 18561 } 18562 18563 case AMDGPU::BI__builtin_amdgcn_ds_swizzle: 18564 return emitBuiltinWithOneOverloadedType<2>(*this, E, 18565 Intrinsic::amdgcn_ds_swizzle); 18566 case AMDGPU::BI__builtin_amdgcn_mov_dpp8: 18567 return emitBuiltinWithOneOverloadedType<2>(*this, E, 18568 Intrinsic::amdgcn_mov_dpp8); 18569 case AMDGPU::BI__builtin_amdgcn_mov_dpp: 18570 case AMDGPU::BI__builtin_amdgcn_update_dpp: { 18571 llvm::SmallVector<llvm::Value *, 6> Args; 18572 // Find out if any arguments are required to be integer constant 18573 // expressions. 18574 unsigned ICEArguments = 0; 18575 ASTContext::GetBuiltinTypeError Error; 18576 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 18577 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 18578 for (unsigned I = 0; I != E->getNumArgs(); ++I) { 18579 Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E)); 18580 } 18581 assert(Args.size() == 5 || Args.size() == 6); 18582 if (Args.size() == 5) 18583 Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType())); 18584 Function *F = 18585 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); 18586 return Builder.CreateCall(F, Args); 18587 } 18588 case AMDGPU::BI__builtin_amdgcn_permlane16: 18589 case AMDGPU::BI__builtin_amdgcn_permlanex16: 18590 return emitBuiltinWithOneOverloadedType<6>( 18591 *this, E, 18592 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16 18593 ? Intrinsic::amdgcn_permlane16 18594 : Intrinsic::amdgcn_permlanex16); 18595 case AMDGPU::BI__builtin_amdgcn_permlane64: 18596 return emitBuiltinWithOneOverloadedType<1>(*this, E, 18597 Intrinsic::amdgcn_permlane64); 18598 case AMDGPU::BI__builtin_amdgcn_readlane: 18599 return emitBuiltinWithOneOverloadedType<2>(*this, E, 18600 Intrinsic::amdgcn_readlane); 18601 case AMDGPU::BI__builtin_amdgcn_readfirstlane: 18602 return emitBuiltinWithOneOverloadedType<1>(*this, E, 18603 Intrinsic::amdgcn_readfirstlane); 18604 case AMDGPU::BI__builtin_amdgcn_div_fixup: 18605 case AMDGPU::BI__builtin_amdgcn_div_fixupf: 18606 case AMDGPU::BI__builtin_amdgcn_div_fixuph: 18607 return emitBuiltinWithOneOverloadedType<3>(*this, E, 18608 Intrinsic::amdgcn_div_fixup); 18609 case AMDGPU::BI__builtin_amdgcn_trig_preop: 18610 case AMDGPU::BI__builtin_amdgcn_trig_preopf: 18611 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); 18612 case AMDGPU::BI__builtin_amdgcn_rcp: 18613 case AMDGPU::BI__builtin_amdgcn_rcpf: 18614 case AMDGPU::BI__builtin_amdgcn_rcph: 18615 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp); 18616 case AMDGPU::BI__builtin_amdgcn_sqrt: 18617 case AMDGPU::BI__builtin_amdgcn_sqrtf: 18618 case AMDGPU::BI__builtin_amdgcn_sqrth: 18619 return emitBuiltinWithOneOverloadedType<1>(*this, E, 18620 Intrinsic::amdgcn_sqrt); 18621 case AMDGPU::BI__builtin_amdgcn_rsq: 18622 case AMDGPU::BI__builtin_amdgcn_rsqf: 18623 case AMDGPU::BI__builtin_amdgcn_rsqh: 18624 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq); 18625 case AMDGPU::BI__builtin_amdgcn_rsq_clamp: 18626 case AMDGPU::BI__builtin_amdgcn_rsq_clampf: 18627 return emitBuiltinWithOneOverloadedType<1>(*this, E, 18628 Intrinsic::amdgcn_rsq_clamp); 18629 case AMDGPU::BI__builtin_amdgcn_sinf: 18630 case AMDGPU::BI__builtin_amdgcn_sinh: 18631 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin); 18632 case AMDGPU::BI__builtin_amdgcn_cosf: 18633 case AMDGPU::BI__builtin_amdgcn_cosh: 18634 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos); 18635 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: 18636 return EmitAMDGPUDispatchPtr(*this, E); 18637 case AMDGPU::BI__builtin_amdgcn_logf: 18638 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log); 18639 case AMDGPU::BI__builtin_amdgcn_exp2f: 18640 return emitBuiltinWithOneOverloadedType<1>(*this, E, 18641 Intrinsic::amdgcn_exp2); 18642 case AMDGPU::BI__builtin_amdgcn_log_clampf: 18643 return emitBuiltinWithOneOverloadedType<1>(*this, E, 18644 Intrinsic::amdgcn_log_clamp); 18645 case AMDGPU::BI__builtin_amdgcn_ldexp: 18646 case AMDGPU::BI__builtin_amdgcn_ldexpf: { 18647 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 18648 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 18649 llvm::Function *F = 18650 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()}); 18651 return Builder.CreateCall(F, {Src0, Src1}); 18652 } 18653 case AMDGPU::BI__builtin_amdgcn_ldexph: { 18654 // The raw instruction has a different behavior for out of bounds exponent 18655 // values (implicit truncation instead of saturate to short_min/short_max). 18656 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 18657 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 18658 llvm::Function *F = 18659 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty}); 18660 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)}); 18661 } 18662 case AMDGPU::BI__builtin_amdgcn_frexp_mant: 18663 case AMDGPU::BI__builtin_amdgcn_frexp_mantf: 18664 case AMDGPU::BI__builtin_amdgcn_frexp_manth: 18665 return emitBuiltinWithOneOverloadedType<1>(*this, E, 18666 Intrinsic::amdgcn_frexp_mant); 18667 case AMDGPU::BI__builtin_amdgcn_frexp_exp: 18668 case AMDGPU::BI__builtin_amdgcn_frexp_expf: { 18669 Value *Src0 = EmitScalarExpr(E->getArg(0)); 18670 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 18671 { Builder.getInt32Ty(), Src0->getType() }); 18672 return Builder.CreateCall(F, Src0); 18673 } 18674 case AMDGPU::BI__builtin_amdgcn_frexp_exph: { 18675 Value *Src0 = EmitScalarExpr(E->getArg(0)); 18676 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 18677 { Builder.getInt16Ty(), Src0->getType() }); 18678 return Builder.CreateCall(F, Src0); 18679 } 18680 case AMDGPU::BI__builtin_amdgcn_fract: 18681 case AMDGPU::BI__builtin_amdgcn_fractf: 18682 case AMDGPU::BI__builtin_amdgcn_fracth: 18683 return emitBuiltinWithOneOverloadedType<1>(*this, E, 18684 Intrinsic::amdgcn_fract); 18685 case AMDGPU::BI__builtin_amdgcn_lerp: 18686 return emitBuiltinWithOneOverloadedType<3>(*this, E, 18687 Intrinsic::amdgcn_lerp); 18688 case AMDGPU::BI__builtin_amdgcn_ubfe: 18689 return emitBuiltinWithOneOverloadedType<3>(*this, E, 18690 Intrinsic::amdgcn_ubfe); 18691 case AMDGPU::BI__builtin_amdgcn_sbfe: 18692 return emitBuiltinWithOneOverloadedType<3>(*this, E, 18693 Intrinsic::amdgcn_sbfe); 18694 case AMDGPU::BI__builtin_amdgcn_ballot_w32: 18695 case AMDGPU::BI__builtin_amdgcn_ballot_w64: { 18696 llvm::Type *ResultType = ConvertType(E->getType()); 18697 llvm::Value *Src = EmitScalarExpr(E->getArg(0)); 18698 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType }); 18699 return Builder.CreateCall(F, { Src }); 18700 } 18701 case AMDGPU::BI__builtin_amdgcn_uicmp: 18702 case AMDGPU::BI__builtin_amdgcn_uicmpl: 18703 case AMDGPU::BI__builtin_amdgcn_sicmp: 18704 case AMDGPU::BI__builtin_amdgcn_sicmpl: { 18705 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 18706 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 18707 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 18708 18709 // FIXME-GFX10: How should 32 bit mask be handled? 18710 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp, 18711 { Builder.getInt64Ty(), Src0->getType() }); 18712 return Builder.CreateCall(F, { Src0, Src1, Src2 }); 18713 } 18714 case AMDGPU::BI__builtin_amdgcn_fcmp: 18715 case AMDGPU::BI__builtin_amdgcn_fcmpf: { 18716 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 18717 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 18718 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 18719 18720 // FIXME-GFX10: How should 32 bit mask be handled? 18721 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp, 18722 { Builder.getInt64Ty(), Src0->getType() }); 18723 return Builder.CreateCall(F, { Src0, Src1, Src2 }); 18724 } 18725 case AMDGPU::BI__builtin_amdgcn_class: 18726 case AMDGPU::BI__builtin_amdgcn_classf: 18727 case AMDGPU::BI__builtin_amdgcn_classh: 18728 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); 18729 case AMDGPU::BI__builtin_amdgcn_fmed3f: 18730 case AMDGPU::BI__builtin_amdgcn_fmed3h: 18731 return emitBuiltinWithOneOverloadedType<3>(*this, E, 18732 Intrinsic::amdgcn_fmed3); 18733 case AMDGPU::BI__builtin_amdgcn_ds_append: 18734 case AMDGPU::BI__builtin_amdgcn_ds_consume: { 18735 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? 18736 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume; 18737 Value *Src0 = EmitScalarExpr(E->getArg(0)); 18738 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); 18739 return Builder.CreateCall(F, { Src0, Builder.getFalse() }); 18740 } 18741 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: 18742 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: 18743 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: 18744 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: 18745 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: 18746 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: 18747 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: 18748 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: 18749 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: 18750 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { 18751 Intrinsic::ID IID; 18752 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); 18753 switch (BuiltinID) { 18754 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: 18755 ArgTy = llvm::Type::getFloatTy(getLLVMContext()); 18756 IID = Intrinsic::amdgcn_global_atomic_fadd; 18757 break; 18758 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: 18759 ArgTy = llvm::FixedVectorType::get( 18760 llvm::Type::getHalfTy(getLLVMContext()), 2); 18761 IID = Intrinsic::amdgcn_global_atomic_fadd; 18762 break; 18763 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: 18764 IID = Intrinsic::amdgcn_global_atomic_fadd; 18765 break; 18766 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: 18767 IID = Intrinsic::amdgcn_global_atomic_fmin; 18768 break; 18769 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: 18770 IID = Intrinsic::amdgcn_global_atomic_fmax; 18771 break; 18772 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: 18773 IID = Intrinsic::amdgcn_flat_atomic_fadd; 18774 break; 18775 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: 18776 IID = Intrinsic::amdgcn_flat_atomic_fmin; 18777 break; 18778 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: 18779 IID = Intrinsic::amdgcn_flat_atomic_fmax; 18780 break; 18781 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: 18782 ArgTy = llvm::Type::getFloatTy(getLLVMContext()); 18783 IID = Intrinsic::amdgcn_flat_atomic_fadd; 18784 break; 18785 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: 18786 ArgTy = llvm::FixedVectorType::get( 18787 llvm::Type::getHalfTy(getLLVMContext()), 2); 18788 IID = Intrinsic::amdgcn_flat_atomic_fadd; 18789 break; 18790 } 18791 llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); 18792 llvm::Value *Val = EmitScalarExpr(E->getArg(1)); 18793 llvm::Function *F = 18794 CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); 18795 return Builder.CreateCall(F, {Addr, Val}); 18796 } 18797 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: 18798 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { 18799 Intrinsic::ID IID; 18800 switch (BuiltinID) { 18801 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: 18802 IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16; 18803 break; 18804 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: 18805 IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16; 18806 break; 18807 } 18808 llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); 18809 llvm::Value *Val = EmitScalarExpr(E->getArg(1)); 18810 llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()}); 18811 return Builder.CreateCall(F, {Addr, Val}); 18812 } 18813 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: 18814 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: 18815 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: 18816 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16: 18817 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16: 18818 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16: 18819 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16: 18820 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: { 18821 18822 Intrinsic::ID IID; 18823 switch (BuiltinID) { 18824 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: 18825 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: 18826 IID = Intrinsic::amdgcn_global_load_tr_b64; 18827 break; 18828 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: 18829 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16: 18830 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16: 18831 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16: 18832 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16: 18833 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: 18834 IID = Intrinsic::amdgcn_global_load_tr_b128; 18835 break; 18836 } 18837 llvm::Type *LoadTy = ConvertType(E->getType()); 18838 llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); 18839 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy}); 18840 return Builder.CreateCall(F, {Addr}); 18841 } 18842 case AMDGPU::BI__builtin_amdgcn_get_fpenv: { 18843 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv, 18844 {llvm::Type::getInt64Ty(getLLVMContext())}); 18845 return Builder.CreateCall(F); 18846 } 18847 case AMDGPU::BI__builtin_amdgcn_set_fpenv: { 18848 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv, 18849 {llvm::Type::getInt64Ty(getLLVMContext())}); 18850 llvm::Value *Env = EmitScalarExpr(E->getArg(0)); 18851 return Builder.CreateCall(F, {Env}); 18852 } 18853 case AMDGPU::BI__builtin_amdgcn_read_exec: 18854 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); 18855 case AMDGPU::BI__builtin_amdgcn_read_exec_lo: 18856 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); 18857 case AMDGPU::BI__builtin_amdgcn_read_exec_hi: 18858 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); 18859 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: 18860 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: 18861 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: 18862 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: { 18863 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0)); 18864 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1)); 18865 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2)); 18866 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3)); 18867 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4)); 18868 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5)); 18869 18870 // The builtins take these arguments as vec4 where the last element is 18871 // ignored. The intrinsic takes them as vec3. 18872 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin, 18873 ArrayRef<int>{0, 1, 2}); 18874 RayDir = 18875 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2}); 18876 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir, 18877 ArrayRef<int>{0, 1, 2}); 18878 18879 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray, 18880 {NodePtr->getType(), RayDir->getType()}); 18881 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir, 18882 RayInverseDir, TextureDescr}); 18883 } 18884 18885 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: { 18886 SmallVector<Value *, 4> Args; 18887 for (int i = 0, e = E->getNumArgs(); i != e; ++i) 18888 Args.push_back(EmitScalarExpr(E->getArg(i))); 18889 18890 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn); 18891 Value *Call = Builder.CreateCall(F, Args); 18892 Value *Rtn = Builder.CreateExtractValue(Call, 0); 18893 Value *A = Builder.CreateExtractValue(Call, 1); 18894 llvm::Type *RetTy = ConvertType(E->getType()); 18895 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn, 18896 (uint64_t)0); 18897 return Builder.CreateInsertElement(I0, A, 1); 18898 } 18899 18900 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: 18901 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32: 18902 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: 18903 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64: 18904 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: 18905 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32: 18906 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: 18907 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64: 18908 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32: 18909 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64: 18910 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32: 18911 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64: 18912 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32: 18913 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64: 18914 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32: 18915 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: 18916 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12: 18917 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12: 18918 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12: 18919 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12: 18920 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12: 18921 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12: 18922 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12: 18923 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12: 18924 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12: 18925 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12: 18926 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12: 18927 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12: 18928 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12: 18929 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12: 18930 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12: 18931 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12: 18932 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12: 18933 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12: 18934 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12: 18935 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12: 18936 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12: 18937 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12: 18938 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32: 18939 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64: 18940 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32: 18941 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64: 18942 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32: 18943 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64: 18944 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32: 18945 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64: 18946 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32: 18947 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64: 18948 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32: 18949 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64: 18950 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32: 18951 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64: 18952 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32: 18953 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64: 18954 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32: 18955 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64: 18956 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32: 18957 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64: 18958 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32: 18959 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: { 18960 18961 // These operations perform a matrix multiplication and accumulation of 18962 // the form: 18963 // D = A * B + C 18964 // We need to specify one type for matrices AB and one for matrices CD. 18965 // Sparse matrix operations can have different types for A and B as well as 18966 // an additional type for sparsity index. 18967 // Destination type should be put before types used for source operands. 18968 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes; 18969 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout. 18970 // There is no need for the variable opsel argument, so always set it to 18971 // "false". 18972 bool AppendFalseForOpselArg = false; 18973 unsigned BuiltinWMMAOp; 18974 18975 switch (BuiltinID) { 18976 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32: 18977 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64: 18978 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12: 18979 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12: 18980 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB 18981 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16; 18982 break; 18983 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32: 18984 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64: 18985 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12: 18986 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12: 18987 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB 18988 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16; 18989 break; 18990 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12: 18991 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12: 18992 AppendFalseForOpselArg = true; 18993 [[fallthrough]]; 18994 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: 18995 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: 18996 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB 18997 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16; 18998 break; 18999 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12: 19000 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12: 19001 AppendFalseForOpselArg = true; 19002 [[fallthrough]]; 19003 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: 19004 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: 19005 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB 19006 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16; 19007 break; 19008 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32: 19009 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64: 19010 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB 19011 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied; 19012 break; 19013 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32: 19014 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64: 19015 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB 19016 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied; 19017 break; 19018 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32: 19019 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: 19020 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12: 19021 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12: 19022 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB 19023 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8; 19024 break; 19025 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32: 19026 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64: 19027 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12: 19028 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12: 19029 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB 19030 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4; 19031 break; 19032 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12: 19033 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12: 19034 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB 19035 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8; 19036 break; 19037 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12: 19038 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12: 19039 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB 19040 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8; 19041 break; 19042 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12: 19043 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12: 19044 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB 19045 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8; 19046 break; 19047 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12: 19048 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12: 19049 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB 19050 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8; 19051 break; 19052 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12: 19053 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12: 19054 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB 19055 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4; 19056 break; 19057 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32: 19058 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64: 19059 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index 19060 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16; 19061 break; 19062 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32: 19063 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64: 19064 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index 19065 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16; 19066 break; 19067 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32: 19068 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64: 19069 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index 19070 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16; 19071 break; 19072 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32: 19073 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64: 19074 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index 19075 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16; 19076 break; 19077 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32: 19078 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64: 19079 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index 19080 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8; 19081 break; 19082 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32: 19083 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64: 19084 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index 19085 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4; 19086 break; 19087 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32: 19088 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64: 19089 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index 19090 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4; 19091 break; 19092 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32: 19093 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64: 19094 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index 19095 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8; 19096 break; 19097 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32: 19098 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64: 19099 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index 19100 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8; 19101 break; 19102 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32: 19103 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64: 19104 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index 19105 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8; 19106 break; 19107 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32: 19108 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: 19109 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index 19110 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8; 19111 break; 19112 } 19113 19114 SmallVector<Value *, 6> Args; 19115 for (int i = 0, e = E->getNumArgs(); i != e; ++i) 19116 Args.push_back(EmitScalarExpr(E->getArg(i))); 19117 if (AppendFalseForOpselArg) 19118 Args.push_back(Builder.getFalse()); 19119 19120 SmallVector<llvm::Type *, 6> ArgTypes; 19121 for (auto ArgIdx : ArgsForMatchingMatrixTypes) 19122 ArgTypes.push_back(Args[ArgIdx]->getType()); 19123 19124 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes); 19125 return Builder.CreateCall(F, Args); 19126 } 19127 19128 // amdgcn workitem 19129 case AMDGPU::BI__builtin_amdgcn_workitem_id_x: 19130 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); 19131 case AMDGPU::BI__builtin_amdgcn_workitem_id_y: 19132 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); 19133 case AMDGPU::BI__builtin_amdgcn_workitem_id_z: 19134 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); 19135 19136 // amdgcn workgroup size 19137 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x: 19138 return EmitAMDGPUWorkGroupSize(*this, 0); 19139 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y: 19140 return EmitAMDGPUWorkGroupSize(*this, 1); 19141 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z: 19142 return EmitAMDGPUWorkGroupSize(*this, 2); 19143 19144 // amdgcn grid size 19145 case AMDGPU::BI__builtin_amdgcn_grid_size_x: 19146 return EmitAMDGPUGridSize(*this, 0); 19147 case AMDGPU::BI__builtin_amdgcn_grid_size_y: 19148 return EmitAMDGPUGridSize(*this, 1); 19149 case AMDGPU::BI__builtin_amdgcn_grid_size_z: 19150 return EmitAMDGPUGridSize(*this, 2); 19151 19152 // r600 intrinsics 19153 case AMDGPU::BI__builtin_r600_recipsqrt_ieee: 19154 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: 19155 return emitBuiltinWithOneOverloadedType<1>(*this, E, 19156 Intrinsic::r600_recipsqrt_ieee); 19157 case AMDGPU::BI__builtin_r600_read_tidig_x: 19158 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); 19159 case AMDGPU::BI__builtin_r600_read_tidig_y: 19160 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); 19161 case AMDGPU::BI__builtin_r600_read_tidig_z: 19162 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); 19163 case AMDGPU::BI__builtin_amdgcn_alignbit: { 19164 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 19165 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 19166 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 19167 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType()); 19168 return Builder.CreateCall(F, { Src0, Src1, Src2 }); 19169 } 19170 case AMDGPU::BI__builtin_amdgcn_fence: { 19171 ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)), 19172 EmitScalarExpr(E->getArg(1)), AO, SSID); 19173 FenceInst *Fence = Builder.CreateFence(AO, SSID); 19174 if (E->getNumArgs() > 2) 19175 AddAMDGPUFenceAddressSpaceMMRA(Fence, E); 19176 return Fence; 19177 } 19178 case AMDGPU::BI__builtin_amdgcn_atomic_inc32: 19179 case AMDGPU::BI__builtin_amdgcn_atomic_inc64: 19180 case AMDGPU::BI__builtin_amdgcn_atomic_dec32: 19181 case AMDGPU::BI__builtin_amdgcn_atomic_dec64: 19182 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: 19183 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: 19184 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: 19185 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: 19186 case AMDGPU::BI__builtin_amdgcn_ds_faddf: 19187 case AMDGPU::BI__builtin_amdgcn_ds_fminf: 19188 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { 19189 llvm::AtomicRMWInst::BinOp BinOp; 19190 switch (BuiltinID) { 19191 case AMDGPU::BI__builtin_amdgcn_atomic_inc32: 19192 case AMDGPU::BI__builtin_amdgcn_atomic_inc64: 19193 BinOp = llvm::AtomicRMWInst::UIncWrap; 19194 break; 19195 case AMDGPU::BI__builtin_amdgcn_atomic_dec32: 19196 case AMDGPU::BI__builtin_amdgcn_atomic_dec64: 19197 BinOp = llvm::AtomicRMWInst::UDecWrap; 19198 break; 19199 case AMDGPU::BI__builtin_amdgcn_ds_faddf: 19200 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: 19201 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: 19202 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: 19203 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: 19204 BinOp = llvm::AtomicRMWInst::FAdd; 19205 break; 19206 case AMDGPU::BI__builtin_amdgcn_ds_fminf: 19207 BinOp = llvm::AtomicRMWInst::FMin; 19208 break; 19209 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: 19210 BinOp = llvm::AtomicRMWInst::FMax; 19211 break; 19212 } 19213 19214 Address Ptr = CheckAtomicAlignment(*this, E); 19215 Value *Val = EmitScalarExpr(E->getArg(1)); 19216 llvm::Type *OrigTy = Val->getType(); 19217 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 19218 19219 bool Volatile; 19220 19221 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf || 19222 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf || 19223 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) { 19224 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument 19225 Volatile = 19226 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue(); 19227 } else { 19228 // Infer volatile from the passed type. 19229 Volatile = 19230 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 19231 } 19232 19233 if (E->getNumArgs() >= 4) { 19234 // Some of the builtins have explicit ordering and scope arguments. 19235 ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), 19236 EmitScalarExpr(E->getArg(3)), AO, SSID); 19237 } else { 19238 // The ds_atomic_fadd_* builtins do not have syncscope/order arguments. 19239 SSID = llvm::SyncScope::System; 19240 AO = AtomicOrdering::SequentiallyConsistent; 19241 19242 // The v2bf16 builtin uses i16 instead of a natural bfloat type. 19243 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) { 19244 llvm::Type *V2BF16Ty = FixedVectorType::get( 19245 llvm::Type::getBFloatTy(Builder.getContext()), 2); 19246 Val = Builder.CreateBitCast(Val, V2BF16Ty); 19247 } 19248 } 19249 19250 llvm::AtomicRMWInst *RMW = 19251 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID); 19252 if (Volatile) 19253 RMW->setVolatile(true); 19254 return Builder.CreateBitCast(RMW, OrigTy); 19255 } 19256 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn: 19257 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: { 19258 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 19259 llvm::Type *ResultType = ConvertType(E->getType()); 19260 // s_sendmsg_rtn is mangled using return type only. 19261 Function *F = 19262 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType}); 19263 return Builder.CreateCall(F, {Arg}); 19264 } 19265 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: 19266 return emitBuiltinWithOneOverloadedType<4>( 19267 *this, E, Intrinsic::amdgcn_make_buffer_rsrc); 19268 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8: 19269 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16: 19270 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32: 19271 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64: 19272 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96: 19273 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128: 19274 return emitBuiltinWithOneOverloadedType<5>( 19275 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store); 19276 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8: 19277 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16: 19278 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32: 19279 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64: 19280 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96: 19281 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: { 19282 llvm::Type *RetTy = nullptr; 19283 switch (BuiltinID) { 19284 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8: 19285 RetTy = Int8Ty; 19286 break; 19287 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16: 19288 RetTy = Int16Ty; 19289 break; 19290 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32: 19291 RetTy = Int32Ty; 19292 break; 19293 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64: 19294 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2); 19295 break; 19296 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96: 19297 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3); 19298 break; 19299 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: 19300 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4); 19301 break; 19302 } 19303 Function *F = 19304 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy); 19305 return Builder.CreateCall( 19306 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), 19307 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))}); 19308 } 19309 default: 19310 return nullptr; 19311 } 19312 } 19313 19314 /// Handle a SystemZ function in which the final argument is a pointer 19315 /// to an int that receives the post-instruction CC value. At the LLVM level 19316 /// this is represented as a function that returns a {result, cc} pair. 19317 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, 19318 unsigned IntrinsicID, 19319 const CallExpr *E) { 19320 unsigned NumArgs = E->getNumArgs() - 1; 19321 SmallVector<Value *, 8> Args(NumArgs); 19322 for (unsigned I = 0; I < NumArgs; ++I) 19323 Args[I] = CGF.EmitScalarExpr(E->getArg(I)); 19324 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); 19325 Function *F = CGF.CGM.getIntrinsic(IntrinsicID); 19326 Value *Call = CGF.Builder.CreateCall(F, Args); 19327 Value *CC = CGF.Builder.CreateExtractValue(Call, 1); 19328 CGF.Builder.CreateStore(CC, CCPtr); 19329 return CGF.Builder.CreateExtractValue(Call, 0); 19330 } 19331 19332 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 19333 const CallExpr *E) { 19334 switch (BuiltinID) { 19335 case SystemZ::BI__builtin_tbegin: { 19336 Value *TDB = EmitScalarExpr(E->getArg(0)); 19337 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 19338 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 19339 return Builder.CreateCall(F, {TDB, Control}); 19340 } 19341 case SystemZ::BI__builtin_tbegin_nofloat: { 19342 Value *TDB = EmitScalarExpr(E->getArg(0)); 19343 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 19344 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 19345 return Builder.CreateCall(F, {TDB, Control}); 19346 } 19347 case SystemZ::BI__builtin_tbeginc: { 19348 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 19349 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 19350 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 19351 return Builder.CreateCall(F, {TDB, Control}); 19352 } 19353 case SystemZ::BI__builtin_tabort: { 19354 Value *Data = EmitScalarExpr(E->getArg(0)); 19355 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 19356 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 19357 } 19358 case SystemZ::BI__builtin_non_tx_store: { 19359 Value *Address = EmitScalarExpr(E->getArg(0)); 19360 Value *Data = EmitScalarExpr(E->getArg(1)); 19361 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 19362 return Builder.CreateCall(F, {Data, Address}); 19363 } 19364 19365 // Vector builtins. Note that most vector builtins are mapped automatically 19366 // to target-specific LLVM intrinsics. The ones handled specially here can 19367 // be represented via standard LLVM IR, which is preferable to enable common 19368 // LLVM optimizations. 19369 19370 case SystemZ::BI__builtin_s390_vpopctb: 19371 case SystemZ::BI__builtin_s390_vpopcth: 19372 case SystemZ::BI__builtin_s390_vpopctf: 19373 case SystemZ::BI__builtin_s390_vpopctg: { 19374 llvm::Type *ResultType = ConvertType(E->getType()); 19375 Value *X = EmitScalarExpr(E->getArg(0)); 19376 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 19377 return Builder.CreateCall(F, X); 19378 } 19379 19380 case SystemZ::BI__builtin_s390_vclzb: 19381 case SystemZ::BI__builtin_s390_vclzh: 19382 case SystemZ::BI__builtin_s390_vclzf: 19383 case SystemZ::BI__builtin_s390_vclzg: { 19384 llvm::Type *ResultType = ConvertType(E->getType()); 19385 Value *X = EmitScalarExpr(E->getArg(0)); 19386 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 19387 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 19388 return Builder.CreateCall(F, {X, Undef}); 19389 } 19390 19391 case SystemZ::BI__builtin_s390_vctzb: 19392 case SystemZ::BI__builtin_s390_vctzh: 19393 case SystemZ::BI__builtin_s390_vctzf: 19394 case SystemZ::BI__builtin_s390_vctzg: { 19395 llvm::Type *ResultType = ConvertType(E->getType()); 19396 Value *X = EmitScalarExpr(E->getArg(0)); 19397 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 19398 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 19399 return Builder.CreateCall(F, {X, Undef}); 19400 } 19401 19402 case SystemZ::BI__builtin_s390_verllb: 19403 case SystemZ::BI__builtin_s390_verllh: 19404 case SystemZ::BI__builtin_s390_verllf: 19405 case SystemZ::BI__builtin_s390_verllg: { 19406 llvm::Type *ResultType = ConvertType(E->getType()); 19407 llvm::Value *Src = EmitScalarExpr(E->getArg(0)); 19408 llvm::Value *Amt = EmitScalarExpr(E->getArg(1)); 19409 // Splat scalar rotate amount to vector type. 19410 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements(); 19411 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false); 19412 Amt = Builder.CreateVectorSplat(NumElts, Amt); 19413 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType); 19414 return Builder.CreateCall(F, { Src, Src, Amt }); 19415 } 19416 19417 case SystemZ::BI__builtin_s390_verllvb: 19418 case SystemZ::BI__builtin_s390_verllvh: 19419 case SystemZ::BI__builtin_s390_verllvf: 19420 case SystemZ::BI__builtin_s390_verllvg: { 19421 llvm::Type *ResultType = ConvertType(E->getType()); 19422 llvm::Value *Src = EmitScalarExpr(E->getArg(0)); 19423 llvm::Value *Amt = EmitScalarExpr(E->getArg(1)); 19424 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType); 19425 return Builder.CreateCall(F, { Src, Src, Amt }); 19426 } 19427 19428 case SystemZ::BI__builtin_s390_vfsqsb: 19429 case SystemZ::BI__builtin_s390_vfsqdb: { 19430 llvm::Type *ResultType = ConvertType(E->getType()); 19431 Value *X = EmitScalarExpr(E->getArg(0)); 19432 if (Builder.getIsFPConstrained()) { 19433 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType); 19434 return Builder.CreateConstrainedFPCall(F, { X }); 19435 } else { 19436 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 19437 return Builder.CreateCall(F, X); 19438 } 19439 } 19440 case SystemZ::BI__builtin_s390_vfmasb: 19441 case SystemZ::BI__builtin_s390_vfmadb: { 19442 llvm::Type *ResultType = ConvertType(E->getType()); 19443 Value *X = EmitScalarExpr(E->getArg(0)); 19444 Value *Y = EmitScalarExpr(E->getArg(1)); 19445 Value *Z = EmitScalarExpr(E->getArg(2)); 19446 if (Builder.getIsFPConstrained()) { 19447 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); 19448 return Builder.CreateConstrainedFPCall(F, {X, Y, Z}); 19449 } else { 19450 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 19451 return Builder.CreateCall(F, {X, Y, Z}); 19452 } 19453 } 19454 case SystemZ::BI__builtin_s390_vfmssb: 19455 case SystemZ::BI__builtin_s390_vfmsdb: { 19456 llvm::Type *ResultType = ConvertType(E->getType()); 19457 Value *X = EmitScalarExpr(E->getArg(0)); 19458 Value *Y = EmitScalarExpr(E->getArg(1)); 19459 Value *Z = EmitScalarExpr(E->getArg(2)); 19460 if (Builder.getIsFPConstrained()) { 19461 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); 19462 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); 19463 } else { 19464 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 19465 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); 19466 } 19467 } 19468 case SystemZ::BI__builtin_s390_vfnmasb: 19469 case SystemZ::BI__builtin_s390_vfnmadb: { 19470 llvm::Type *ResultType = ConvertType(E->getType()); 19471 Value *X = EmitScalarExpr(E->getArg(0)); 19472 Value *Y = EmitScalarExpr(E->getArg(1)); 19473 Value *Z = EmitScalarExpr(E->getArg(2)); 19474 if (Builder.getIsFPConstrained()) { 19475 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); 19476 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg"); 19477 } else { 19478 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 19479 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg"); 19480 } 19481 } 19482 case SystemZ::BI__builtin_s390_vfnmssb: 19483 case SystemZ::BI__builtin_s390_vfnmsdb: { 19484 llvm::Type *ResultType = ConvertType(E->getType()); 19485 Value *X = EmitScalarExpr(E->getArg(0)); 19486 Value *Y = EmitScalarExpr(E->getArg(1)); 19487 Value *Z = EmitScalarExpr(E->getArg(2)); 19488 if (Builder.getIsFPConstrained()) { 19489 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); 19490 Value *NegZ = Builder.CreateFNeg(Z, "sub"); 19491 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ})); 19492 } else { 19493 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 19494 Value *NegZ = Builder.CreateFNeg(Z, "neg"); 19495 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ})); 19496 } 19497 } 19498 case SystemZ::BI__builtin_s390_vflpsb: 19499 case SystemZ::BI__builtin_s390_vflpdb: { 19500 llvm::Type *ResultType = ConvertType(E->getType()); 19501 Value *X = EmitScalarExpr(E->getArg(0)); 19502 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 19503 return Builder.CreateCall(F, X); 19504 } 19505 case SystemZ::BI__builtin_s390_vflnsb: 19506 case SystemZ::BI__builtin_s390_vflndb: { 19507 llvm::Type *ResultType = ConvertType(E->getType()); 19508 Value *X = EmitScalarExpr(E->getArg(0)); 19509 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 19510 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg"); 19511 } 19512 case SystemZ::BI__builtin_s390_vfisb: 19513 case SystemZ::BI__builtin_s390_vfidb: { 19514 llvm::Type *ResultType = ConvertType(E->getType()); 19515 Value *X = EmitScalarExpr(E->getArg(0)); 19516 // Constant-fold the M4 and M5 mask arguments. 19517 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext()); 19518 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext()); 19519 // Check whether this instance can be represented via a LLVM standard 19520 // intrinsic. We only support some combinations of M4 and M5. 19521 Intrinsic::ID ID = Intrinsic::not_intrinsic; 19522 Intrinsic::ID CI; 19523 switch (M4.getZExtValue()) { 19524 default: break; 19525 case 0: // IEEE-inexact exception allowed 19526 switch (M5.getZExtValue()) { 19527 default: break; 19528 case 0: ID = Intrinsic::rint; 19529 CI = Intrinsic::experimental_constrained_rint; break; 19530 } 19531 break; 19532 case 4: // IEEE-inexact exception suppressed 19533 switch (M5.getZExtValue()) { 19534 default: break; 19535 case 0: ID = Intrinsic::nearbyint; 19536 CI = Intrinsic::experimental_constrained_nearbyint; break; 19537 case 1: ID = Intrinsic::round; 19538 CI = Intrinsic::experimental_constrained_round; break; 19539 case 5: ID = Intrinsic::trunc; 19540 CI = Intrinsic::experimental_constrained_trunc; break; 19541 case 6: ID = Intrinsic::ceil; 19542 CI = Intrinsic::experimental_constrained_ceil; break; 19543 case 7: ID = Intrinsic::floor; 19544 CI = Intrinsic::experimental_constrained_floor; break; 19545 } 19546 break; 19547 } 19548 if (ID != Intrinsic::not_intrinsic) { 19549 if (Builder.getIsFPConstrained()) { 19550 Function *F = CGM.getIntrinsic(CI, ResultType); 19551 return Builder.CreateConstrainedFPCall(F, X); 19552 } else { 19553 Function *F = CGM.getIntrinsic(ID, ResultType); 19554 return Builder.CreateCall(F, X); 19555 } 19556 } 19557 switch (BuiltinID) { // FIXME: constrained version? 19558 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; 19559 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; 19560 default: llvm_unreachable("Unknown BuiltinID"); 19561 } 19562 Function *F = CGM.getIntrinsic(ID); 19563 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 19564 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); 19565 return Builder.CreateCall(F, {X, M4Value, M5Value}); 19566 } 19567 case SystemZ::BI__builtin_s390_vfmaxsb: 19568 case SystemZ::BI__builtin_s390_vfmaxdb: { 19569 llvm::Type *ResultType = ConvertType(E->getType()); 19570 Value *X = EmitScalarExpr(E->getArg(0)); 19571 Value *Y = EmitScalarExpr(E->getArg(1)); 19572 // Constant-fold the M4 mask argument. 19573 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext()); 19574 // Check whether this instance can be represented via a LLVM standard 19575 // intrinsic. We only support some values of M4. 19576 Intrinsic::ID ID = Intrinsic::not_intrinsic; 19577 Intrinsic::ID CI; 19578 switch (M4.getZExtValue()) { 19579 default: break; 19580 case 4: ID = Intrinsic::maxnum; 19581 CI = Intrinsic::experimental_constrained_maxnum; break; 19582 } 19583 if (ID != Intrinsic::not_intrinsic) { 19584 if (Builder.getIsFPConstrained()) { 19585 Function *F = CGM.getIntrinsic(CI, ResultType); 19586 return Builder.CreateConstrainedFPCall(F, {X, Y}); 19587 } else { 19588 Function *F = CGM.getIntrinsic(ID, ResultType); 19589 return Builder.CreateCall(F, {X, Y}); 19590 } 19591 } 19592 switch (BuiltinID) { 19593 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; 19594 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break; 19595 default: llvm_unreachable("Unknown BuiltinID"); 19596 } 19597 Function *F = CGM.getIntrinsic(ID); 19598 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 19599 return Builder.CreateCall(F, {X, Y, M4Value}); 19600 } 19601 case SystemZ::BI__builtin_s390_vfminsb: 19602 case SystemZ::BI__builtin_s390_vfmindb: { 19603 llvm::Type *ResultType = ConvertType(E->getType()); 19604 Value *X = EmitScalarExpr(E->getArg(0)); 19605 Value *Y = EmitScalarExpr(E->getArg(1)); 19606 // Constant-fold the M4 mask argument. 19607 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext()); 19608 // Check whether this instance can be represented via a LLVM standard 19609 // intrinsic. We only support some values of M4. 19610 Intrinsic::ID ID = Intrinsic::not_intrinsic; 19611 Intrinsic::ID CI; 19612 switch (M4.getZExtValue()) { 19613 default: break; 19614 case 4: ID = Intrinsic::minnum; 19615 CI = Intrinsic::experimental_constrained_minnum; break; 19616 } 19617 if (ID != Intrinsic::not_intrinsic) { 19618 if (Builder.getIsFPConstrained()) { 19619 Function *F = CGM.getIntrinsic(CI, ResultType); 19620 return Builder.CreateConstrainedFPCall(F, {X, Y}); 19621 } else { 19622 Function *F = CGM.getIntrinsic(ID, ResultType); 19623 return Builder.CreateCall(F, {X, Y}); 19624 } 19625 } 19626 switch (BuiltinID) { 19627 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; 19628 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break; 19629 default: llvm_unreachable("Unknown BuiltinID"); 19630 } 19631 Function *F = CGM.getIntrinsic(ID); 19632 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 19633 return Builder.CreateCall(F, {X, Y, M4Value}); 19634 } 19635 19636 case SystemZ::BI__builtin_s390_vlbrh: 19637 case SystemZ::BI__builtin_s390_vlbrf: 19638 case SystemZ::BI__builtin_s390_vlbrg: { 19639 llvm::Type *ResultType = ConvertType(E->getType()); 19640 Value *X = EmitScalarExpr(E->getArg(0)); 19641 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType); 19642 return Builder.CreateCall(F, X); 19643 } 19644 19645 // Vector intrinsics that output the post-instruction CC value. 19646 19647 #define INTRINSIC_WITH_CC(NAME) \ 19648 case SystemZ::BI__builtin_##NAME: \ 19649 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) 19650 19651 INTRINSIC_WITH_CC(s390_vpkshs); 19652 INTRINSIC_WITH_CC(s390_vpksfs); 19653 INTRINSIC_WITH_CC(s390_vpksgs); 19654 19655 INTRINSIC_WITH_CC(s390_vpklshs); 19656 INTRINSIC_WITH_CC(s390_vpklsfs); 19657 INTRINSIC_WITH_CC(s390_vpklsgs); 19658 19659 INTRINSIC_WITH_CC(s390_vceqbs); 19660 INTRINSIC_WITH_CC(s390_vceqhs); 19661 INTRINSIC_WITH_CC(s390_vceqfs); 19662 INTRINSIC_WITH_CC(s390_vceqgs); 19663 19664 INTRINSIC_WITH_CC(s390_vchbs); 19665 INTRINSIC_WITH_CC(s390_vchhs); 19666 INTRINSIC_WITH_CC(s390_vchfs); 19667 INTRINSIC_WITH_CC(s390_vchgs); 19668 19669 INTRINSIC_WITH_CC(s390_vchlbs); 19670 INTRINSIC_WITH_CC(s390_vchlhs); 19671 INTRINSIC_WITH_CC(s390_vchlfs); 19672 INTRINSIC_WITH_CC(s390_vchlgs); 19673 19674 INTRINSIC_WITH_CC(s390_vfaebs); 19675 INTRINSIC_WITH_CC(s390_vfaehs); 19676 INTRINSIC_WITH_CC(s390_vfaefs); 19677 19678 INTRINSIC_WITH_CC(s390_vfaezbs); 19679 INTRINSIC_WITH_CC(s390_vfaezhs); 19680 INTRINSIC_WITH_CC(s390_vfaezfs); 19681 19682 INTRINSIC_WITH_CC(s390_vfeebs); 19683 INTRINSIC_WITH_CC(s390_vfeehs); 19684 INTRINSIC_WITH_CC(s390_vfeefs); 19685 19686 INTRINSIC_WITH_CC(s390_vfeezbs); 19687 INTRINSIC_WITH_CC(s390_vfeezhs); 19688 INTRINSIC_WITH_CC(s390_vfeezfs); 19689 19690 INTRINSIC_WITH_CC(s390_vfenebs); 19691 INTRINSIC_WITH_CC(s390_vfenehs); 19692 INTRINSIC_WITH_CC(s390_vfenefs); 19693 19694 INTRINSIC_WITH_CC(s390_vfenezbs); 19695 INTRINSIC_WITH_CC(s390_vfenezhs); 19696 INTRINSIC_WITH_CC(s390_vfenezfs); 19697 19698 INTRINSIC_WITH_CC(s390_vistrbs); 19699 INTRINSIC_WITH_CC(s390_vistrhs); 19700 INTRINSIC_WITH_CC(s390_vistrfs); 19701 19702 INTRINSIC_WITH_CC(s390_vstrcbs); 19703 INTRINSIC_WITH_CC(s390_vstrchs); 19704 INTRINSIC_WITH_CC(s390_vstrcfs); 19705 19706 INTRINSIC_WITH_CC(s390_vstrczbs); 19707 INTRINSIC_WITH_CC(s390_vstrczhs); 19708 INTRINSIC_WITH_CC(s390_vstrczfs); 19709 19710 INTRINSIC_WITH_CC(s390_vfcesbs); 19711 INTRINSIC_WITH_CC(s390_vfcedbs); 19712 INTRINSIC_WITH_CC(s390_vfchsbs); 19713 INTRINSIC_WITH_CC(s390_vfchdbs); 19714 INTRINSIC_WITH_CC(s390_vfchesbs); 19715 INTRINSIC_WITH_CC(s390_vfchedbs); 19716 19717 INTRINSIC_WITH_CC(s390_vftcisb); 19718 INTRINSIC_WITH_CC(s390_vftcidb); 19719 19720 INTRINSIC_WITH_CC(s390_vstrsb); 19721 INTRINSIC_WITH_CC(s390_vstrsh); 19722 INTRINSIC_WITH_CC(s390_vstrsf); 19723 19724 INTRINSIC_WITH_CC(s390_vstrszb); 19725 INTRINSIC_WITH_CC(s390_vstrszh); 19726 INTRINSIC_WITH_CC(s390_vstrszf); 19727 19728 #undef INTRINSIC_WITH_CC 19729 19730 default: 19731 return nullptr; 19732 } 19733 } 19734 19735 namespace { 19736 // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant. 19737 struct NVPTXMmaLdstInfo { 19738 unsigned NumResults; // Number of elements to load/store 19739 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported. 19740 unsigned IID_col; 19741 unsigned IID_row; 19742 }; 19743 19744 #define MMA_INTR(geom_op_type, layout) \ 19745 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride 19746 #define MMA_LDST(n, geom_op_type) \ 19747 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) } 19748 19749 static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) { 19750 switch (BuiltinID) { 19751 // FP MMA loads 19752 case NVPTX::BI__hmma_m16n16k16_ld_a: 19753 return MMA_LDST(8, m16n16k16_load_a_f16); 19754 case NVPTX::BI__hmma_m16n16k16_ld_b: 19755 return MMA_LDST(8, m16n16k16_load_b_f16); 19756 case NVPTX::BI__hmma_m16n16k16_ld_c_f16: 19757 return MMA_LDST(4, m16n16k16_load_c_f16); 19758 case NVPTX::BI__hmma_m16n16k16_ld_c_f32: 19759 return MMA_LDST(8, m16n16k16_load_c_f32); 19760 case NVPTX::BI__hmma_m32n8k16_ld_a: 19761 return MMA_LDST(8, m32n8k16_load_a_f16); 19762 case NVPTX::BI__hmma_m32n8k16_ld_b: 19763 return MMA_LDST(8, m32n8k16_load_b_f16); 19764 case NVPTX::BI__hmma_m32n8k16_ld_c_f16: 19765 return MMA_LDST(4, m32n8k16_load_c_f16); 19766 case NVPTX::BI__hmma_m32n8k16_ld_c_f32: 19767 return MMA_LDST(8, m32n8k16_load_c_f32); 19768 case NVPTX::BI__hmma_m8n32k16_ld_a: 19769 return MMA_LDST(8, m8n32k16_load_a_f16); 19770 case NVPTX::BI__hmma_m8n32k16_ld_b: 19771 return MMA_LDST(8, m8n32k16_load_b_f16); 19772 case NVPTX::BI__hmma_m8n32k16_ld_c_f16: 19773 return MMA_LDST(4, m8n32k16_load_c_f16); 19774 case NVPTX::BI__hmma_m8n32k16_ld_c_f32: 19775 return MMA_LDST(8, m8n32k16_load_c_f32); 19776 19777 // Integer MMA loads 19778 case NVPTX::BI__imma_m16n16k16_ld_a_s8: 19779 return MMA_LDST(2, m16n16k16_load_a_s8); 19780 case NVPTX::BI__imma_m16n16k16_ld_a_u8: 19781 return MMA_LDST(2, m16n16k16_load_a_u8); 19782 case NVPTX::BI__imma_m16n16k16_ld_b_s8: 19783 return MMA_LDST(2, m16n16k16_load_b_s8); 19784 case NVPTX::BI__imma_m16n16k16_ld_b_u8: 19785 return MMA_LDST(2, m16n16k16_load_b_u8); 19786 case NVPTX::BI__imma_m16n16k16_ld_c: 19787 return MMA_LDST(8, m16n16k16_load_c_s32); 19788 case NVPTX::BI__imma_m32n8k16_ld_a_s8: 19789 return MMA_LDST(4, m32n8k16_load_a_s8); 19790 case NVPTX::BI__imma_m32n8k16_ld_a_u8: 19791 return MMA_LDST(4, m32n8k16_load_a_u8); 19792 case NVPTX::BI__imma_m32n8k16_ld_b_s8: 19793 return MMA_LDST(1, m32n8k16_load_b_s8); 19794 case NVPTX::BI__imma_m32n8k16_ld_b_u8: 19795 return MMA_LDST(1, m32n8k16_load_b_u8); 19796 case NVPTX::BI__imma_m32n8k16_ld_c: 19797 return MMA_LDST(8, m32n8k16_load_c_s32); 19798 case NVPTX::BI__imma_m8n32k16_ld_a_s8: 19799 return MMA_LDST(1, m8n32k16_load_a_s8); 19800 case NVPTX::BI__imma_m8n32k16_ld_a_u8: 19801 return MMA_LDST(1, m8n32k16_load_a_u8); 19802 case NVPTX::BI__imma_m8n32k16_ld_b_s8: 19803 return MMA_LDST(4, m8n32k16_load_b_s8); 19804 case NVPTX::BI__imma_m8n32k16_ld_b_u8: 19805 return MMA_LDST(4, m8n32k16_load_b_u8); 19806 case NVPTX::BI__imma_m8n32k16_ld_c: 19807 return MMA_LDST(8, m8n32k16_load_c_s32); 19808 19809 // Sub-integer MMA loads. 19810 // Only row/col layout is supported by A/B fragments. 19811 case NVPTX::BI__imma_m8n8k32_ld_a_s4: 19812 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)}; 19813 case NVPTX::BI__imma_m8n8k32_ld_a_u4: 19814 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)}; 19815 case NVPTX::BI__imma_m8n8k32_ld_b_s4: 19816 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0}; 19817 case NVPTX::BI__imma_m8n8k32_ld_b_u4: 19818 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0}; 19819 case NVPTX::BI__imma_m8n8k32_ld_c: 19820 return MMA_LDST(2, m8n8k32_load_c_s32); 19821 case NVPTX::BI__bmma_m8n8k128_ld_a_b1: 19822 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)}; 19823 case NVPTX::BI__bmma_m8n8k128_ld_b_b1: 19824 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0}; 19825 case NVPTX::BI__bmma_m8n8k128_ld_c: 19826 return MMA_LDST(2, m8n8k128_load_c_s32); 19827 19828 // Double MMA loads 19829 case NVPTX::BI__dmma_m8n8k4_ld_a: 19830 return MMA_LDST(1, m8n8k4_load_a_f64); 19831 case NVPTX::BI__dmma_m8n8k4_ld_b: 19832 return MMA_LDST(1, m8n8k4_load_b_f64); 19833 case NVPTX::BI__dmma_m8n8k4_ld_c: 19834 return MMA_LDST(2, m8n8k4_load_c_f64); 19835 19836 // Alternate float MMA loads 19837 case NVPTX::BI__mma_bf16_m16n16k16_ld_a: 19838 return MMA_LDST(4, m16n16k16_load_a_bf16); 19839 case NVPTX::BI__mma_bf16_m16n16k16_ld_b: 19840 return MMA_LDST(4, m16n16k16_load_b_bf16); 19841 case NVPTX::BI__mma_bf16_m8n32k16_ld_a: 19842 return MMA_LDST(2, m8n32k16_load_a_bf16); 19843 case NVPTX::BI__mma_bf16_m8n32k16_ld_b: 19844 return MMA_LDST(8, m8n32k16_load_b_bf16); 19845 case NVPTX::BI__mma_bf16_m32n8k16_ld_a: 19846 return MMA_LDST(8, m32n8k16_load_a_bf16); 19847 case NVPTX::BI__mma_bf16_m32n8k16_ld_b: 19848 return MMA_LDST(2, m32n8k16_load_b_bf16); 19849 case NVPTX::BI__mma_tf32_m16n16k8_ld_a: 19850 return MMA_LDST(4, m16n16k8_load_a_tf32); 19851 case NVPTX::BI__mma_tf32_m16n16k8_ld_b: 19852 return MMA_LDST(4, m16n16k8_load_b_tf32); 19853 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: 19854 return MMA_LDST(8, m16n16k8_load_c_f32); 19855 19856 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike 19857 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always 19858 // use fragment C for both loads and stores. 19859 // FP MMA stores. 19860 case NVPTX::BI__hmma_m16n16k16_st_c_f16: 19861 return MMA_LDST(4, m16n16k16_store_d_f16); 19862 case NVPTX::BI__hmma_m16n16k16_st_c_f32: 19863 return MMA_LDST(8, m16n16k16_store_d_f32); 19864 case NVPTX::BI__hmma_m32n8k16_st_c_f16: 19865 return MMA_LDST(4, m32n8k16_store_d_f16); 19866 case NVPTX::BI__hmma_m32n8k16_st_c_f32: 19867 return MMA_LDST(8, m32n8k16_store_d_f32); 19868 case NVPTX::BI__hmma_m8n32k16_st_c_f16: 19869 return MMA_LDST(4, m8n32k16_store_d_f16); 19870 case NVPTX::BI__hmma_m8n32k16_st_c_f32: 19871 return MMA_LDST(8, m8n32k16_store_d_f32); 19872 19873 // Integer and sub-integer MMA stores. 19874 // Another naming quirk. Unlike other MMA builtins that use PTX types in the 19875 // name, integer loads/stores use LLVM's i32. 19876 case NVPTX::BI__imma_m16n16k16_st_c_i32: 19877 return MMA_LDST(8, m16n16k16_store_d_s32); 19878 case NVPTX::BI__imma_m32n8k16_st_c_i32: 19879 return MMA_LDST(8, m32n8k16_store_d_s32); 19880 case NVPTX::BI__imma_m8n32k16_st_c_i32: 19881 return MMA_LDST(8, m8n32k16_store_d_s32); 19882 case NVPTX::BI__imma_m8n8k32_st_c_i32: 19883 return MMA_LDST(2, m8n8k32_store_d_s32); 19884 case NVPTX::BI__bmma_m8n8k128_st_c_i32: 19885 return MMA_LDST(2, m8n8k128_store_d_s32); 19886 19887 // Double MMA store 19888 case NVPTX::BI__dmma_m8n8k4_st_c_f64: 19889 return MMA_LDST(2, m8n8k4_store_d_f64); 19890 19891 // Alternate float MMA store 19892 case NVPTX::BI__mma_m16n16k8_st_c_f32: 19893 return MMA_LDST(8, m16n16k8_store_d_f32); 19894 19895 default: 19896 llvm_unreachable("Unknown MMA builtin"); 19897 } 19898 } 19899 #undef MMA_LDST 19900 #undef MMA_INTR 19901 19902 19903 struct NVPTXMmaInfo { 19904 unsigned NumEltsA; 19905 unsigned NumEltsB; 19906 unsigned NumEltsC; 19907 unsigned NumEltsD; 19908 19909 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority 19910 // over 'col' for layout. The index of non-satf variants is expected to match 19911 // the undocumented layout constants used by CUDA's mma.hpp. 19912 std::array<unsigned, 8> Variants; 19913 19914 unsigned getMMAIntrinsic(int Layout, bool Satf) { 19915 unsigned Index = Layout + 4 * Satf; 19916 if (Index >= Variants.size()) 19917 return 0; 19918 return Variants[Index]; 19919 } 19920 }; 19921 19922 // Returns an intrinsic that matches Layout and Satf for valid combinations of 19923 // Layout and Satf, 0 otherwise. 19924 static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) { 19925 // clang-format off 19926 #define MMA_VARIANTS(geom, type) \ 19927 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ 19928 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ 19929 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ 19930 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type 19931 #define MMA_SATF_VARIANTS(geom, type) \ 19932 MMA_VARIANTS(geom, type), \ 19933 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ 19934 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ 19935 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ 19936 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite 19937 // Sub-integer MMA only supports row.col layout. 19938 #define MMA_VARIANTS_I4(geom, type) \ 19939 0, \ 19940 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ 19941 0, \ 19942 0, \ 19943 0, \ 19944 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ 19945 0, \ 19946 0 19947 // b1 MMA does not support .satfinite. 19948 #define MMA_VARIANTS_B1_XOR(geom, type) \ 19949 0, \ 19950 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \ 19951 0, \ 19952 0, \ 19953 0, \ 19954 0, \ 19955 0, \ 19956 0 19957 #define MMA_VARIANTS_B1_AND(geom, type) \ 19958 0, \ 19959 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \ 19960 0, \ 19961 0, \ 19962 0, \ 19963 0, \ 19964 0, \ 19965 0 19966 // clang-format on 19967 switch (BuiltinID) { 19968 // FP MMA 19969 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while 19970 // NumEltsN of return value are ordered as A,B,C,D. 19971 case NVPTX::BI__hmma_m16n16k16_mma_f16f16: 19972 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}}; 19973 case NVPTX::BI__hmma_m16n16k16_mma_f32f16: 19974 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}}; 19975 case NVPTX::BI__hmma_m16n16k16_mma_f16f32: 19976 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}}; 19977 case NVPTX::BI__hmma_m16n16k16_mma_f32f32: 19978 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}}; 19979 case NVPTX::BI__hmma_m32n8k16_mma_f16f16: 19980 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}}; 19981 case NVPTX::BI__hmma_m32n8k16_mma_f32f16: 19982 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}}; 19983 case NVPTX::BI__hmma_m32n8k16_mma_f16f32: 19984 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}}; 19985 case NVPTX::BI__hmma_m32n8k16_mma_f32f32: 19986 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}}; 19987 case NVPTX::BI__hmma_m8n32k16_mma_f16f16: 19988 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}}; 19989 case NVPTX::BI__hmma_m8n32k16_mma_f32f16: 19990 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}}; 19991 case NVPTX::BI__hmma_m8n32k16_mma_f16f32: 19992 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}}; 19993 case NVPTX::BI__hmma_m8n32k16_mma_f32f32: 19994 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}}; 19995 19996 // Integer MMA 19997 case NVPTX::BI__imma_m16n16k16_mma_s8: 19998 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}}; 19999 case NVPTX::BI__imma_m16n16k16_mma_u8: 20000 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}}; 20001 case NVPTX::BI__imma_m32n8k16_mma_s8: 20002 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}}; 20003 case NVPTX::BI__imma_m32n8k16_mma_u8: 20004 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}}; 20005 case NVPTX::BI__imma_m8n32k16_mma_s8: 20006 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}}; 20007 case NVPTX::BI__imma_m8n32k16_mma_u8: 20008 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}}; 20009 20010 // Sub-integer MMA 20011 case NVPTX::BI__imma_m8n8k32_mma_s4: 20012 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}}; 20013 case NVPTX::BI__imma_m8n8k32_mma_u4: 20014 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}}; 20015 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: 20016 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}}; 20017 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1: 20018 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}}; 20019 20020 // Double MMA 20021 case NVPTX::BI__dmma_m8n8k4_mma_f64: 20022 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}}; 20023 20024 // Alternate FP MMA 20025 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32: 20026 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}}; 20027 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32: 20028 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}}; 20029 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32: 20030 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}}; 20031 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: 20032 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}}; 20033 default: 20034 llvm_unreachable("Unexpected builtin ID."); 20035 } 20036 #undef MMA_VARIANTS 20037 #undef MMA_SATF_VARIANTS 20038 #undef MMA_VARIANTS_I4 20039 #undef MMA_VARIANTS_B1_AND 20040 #undef MMA_VARIANTS_B1_XOR 20041 } 20042 20043 static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF, 20044 const CallExpr *E) { 20045 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); 20046 QualType ArgType = E->getArg(0)->getType(); 20047 clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType); 20048 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType()); 20049 return CGF.Builder.CreateCall( 20050 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}), 20051 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())}); 20052 } 20053 20054 static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF, 20055 const CallExpr *E) { 20056 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); 20057 llvm::Type *ElemTy = 20058 CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); 20059 return CGF.Builder.CreateCall( 20060 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}), 20061 {Ptr, CGF.EmitScalarExpr(E->getArg(1))}); 20062 } 20063 20064 static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS, 20065 CodeGenFunction &CGF, const CallExpr *E, 20066 int SrcSize) { 20067 return E->getNumArgs() == 3 20068 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS), 20069 {CGF.EmitScalarExpr(E->getArg(0)), 20070 CGF.EmitScalarExpr(E->getArg(1)), 20071 CGF.EmitScalarExpr(E->getArg(2))}) 20072 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID), 20073 {CGF.EmitScalarExpr(E->getArg(0)), 20074 CGF.EmitScalarExpr(E->getArg(1))}); 20075 } 20076 20077 static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID, 20078 const CallExpr *E, CodeGenFunction &CGF) { 20079 auto &C = CGF.CGM.getContext(); 20080 if (!(C.getLangOpts().NativeHalfType || 20081 !C.getTargetInfo().useFP16ConversionIntrinsics())) { 20082 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() + 20083 " requires native half type support."); 20084 return nullptr; 20085 } 20086 20087 if (IntrinsicID == Intrinsic::nvvm_ldg_global_f || 20088 IntrinsicID == Intrinsic::nvvm_ldu_global_f) 20089 return MakeLdgLdu(IntrinsicID, CGF, E); 20090 20091 SmallVector<Value *, 16> Args; 20092 auto *F = CGF.CGM.getIntrinsic(IntrinsicID); 20093 auto *FTy = F->getFunctionType(); 20094 unsigned ICEArguments = 0; 20095 ASTContext::GetBuiltinTypeError Error; 20096 C.GetBuiltinType(BuiltinID, Error, &ICEArguments); 20097 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 20098 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 20099 assert((ICEArguments & (1 << i)) == 0); 20100 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i)); 20101 auto *PTy = FTy->getParamType(i); 20102 if (PTy != ArgValue->getType()) 20103 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy); 20104 Args.push_back(ArgValue); 20105 } 20106 20107 return CGF.Builder.CreateCall(F, Args); 20108 } 20109 } // namespace 20110 20111 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, 20112 const CallExpr *E) { 20113 switch (BuiltinID) { 20114 case NVPTX::BI__nvvm_atom_add_gen_i: 20115 case NVPTX::BI__nvvm_atom_add_gen_l: 20116 case NVPTX::BI__nvvm_atom_add_gen_ll: 20117 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); 20118 20119 case NVPTX::BI__nvvm_atom_sub_gen_i: 20120 case NVPTX::BI__nvvm_atom_sub_gen_l: 20121 case NVPTX::BI__nvvm_atom_sub_gen_ll: 20122 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); 20123 20124 case NVPTX::BI__nvvm_atom_and_gen_i: 20125 case NVPTX::BI__nvvm_atom_and_gen_l: 20126 case NVPTX::BI__nvvm_atom_and_gen_ll: 20127 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); 20128 20129 case NVPTX::BI__nvvm_atom_or_gen_i: 20130 case NVPTX::BI__nvvm_atom_or_gen_l: 20131 case NVPTX::BI__nvvm_atom_or_gen_ll: 20132 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); 20133 20134 case NVPTX::BI__nvvm_atom_xor_gen_i: 20135 case NVPTX::BI__nvvm_atom_xor_gen_l: 20136 case NVPTX::BI__nvvm_atom_xor_gen_ll: 20137 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); 20138 20139 case NVPTX::BI__nvvm_atom_xchg_gen_i: 20140 case NVPTX::BI__nvvm_atom_xchg_gen_l: 20141 case NVPTX::BI__nvvm_atom_xchg_gen_ll: 20142 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); 20143 20144 case NVPTX::BI__nvvm_atom_max_gen_i: 20145 case NVPTX::BI__nvvm_atom_max_gen_l: 20146 case NVPTX::BI__nvvm_atom_max_gen_ll: 20147 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); 20148 20149 case NVPTX::BI__nvvm_atom_max_gen_ui: 20150 case NVPTX::BI__nvvm_atom_max_gen_ul: 20151 case NVPTX::BI__nvvm_atom_max_gen_ull: 20152 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); 20153 20154 case NVPTX::BI__nvvm_atom_min_gen_i: 20155 case NVPTX::BI__nvvm_atom_min_gen_l: 20156 case NVPTX::BI__nvvm_atom_min_gen_ll: 20157 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); 20158 20159 case NVPTX::BI__nvvm_atom_min_gen_ui: 20160 case NVPTX::BI__nvvm_atom_min_gen_ul: 20161 case NVPTX::BI__nvvm_atom_min_gen_ull: 20162 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); 20163 20164 case NVPTX::BI__nvvm_atom_cas_gen_i: 20165 case NVPTX::BI__nvvm_atom_cas_gen_l: 20166 case NVPTX::BI__nvvm_atom_cas_gen_ll: 20167 // __nvvm_atom_cas_gen_* should return the old value rather than the 20168 // success flag. 20169 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); 20170 20171 case NVPTX::BI__nvvm_atom_add_gen_f: 20172 case NVPTX::BI__nvvm_atom_add_gen_d: { 20173 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 20174 Value *Val = EmitScalarExpr(E->getArg(1)); 20175 20176 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val, 20177 AtomicOrdering::SequentiallyConsistent); 20178 } 20179 20180 case NVPTX::BI__nvvm_atom_inc_gen_ui: { 20181 Value *Ptr = EmitScalarExpr(E->getArg(0)); 20182 Value *Val = EmitScalarExpr(E->getArg(1)); 20183 Function *FnALI32 = 20184 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); 20185 return Builder.CreateCall(FnALI32, {Ptr, Val}); 20186 } 20187 20188 case NVPTX::BI__nvvm_atom_dec_gen_ui: { 20189 Value *Ptr = EmitScalarExpr(E->getArg(0)); 20190 Value *Val = EmitScalarExpr(E->getArg(1)); 20191 Function *FnALD32 = 20192 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); 20193 return Builder.CreateCall(FnALD32, {Ptr, Val}); 20194 } 20195 20196 case NVPTX::BI__nvvm_ldg_c: 20197 case NVPTX::BI__nvvm_ldg_sc: 20198 case NVPTX::BI__nvvm_ldg_c2: 20199 case NVPTX::BI__nvvm_ldg_sc2: 20200 case NVPTX::BI__nvvm_ldg_c4: 20201 case NVPTX::BI__nvvm_ldg_sc4: 20202 case NVPTX::BI__nvvm_ldg_s: 20203 case NVPTX::BI__nvvm_ldg_s2: 20204 case NVPTX::BI__nvvm_ldg_s4: 20205 case NVPTX::BI__nvvm_ldg_i: 20206 case NVPTX::BI__nvvm_ldg_i2: 20207 case NVPTX::BI__nvvm_ldg_i4: 20208 case NVPTX::BI__nvvm_ldg_l: 20209 case NVPTX::BI__nvvm_ldg_l2: 20210 case NVPTX::BI__nvvm_ldg_ll: 20211 case NVPTX::BI__nvvm_ldg_ll2: 20212 case NVPTX::BI__nvvm_ldg_uc: 20213 case NVPTX::BI__nvvm_ldg_uc2: 20214 case NVPTX::BI__nvvm_ldg_uc4: 20215 case NVPTX::BI__nvvm_ldg_us: 20216 case NVPTX::BI__nvvm_ldg_us2: 20217 case NVPTX::BI__nvvm_ldg_us4: 20218 case NVPTX::BI__nvvm_ldg_ui: 20219 case NVPTX::BI__nvvm_ldg_ui2: 20220 case NVPTX::BI__nvvm_ldg_ui4: 20221 case NVPTX::BI__nvvm_ldg_ul: 20222 case NVPTX::BI__nvvm_ldg_ul2: 20223 case NVPTX::BI__nvvm_ldg_ull: 20224 case NVPTX::BI__nvvm_ldg_ull2: 20225 // PTX Interoperability section 2.2: "For a vector with an even number of 20226 // elements, its alignment is set to number of elements times the alignment 20227 // of its member: n*alignof(t)." 20228 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E); 20229 case NVPTX::BI__nvvm_ldg_f: 20230 case NVPTX::BI__nvvm_ldg_f2: 20231 case NVPTX::BI__nvvm_ldg_f4: 20232 case NVPTX::BI__nvvm_ldg_d: 20233 case NVPTX::BI__nvvm_ldg_d2: 20234 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E); 20235 20236 case NVPTX::BI__nvvm_ldu_c: 20237 case NVPTX::BI__nvvm_ldu_sc: 20238 case NVPTX::BI__nvvm_ldu_c2: 20239 case NVPTX::BI__nvvm_ldu_sc2: 20240 case NVPTX::BI__nvvm_ldu_c4: 20241 case NVPTX::BI__nvvm_ldu_sc4: 20242 case NVPTX::BI__nvvm_ldu_s: 20243 case NVPTX::BI__nvvm_ldu_s2: 20244 case NVPTX::BI__nvvm_ldu_s4: 20245 case NVPTX::BI__nvvm_ldu_i: 20246 case NVPTX::BI__nvvm_ldu_i2: 20247 case NVPTX::BI__nvvm_ldu_i4: 20248 case NVPTX::BI__nvvm_ldu_l: 20249 case NVPTX::BI__nvvm_ldu_l2: 20250 case NVPTX::BI__nvvm_ldu_ll: 20251 case NVPTX::BI__nvvm_ldu_ll2: 20252 case NVPTX::BI__nvvm_ldu_uc: 20253 case NVPTX::BI__nvvm_ldu_uc2: 20254 case NVPTX::BI__nvvm_ldu_uc4: 20255 case NVPTX::BI__nvvm_ldu_us: 20256 case NVPTX::BI__nvvm_ldu_us2: 20257 case NVPTX::BI__nvvm_ldu_us4: 20258 case NVPTX::BI__nvvm_ldu_ui: 20259 case NVPTX::BI__nvvm_ldu_ui2: 20260 case NVPTX::BI__nvvm_ldu_ui4: 20261 case NVPTX::BI__nvvm_ldu_ul: 20262 case NVPTX::BI__nvvm_ldu_ul2: 20263 case NVPTX::BI__nvvm_ldu_ull: 20264 case NVPTX::BI__nvvm_ldu_ull2: 20265 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E); 20266 case NVPTX::BI__nvvm_ldu_f: 20267 case NVPTX::BI__nvvm_ldu_f2: 20268 case NVPTX::BI__nvvm_ldu_f4: 20269 case NVPTX::BI__nvvm_ldu_d: 20270 case NVPTX::BI__nvvm_ldu_d2: 20271 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E); 20272 20273 case NVPTX::BI__nvvm_atom_cta_add_gen_i: 20274 case NVPTX::BI__nvvm_atom_cta_add_gen_l: 20275 case NVPTX::BI__nvvm_atom_cta_add_gen_ll: 20276 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E); 20277 case NVPTX::BI__nvvm_atom_sys_add_gen_i: 20278 case NVPTX::BI__nvvm_atom_sys_add_gen_l: 20279 case NVPTX::BI__nvvm_atom_sys_add_gen_ll: 20280 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E); 20281 case NVPTX::BI__nvvm_atom_cta_add_gen_f: 20282 case NVPTX::BI__nvvm_atom_cta_add_gen_d: 20283 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E); 20284 case NVPTX::BI__nvvm_atom_sys_add_gen_f: 20285 case NVPTX::BI__nvvm_atom_sys_add_gen_d: 20286 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E); 20287 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: 20288 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: 20289 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: 20290 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E); 20291 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: 20292 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: 20293 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: 20294 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E); 20295 case NVPTX::BI__nvvm_atom_cta_max_gen_i: 20296 case NVPTX::BI__nvvm_atom_cta_max_gen_ui: 20297 case NVPTX::BI__nvvm_atom_cta_max_gen_l: 20298 case NVPTX::BI__nvvm_atom_cta_max_gen_ul: 20299 case NVPTX::BI__nvvm_atom_cta_max_gen_ll: 20300 case NVPTX::BI__nvvm_atom_cta_max_gen_ull: 20301 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E); 20302 case NVPTX::BI__nvvm_atom_sys_max_gen_i: 20303 case NVPTX::BI__nvvm_atom_sys_max_gen_ui: 20304 case NVPTX::BI__nvvm_atom_sys_max_gen_l: 20305 case NVPTX::BI__nvvm_atom_sys_max_gen_ul: 20306 case NVPTX::BI__nvvm_atom_sys_max_gen_ll: 20307 case NVPTX::BI__nvvm_atom_sys_max_gen_ull: 20308 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E); 20309 case NVPTX::BI__nvvm_atom_cta_min_gen_i: 20310 case NVPTX::BI__nvvm_atom_cta_min_gen_ui: 20311 case NVPTX::BI__nvvm_atom_cta_min_gen_l: 20312 case NVPTX::BI__nvvm_atom_cta_min_gen_ul: 20313 case NVPTX::BI__nvvm_atom_cta_min_gen_ll: 20314 case NVPTX::BI__nvvm_atom_cta_min_gen_ull: 20315 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E); 20316 case NVPTX::BI__nvvm_atom_sys_min_gen_i: 20317 case NVPTX::BI__nvvm_atom_sys_min_gen_ui: 20318 case NVPTX::BI__nvvm_atom_sys_min_gen_l: 20319 case NVPTX::BI__nvvm_atom_sys_min_gen_ul: 20320 case NVPTX::BI__nvvm_atom_sys_min_gen_ll: 20321 case NVPTX::BI__nvvm_atom_sys_min_gen_ull: 20322 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E); 20323 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: 20324 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E); 20325 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: 20326 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E); 20327 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: 20328 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E); 20329 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: 20330 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E); 20331 case NVPTX::BI__nvvm_atom_cta_and_gen_i: 20332 case NVPTX::BI__nvvm_atom_cta_and_gen_l: 20333 case NVPTX::BI__nvvm_atom_cta_and_gen_ll: 20334 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E); 20335 case NVPTX::BI__nvvm_atom_sys_and_gen_i: 20336 case NVPTX::BI__nvvm_atom_sys_and_gen_l: 20337 case NVPTX::BI__nvvm_atom_sys_and_gen_ll: 20338 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E); 20339 case NVPTX::BI__nvvm_atom_cta_or_gen_i: 20340 case NVPTX::BI__nvvm_atom_cta_or_gen_l: 20341 case NVPTX::BI__nvvm_atom_cta_or_gen_ll: 20342 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E); 20343 case NVPTX::BI__nvvm_atom_sys_or_gen_i: 20344 case NVPTX::BI__nvvm_atom_sys_or_gen_l: 20345 case NVPTX::BI__nvvm_atom_sys_or_gen_ll: 20346 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E); 20347 case NVPTX::BI__nvvm_atom_cta_xor_gen_i: 20348 case NVPTX::BI__nvvm_atom_cta_xor_gen_l: 20349 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: 20350 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E); 20351 case NVPTX::BI__nvvm_atom_sys_xor_gen_i: 20352 case NVPTX::BI__nvvm_atom_sys_xor_gen_l: 20353 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: 20354 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E); 20355 case NVPTX::BI__nvvm_atom_cta_cas_gen_i: 20356 case NVPTX::BI__nvvm_atom_cta_cas_gen_l: 20357 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { 20358 Value *Ptr = EmitScalarExpr(E->getArg(0)); 20359 llvm::Type *ElemTy = 20360 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); 20361 return Builder.CreateCall( 20362 CGM.getIntrinsic( 20363 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}), 20364 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 20365 } 20366 case NVPTX::BI__nvvm_atom_sys_cas_gen_i: 20367 case NVPTX::BI__nvvm_atom_sys_cas_gen_l: 20368 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { 20369 Value *Ptr = EmitScalarExpr(E->getArg(0)); 20370 llvm::Type *ElemTy = 20371 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); 20372 return Builder.CreateCall( 20373 CGM.getIntrinsic( 20374 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}), 20375 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 20376 } 20377 case NVPTX::BI__nvvm_match_all_sync_i32p: 20378 case NVPTX::BI__nvvm_match_all_sync_i64p: { 20379 Value *Mask = EmitScalarExpr(E->getArg(0)); 20380 Value *Val = EmitScalarExpr(E->getArg(1)); 20381 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2)); 20382 Value *ResultPair = Builder.CreateCall( 20383 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p 20384 ? Intrinsic::nvvm_match_all_sync_i32p 20385 : Intrinsic::nvvm_match_all_sync_i64p), 20386 {Mask, Val}); 20387 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1), 20388 PredOutPtr.getElementType()); 20389 Builder.CreateStore(Pred, PredOutPtr); 20390 return Builder.CreateExtractValue(ResultPair, 0); 20391 } 20392 20393 // FP MMA loads 20394 case NVPTX::BI__hmma_m16n16k16_ld_a: 20395 case NVPTX::BI__hmma_m16n16k16_ld_b: 20396 case NVPTX::BI__hmma_m16n16k16_ld_c_f16: 20397 case NVPTX::BI__hmma_m16n16k16_ld_c_f32: 20398 case NVPTX::BI__hmma_m32n8k16_ld_a: 20399 case NVPTX::BI__hmma_m32n8k16_ld_b: 20400 case NVPTX::BI__hmma_m32n8k16_ld_c_f16: 20401 case NVPTX::BI__hmma_m32n8k16_ld_c_f32: 20402 case NVPTX::BI__hmma_m8n32k16_ld_a: 20403 case NVPTX::BI__hmma_m8n32k16_ld_b: 20404 case NVPTX::BI__hmma_m8n32k16_ld_c_f16: 20405 case NVPTX::BI__hmma_m8n32k16_ld_c_f32: 20406 // Integer MMA loads. 20407 case NVPTX::BI__imma_m16n16k16_ld_a_s8: 20408 case NVPTX::BI__imma_m16n16k16_ld_a_u8: 20409 case NVPTX::BI__imma_m16n16k16_ld_b_s8: 20410 case NVPTX::BI__imma_m16n16k16_ld_b_u8: 20411 case NVPTX::BI__imma_m16n16k16_ld_c: 20412 case NVPTX::BI__imma_m32n8k16_ld_a_s8: 20413 case NVPTX::BI__imma_m32n8k16_ld_a_u8: 20414 case NVPTX::BI__imma_m32n8k16_ld_b_s8: 20415 case NVPTX::BI__imma_m32n8k16_ld_b_u8: 20416 case NVPTX::BI__imma_m32n8k16_ld_c: 20417 case NVPTX::BI__imma_m8n32k16_ld_a_s8: 20418 case NVPTX::BI__imma_m8n32k16_ld_a_u8: 20419 case NVPTX::BI__imma_m8n32k16_ld_b_s8: 20420 case NVPTX::BI__imma_m8n32k16_ld_b_u8: 20421 case NVPTX::BI__imma_m8n32k16_ld_c: 20422 // Sub-integer MMA loads. 20423 case NVPTX::BI__imma_m8n8k32_ld_a_s4: 20424 case NVPTX::BI__imma_m8n8k32_ld_a_u4: 20425 case NVPTX::BI__imma_m8n8k32_ld_b_s4: 20426 case NVPTX::BI__imma_m8n8k32_ld_b_u4: 20427 case NVPTX::BI__imma_m8n8k32_ld_c: 20428 case NVPTX::BI__bmma_m8n8k128_ld_a_b1: 20429 case NVPTX::BI__bmma_m8n8k128_ld_b_b1: 20430 case NVPTX::BI__bmma_m8n8k128_ld_c: 20431 // Double MMA loads. 20432 case NVPTX::BI__dmma_m8n8k4_ld_a: 20433 case NVPTX::BI__dmma_m8n8k4_ld_b: 20434 case NVPTX::BI__dmma_m8n8k4_ld_c: 20435 // Alternate float MMA loads. 20436 case NVPTX::BI__mma_bf16_m16n16k16_ld_a: 20437 case NVPTX::BI__mma_bf16_m16n16k16_ld_b: 20438 case NVPTX::BI__mma_bf16_m8n32k16_ld_a: 20439 case NVPTX::BI__mma_bf16_m8n32k16_ld_b: 20440 case NVPTX::BI__mma_bf16_m32n8k16_ld_a: 20441 case NVPTX::BI__mma_bf16_m32n8k16_ld_b: 20442 case NVPTX::BI__mma_tf32_m16n16k8_ld_a: 20443 case NVPTX::BI__mma_tf32_m16n16k8_ld_b: 20444 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: { 20445 Address Dst = EmitPointerWithAlignment(E->getArg(0)); 20446 Value *Src = EmitScalarExpr(E->getArg(1)); 20447 Value *Ldm = EmitScalarExpr(E->getArg(2)); 20448 std::optional<llvm::APSInt> isColMajorArg = 20449 E->getArg(3)->getIntegerConstantExpr(getContext()); 20450 if (!isColMajorArg) 20451 return nullptr; 20452 bool isColMajor = isColMajorArg->getSExtValue(); 20453 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); 20454 unsigned IID = isColMajor ? II.IID_col : II.IID_row; 20455 if (IID == 0) 20456 return nullptr; 20457 20458 Value *Result = 20459 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm}); 20460 20461 // Save returned values. 20462 assert(II.NumResults); 20463 if (II.NumResults == 1) { 20464 Builder.CreateAlignedStore(Result, Dst.emitRawPointer(*this), 20465 CharUnits::fromQuantity(4)); 20466 } else { 20467 for (unsigned i = 0; i < II.NumResults; ++i) { 20468 Builder.CreateAlignedStore( 20469 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), 20470 Dst.getElementType()), 20471 Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this), 20472 llvm::ConstantInt::get(IntTy, i)), 20473 CharUnits::fromQuantity(4)); 20474 } 20475 } 20476 return Result; 20477 } 20478 20479 case NVPTX::BI__hmma_m16n16k16_st_c_f16: 20480 case NVPTX::BI__hmma_m16n16k16_st_c_f32: 20481 case NVPTX::BI__hmma_m32n8k16_st_c_f16: 20482 case NVPTX::BI__hmma_m32n8k16_st_c_f32: 20483 case NVPTX::BI__hmma_m8n32k16_st_c_f16: 20484 case NVPTX::BI__hmma_m8n32k16_st_c_f32: 20485 case NVPTX::BI__imma_m16n16k16_st_c_i32: 20486 case NVPTX::BI__imma_m32n8k16_st_c_i32: 20487 case NVPTX::BI__imma_m8n32k16_st_c_i32: 20488 case NVPTX::BI__imma_m8n8k32_st_c_i32: 20489 case NVPTX::BI__bmma_m8n8k128_st_c_i32: 20490 case NVPTX::BI__dmma_m8n8k4_st_c_f64: 20491 case NVPTX::BI__mma_m16n16k8_st_c_f32: { 20492 Value *Dst = EmitScalarExpr(E->getArg(0)); 20493 Address Src = EmitPointerWithAlignment(E->getArg(1)); 20494 Value *Ldm = EmitScalarExpr(E->getArg(2)); 20495 std::optional<llvm::APSInt> isColMajorArg = 20496 E->getArg(3)->getIntegerConstantExpr(getContext()); 20497 if (!isColMajorArg) 20498 return nullptr; 20499 bool isColMajor = isColMajorArg->getSExtValue(); 20500 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); 20501 unsigned IID = isColMajor ? II.IID_col : II.IID_row; 20502 if (IID == 0) 20503 return nullptr; 20504 Function *Intrinsic = 20505 CGM.getIntrinsic(IID, Dst->getType()); 20506 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1); 20507 SmallVector<Value *, 10> Values = {Dst}; 20508 for (unsigned i = 0; i < II.NumResults; ++i) { 20509 Value *V = Builder.CreateAlignedLoad( 20510 Src.getElementType(), 20511 Builder.CreateGEP(Src.getElementType(), Src.emitRawPointer(*this), 20512 llvm::ConstantInt::get(IntTy, i)), 20513 CharUnits::fromQuantity(4)); 20514 Values.push_back(Builder.CreateBitCast(V, ParamType)); 20515 } 20516 Values.push_back(Ldm); 20517 Value *Result = Builder.CreateCall(Intrinsic, Values); 20518 return Result; 20519 } 20520 20521 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) --> 20522 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf> 20523 case NVPTX::BI__hmma_m16n16k16_mma_f16f16: 20524 case NVPTX::BI__hmma_m16n16k16_mma_f32f16: 20525 case NVPTX::BI__hmma_m16n16k16_mma_f32f32: 20526 case NVPTX::BI__hmma_m16n16k16_mma_f16f32: 20527 case NVPTX::BI__hmma_m32n8k16_mma_f16f16: 20528 case NVPTX::BI__hmma_m32n8k16_mma_f32f16: 20529 case NVPTX::BI__hmma_m32n8k16_mma_f32f32: 20530 case NVPTX::BI__hmma_m32n8k16_mma_f16f32: 20531 case NVPTX::BI__hmma_m8n32k16_mma_f16f16: 20532 case NVPTX::BI__hmma_m8n32k16_mma_f32f16: 20533 case NVPTX::BI__hmma_m8n32k16_mma_f32f32: 20534 case NVPTX::BI__hmma_m8n32k16_mma_f16f32: 20535 case NVPTX::BI__imma_m16n16k16_mma_s8: 20536 case NVPTX::BI__imma_m16n16k16_mma_u8: 20537 case NVPTX::BI__imma_m32n8k16_mma_s8: 20538 case NVPTX::BI__imma_m32n8k16_mma_u8: 20539 case NVPTX::BI__imma_m8n32k16_mma_s8: 20540 case NVPTX::BI__imma_m8n32k16_mma_u8: 20541 case NVPTX::BI__imma_m8n8k32_mma_s4: 20542 case NVPTX::BI__imma_m8n8k32_mma_u4: 20543 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: 20544 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1: 20545 case NVPTX::BI__dmma_m8n8k4_mma_f64: 20546 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32: 20547 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32: 20548 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32: 20549 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: { 20550 Address Dst = EmitPointerWithAlignment(E->getArg(0)); 20551 Address SrcA = EmitPointerWithAlignment(E->getArg(1)); 20552 Address SrcB = EmitPointerWithAlignment(E->getArg(2)); 20553 Address SrcC = EmitPointerWithAlignment(E->getArg(3)); 20554 std::optional<llvm::APSInt> LayoutArg = 20555 E->getArg(4)->getIntegerConstantExpr(getContext()); 20556 if (!LayoutArg) 20557 return nullptr; 20558 int Layout = LayoutArg->getSExtValue(); 20559 if (Layout < 0 || Layout > 3) 20560 return nullptr; 20561 llvm::APSInt SatfArg; 20562 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 || 20563 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1) 20564 SatfArg = 0; // .b1 does not have satf argument. 20565 else if (std::optional<llvm::APSInt> OptSatfArg = 20566 E->getArg(5)->getIntegerConstantExpr(getContext())) 20567 SatfArg = *OptSatfArg; 20568 else 20569 return nullptr; 20570 bool Satf = SatfArg.getSExtValue(); 20571 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID); 20572 unsigned IID = MI.getMMAIntrinsic(Layout, Satf); 20573 if (IID == 0) // Unsupported combination of Layout/Satf. 20574 return nullptr; 20575 20576 SmallVector<Value *, 24> Values; 20577 Function *Intrinsic = CGM.getIntrinsic(IID); 20578 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0); 20579 // Load A 20580 for (unsigned i = 0; i < MI.NumEltsA; ++i) { 20581 Value *V = Builder.CreateAlignedLoad( 20582 SrcA.getElementType(), 20583 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this), 20584 llvm::ConstantInt::get(IntTy, i)), 20585 CharUnits::fromQuantity(4)); 20586 Values.push_back(Builder.CreateBitCast(V, AType)); 20587 } 20588 // Load B 20589 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA); 20590 for (unsigned i = 0; i < MI.NumEltsB; ++i) { 20591 Value *V = Builder.CreateAlignedLoad( 20592 SrcB.getElementType(), 20593 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this), 20594 llvm::ConstantInt::get(IntTy, i)), 20595 CharUnits::fromQuantity(4)); 20596 Values.push_back(Builder.CreateBitCast(V, BType)); 20597 } 20598 // Load C 20599 llvm::Type *CType = 20600 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB); 20601 for (unsigned i = 0; i < MI.NumEltsC; ++i) { 20602 Value *V = Builder.CreateAlignedLoad( 20603 SrcC.getElementType(), 20604 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this), 20605 llvm::ConstantInt::get(IntTy, i)), 20606 CharUnits::fromQuantity(4)); 20607 Values.push_back(Builder.CreateBitCast(V, CType)); 20608 } 20609 Value *Result = Builder.CreateCall(Intrinsic, Values); 20610 llvm::Type *DType = Dst.getElementType(); 20611 for (unsigned i = 0; i < MI.NumEltsD; ++i) 20612 Builder.CreateAlignedStore( 20613 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType), 20614 Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this), 20615 llvm::ConstantInt::get(IntTy, i)), 20616 CharUnits::fromQuantity(4)); 20617 return Result; 20618 } 20619 // The following builtins require half type support 20620 case NVPTX::BI__nvvm_ex2_approx_f16: 20621 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this); 20622 case NVPTX::BI__nvvm_ex2_approx_f16x2: 20623 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this); 20624 case NVPTX::BI__nvvm_ff2f16x2_rn: 20625 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this); 20626 case NVPTX::BI__nvvm_ff2f16x2_rn_relu: 20627 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this); 20628 case NVPTX::BI__nvvm_ff2f16x2_rz: 20629 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this); 20630 case NVPTX::BI__nvvm_ff2f16x2_rz_relu: 20631 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this); 20632 case NVPTX::BI__nvvm_fma_rn_f16: 20633 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this); 20634 case NVPTX::BI__nvvm_fma_rn_f16x2: 20635 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this); 20636 case NVPTX::BI__nvvm_fma_rn_ftz_f16: 20637 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this); 20638 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2: 20639 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this); 20640 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16: 20641 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E, 20642 *this); 20643 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2: 20644 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E, 20645 *this); 20646 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16: 20647 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E, 20648 *this); 20649 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2: 20650 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E, 20651 *this); 20652 case NVPTX::BI__nvvm_fma_rn_relu_f16: 20653 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this); 20654 case NVPTX::BI__nvvm_fma_rn_relu_f16x2: 20655 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this); 20656 case NVPTX::BI__nvvm_fma_rn_sat_f16: 20657 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this); 20658 case NVPTX::BI__nvvm_fma_rn_sat_f16x2: 20659 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this); 20660 case NVPTX::BI__nvvm_fmax_f16: 20661 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this); 20662 case NVPTX::BI__nvvm_fmax_f16x2: 20663 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this); 20664 case NVPTX::BI__nvvm_fmax_ftz_f16: 20665 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this); 20666 case NVPTX::BI__nvvm_fmax_ftz_f16x2: 20667 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this); 20668 case NVPTX::BI__nvvm_fmax_ftz_nan_f16: 20669 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this); 20670 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2: 20671 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E, 20672 *this); 20673 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16: 20674 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID, 20675 E, *this); 20676 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2: 20677 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2, 20678 BuiltinID, E, *this); 20679 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16: 20680 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E, 20681 *this); 20682 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2: 20683 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID, 20684 E, *this); 20685 case NVPTX::BI__nvvm_fmax_nan_f16: 20686 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this); 20687 case NVPTX::BI__nvvm_fmax_nan_f16x2: 20688 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this); 20689 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16: 20690 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E, 20691 *this); 20692 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2: 20693 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID, 20694 E, *this); 20695 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16: 20696 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E, 20697 *this); 20698 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2: 20699 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E, 20700 *this); 20701 case NVPTX::BI__nvvm_fmin_f16: 20702 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this); 20703 case NVPTX::BI__nvvm_fmin_f16x2: 20704 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this); 20705 case NVPTX::BI__nvvm_fmin_ftz_f16: 20706 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this); 20707 case NVPTX::BI__nvvm_fmin_ftz_f16x2: 20708 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this); 20709 case NVPTX::BI__nvvm_fmin_ftz_nan_f16: 20710 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this); 20711 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2: 20712 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E, 20713 *this); 20714 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16: 20715 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID, 20716 E, *this); 20717 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2: 20718 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2, 20719 BuiltinID, E, *this); 20720 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16: 20721 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E, 20722 *this); 20723 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2: 20724 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID, 20725 E, *this); 20726 case NVPTX::BI__nvvm_fmin_nan_f16: 20727 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this); 20728 case NVPTX::BI__nvvm_fmin_nan_f16x2: 20729 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this); 20730 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16: 20731 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E, 20732 *this); 20733 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2: 20734 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID, 20735 E, *this); 20736 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16: 20737 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E, 20738 *this); 20739 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2: 20740 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E, 20741 *this); 20742 case NVPTX::BI__nvvm_ldg_h: 20743 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this); 20744 case NVPTX::BI__nvvm_ldg_h2: 20745 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this); 20746 case NVPTX::BI__nvvm_ldu_h: 20747 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this); 20748 case NVPTX::BI__nvvm_ldu_h2: { 20749 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this); 20750 } 20751 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4: 20752 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4, 20753 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E, 20754 4); 20755 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8: 20756 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8, 20757 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E, 20758 8); 20759 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16: 20760 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16, 20761 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E, 20762 16); 20763 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16: 20764 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16, 20765 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E, 20766 16); 20767 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x: 20768 return Builder.CreateCall( 20769 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x)); 20770 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y: 20771 return Builder.CreateCall( 20772 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y)); 20773 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z: 20774 return Builder.CreateCall( 20775 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z)); 20776 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w: 20777 return Builder.CreateCall( 20778 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w)); 20779 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x: 20780 return Builder.CreateCall( 20781 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x)); 20782 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y: 20783 return Builder.CreateCall( 20784 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y)); 20785 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z: 20786 return Builder.CreateCall( 20787 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z)); 20788 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w: 20789 return Builder.CreateCall( 20790 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w)); 20791 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x: 20792 return Builder.CreateCall( 20793 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x)); 20794 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y: 20795 return Builder.CreateCall( 20796 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y)); 20797 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z: 20798 return Builder.CreateCall( 20799 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z)); 20800 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w: 20801 return Builder.CreateCall( 20802 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w)); 20803 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x: 20804 return Builder.CreateCall( 20805 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x)); 20806 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y: 20807 return Builder.CreateCall( 20808 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y)); 20809 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z: 20810 return Builder.CreateCall( 20811 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z)); 20812 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w: 20813 return Builder.CreateCall( 20814 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w)); 20815 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank: 20816 return Builder.CreateCall( 20817 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank)); 20818 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank: 20819 return Builder.CreateCall( 20820 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank)); 20821 case NVPTX::BI__nvvm_is_explicit_cluster: 20822 return Builder.CreateCall( 20823 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster)); 20824 case NVPTX::BI__nvvm_isspacep_shared_cluster: 20825 return Builder.CreateCall( 20826 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster), 20827 EmitScalarExpr(E->getArg(0))); 20828 case NVPTX::BI__nvvm_mapa: 20829 return Builder.CreateCall( 20830 CGM.getIntrinsic(Intrinsic::nvvm_mapa), 20831 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); 20832 case NVPTX::BI__nvvm_mapa_shared_cluster: 20833 return Builder.CreateCall( 20834 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster), 20835 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); 20836 case NVPTX::BI__nvvm_getctarank: 20837 return Builder.CreateCall( 20838 CGM.getIntrinsic(Intrinsic::nvvm_getctarank), 20839 EmitScalarExpr(E->getArg(0))); 20840 case NVPTX::BI__nvvm_getctarank_shared_cluster: 20841 return Builder.CreateCall( 20842 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster), 20843 EmitScalarExpr(E->getArg(0))); 20844 case NVPTX::BI__nvvm_barrier_cluster_arrive: 20845 return Builder.CreateCall( 20846 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive)); 20847 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed: 20848 return Builder.CreateCall( 20849 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed)); 20850 case NVPTX::BI__nvvm_barrier_cluster_wait: 20851 return Builder.CreateCall( 20852 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait)); 20853 case NVPTX::BI__nvvm_fence_sc_cluster: 20854 return Builder.CreateCall( 20855 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster)); 20856 default: 20857 return nullptr; 20858 } 20859 } 20860 20861 namespace { 20862 struct BuiltinAlignArgs { 20863 llvm::Value *Src = nullptr; 20864 llvm::Type *SrcType = nullptr; 20865 llvm::Value *Alignment = nullptr; 20866 llvm::Value *Mask = nullptr; 20867 llvm::IntegerType *IntType = nullptr; 20868 20869 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) { 20870 QualType AstType = E->getArg(0)->getType(); 20871 if (AstType->isArrayType()) 20872 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF); 20873 else 20874 Src = CGF.EmitScalarExpr(E->getArg(0)); 20875 SrcType = Src->getType(); 20876 if (SrcType->isPointerTy()) { 20877 IntType = IntegerType::get( 20878 CGF.getLLVMContext(), 20879 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType)); 20880 } else { 20881 assert(SrcType->isIntegerTy()); 20882 IntType = cast<llvm::IntegerType>(SrcType); 20883 } 20884 Alignment = CGF.EmitScalarExpr(E->getArg(1)); 20885 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment"); 20886 auto *One = llvm::ConstantInt::get(IntType, 1); 20887 Mask = CGF.Builder.CreateSub(Alignment, One, "mask"); 20888 } 20889 }; 20890 } // namespace 20891 20892 /// Generate (x & (y-1)) == 0. 20893 RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) { 20894 BuiltinAlignArgs Args(E, *this); 20895 llvm::Value *SrcAddress = Args.Src; 20896 if (Args.SrcType->isPointerTy()) 20897 SrcAddress = 20898 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr"); 20899 return RValue::get(Builder.CreateICmpEQ( 20900 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"), 20901 llvm::Constant::getNullValue(Args.IntType), "is_aligned")); 20902 } 20903 20904 /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. 20905 /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the 20906 /// llvm.ptrmask intrinsic (with a GEP before in the align_up case). 20907 RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { 20908 BuiltinAlignArgs Args(E, *this); 20909 llvm::Value *SrcForMask = Args.Src; 20910 if (AlignUp) { 20911 // When aligning up we have to first add the mask to ensure we go over the 20912 // next alignment value and then align down to the next valid multiple. 20913 // By adding the mask, we ensure that align_up on an already aligned 20914 // value will not change the value. 20915 if (Args.Src->getType()->isPointerTy()) { 20916 if (getLangOpts().isSignedOverflowDefined()) 20917 SrcForMask = 20918 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary"); 20919 else 20920 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask, 20921 /*SignedIndices=*/true, 20922 /*isSubtraction=*/false, 20923 E->getExprLoc(), "over_boundary"); 20924 } else { 20925 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary"); 20926 } 20927 } 20928 // Invert the mask to only clear the lower bits. 20929 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask"); 20930 llvm::Value *Result = nullptr; 20931 if (Args.Src->getType()->isPointerTy()) { 20932 Result = Builder.CreateIntrinsic( 20933 Intrinsic::ptrmask, {Args.SrcType, Args.IntType}, 20934 {SrcForMask, InvertedMask}, nullptr, "aligned_result"); 20935 } else { 20936 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result"); 20937 } 20938 assert(Result->getType() == Args.SrcType); 20939 return RValue::get(Result); 20940 } 20941 20942 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, 20943 const CallExpr *E) { 20944 switch (BuiltinID) { 20945 case WebAssembly::BI__builtin_wasm_memory_size: { 20946 llvm::Type *ResultType = ConvertType(E->getType()); 20947 Value *I = EmitScalarExpr(E->getArg(0)); 20948 Function *Callee = 20949 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); 20950 return Builder.CreateCall(Callee, I); 20951 } 20952 case WebAssembly::BI__builtin_wasm_memory_grow: { 20953 llvm::Type *ResultType = ConvertType(E->getType()); 20954 Value *Args[] = {EmitScalarExpr(E->getArg(0)), 20955 EmitScalarExpr(E->getArg(1))}; 20956 Function *Callee = 20957 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); 20958 return Builder.CreateCall(Callee, Args); 20959 } 20960 case WebAssembly::BI__builtin_wasm_tls_size: { 20961 llvm::Type *ResultType = ConvertType(E->getType()); 20962 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType); 20963 return Builder.CreateCall(Callee); 20964 } 20965 case WebAssembly::BI__builtin_wasm_tls_align: { 20966 llvm::Type *ResultType = ConvertType(E->getType()); 20967 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType); 20968 return Builder.CreateCall(Callee); 20969 } 20970 case WebAssembly::BI__builtin_wasm_tls_base: { 20971 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base); 20972 return Builder.CreateCall(Callee); 20973 } 20974 case WebAssembly::BI__builtin_wasm_throw: { 20975 Value *Tag = EmitScalarExpr(E->getArg(0)); 20976 Value *Obj = EmitScalarExpr(E->getArg(1)); 20977 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); 20978 return Builder.CreateCall(Callee, {Tag, Obj}); 20979 } 20980 case WebAssembly::BI__builtin_wasm_rethrow: { 20981 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); 20982 return Builder.CreateCall(Callee); 20983 } 20984 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: { 20985 Value *Addr = EmitScalarExpr(E->getArg(0)); 20986 Value *Expected = EmitScalarExpr(E->getArg(1)); 20987 Value *Timeout = EmitScalarExpr(E->getArg(2)); 20988 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32); 20989 return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); 20990 } 20991 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: { 20992 Value *Addr = EmitScalarExpr(E->getArg(0)); 20993 Value *Expected = EmitScalarExpr(E->getArg(1)); 20994 Value *Timeout = EmitScalarExpr(E->getArg(2)); 20995 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64); 20996 return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); 20997 } 20998 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: { 20999 Value *Addr = EmitScalarExpr(E->getArg(0)); 21000 Value *Count = EmitScalarExpr(E->getArg(1)); 21001 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify); 21002 return Builder.CreateCall(Callee, {Addr, Count}); 21003 } 21004 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32: 21005 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64: 21006 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32: 21007 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: { 21008 Value *Src = EmitScalarExpr(E->getArg(0)); 21009 llvm::Type *ResT = ConvertType(E->getType()); 21010 Function *Callee = 21011 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()}); 21012 return Builder.CreateCall(Callee, {Src}); 21013 } 21014 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32: 21015 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64: 21016 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32: 21017 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: { 21018 Value *Src = EmitScalarExpr(E->getArg(0)); 21019 llvm::Type *ResT = ConvertType(E->getType()); 21020 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned, 21021 {ResT, Src->getType()}); 21022 return Builder.CreateCall(Callee, {Src}); 21023 } 21024 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32: 21025 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64: 21026 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: 21027 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64: 21028 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: { 21029 Value *Src = EmitScalarExpr(E->getArg(0)); 21030 llvm::Type *ResT = ConvertType(E->getType()); 21031 Function *Callee = 21032 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()}); 21033 return Builder.CreateCall(Callee, {Src}); 21034 } 21035 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32: 21036 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64: 21037 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32: 21038 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64: 21039 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: { 21040 Value *Src = EmitScalarExpr(E->getArg(0)); 21041 llvm::Type *ResT = ConvertType(E->getType()); 21042 Function *Callee = 21043 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()}); 21044 return Builder.CreateCall(Callee, {Src}); 21045 } 21046 case WebAssembly::BI__builtin_wasm_min_f32: 21047 case WebAssembly::BI__builtin_wasm_min_f64: 21048 case WebAssembly::BI__builtin_wasm_min_f16x8: 21049 case WebAssembly::BI__builtin_wasm_min_f32x4: 21050 case WebAssembly::BI__builtin_wasm_min_f64x2: { 21051 Value *LHS = EmitScalarExpr(E->getArg(0)); 21052 Value *RHS = EmitScalarExpr(E->getArg(1)); 21053 Function *Callee = 21054 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType())); 21055 return Builder.CreateCall(Callee, {LHS, RHS}); 21056 } 21057 case WebAssembly::BI__builtin_wasm_max_f32: 21058 case WebAssembly::BI__builtin_wasm_max_f64: 21059 case WebAssembly::BI__builtin_wasm_max_f16x8: 21060 case WebAssembly::BI__builtin_wasm_max_f32x4: 21061 case WebAssembly::BI__builtin_wasm_max_f64x2: { 21062 Value *LHS = EmitScalarExpr(E->getArg(0)); 21063 Value *RHS = EmitScalarExpr(E->getArg(1)); 21064 Function *Callee = 21065 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType())); 21066 return Builder.CreateCall(Callee, {LHS, RHS}); 21067 } 21068 case WebAssembly::BI__builtin_wasm_pmin_f16x8: 21069 case WebAssembly::BI__builtin_wasm_pmin_f32x4: 21070 case WebAssembly::BI__builtin_wasm_pmin_f64x2: { 21071 Value *LHS = EmitScalarExpr(E->getArg(0)); 21072 Value *RHS = EmitScalarExpr(E->getArg(1)); 21073 Function *Callee = 21074 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType())); 21075 return Builder.CreateCall(Callee, {LHS, RHS}); 21076 } 21077 case WebAssembly::BI__builtin_wasm_pmax_f16x8: 21078 case WebAssembly::BI__builtin_wasm_pmax_f32x4: 21079 case WebAssembly::BI__builtin_wasm_pmax_f64x2: { 21080 Value *LHS = EmitScalarExpr(E->getArg(0)); 21081 Value *RHS = EmitScalarExpr(E->getArg(1)); 21082 Function *Callee = 21083 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType())); 21084 return Builder.CreateCall(Callee, {LHS, RHS}); 21085 } 21086 case WebAssembly::BI__builtin_wasm_ceil_f32x4: 21087 case WebAssembly::BI__builtin_wasm_floor_f32x4: 21088 case WebAssembly::BI__builtin_wasm_trunc_f32x4: 21089 case WebAssembly::BI__builtin_wasm_nearest_f32x4: 21090 case WebAssembly::BI__builtin_wasm_ceil_f64x2: 21091 case WebAssembly::BI__builtin_wasm_floor_f64x2: 21092 case WebAssembly::BI__builtin_wasm_trunc_f64x2: 21093 case WebAssembly::BI__builtin_wasm_nearest_f64x2: { 21094 unsigned IntNo; 21095 switch (BuiltinID) { 21096 case WebAssembly::BI__builtin_wasm_ceil_f32x4: 21097 case WebAssembly::BI__builtin_wasm_ceil_f64x2: 21098 IntNo = Intrinsic::ceil; 21099 break; 21100 case WebAssembly::BI__builtin_wasm_floor_f32x4: 21101 case WebAssembly::BI__builtin_wasm_floor_f64x2: 21102 IntNo = Intrinsic::floor; 21103 break; 21104 case WebAssembly::BI__builtin_wasm_trunc_f32x4: 21105 case WebAssembly::BI__builtin_wasm_trunc_f64x2: 21106 IntNo = Intrinsic::trunc; 21107 break; 21108 case WebAssembly::BI__builtin_wasm_nearest_f32x4: 21109 case WebAssembly::BI__builtin_wasm_nearest_f64x2: 21110 IntNo = Intrinsic::nearbyint; 21111 break; 21112 default: 21113 llvm_unreachable("unexpected builtin ID"); 21114 } 21115 Value *Value = EmitScalarExpr(E->getArg(0)); 21116 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); 21117 return Builder.CreateCall(Callee, Value); 21118 } 21119 case WebAssembly::BI__builtin_wasm_ref_null_extern: { 21120 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern); 21121 return Builder.CreateCall(Callee); 21122 } 21123 case WebAssembly::BI__builtin_wasm_ref_null_func: { 21124 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func); 21125 return Builder.CreateCall(Callee); 21126 } 21127 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: { 21128 Value *Src = EmitScalarExpr(E->getArg(0)); 21129 Value *Indices = EmitScalarExpr(E->getArg(1)); 21130 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle); 21131 return Builder.CreateCall(Callee, {Src, Indices}); 21132 } 21133 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16: 21134 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16: 21135 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8: 21136 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8: 21137 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16: 21138 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16: 21139 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8: 21140 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: { 21141 unsigned IntNo; 21142 switch (BuiltinID) { 21143 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16: 21144 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8: 21145 IntNo = Intrinsic::sadd_sat; 21146 break; 21147 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16: 21148 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8: 21149 IntNo = Intrinsic::uadd_sat; 21150 break; 21151 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16: 21152 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8: 21153 IntNo = Intrinsic::wasm_sub_sat_signed; 21154 break; 21155 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16: 21156 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: 21157 IntNo = Intrinsic::wasm_sub_sat_unsigned; 21158 break; 21159 default: 21160 llvm_unreachable("unexpected builtin ID"); 21161 } 21162 Value *LHS = EmitScalarExpr(E->getArg(0)); 21163 Value *RHS = EmitScalarExpr(E->getArg(1)); 21164 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); 21165 return Builder.CreateCall(Callee, {LHS, RHS}); 21166 } 21167 case WebAssembly::BI__builtin_wasm_abs_i8x16: 21168 case WebAssembly::BI__builtin_wasm_abs_i16x8: 21169 case WebAssembly::BI__builtin_wasm_abs_i32x4: 21170 case WebAssembly::BI__builtin_wasm_abs_i64x2: { 21171 Value *Vec = EmitScalarExpr(E->getArg(0)); 21172 Value *Neg = Builder.CreateNeg(Vec, "neg"); 21173 Constant *Zero = llvm::Constant::getNullValue(Vec->getType()); 21174 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond"); 21175 return Builder.CreateSelect(ICmp, Neg, Vec, "abs"); 21176 } 21177 case WebAssembly::BI__builtin_wasm_min_s_i8x16: 21178 case WebAssembly::BI__builtin_wasm_min_u_i8x16: 21179 case WebAssembly::BI__builtin_wasm_max_s_i8x16: 21180 case WebAssembly::BI__builtin_wasm_max_u_i8x16: 21181 case WebAssembly::BI__builtin_wasm_min_s_i16x8: 21182 case WebAssembly::BI__builtin_wasm_min_u_i16x8: 21183 case WebAssembly::BI__builtin_wasm_max_s_i16x8: 21184 case WebAssembly::BI__builtin_wasm_max_u_i16x8: 21185 case WebAssembly::BI__builtin_wasm_min_s_i32x4: 21186 case WebAssembly::BI__builtin_wasm_min_u_i32x4: 21187 case WebAssembly::BI__builtin_wasm_max_s_i32x4: 21188 case WebAssembly::BI__builtin_wasm_max_u_i32x4: { 21189 Value *LHS = EmitScalarExpr(E->getArg(0)); 21190 Value *RHS = EmitScalarExpr(E->getArg(1)); 21191 Value *ICmp; 21192 switch (BuiltinID) { 21193 case WebAssembly::BI__builtin_wasm_min_s_i8x16: 21194 case WebAssembly::BI__builtin_wasm_min_s_i16x8: 21195 case WebAssembly::BI__builtin_wasm_min_s_i32x4: 21196 ICmp = Builder.CreateICmpSLT(LHS, RHS); 21197 break; 21198 case WebAssembly::BI__builtin_wasm_min_u_i8x16: 21199 case WebAssembly::BI__builtin_wasm_min_u_i16x8: 21200 case WebAssembly::BI__builtin_wasm_min_u_i32x4: 21201 ICmp = Builder.CreateICmpULT(LHS, RHS); 21202 break; 21203 case WebAssembly::BI__builtin_wasm_max_s_i8x16: 21204 case WebAssembly::BI__builtin_wasm_max_s_i16x8: 21205 case WebAssembly::BI__builtin_wasm_max_s_i32x4: 21206 ICmp = Builder.CreateICmpSGT(LHS, RHS); 21207 break; 21208 case WebAssembly::BI__builtin_wasm_max_u_i8x16: 21209 case WebAssembly::BI__builtin_wasm_max_u_i16x8: 21210 case WebAssembly::BI__builtin_wasm_max_u_i32x4: 21211 ICmp = Builder.CreateICmpUGT(LHS, RHS); 21212 break; 21213 default: 21214 llvm_unreachable("unexpected builtin ID"); 21215 } 21216 return Builder.CreateSelect(ICmp, LHS, RHS); 21217 } 21218 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16: 21219 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: { 21220 Value *LHS = EmitScalarExpr(E->getArg(0)); 21221 Value *RHS = EmitScalarExpr(E->getArg(1)); 21222 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned, 21223 ConvertType(E->getType())); 21224 return Builder.CreateCall(Callee, {LHS, RHS}); 21225 } 21226 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: { 21227 Value *LHS = EmitScalarExpr(E->getArg(0)); 21228 Value *RHS = EmitScalarExpr(E->getArg(1)); 21229 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed); 21230 return Builder.CreateCall(Callee, {LHS, RHS}); 21231 } 21232 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8: 21233 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8: 21234 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4: 21235 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: { 21236 Value *Vec = EmitScalarExpr(E->getArg(0)); 21237 unsigned IntNo; 21238 switch (BuiltinID) { 21239 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8: 21240 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4: 21241 IntNo = Intrinsic::wasm_extadd_pairwise_signed; 21242 break; 21243 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8: 21244 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: 21245 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned; 21246 break; 21247 default: 21248 llvm_unreachable("unexpected builtin ID"); 21249 } 21250 21251 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); 21252 return Builder.CreateCall(Callee, Vec); 21253 } 21254 case WebAssembly::BI__builtin_wasm_bitselect: { 21255 Value *V1 = EmitScalarExpr(E->getArg(0)); 21256 Value *V2 = EmitScalarExpr(E->getArg(1)); 21257 Value *C = EmitScalarExpr(E->getArg(2)); 21258 Function *Callee = 21259 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType())); 21260 return Builder.CreateCall(Callee, {V1, V2, C}); 21261 } 21262 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: { 21263 Value *LHS = EmitScalarExpr(E->getArg(0)); 21264 Value *RHS = EmitScalarExpr(E->getArg(1)); 21265 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot); 21266 return Builder.CreateCall(Callee, {LHS, RHS}); 21267 } 21268 case WebAssembly::BI__builtin_wasm_popcnt_i8x16: { 21269 Value *Vec = EmitScalarExpr(E->getArg(0)); 21270 Function *Callee = 21271 CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType())); 21272 return Builder.CreateCall(Callee, {Vec}); 21273 } 21274 case WebAssembly::BI__builtin_wasm_any_true_v128: 21275 case WebAssembly::BI__builtin_wasm_all_true_i8x16: 21276 case WebAssembly::BI__builtin_wasm_all_true_i16x8: 21277 case WebAssembly::BI__builtin_wasm_all_true_i32x4: 21278 case WebAssembly::BI__builtin_wasm_all_true_i64x2: { 21279 unsigned IntNo; 21280 switch (BuiltinID) { 21281 case WebAssembly::BI__builtin_wasm_any_true_v128: 21282 IntNo = Intrinsic::wasm_anytrue; 21283 break; 21284 case WebAssembly::BI__builtin_wasm_all_true_i8x16: 21285 case WebAssembly::BI__builtin_wasm_all_true_i16x8: 21286 case WebAssembly::BI__builtin_wasm_all_true_i32x4: 21287 case WebAssembly::BI__builtin_wasm_all_true_i64x2: 21288 IntNo = Intrinsic::wasm_alltrue; 21289 break; 21290 default: 21291 llvm_unreachable("unexpected builtin ID"); 21292 } 21293 Value *Vec = EmitScalarExpr(E->getArg(0)); 21294 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); 21295 return Builder.CreateCall(Callee, {Vec}); 21296 } 21297 case WebAssembly::BI__builtin_wasm_bitmask_i8x16: 21298 case WebAssembly::BI__builtin_wasm_bitmask_i16x8: 21299 case WebAssembly::BI__builtin_wasm_bitmask_i32x4: 21300 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: { 21301 Value *Vec = EmitScalarExpr(E->getArg(0)); 21302 Function *Callee = 21303 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType()); 21304 return Builder.CreateCall(Callee, {Vec}); 21305 } 21306 case WebAssembly::BI__builtin_wasm_abs_f32x4: 21307 case WebAssembly::BI__builtin_wasm_abs_f64x2: { 21308 Value *Vec = EmitScalarExpr(E->getArg(0)); 21309 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); 21310 return Builder.CreateCall(Callee, {Vec}); 21311 } 21312 case WebAssembly::BI__builtin_wasm_sqrt_f32x4: 21313 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: { 21314 Value *Vec = EmitScalarExpr(E->getArg(0)); 21315 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); 21316 return Builder.CreateCall(Callee, {Vec}); 21317 } 21318 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: 21319 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: 21320 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: 21321 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: { 21322 Value *Low = EmitScalarExpr(E->getArg(0)); 21323 Value *High = EmitScalarExpr(E->getArg(1)); 21324 unsigned IntNo; 21325 switch (BuiltinID) { 21326 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: 21327 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: 21328 IntNo = Intrinsic::wasm_narrow_signed; 21329 break; 21330 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: 21331 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: 21332 IntNo = Intrinsic::wasm_narrow_unsigned; 21333 break; 21334 default: 21335 llvm_unreachable("unexpected builtin ID"); 21336 } 21337 Function *Callee = 21338 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()}); 21339 return Builder.CreateCall(Callee, {Low, High}); 21340 } 21341 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4: 21342 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: { 21343 Value *Vec = EmitScalarExpr(E->getArg(0)); 21344 unsigned IntNo; 21345 switch (BuiltinID) { 21346 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4: 21347 IntNo = Intrinsic::fptosi_sat; 21348 break; 21349 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: 21350 IntNo = Intrinsic::fptoui_sat; 21351 break; 21352 default: 21353 llvm_unreachable("unexpected builtin ID"); 21354 } 21355 llvm::Type *SrcT = Vec->getType(); 21356 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty()); 21357 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT}); 21358 Value *Trunc = Builder.CreateCall(Callee, Vec); 21359 Value *Splat = Constant::getNullValue(TruncT); 21360 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3}); 21361 } 21362 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: { 21363 Value *Ops[18]; 21364 size_t OpIdx = 0; 21365 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0)); 21366 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1)); 21367 while (OpIdx < 18) { 21368 std::optional<llvm::APSInt> LaneConst = 21369 E->getArg(OpIdx)->getIntegerConstantExpr(getContext()); 21370 assert(LaneConst && "Constant arg isn't actually constant?"); 21371 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst); 21372 } 21373 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle); 21374 return Builder.CreateCall(Callee, Ops); 21375 } 21376 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8: 21377 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8: 21378 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4: 21379 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4: 21380 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2: 21381 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: { 21382 Value *A = EmitScalarExpr(E->getArg(0)); 21383 Value *B = EmitScalarExpr(E->getArg(1)); 21384 Value *C = EmitScalarExpr(E->getArg(2)); 21385 unsigned IntNo; 21386 switch (BuiltinID) { 21387 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8: 21388 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4: 21389 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2: 21390 IntNo = Intrinsic::wasm_relaxed_madd; 21391 break; 21392 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8: 21393 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4: 21394 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: 21395 IntNo = Intrinsic::wasm_relaxed_nmadd; 21396 break; 21397 default: 21398 llvm_unreachable("unexpected builtin ID"); 21399 } 21400 Function *Callee = CGM.getIntrinsic(IntNo, A->getType()); 21401 return Builder.CreateCall(Callee, {A, B, C}); 21402 } 21403 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16: 21404 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8: 21405 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4: 21406 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: { 21407 Value *A = EmitScalarExpr(E->getArg(0)); 21408 Value *B = EmitScalarExpr(E->getArg(1)); 21409 Value *C = EmitScalarExpr(E->getArg(2)); 21410 Function *Callee = 21411 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType()); 21412 return Builder.CreateCall(Callee, {A, B, C}); 21413 } 21414 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: { 21415 Value *Src = EmitScalarExpr(E->getArg(0)); 21416 Value *Indices = EmitScalarExpr(E->getArg(1)); 21417 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle); 21418 return Builder.CreateCall(Callee, {Src, Indices}); 21419 } 21420 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4: 21421 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4: 21422 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2: 21423 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: { 21424 Value *LHS = EmitScalarExpr(E->getArg(0)); 21425 Value *RHS = EmitScalarExpr(E->getArg(1)); 21426 unsigned IntNo; 21427 switch (BuiltinID) { 21428 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4: 21429 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2: 21430 IntNo = Intrinsic::wasm_relaxed_min; 21431 break; 21432 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4: 21433 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: 21434 IntNo = Intrinsic::wasm_relaxed_max; 21435 break; 21436 default: 21437 llvm_unreachable("unexpected builtin ID"); 21438 } 21439 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType()); 21440 return Builder.CreateCall(Callee, {LHS, RHS}); 21441 } 21442 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4: 21443 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4: 21444 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2: 21445 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: { 21446 Value *Vec = EmitScalarExpr(E->getArg(0)); 21447 unsigned IntNo; 21448 switch (BuiltinID) { 21449 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4: 21450 IntNo = Intrinsic::wasm_relaxed_trunc_signed; 21451 break; 21452 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4: 21453 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned; 21454 break; 21455 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2: 21456 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero; 21457 break; 21458 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: 21459 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero; 21460 break; 21461 default: 21462 llvm_unreachable("unexpected builtin ID"); 21463 } 21464 Function *Callee = CGM.getIntrinsic(IntNo); 21465 return Builder.CreateCall(Callee, {Vec}); 21466 } 21467 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: { 21468 Value *LHS = EmitScalarExpr(E->getArg(0)); 21469 Value *RHS = EmitScalarExpr(E->getArg(1)); 21470 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed); 21471 return Builder.CreateCall(Callee, {LHS, RHS}); 21472 } 21473 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: { 21474 Value *LHS = EmitScalarExpr(E->getArg(0)); 21475 Value *RHS = EmitScalarExpr(E->getArg(1)); 21476 Function *Callee = 21477 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed); 21478 return Builder.CreateCall(Callee, {LHS, RHS}); 21479 } 21480 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: { 21481 Value *LHS = EmitScalarExpr(E->getArg(0)); 21482 Value *RHS = EmitScalarExpr(E->getArg(1)); 21483 Value *Acc = EmitScalarExpr(E->getArg(2)); 21484 Function *Callee = 21485 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed); 21486 return Builder.CreateCall(Callee, {LHS, RHS, Acc}); 21487 } 21488 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: { 21489 Value *LHS = EmitScalarExpr(E->getArg(0)); 21490 Value *RHS = EmitScalarExpr(E->getArg(1)); 21491 Value *Acc = EmitScalarExpr(E->getArg(2)); 21492 Function *Callee = 21493 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32); 21494 return Builder.CreateCall(Callee, {LHS, RHS, Acc}); 21495 } 21496 case WebAssembly::BI__builtin_wasm_loadf16_f32: { 21497 Value *Addr = EmitScalarExpr(E->getArg(0)); 21498 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32); 21499 return Builder.CreateCall(Callee, {Addr}); 21500 } 21501 case WebAssembly::BI__builtin_wasm_storef16_f32: { 21502 Value *Val = EmitScalarExpr(E->getArg(0)); 21503 Value *Addr = EmitScalarExpr(E->getArg(1)); 21504 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32); 21505 return Builder.CreateCall(Callee, {Val, Addr}); 21506 } 21507 case WebAssembly::BI__builtin_wasm_splat_f16x8: { 21508 Value *Val = EmitScalarExpr(E->getArg(0)); 21509 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8); 21510 return Builder.CreateCall(Callee, {Val}); 21511 } 21512 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: { 21513 Value *Vector = EmitScalarExpr(E->getArg(0)); 21514 Value *Index = EmitScalarExpr(E->getArg(1)); 21515 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8); 21516 return Builder.CreateCall(Callee, {Vector, Index}); 21517 } 21518 case WebAssembly::BI__builtin_wasm_table_get: { 21519 assert(E->getArg(0)->getType()->isArrayType()); 21520 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); 21521 Value *Index = EmitScalarExpr(E->getArg(1)); 21522 Function *Callee; 21523 if (E->getType().isWebAssemblyExternrefType()) 21524 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref); 21525 else if (E->getType().isWebAssemblyFuncrefType()) 21526 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref); 21527 else 21528 llvm_unreachable( 21529 "Unexpected reference type for __builtin_wasm_table_get"); 21530 return Builder.CreateCall(Callee, {Table, Index}); 21531 } 21532 case WebAssembly::BI__builtin_wasm_table_set: { 21533 assert(E->getArg(0)->getType()->isArrayType()); 21534 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); 21535 Value *Index = EmitScalarExpr(E->getArg(1)); 21536 Value *Val = EmitScalarExpr(E->getArg(2)); 21537 Function *Callee; 21538 if (E->getArg(2)->getType().isWebAssemblyExternrefType()) 21539 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref); 21540 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType()) 21541 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref); 21542 else 21543 llvm_unreachable( 21544 "Unexpected reference type for __builtin_wasm_table_set"); 21545 return Builder.CreateCall(Callee, {Table, Index, Val}); 21546 } 21547 case WebAssembly::BI__builtin_wasm_table_size: { 21548 assert(E->getArg(0)->getType()->isArrayType()); 21549 Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); 21550 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size); 21551 return Builder.CreateCall(Callee, Value); 21552 } 21553 case WebAssembly::BI__builtin_wasm_table_grow: { 21554 assert(E->getArg(0)->getType()->isArrayType()); 21555 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); 21556 Value *Val = EmitScalarExpr(E->getArg(1)); 21557 Value *NElems = EmitScalarExpr(E->getArg(2)); 21558 21559 Function *Callee; 21560 if (E->getArg(1)->getType().isWebAssemblyExternrefType()) 21561 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref); 21562 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType()) 21563 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref); 21564 else 21565 llvm_unreachable( 21566 "Unexpected reference type for __builtin_wasm_table_grow"); 21567 21568 return Builder.CreateCall(Callee, {Table, Val, NElems}); 21569 } 21570 case WebAssembly::BI__builtin_wasm_table_fill: { 21571 assert(E->getArg(0)->getType()->isArrayType()); 21572 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); 21573 Value *Index = EmitScalarExpr(E->getArg(1)); 21574 Value *Val = EmitScalarExpr(E->getArg(2)); 21575 Value *NElems = EmitScalarExpr(E->getArg(3)); 21576 21577 Function *Callee; 21578 if (E->getArg(2)->getType().isWebAssemblyExternrefType()) 21579 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref); 21580 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType()) 21581 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref); 21582 else 21583 llvm_unreachable( 21584 "Unexpected reference type for __builtin_wasm_table_fill"); 21585 21586 return Builder.CreateCall(Callee, {Table, Index, Val, NElems}); 21587 } 21588 case WebAssembly::BI__builtin_wasm_table_copy: { 21589 assert(E->getArg(0)->getType()->isArrayType()); 21590 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); 21591 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this); 21592 Value *DstIdx = EmitScalarExpr(E->getArg(2)); 21593 Value *SrcIdx = EmitScalarExpr(E->getArg(3)); 21594 Value *NElems = EmitScalarExpr(E->getArg(4)); 21595 21596 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy); 21597 21598 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems}); 21599 } 21600 default: 21601 return nullptr; 21602 } 21603 } 21604 21605 static std::pair<Intrinsic::ID, unsigned> 21606 getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) { 21607 struct Info { 21608 unsigned BuiltinID; 21609 Intrinsic::ID IntrinsicID; 21610 unsigned VecLen; 21611 }; 21612 static Info Infos[] = { 21613 #define CUSTOM_BUILTIN_MAPPING(x,s) \ 21614 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s }, 21615 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0) 21616 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0) 21617 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0) 21618 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0) 21619 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0) 21620 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0) 21621 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0) 21622 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0) 21623 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0) 21624 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0) 21625 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0) 21626 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0) 21627 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0) 21628 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0) 21629 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0) 21630 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0) 21631 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0) 21632 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0) 21633 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0) 21634 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0) 21635 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0) 21636 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0) 21637 // Legacy builtins that take a vector in place of a vector predicate. 21638 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64) 21639 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64) 21640 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64) 21641 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64) 21642 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128) 21643 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128) 21644 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128) 21645 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128) 21646 #include "clang/Basic/BuiltinsHexagonMapCustomDep.def" 21647 #undef CUSTOM_BUILTIN_MAPPING 21648 }; 21649 21650 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; }; 21651 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true); 21652 (void)SortOnce; 21653 21654 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo); 21655 if (F == std::end(Infos) || F->BuiltinID != BuiltinID) 21656 return {Intrinsic::not_intrinsic, 0}; 21657 21658 return {F->IntrinsicID, F->VecLen}; 21659 } 21660 21661 Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, 21662 const CallExpr *E) { 21663 Intrinsic::ID ID; 21664 unsigned VecLen; 21665 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID); 21666 21667 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) { 21668 // The base pointer is passed by address, so it needs to be loaded. 21669 Address A = EmitPointerWithAlignment(E->getArg(0)); 21670 Address BP = Address(A.emitRawPointer(*this), Int8PtrTy, A.getAlignment()); 21671 llvm::Value *Base = Builder.CreateLoad(BP); 21672 // The treatment of both loads and stores is the same: the arguments for 21673 // the builtin are the same as the arguments for the intrinsic. 21674 // Load: 21675 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start) 21676 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start) 21677 // Store: 21678 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start) 21679 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start) 21680 SmallVector<llvm::Value*,5> Ops = { Base }; 21681 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i) 21682 Ops.push_back(EmitScalarExpr(E->getArg(i))); 21683 21684 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops); 21685 // The load intrinsics generate two results (Value, NewBase), stores 21686 // generate one (NewBase). The new base address needs to be stored. 21687 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1) 21688 : Result; 21689 llvm::Value *LV = EmitScalarExpr(E->getArg(0)); 21690 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 21691 llvm::Value *RetVal = 21692 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment()); 21693 if (IsLoad) 21694 RetVal = Builder.CreateExtractValue(Result, 0); 21695 return RetVal; 21696 }; 21697 21698 // Handle the conversion of bit-reverse load intrinsics to bit code. 21699 // The intrinsic call after this function only reads from memory and the 21700 // write to memory is dealt by the store instruction. 21701 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) { 21702 // The intrinsic generates one result, which is the new value for the base 21703 // pointer. It needs to be returned. The result of the load instruction is 21704 // passed to intrinsic by address, so the value needs to be stored. 21705 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0)); 21706 21707 // Expressions like &(*pt++) will be incremented per evaluation. 21708 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression 21709 // per call. 21710 Address DestAddr = EmitPointerWithAlignment(E->getArg(1)); 21711 DestAddr = DestAddr.withElementType(Int8Ty); 21712 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this); 21713 21714 // Operands are Base, Dest, Modifier. 21715 // The intrinsic format in LLVM IR is defined as 21716 // { ValueType, i8* } (i8*, i32). 21717 llvm::Value *Result = Builder.CreateCall( 21718 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))}); 21719 21720 // The value needs to be stored as the variable is passed by reference. 21721 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0); 21722 21723 // The store needs to be truncated to fit the destination type. 21724 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs 21725 // to be handled with stores of respective destination type. 21726 DestVal = Builder.CreateTrunc(DestVal, DestTy); 21727 21728 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment()); 21729 // The updated value of the base pointer is returned. 21730 return Builder.CreateExtractValue(Result, 1); 21731 }; 21732 21733 auto V2Q = [this, VecLen] (llvm::Value *Vec) { 21734 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B 21735 : Intrinsic::hexagon_V6_vandvrt; 21736 return Builder.CreateCall(CGM.getIntrinsic(ID), 21737 {Vec, Builder.getInt32(-1)}); 21738 }; 21739 auto Q2V = [this, VecLen] (llvm::Value *Pred) { 21740 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B 21741 : Intrinsic::hexagon_V6_vandqrt; 21742 return Builder.CreateCall(CGM.getIntrinsic(ID), 21743 {Pred, Builder.getInt32(-1)}); 21744 }; 21745 21746 switch (BuiltinID) { 21747 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR, 21748 // and the corresponding C/C++ builtins use loads/stores to update 21749 // the predicate. 21750 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry: 21751 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: 21752 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry: 21753 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: { 21754 // Get the type from the 0-th argument. 21755 llvm::Type *VecType = ConvertType(E->getArg(0)->getType()); 21756 Address PredAddr = 21757 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType); 21758 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr)); 21759 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), 21760 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn}); 21761 21762 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1); 21763 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this), 21764 PredAddr.getAlignment()); 21765 return Builder.CreateExtractValue(Result, 0); 21766 } 21767 // These are identical to the builtins above, except they don't consume 21768 // input carry, only generate carry-out. Since they still produce two 21769 // outputs, generate the store of the predicate, but no load. 21770 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo: 21771 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B: 21772 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo: 21773 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: { 21774 // Get the type from the 0-th argument. 21775 llvm::Type *VecType = ConvertType(E->getArg(0)->getType()); 21776 Address PredAddr = 21777 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType); 21778 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), 21779 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); 21780 21781 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1); 21782 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this), 21783 PredAddr.getAlignment()); 21784 return Builder.CreateExtractValue(Result, 0); 21785 } 21786 21787 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq: 21788 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq: 21789 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq: 21790 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq: 21791 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B: 21792 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B: 21793 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B: 21794 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: { 21795 SmallVector<llvm::Value*,4> Ops; 21796 const Expr *PredOp = E->getArg(0); 21797 // There will be an implicit cast to a boolean vector. Strip it. 21798 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) { 21799 if (Cast->getCastKind() == CK_BitCast) 21800 PredOp = Cast->getSubExpr(); 21801 Ops.push_back(V2Q(EmitScalarExpr(PredOp))); 21802 } 21803 for (int i = 1, e = E->getNumArgs(); i != e; ++i) 21804 Ops.push_back(EmitScalarExpr(E->getArg(i))); 21805 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 21806 } 21807 21808 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci: 21809 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci: 21810 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci: 21811 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci: 21812 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci: 21813 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci: 21814 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr: 21815 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr: 21816 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr: 21817 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr: 21818 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr: 21819 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr: 21820 return MakeCircOp(ID, /*IsLoad=*/true); 21821 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci: 21822 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci: 21823 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci: 21824 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci: 21825 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci: 21826 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr: 21827 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr: 21828 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr: 21829 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr: 21830 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr: 21831 return MakeCircOp(ID, /*IsLoad=*/false); 21832 case Hexagon::BI__builtin_brev_ldub: 21833 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty); 21834 case Hexagon::BI__builtin_brev_ldb: 21835 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty); 21836 case Hexagon::BI__builtin_brev_lduh: 21837 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty); 21838 case Hexagon::BI__builtin_brev_ldh: 21839 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty); 21840 case Hexagon::BI__builtin_brev_ldw: 21841 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty); 21842 case Hexagon::BI__builtin_brev_ldd: 21843 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty); 21844 } // switch 21845 21846 return nullptr; 21847 } 21848 21849 Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, 21850 const CallExpr *E, 21851 ReturnValueSlot ReturnValue) { 21852 SmallVector<Value *, 4> Ops; 21853 llvm::Type *ResultType = ConvertType(E->getType()); 21854 21855 // Find out if any arguments are required to be integer constant expressions. 21856 unsigned ICEArguments = 0; 21857 ASTContext::GetBuiltinTypeError Error; 21858 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 21859 if (Error == ASTContext::GE_Missing_type) { 21860 // Vector intrinsics don't have a type string. 21861 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin && 21862 BuiltinID <= clang::RISCV::LastRVVBuiltin); 21863 ICEArguments = 0; 21864 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v || 21865 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v) 21866 ICEArguments = 1 << 1; 21867 } else { 21868 assert(Error == ASTContext::GE_None && "Unexpected error"); 21869 } 21870 21871 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load) 21872 ICEArguments |= (1 << 1); 21873 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store) 21874 ICEArguments |= (1 << 2); 21875 21876 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 21877 // Handle aggregate argument, namely RVV tuple types in segment load/store 21878 if (hasAggregateEvaluationKind(E->getArg(i)->getType())) { 21879 LValue L = EmitAggExprToLValue(E->getArg(i)); 21880 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress()); 21881 Ops.push_back(AggValue); 21882 continue; 21883 } 21884 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); 21885 } 21886 21887 Intrinsic::ID ID = Intrinsic::not_intrinsic; 21888 unsigned NF = 1; 21889 // The 0th bit simulates the `vta` of RVV 21890 // The 1st bit simulates the `vma` of RVV 21891 constexpr unsigned RVV_VTA = 0x1; 21892 constexpr unsigned RVV_VMA = 0x2; 21893 int PolicyAttrs = 0; 21894 bool IsMasked = false; 21895 21896 // Required for overloaded intrinsics. 21897 llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes; 21898 switch (BuiltinID) { 21899 default: llvm_unreachable("unexpected builtin ID"); 21900 case RISCV::BI__builtin_riscv_orc_b_32: 21901 case RISCV::BI__builtin_riscv_orc_b_64: 21902 case RISCV::BI__builtin_riscv_clz_32: 21903 case RISCV::BI__builtin_riscv_clz_64: 21904 case RISCV::BI__builtin_riscv_ctz_32: 21905 case RISCV::BI__builtin_riscv_ctz_64: 21906 case RISCV::BI__builtin_riscv_clmul_32: 21907 case RISCV::BI__builtin_riscv_clmul_64: 21908 case RISCV::BI__builtin_riscv_clmulh_32: 21909 case RISCV::BI__builtin_riscv_clmulh_64: 21910 case RISCV::BI__builtin_riscv_clmulr_32: 21911 case RISCV::BI__builtin_riscv_clmulr_64: 21912 case RISCV::BI__builtin_riscv_xperm4_32: 21913 case RISCV::BI__builtin_riscv_xperm4_64: 21914 case RISCV::BI__builtin_riscv_xperm8_32: 21915 case RISCV::BI__builtin_riscv_xperm8_64: 21916 case RISCV::BI__builtin_riscv_brev8_32: 21917 case RISCV::BI__builtin_riscv_brev8_64: 21918 case RISCV::BI__builtin_riscv_zip_32: 21919 case RISCV::BI__builtin_riscv_unzip_32: { 21920 switch (BuiltinID) { 21921 default: llvm_unreachable("unexpected builtin ID"); 21922 // Zbb 21923 case RISCV::BI__builtin_riscv_orc_b_32: 21924 case RISCV::BI__builtin_riscv_orc_b_64: 21925 ID = Intrinsic::riscv_orc_b; 21926 break; 21927 case RISCV::BI__builtin_riscv_clz_32: 21928 case RISCV::BI__builtin_riscv_clz_64: { 21929 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 21930 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); 21931 if (Result->getType() != ResultType) 21932 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 21933 "cast"); 21934 return Result; 21935 } 21936 case RISCV::BI__builtin_riscv_ctz_32: 21937 case RISCV::BI__builtin_riscv_ctz_64: { 21938 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); 21939 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); 21940 if (Result->getType() != ResultType) 21941 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 21942 "cast"); 21943 return Result; 21944 } 21945 21946 // Zbc 21947 case RISCV::BI__builtin_riscv_clmul_32: 21948 case RISCV::BI__builtin_riscv_clmul_64: 21949 ID = Intrinsic::riscv_clmul; 21950 break; 21951 case RISCV::BI__builtin_riscv_clmulh_32: 21952 case RISCV::BI__builtin_riscv_clmulh_64: 21953 ID = Intrinsic::riscv_clmulh; 21954 break; 21955 case RISCV::BI__builtin_riscv_clmulr_32: 21956 case RISCV::BI__builtin_riscv_clmulr_64: 21957 ID = Intrinsic::riscv_clmulr; 21958 break; 21959 21960 // Zbkx 21961 case RISCV::BI__builtin_riscv_xperm8_32: 21962 case RISCV::BI__builtin_riscv_xperm8_64: 21963 ID = Intrinsic::riscv_xperm8; 21964 break; 21965 case RISCV::BI__builtin_riscv_xperm4_32: 21966 case RISCV::BI__builtin_riscv_xperm4_64: 21967 ID = Intrinsic::riscv_xperm4; 21968 break; 21969 21970 // Zbkb 21971 case RISCV::BI__builtin_riscv_brev8_32: 21972 case RISCV::BI__builtin_riscv_brev8_64: 21973 ID = Intrinsic::riscv_brev8; 21974 break; 21975 case RISCV::BI__builtin_riscv_zip_32: 21976 ID = Intrinsic::riscv_zip; 21977 break; 21978 case RISCV::BI__builtin_riscv_unzip_32: 21979 ID = Intrinsic::riscv_unzip; 21980 break; 21981 } 21982 21983 IntrinsicTypes = {ResultType}; 21984 break; 21985 } 21986 21987 // Zk builtins 21988 21989 // Zknh 21990 case RISCV::BI__builtin_riscv_sha256sig0: 21991 ID = Intrinsic::riscv_sha256sig0; 21992 break; 21993 case RISCV::BI__builtin_riscv_sha256sig1: 21994 ID = Intrinsic::riscv_sha256sig1; 21995 break; 21996 case RISCV::BI__builtin_riscv_sha256sum0: 21997 ID = Intrinsic::riscv_sha256sum0; 21998 break; 21999 case RISCV::BI__builtin_riscv_sha256sum1: 22000 ID = Intrinsic::riscv_sha256sum1; 22001 break; 22002 22003 // Zksed 22004 case RISCV::BI__builtin_riscv_sm4ks: 22005 ID = Intrinsic::riscv_sm4ks; 22006 break; 22007 case RISCV::BI__builtin_riscv_sm4ed: 22008 ID = Intrinsic::riscv_sm4ed; 22009 break; 22010 22011 // Zksh 22012 case RISCV::BI__builtin_riscv_sm3p0: 22013 ID = Intrinsic::riscv_sm3p0; 22014 break; 22015 case RISCV::BI__builtin_riscv_sm3p1: 22016 ID = Intrinsic::riscv_sm3p1; 22017 break; 22018 22019 // Zihintntl 22020 case RISCV::BI__builtin_riscv_ntl_load: { 22021 llvm::Type *ResTy = ConvertType(E->getType()); 22022 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL 22023 if (Ops.size() == 2) 22024 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue(); 22025 22026 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get( 22027 getLLVMContext(), 22028 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal))); 22029 llvm::MDNode *NontemporalNode = llvm::MDNode::get( 22030 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 22031 22032 int Width; 22033 if(ResTy->isScalableTy()) { 22034 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy); 22035 llvm::Type *ScalarTy = ResTy->getScalarType(); 22036 Width = ScalarTy->getPrimitiveSizeInBits() * 22037 SVTy->getElementCount().getKnownMinValue(); 22038 } else 22039 Width = ResTy->getPrimitiveSizeInBits(); 22040 LoadInst *Load = Builder.CreateLoad( 22041 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8))); 22042 22043 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode); 22044 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"), 22045 RISCVDomainNode); 22046 22047 return Load; 22048 } 22049 case RISCV::BI__builtin_riscv_ntl_store: { 22050 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL 22051 if (Ops.size() == 3) 22052 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue(); 22053 22054 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get( 22055 getLLVMContext(), 22056 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal))); 22057 llvm::MDNode *NontemporalNode = llvm::MDNode::get( 22058 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 22059 22060 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 22061 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode); 22062 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"), 22063 RISCVDomainNode); 22064 22065 return Store; 22066 } 22067 22068 // Vector builtins are handled from here. 22069 #include "clang/Basic/riscv_vector_builtin_cg.inc" 22070 // SiFive Vector builtins are handled from here. 22071 #include "clang/Basic/riscv_sifive_vector_builtin_cg.inc" 22072 } 22073 22074 assert(ID != Intrinsic::not_intrinsic); 22075 22076 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); 22077 return Builder.CreateCall(F, Ops, ""); 22078 } 22079