1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit Builtin calls as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "ABIInfo.h"
14 #include "CGCUDARuntime.h"
15 #include "CGCXXABI.h"
16 #include "CGHLSLRuntime.h"
17 #include "CGObjCRuntime.h"
18 #include "CGOpenCLRuntime.h"
19 #include "CGRecordLayout.h"
20 #include "CodeGenFunction.h"
21 #include "CodeGenModule.h"
22 #include "ConstantEmitter.h"
23 #include "PatternInit.h"
24 #include "TargetInfo.h"
25 #include "clang/AST/ASTContext.h"
26 #include "clang/AST/Attr.h"
27 #include "clang/AST/Decl.h"
28 #include "clang/AST/OSLog.h"
29 #include "clang/AST/OperationKinds.h"
30 #include "clang/Basic/TargetBuiltins.h"
31 #include "clang/Basic/TargetInfo.h"
32 #include "clang/Basic/TargetOptions.h"
33 #include "clang/CodeGen/CGFunctionInfo.h"
34 #include "clang/Frontend/FrontendDiagnostic.h"
35 #include "llvm/ADT/APFloat.h"
36 #include "llvm/ADT/APInt.h"
37 #include "llvm/ADT/FloatingPointMode.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/StringExtras.h"
40 #include "llvm/Analysis/ValueTracking.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/InlineAsm.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/IntrinsicsAArch64.h"
45 #include "llvm/IR/IntrinsicsAMDGPU.h"
46 #include "llvm/IR/IntrinsicsARM.h"
47 #include "llvm/IR/IntrinsicsBPF.h"
48 #include "llvm/IR/IntrinsicsDirectX.h"
49 #include "llvm/IR/IntrinsicsHexagon.h"
50 #include "llvm/IR/IntrinsicsNVPTX.h"
51 #include "llvm/IR/IntrinsicsPowerPC.h"
52 #include "llvm/IR/IntrinsicsR600.h"
53 #include "llvm/IR/IntrinsicsRISCV.h"
54 #include "llvm/IR/IntrinsicsS390.h"
55 #include "llvm/IR/IntrinsicsVE.h"
56 #include "llvm/IR/IntrinsicsWebAssembly.h"
57 #include "llvm/IR/IntrinsicsX86.h"
58 #include "llvm/IR/MDBuilder.h"
59 #include "llvm/IR/MatrixBuilder.h"
60 #include "llvm/IR/MemoryModelRelaxationAnnotations.h"
61 #include "llvm/Support/ConvertUTF.h"
62 #include "llvm/Support/MathExtras.h"
63 #include "llvm/Support/ScopedPrinter.h"
64 #include "llvm/TargetParser/AArch64TargetParser.h"
65 #include "llvm/TargetParser/X86TargetParser.h"
66 #include <optional>
67 #include <sstream>
68
69 using namespace clang;
70 using namespace CodeGen;
71 using namespace llvm;
72
initializeAlloca(CodeGenFunction & CGF,AllocaInst * AI,Value * Size,Align AlignmentInBytes)73 static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
74 Align AlignmentInBytes) {
75 ConstantInt *Byte;
76 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
77 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
78 // Nothing to initialize.
79 return;
80 case LangOptions::TrivialAutoVarInitKind::Zero:
81 Byte = CGF.Builder.getInt8(0x00);
82 break;
83 case LangOptions::TrivialAutoVarInitKind::Pattern: {
84 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
85 Byte = llvm::dyn_cast<llvm::ConstantInt>(
86 initializationPatternFor(CGF.CGM, Int8));
87 break;
88 }
89 }
90 if (CGF.CGM.stopAutoInit())
91 return;
92 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
93 I->addAnnotationMetadata("auto-init");
94 }
95
96 /// getBuiltinLibFunction - Given a builtin id for a function like
97 /// "__builtin_fabsf", return a Function* for "fabsf".
getBuiltinLibFunction(const FunctionDecl * FD,unsigned BuiltinID)98 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
99 unsigned BuiltinID) {
100 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
101
102 // Get the name, skip over the __builtin_ prefix (if necessary).
103 StringRef Name;
104 GlobalDecl D(FD);
105
106 // TODO: This list should be expanded or refactored after all GCC-compatible
107 // std libcall builtins are implemented.
108 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
109 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
110 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
111 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
112 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
113 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
114 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
115 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
116 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
117 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
118 {Builtin::BI__builtin_printf, "__printfieee128"},
119 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
120 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
121 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
122 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
123 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
124 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
125 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
126 {Builtin::BI__builtin_scanf, "__scanfieee128"},
127 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
128 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
129 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
130 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
131 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
132 };
133
134 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
135 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
136 // if it is 64-bit 'long double' mode.
137 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
138 {Builtin::BI__builtin_frexpl, "frexp"},
139 {Builtin::BI__builtin_ldexpl, "ldexp"},
140 {Builtin::BI__builtin_modfl, "modf"},
141 };
142
143 // If the builtin has been declared explicitly with an assembler label,
144 // use the mangled name. This differs from the plain label on platforms
145 // that prefix labels.
146 if (FD->hasAttr<AsmLabelAttr>())
147 Name = getMangledName(D);
148 else {
149 // TODO: This mutation should also be applied to other targets other than
150 // PPC, after backend supports IEEE 128-bit style libcalls.
151 if (getTriple().isPPC64() &&
152 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
153 F128Builtins.contains(BuiltinID))
154 Name = F128Builtins[BuiltinID];
155 else if (getTriple().isOSAIX() &&
156 &getTarget().getLongDoubleFormat() ==
157 &llvm::APFloat::IEEEdouble() &&
158 AIXLongDouble64Builtins.contains(BuiltinID))
159 Name = AIXLongDouble64Builtins[BuiltinID];
160 else
161 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
162 }
163
164 llvm::FunctionType *Ty =
165 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
166
167 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
168 }
169
170 /// Emit the conversions required to turn the given value into an
171 /// integer of the given size.
EmitToInt(CodeGenFunction & CGF,llvm::Value * V,QualType T,llvm::IntegerType * IntType)172 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
173 QualType T, llvm::IntegerType *IntType) {
174 V = CGF.EmitToMemory(V, T);
175
176 if (V->getType()->isPointerTy())
177 return CGF.Builder.CreatePtrToInt(V, IntType);
178
179 assert(V->getType() == IntType);
180 return V;
181 }
182
EmitFromInt(CodeGenFunction & CGF,llvm::Value * V,QualType T,llvm::Type * ResultType)183 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
184 QualType T, llvm::Type *ResultType) {
185 V = CGF.EmitFromMemory(V, T);
186
187 if (ResultType->isPointerTy())
188 return CGF.Builder.CreateIntToPtr(V, ResultType);
189
190 assert(V->getType() == ResultType);
191 return V;
192 }
193
CheckAtomicAlignment(CodeGenFunction & CGF,const CallExpr * E)194 static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E) {
195 ASTContext &Ctx = CGF.getContext();
196 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
197 unsigned Bytes = Ptr.getElementType()->isPointerTy()
198 ? Ctx.getTypeSizeInChars(Ctx.VoidPtrTy).getQuantity()
199 : Ptr.getElementType()->getScalarSizeInBits() / 8;
200 unsigned Align = Ptr.getAlignment().getQuantity();
201 if (Align % Bytes != 0) {
202 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
203 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
204 // Force address to be at least naturally-aligned.
205 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
206 }
207 return Ptr;
208 }
209
210 /// Utility to insert an atomic instruction based on Intrinsic::ID
211 /// and the expression node.
MakeBinaryAtomicValue(CodeGenFunction & CGF,llvm::AtomicRMWInst::BinOp Kind,const CallExpr * E,AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)212 static Value *MakeBinaryAtomicValue(
213 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
214 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
215
216 QualType T = E->getType();
217 assert(E->getArg(0)->getType()->isPointerType());
218 assert(CGF.getContext().hasSameUnqualifiedType(T,
219 E->getArg(0)->getType()->getPointeeType()));
220 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
221
222 Address DestAddr = CheckAtomicAlignment(CGF, E);
223
224 llvm::IntegerType *IntType = llvm::IntegerType::get(
225 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
226
227 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
228 llvm::Type *ValueType = Val->getType();
229 Val = EmitToInt(CGF, Val, T, IntType);
230
231 llvm::Value *Result =
232 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
233 return EmitFromInt(CGF, Result, T, ValueType);
234 }
235
EmitNontemporalStore(CodeGenFunction & CGF,const CallExpr * E)236 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
237 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
238 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
239
240 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
241 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
242 LV.setNontemporal(true);
243 CGF.EmitStoreOfScalar(Val, LV, false);
244 return nullptr;
245 }
246
EmitNontemporalLoad(CodeGenFunction & CGF,const CallExpr * E)247 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
248 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
249
250 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
251 LV.setNontemporal(true);
252 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
253 }
254
EmitBinaryAtomic(CodeGenFunction & CGF,llvm::AtomicRMWInst::BinOp Kind,const CallExpr * E)255 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
256 llvm::AtomicRMWInst::BinOp Kind,
257 const CallExpr *E) {
258 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
259 }
260
261 /// Utility to insert an atomic instruction based Intrinsic::ID and
262 /// the expression node, where the return value is the result of the
263 /// operation.
EmitBinaryAtomicPost(CodeGenFunction & CGF,llvm::AtomicRMWInst::BinOp Kind,const CallExpr * E,Instruction::BinaryOps Op,bool Invert=false)264 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
265 llvm::AtomicRMWInst::BinOp Kind,
266 const CallExpr *E,
267 Instruction::BinaryOps Op,
268 bool Invert = false) {
269 QualType T = E->getType();
270 assert(E->getArg(0)->getType()->isPointerType());
271 assert(CGF.getContext().hasSameUnqualifiedType(T,
272 E->getArg(0)->getType()->getPointeeType()));
273 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
274
275 Address DestAddr = CheckAtomicAlignment(CGF, E);
276
277 llvm::IntegerType *IntType = llvm::IntegerType::get(
278 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
279
280 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
281 llvm::Type *ValueType = Val->getType();
282 Val = EmitToInt(CGF, Val, T, IntType);
283
284 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
285 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
286 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
287 if (Invert)
288 Result =
289 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
290 llvm::ConstantInt::getAllOnesValue(IntType));
291 Result = EmitFromInt(CGF, Result, T, ValueType);
292 return RValue::get(Result);
293 }
294
295 /// Utility to insert an atomic cmpxchg instruction.
296 ///
297 /// @param CGF The current codegen function.
298 /// @param E Builtin call expression to convert to cmpxchg.
299 /// arg0 - address to operate on
300 /// arg1 - value to compare with
301 /// arg2 - new value
302 /// @param ReturnBool Specifies whether to return success flag of
303 /// cmpxchg result or the old value.
304 ///
305 /// @returns result of cmpxchg, according to ReturnBool
306 ///
307 /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
308 /// invoke the function EmitAtomicCmpXchgForMSIntrin.
MakeAtomicCmpXchgValue(CodeGenFunction & CGF,const CallExpr * E,bool ReturnBool)309 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
310 bool ReturnBool) {
311 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
312 Address DestAddr = CheckAtomicAlignment(CGF, E);
313
314 llvm::IntegerType *IntType = llvm::IntegerType::get(
315 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
316
317 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
318 llvm::Type *ValueType = Cmp->getType();
319 Cmp = EmitToInt(CGF, Cmp, T, IntType);
320 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
321
322 Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
323 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
324 llvm::AtomicOrdering::SequentiallyConsistent);
325 if (ReturnBool)
326 // Extract boolean success flag and zext it to int.
327 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
328 CGF.ConvertType(E->getType()));
329 else
330 // Extract old value and emit it using the same type as compare value.
331 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
332 ValueType);
333 }
334
335 /// This function should be invoked to emit atomic cmpxchg for Microsoft's
336 /// _InterlockedCompareExchange* intrinsics which have the following signature:
337 /// T _InterlockedCompareExchange(T volatile *Destination,
338 /// T Exchange,
339 /// T Comparand);
340 ///
341 /// Whereas the llvm 'cmpxchg' instruction has the following syntax:
342 /// cmpxchg *Destination, Comparand, Exchange.
343 /// So we need to swap Comparand and Exchange when invoking
344 /// CreateAtomicCmpXchg. That is the reason we could not use the above utility
345 /// function MakeAtomicCmpXchgValue since it expects the arguments to be
346 /// already swapped.
347
348 static
EmitAtomicCmpXchgForMSIntrin(CodeGenFunction & CGF,const CallExpr * E,AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)349 Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
350 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
351 assert(E->getArg(0)->getType()->isPointerType());
352 assert(CGF.getContext().hasSameUnqualifiedType(
353 E->getType(), E->getArg(0)->getType()->getPointeeType()));
354 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
355 E->getArg(1)->getType()));
356 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
357 E->getArg(2)->getType()));
358
359 Address DestAddr = CheckAtomicAlignment(CGF, E);
360
361 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
362 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
363
364 // For Release ordering, the failure ordering should be Monotonic.
365 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
366 AtomicOrdering::Monotonic :
367 SuccessOrdering;
368
369 // The atomic instruction is marked volatile for consistency with MSVC. This
370 // blocks the few atomics optimizations that LLVM has. If we want to optimize
371 // _Interlocked* operations in the future, we will have to remove the volatile
372 // marker.
373 auto *Result = CGF.Builder.CreateAtomicCmpXchg(
374 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
375 Result->setVolatile(true);
376 return CGF.Builder.CreateExtractValue(Result, 0);
377 }
378
379 // 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
380 // prototyped like this:
381 //
382 // unsigned char _InterlockedCompareExchange128...(
383 // __int64 volatile * _Destination,
384 // __int64 _ExchangeHigh,
385 // __int64 _ExchangeLow,
386 // __int64 * _ComparandResult);
387 //
388 // Note that Destination is assumed to be at least 16-byte aligned, despite
389 // being typed int64.
390
EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction & CGF,const CallExpr * E,AtomicOrdering SuccessOrdering)391 static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
392 const CallExpr *E,
393 AtomicOrdering SuccessOrdering) {
394 assert(E->getNumArgs() == 4);
395 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
396 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
397 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
398 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
399
400 assert(DestPtr->getType()->isPointerTy());
401 assert(!ExchangeHigh->getType()->isPointerTy());
402 assert(!ExchangeLow->getType()->isPointerTy());
403
404 // For Release ordering, the failure ordering should be Monotonic.
405 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
406 ? AtomicOrdering::Monotonic
407 : SuccessOrdering;
408
409 // Convert to i128 pointers and values. Alignment is also overridden for
410 // destination pointer.
411 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
412 Address DestAddr(DestPtr, Int128Ty,
413 CGF.getContext().toCharUnitsFromBits(128));
414 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
415
416 // (((i128)hi) << 64) | ((i128)lo)
417 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
418 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
419 ExchangeHigh =
420 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
421 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
422
423 // Load the comparand for the instruction.
424 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
425
426 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
427 SuccessOrdering, FailureOrdering);
428
429 // The atomic instruction is marked volatile for consistency with MSVC. This
430 // blocks the few atomics optimizations that LLVM has. If we want to optimize
431 // _Interlocked* operations in the future, we will have to remove the volatile
432 // marker.
433 CXI->setVolatile(true);
434
435 // Store the result as an outparameter.
436 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
437 ComparandAddr);
438
439 // Get the success boolean and zero extend it to i8.
440 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
441 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
442 }
443
EmitAtomicIncrementValue(CodeGenFunction & CGF,const CallExpr * E,AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)444 static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
445 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
446 assert(E->getArg(0)->getType()->isPointerType());
447
448 auto *IntTy = CGF.ConvertType(E->getType());
449 Address DestAddr = CheckAtomicAlignment(CGF, E);
450 auto *Result = CGF.Builder.CreateAtomicRMW(
451 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
452 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
453 }
454
EmitAtomicDecrementValue(CodeGenFunction & CGF,const CallExpr * E,AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)455 static Value *EmitAtomicDecrementValue(
456 CodeGenFunction &CGF, const CallExpr *E,
457 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
458 assert(E->getArg(0)->getType()->isPointerType());
459
460 auto *IntTy = CGF.ConvertType(E->getType());
461 Address DestAddr = CheckAtomicAlignment(CGF, E);
462 auto *Result = CGF.Builder.CreateAtomicRMW(
463 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
464 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
465 }
466
467 // Build a plain volatile load.
EmitISOVolatileLoad(CodeGenFunction & CGF,const CallExpr * E)468 static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) {
469 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
470 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
471 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
472 llvm::Type *ITy =
473 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
474 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
475 Load->setVolatile(true);
476 return Load;
477 }
478
479 // Build a plain volatile store.
EmitISOVolatileStore(CodeGenFunction & CGF,const CallExpr * E)480 static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
481 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
482 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
483 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
484 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
485 llvm::StoreInst *Store =
486 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
487 Store->setVolatile(true);
488 return Store;
489 }
490
491 // Emit a simple mangled intrinsic that has 1 argument and a return type
492 // matching the argument type. Depending on mode, this may be a constrained
493 // floating-point intrinsic.
emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID,unsigned ConstrainedIntrinsicID)494 static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
495 const CallExpr *E, unsigned IntrinsicID,
496 unsigned ConstrainedIntrinsicID) {
497 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
498
499 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
500 if (CGF.Builder.getIsFPConstrained()) {
501 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
502 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
503 } else {
504 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
505 return CGF.Builder.CreateCall(F, Src0);
506 }
507 }
508
509 // Emit an intrinsic that has 2 operands of the same type as its result.
510 // Depending on mode, this may be a constrained floating-point intrinsic.
emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID,unsigned ConstrainedIntrinsicID)511 static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
512 const CallExpr *E, unsigned IntrinsicID,
513 unsigned ConstrainedIntrinsicID) {
514 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
515 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
516
517 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
518 if (CGF.Builder.getIsFPConstrained()) {
519 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
520 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
521 } else {
522 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
523 return CGF.Builder.CreateCall(F, { Src0, Src1 });
524 }
525 }
526
527 // Has second type mangled argument.
emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction & CGF,const CallExpr * E,llvm::Intrinsic::ID IntrinsicID,llvm::Intrinsic::ID ConstrainedIntrinsicID)528 static Value *emitBinaryExpMaybeConstrainedFPBuiltin(
529 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
530 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
531 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
532 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
533
534 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
535 if (CGF.Builder.getIsFPConstrained()) {
536 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
537 {Src0->getType(), Src1->getType()});
538 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
539 }
540
541 Function *F =
542 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
543 return CGF.Builder.CreateCall(F, {Src0, Src1});
544 }
545
546 // Emit an intrinsic that has 3 operands of the same type as its result.
547 // Depending on mode, this may be a constrained floating-point intrinsic.
emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID,unsigned ConstrainedIntrinsicID)548 static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
549 const CallExpr *E, unsigned IntrinsicID,
550 unsigned ConstrainedIntrinsicID) {
551 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
552 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
553 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
554
555 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
556 if (CGF.Builder.getIsFPConstrained()) {
557 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
558 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
559 } else {
560 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
561 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
562 }
563 }
564
565 // Emit an intrinsic where all operands are of the same type as the result.
566 // Depending on mode, this may be a constrained floating-point intrinsic.
emitCallMaybeConstrainedFPBuiltin(CodeGenFunction & CGF,unsigned IntrinsicID,unsigned ConstrainedIntrinsicID,llvm::Type * Ty,ArrayRef<Value * > Args)567 static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
568 unsigned IntrinsicID,
569 unsigned ConstrainedIntrinsicID,
570 llvm::Type *Ty,
571 ArrayRef<Value *> Args) {
572 Function *F;
573 if (CGF.Builder.getIsFPConstrained())
574 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
575 else
576 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
577
578 if (CGF.Builder.getIsFPConstrained())
579 return CGF.Builder.CreateConstrainedFPCall(F, Args);
580 else
581 return CGF.Builder.CreateCall(F, Args);
582 }
583
584 // Emit a simple intrinsic that has N scalar arguments and a return type
585 // matching the argument type. It is assumed that only the first argument is
586 // overloaded.
587 template <unsigned N>
emitBuiltinWithOneOverloadedType(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID,llvm::StringRef Name="")588 Value *emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E,
589 unsigned IntrinsicID,
590 llvm::StringRef Name = "") {
591 static_assert(N, "expect non-empty argument");
592 SmallVector<Value *, N> Args;
593 for (unsigned I = 0; I < N; ++I)
594 Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
595 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType());
596 return CGF.Builder.CreateCall(F, Args, Name);
597 }
598
599 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
emitFPIntBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID)600 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
601 const CallExpr *E,
602 unsigned IntrinsicID) {
603 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
604 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
605
606 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
607 return CGF.Builder.CreateCall(F, {Src0, Src1});
608 }
609
610 // Emit an intrinsic that has overloaded integer result and fp operand.
611 static Value *
emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID,unsigned ConstrainedIntrinsicID)612 emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E,
613 unsigned IntrinsicID,
614 unsigned ConstrainedIntrinsicID) {
615 llvm::Type *ResultType = CGF.ConvertType(E->getType());
616 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
617
618 if (CGF.Builder.getIsFPConstrained()) {
619 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
620 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
621 {ResultType, Src0->getType()});
622 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
623 } else {
624 Function *F =
625 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
626 return CGF.Builder.CreateCall(F, Src0);
627 }
628 }
629
emitFrexpBuiltin(CodeGenFunction & CGF,const CallExpr * E,llvm::Intrinsic::ID IntrinsicID)630 static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E,
631 llvm::Intrinsic::ID IntrinsicID) {
632 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
633 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
634
635 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
636 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
637 llvm::Function *F =
638 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
639 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
640
641 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
642 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
643 CGF.EmitStoreOfScalar(Exp, LV);
644
645 return CGF.Builder.CreateExtractValue(Call, 0);
646 }
647
648 /// EmitFAbs - Emit a call to @llvm.fabs().
EmitFAbs(CodeGenFunction & CGF,Value * V)649 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
650 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
651 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
652 Call->setDoesNotAccessMemory();
653 return Call;
654 }
655
656 /// Emit the computation of the sign bit for a floating point value. Returns
657 /// the i1 sign bit value.
EmitSignBit(CodeGenFunction & CGF,Value * V)658 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
659 LLVMContext &C = CGF.CGM.getLLVMContext();
660
661 llvm::Type *Ty = V->getType();
662 int Width = Ty->getPrimitiveSizeInBits();
663 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
664 V = CGF.Builder.CreateBitCast(V, IntTy);
665 if (Ty->isPPC_FP128Ty()) {
666 // We want the sign bit of the higher-order double. The bitcast we just
667 // did works as if the double-double was stored to memory and then
668 // read as an i128. The "store" will put the higher-order double in the
669 // lower address in both little- and big-Endian modes, but the "load"
670 // will treat those bits as a different part of the i128: the low bits in
671 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
672 // we need to shift the high bits down to the low before truncating.
673 Width >>= 1;
674 if (CGF.getTarget().isBigEndian()) {
675 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
676 V = CGF.Builder.CreateLShr(V, ShiftCst);
677 }
678 // We are truncating value in order to extract the higher-order
679 // double, which we will be using to extract the sign from.
680 IntTy = llvm::IntegerType::get(C, Width);
681 V = CGF.Builder.CreateTrunc(V, IntTy);
682 }
683 Value *Zero = llvm::Constant::getNullValue(IntTy);
684 return CGF.Builder.CreateICmpSLT(V, Zero);
685 }
686
emitLibraryCall(CodeGenFunction & CGF,const FunctionDecl * FD,const CallExpr * E,llvm::Constant * calleeValue)687 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
688 const CallExpr *E, llvm::Constant *calleeValue) {
689 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
690 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
691 RValue Call =
692 CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
693
694 // Check the supported intrinsic.
695 if (unsigned BuiltinID = FD->getBuiltinID()) {
696 auto IsErrnoIntrinsic = [&]() -> unsigned {
697 switch (BuiltinID) {
698 case Builtin::BIexpf:
699 case Builtin::BI__builtin_expf:
700 case Builtin::BI__builtin_expf128:
701 return true;
702 }
703 // TODO: support more FP math libcalls
704 return false;
705 }();
706
707 // Restrict to target with errno, for example, MacOS doesn't set errno.
708 if (IsErrnoIntrinsic && CGF.CGM.getLangOpts().MathErrno &&
709 !CGF.Builder.getIsFPConstrained()) {
710 ASTContext &Context = CGF.getContext();
711 // Emit "int" TBAA metadata on FP math libcalls.
712 clang::QualType IntTy = Context.IntTy;
713 TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);
714 Instruction *Inst = cast<llvm::Instruction>(Call.getScalarVal());
715 CGF.CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo);
716 }
717 }
718 return Call;
719 }
720
721 /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
722 /// depending on IntrinsicID.
723 ///
724 /// \arg CGF The current codegen function.
725 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
726 /// \arg X The first argument to the llvm.*.with.overflow.*.
727 /// \arg Y The second argument to the llvm.*.with.overflow.*.
728 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
729 /// \returns The result (i.e. sum/product) returned by the intrinsic.
EmitOverflowIntrinsic(CodeGenFunction & CGF,const llvm::Intrinsic::ID IntrinsicID,llvm::Value * X,llvm::Value * Y,llvm::Value * & Carry)730 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
731 const llvm::Intrinsic::ID IntrinsicID,
732 llvm::Value *X, llvm::Value *Y,
733 llvm::Value *&Carry) {
734 // Make sure we have integers of the same width.
735 assert(X->getType() == Y->getType() &&
736 "Arguments must be the same type. (Did you forget to make sure both "
737 "arguments have the same integer width?)");
738
739 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
740 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
741 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
742 return CGF.Builder.CreateExtractValue(Tmp, 0);
743 }
744
emitRangedBuiltin(CodeGenFunction & CGF,unsigned IntrinsicID,int low,int high)745 static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID,
746 int low, int high) {
747 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
748 llvm::CallInst *Call = CGF.Builder.CreateCall(F);
749 llvm::ConstantRange CR(APInt(32, low), APInt(32, high));
750 Call->addRangeRetAttr(CR);
751 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
752 return Call;
753 }
754
755 namespace {
756 struct WidthAndSignedness {
757 unsigned Width;
758 bool Signed;
759 };
760 }
761
762 static WidthAndSignedness
getIntegerWidthAndSignedness(const clang::ASTContext & context,const clang::QualType Type)763 getIntegerWidthAndSignedness(const clang::ASTContext &context,
764 const clang::QualType Type) {
765 assert(Type->isIntegerType() && "Given type is not an integer.");
766 unsigned Width = Type->isBooleanType() ? 1
767 : Type->isBitIntType() ? context.getIntWidth(Type)
768 : context.getTypeInfo(Type).Width;
769 bool Signed = Type->isSignedIntegerType();
770 return {Width, Signed};
771 }
772
773 // Given one or more integer types, this function produces an integer type that
774 // encompasses them: any value in one of the given types could be expressed in
775 // the encompassing type.
776 static struct WidthAndSignedness
EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types)777 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
778 assert(Types.size() > 0 && "Empty list of types.");
779
780 // If any of the given types is signed, we must return a signed type.
781 bool Signed = false;
782 for (const auto &Type : Types) {
783 Signed |= Type.Signed;
784 }
785
786 // The encompassing type must have a width greater than or equal to the width
787 // of the specified types. Additionally, if the encompassing type is signed,
788 // its width must be strictly greater than the width of any unsigned types
789 // given.
790 unsigned Width = 0;
791 for (const auto &Type : Types) {
792 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
793 if (Width < MinWidth) {
794 Width = MinWidth;
795 }
796 }
797
798 return {Width, Signed};
799 }
800
EmitVAStartEnd(Value * ArgValue,bool IsStart)801 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
802 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
803 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
804 ArgValue);
805 }
806
807 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
808 /// __builtin_object_size(p, @p To) is correct
areBOSTypesCompatible(int From,int To)809 static bool areBOSTypesCompatible(int From, int To) {
810 // Note: Our __builtin_object_size implementation currently treats Type=0 and
811 // Type=2 identically. Encoding this implementation detail here may make
812 // improving __builtin_object_size difficult in the future, so it's omitted.
813 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
814 }
815
816 static llvm::Value *
getDefaultBuiltinObjectSizeResult(unsigned Type,llvm::IntegerType * ResType)817 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
818 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
819 }
820
821 llvm::Value *
evaluateOrEmitBuiltinObjectSize(const Expr * E,unsigned Type,llvm::IntegerType * ResType,llvm::Value * EmittedE,bool IsDynamic)822 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
823 llvm::IntegerType *ResType,
824 llvm::Value *EmittedE,
825 bool IsDynamic) {
826 uint64_t ObjectSize;
827 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
828 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
829 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
830 }
831
FindFlexibleArrayMemberFieldAndOffset(ASTContext & Ctx,const RecordDecl * RD,const FieldDecl * FAMDecl,uint64_t & Offset)832 const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberFieldAndOffset(
833 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
834 uint64_t &Offset) {
835 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
836 getLangOpts().getStrictFlexArraysLevel();
837 uint32_t FieldNo = 0;
838
839 if (RD->isImplicit())
840 return nullptr;
841
842 for (const FieldDecl *FD : RD->fields()) {
843 if ((!FAMDecl || FD == FAMDecl) &&
844 Decl::isFlexibleArrayMemberLike(
845 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
846 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
847 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
848 Offset += Layout.getFieldOffset(FieldNo);
849 return FD;
850 }
851
852 QualType Ty = FD->getType();
853 if (Ty->isRecordType()) {
854 if (const FieldDecl *Field = FindFlexibleArrayMemberFieldAndOffset(
855 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
856 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
857 Offset += Layout.getFieldOffset(FieldNo);
858 return Field;
859 }
860 }
861
862 if (!RD->isUnion())
863 ++FieldNo;
864 }
865
866 return nullptr;
867 }
868
CountCountedByAttrs(const RecordDecl * RD)869 static unsigned CountCountedByAttrs(const RecordDecl *RD) {
870 unsigned Num = 0;
871
872 for (const FieldDecl *FD : RD->fields()) {
873 if (FD->getType()->isCountAttributedType())
874 return ++Num;
875
876 QualType Ty = FD->getType();
877 if (Ty->isRecordType())
878 Num += CountCountedByAttrs(Ty->getAsRecordDecl());
879 }
880
881 return Num;
882 }
883
884 llvm::Value *
emitFlexibleArrayMemberSize(const Expr * E,unsigned Type,llvm::IntegerType * ResType)885 CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
886 llvm::IntegerType *ResType) {
887 // The code generated here calculates the size of a struct with a flexible
888 // array member that uses the counted_by attribute. There are two instances
889 // we handle:
890 //
891 // struct s {
892 // unsigned long flags;
893 // int count;
894 // int array[] __attribute__((counted_by(count)));
895 // }
896 //
897 // 1) bdos of the flexible array itself:
898 //
899 // __builtin_dynamic_object_size(p->array, 1) ==
900 // p->count * sizeof(*p->array)
901 //
902 // 2) bdos of a pointer into the flexible array:
903 //
904 // __builtin_dynamic_object_size(&p->array[42], 1) ==
905 // (p->count - 42) * sizeof(*p->array)
906 //
907 // 2) bdos of the whole struct, including the flexible array:
908 //
909 // __builtin_dynamic_object_size(p, 1) ==
910 // max(sizeof(struct s),
911 // offsetof(struct s, array) + p->count * sizeof(*p->array))
912 //
913 ASTContext &Ctx = getContext();
914 const Expr *Base = E->IgnoreParenImpCasts();
915 const Expr *Idx = nullptr;
916
917 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
918 UO && UO->getOpcode() == UO_AddrOf) {
919 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
920 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
921 Base = ASE->getBase()->IgnoreParenImpCasts();
922 Idx = ASE->getIdx()->IgnoreParenImpCasts();
923
924 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
925 int64_t Val = IL->getValue().getSExtValue();
926 if (Val < 0)
927 return getDefaultBuiltinObjectSizeResult(Type, ResType);
928
929 if (Val == 0)
930 // The index is 0, so we don't need to take it into account.
931 Idx = nullptr;
932 }
933 } else {
934 // Potential pointer to another element in the struct.
935 Base = SubExpr;
936 }
937 }
938
939 // Get the flexible array member Decl.
940 const RecordDecl *OuterRD = nullptr;
941 const FieldDecl *FAMDecl = nullptr;
942 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
943 // Check if \p Base is referencing the FAM itself.
944 const ValueDecl *VD = ME->getMemberDecl();
945 OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext();
946 FAMDecl = dyn_cast<FieldDecl>(VD);
947 if (!FAMDecl)
948 return nullptr;
949 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
950 // Check if we're pointing to the whole struct.
951 QualType Ty = DRE->getDecl()->getType();
952 if (Ty->isPointerType())
953 Ty = Ty->getPointeeType();
954 OuterRD = Ty->getAsRecordDecl();
955
956 // If we have a situation like this:
957 //
958 // struct union_of_fams {
959 // int flags;
960 // union {
961 // signed char normal_field;
962 // struct {
963 // int count1;
964 // int arr1[] __counted_by(count1);
965 // };
966 // struct {
967 // signed char count2;
968 // int arr2[] __counted_by(count2);
969 // };
970 // };
971 // };
972 //
973 // We don't know which 'count' to use in this scenario:
974 //
975 // size_t get_size(struct union_of_fams *p) {
976 // return __builtin_dynamic_object_size(p, 1);
977 // }
978 //
979 // Instead of calculating a wrong number, we give up.
980 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
981 return nullptr;
982 }
983
984 if (!OuterRD)
985 return nullptr;
986
987 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
988 // get its offset.
989 uint64_t Offset = 0;
990 FAMDecl =
991 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
992 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
993
994 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
995 // No flexible array member found or it doesn't have the "counted_by"
996 // attribute.
997 return nullptr;
998
999 const FieldDecl *CountedByFD = FindCountedByField(FAMDecl);
1000 if (!CountedByFD)
1001 // Can't find the field referenced by the "counted_by" attribute.
1002 return nullptr;
1003
1004 if (isa<DeclRefExpr>(Base))
1005 // The whole struct is specificed in the __bdos. The calculation of the
1006 // whole size of the structure can be done in two ways:
1007 //
1008 // 1) sizeof(struct S) + count * sizeof(typeof(fam))
1009 // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
1010 //
1011 // The first will add additional padding after the end of the array,
1012 // allocation while the second method is more precise, but not quite
1013 // expected from programmers. See
1014 // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a
1015 // discussion of the topic.
1016 //
1017 // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
1018 // structure. Therefore, because of the above issue, we'll choose to match
1019 // what GCC does for consistency's sake.
1020 return nullptr;
1021
1022 // Build a load of the counted_by field.
1023 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
1024 Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountedByFD);
1025 if (!CountedByInst)
1026 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1027
1028 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1029
1030 // Build a load of the index and subtract it from the count.
1031 Value *IdxInst = nullptr;
1032 if (Idx) {
1033 if (Idx->HasSideEffects(getContext()))
1034 // We can't have side-effects.
1035 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1036
1037 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1038 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1039 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1040
1041 // We go ahead with the calculation here. If the index turns out to be
1042 // negative, we'll catch it at the end.
1043 CountedByInst =
1044 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1045 }
1046
1047 // Calculate how large the flexible array member is in bytes.
1048 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1049 CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType());
1050 llvm::Constant *ElemSize =
1051 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1052 Value *Res =
1053 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1054 Res = Builder.CreateIntCast(Res, ResType, IsSigned);
1055
1056 // A negative \p IdxInst or \p CountedByInst means that the index lands
1057 // outside of the flexible array member. If that's the case, we want to
1058 // return 0.
1059 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1060 if (IdxInst)
1061 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1062
1063 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1064 }
1065
1066 /// Returns a Value corresponding to the size of the given expression.
1067 /// This Value may be either of the following:
1068 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1069 /// it)
1070 /// - A call to the @llvm.objectsize intrinsic
1071 ///
1072 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1073 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
1074 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1075 llvm::Value *
emitBuiltinObjectSize(const Expr * E,unsigned Type,llvm::IntegerType * ResType,llvm::Value * EmittedE,bool IsDynamic)1076 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1077 llvm::IntegerType *ResType,
1078 llvm::Value *EmittedE, bool IsDynamic) {
1079 // We need to reference an argument if the pointer is a parameter with the
1080 // pass_object_size attribute.
1081 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1082 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1083 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1084 if (Param != nullptr && PS != nullptr &&
1085 areBOSTypesCompatible(PS->getType(), Type)) {
1086 auto Iter = SizeArguments.find(Param);
1087 assert(Iter != SizeArguments.end());
1088
1089 const ImplicitParamDecl *D = Iter->second;
1090 auto DIter = LocalDeclMap.find(D);
1091 assert(DIter != LocalDeclMap.end());
1092
1093 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1094 getContext().getSizeType(), E->getBeginLoc());
1095 }
1096 }
1097
1098 if (IsDynamic) {
1099 // Emit special code for a flexible array member with the "counted_by"
1100 // attribute.
1101 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1102 return V;
1103 }
1104
1105 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1106 // evaluate E for side-effects. In either case, we shouldn't lower to
1107 // @llvm.objectsize.
1108 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1109 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1110
1111 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1112 assert(Ptr->getType()->isPointerTy() &&
1113 "Non-pointer passed to __builtin_object_size?");
1114
1115 Function *F =
1116 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1117
1118 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1119 Value *Min = Builder.getInt1((Type & 2) != 0);
1120 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1121 Value *NullIsUnknown = Builder.getTrue();
1122 Value *Dynamic = Builder.getInt1(IsDynamic);
1123 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1124 }
1125
1126 namespace {
1127 /// A struct to generically describe a bit test intrinsic.
1128 struct BitTest {
1129 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1130 enum InterlockingKind : uint8_t {
1131 Unlocked,
1132 Sequential,
1133 Acquire,
1134 Release,
1135 NoFence
1136 };
1137
1138 ActionKind Action;
1139 InterlockingKind Interlocking;
1140 bool Is64Bit;
1141
1142 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1143 };
1144
1145 } // namespace
1146
decodeBitTestBuiltin(unsigned BuiltinID)1147 BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1148 switch (BuiltinID) {
1149 // Main portable variants.
1150 case Builtin::BI_bittest:
1151 return {TestOnly, Unlocked, false};
1152 case Builtin::BI_bittestandcomplement:
1153 return {Complement, Unlocked, false};
1154 case Builtin::BI_bittestandreset:
1155 return {Reset, Unlocked, false};
1156 case Builtin::BI_bittestandset:
1157 return {Set, Unlocked, false};
1158 case Builtin::BI_interlockedbittestandreset:
1159 return {Reset, Sequential, false};
1160 case Builtin::BI_interlockedbittestandset:
1161 return {Set, Sequential, false};
1162
1163 // X86-specific 64-bit variants.
1164 case Builtin::BI_bittest64:
1165 return {TestOnly, Unlocked, true};
1166 case Builtin::BI_bittestandcomplement64:
1167 return {Complement, Unlocked, true};
1168 case Builtin::BI_bittestandreset64:
1169 return {Reset, Unlocked, true};
1170 case Builtin::BI_bittestandset64:
1171 return {Set, Unlocked, true};
1172 case Builtin::BI_interlockedbittestandreset64:
1173 return {Reset, Sequential, true};
1174 case Builtin::BI_interlockedbittestandset64:
1175 return {Set, Sequential, true};
1176
1177 // ARM/AArch64-specific ordering variants.
1178 case Builtin::BI_interlockedbittestandset_acq:
1179 return {Set, Acquire, false};
1180 case Builtin::BI_interlockedbittestandset_rel:
1181 return {Set, Release, false};
1182 case Builtin::BI_interlockedbittestandset_nf:
1183 return {Set, NoFence, false};
1184 case Builtin::BI_interlockedbittestandreset_acq:
1185 return {Reset, Acquire, false};
1186 case Builtin::BI_interlockedbittestandreset_rel:
1187 return {Reset, Release, false};
1188 case Builtin::BI_interlockedbittestandreset_nf:
1189 return {Reset, NoFence, false};
1190 }
1191 llvm_unreachable("expected only bittest intrinsics");
1192 }
1193
bitActionToX86BTCode(BitTest::ActionKind A)1194 static char bitActionToX86BTCode(BitTest::ActionKind A) {
1195 switch (A) {
1196 case BitTest::TestOnly: return '\0';
1197 case BitTest::Complement: return 'c';
1198 case BitTest::Reset: return 'r';
1199 case BitTest::Set: return 's';
1200 }
1201 llvm_unreachable("invalid action");
1202 }
1203
EmitX86BitTestIntrinsic(CodeGenFunction & CGF,BitTest BT,const CallExpr * E,Value * BitBase,Value * BitPos)1204 static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
1205 BitTest BT,
1206 const CallExpr *E, Value *BitBase,
1207 Value *BitPos) {
1208 char Action = bitActionToX86BTCode(BT.Action);
1209 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1210
1211 // Build the assembly.
1212 SmallString<64> Asm;
1213 raw_svector_ostream AsmOS(Asm);
1214 if (BT.Interlocking != BitTest::Unlocked)
1215 AsmOS << "lock ";
1216 AsmOS << "bt";
1217 if (Action)
1218 AsmOS << Action;
1219 AsmOS << SizeSuffix << " $2, ($1)";
1220
1221 // Build the constraints. FIXME: We should support immediates when possible.
1222 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1223 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1224 if (!MachineClobbers.empty()) {
1225 Constraints += ',';
1226 Constraints += MachineClobbers;
1227 }
1228 llvm::IntegerType *IntType = llvm::IntegerType::get(
1229 CGF.getLLVMContext(),
1230 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1231 llvm::FunctionType *FTy =
1232 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1233
1234 llvm::InlineAsm *IA =
1235 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1236 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1237 }
1238
1239 static llvm::AtomicOrdering
getBitTestAtomicOrdering(BitTest::InterlockingKind I)1240 getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1241 switch (I) {
1242 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1243 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1244 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1245 case BitTest::Release: return llvm::AtomicOrdering::Release;
1246 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1247 }
1248 llvm_unreachable("invalid interlocking");
1249 }
1250
1251 /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1252 /// bits and a bit position and read and optionally modify the bit at that
1253 /// position. The position index can be arbitrarily large, i.e. it can be larger
1254 /// than 31 or 63, so we need an indexed load in the general case.
EmitBitTestIntrinsic(CodeGenFunction & CGF,unsigned BuiltinID,const CallExpr * E)1255 static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1256 unsigned BuiltinID,
1257 const CallExpr *E) {
1258 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1259 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1260
1261 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1262
1263 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1264 // indexing operation internally. Use them if possible.
1265 if (CGF.getTarget().getTriple().isX86())
1266 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1267
1268 // Otherwise, use generic code to load one byte and test the bit. Use all but
1269 // the bottom three bits as the array index, and the bottom three bits to form
1270 // a mask.
1271 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1272 Value *ByteIndex = CGF.Builder.CreateAShr(
1273 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1274 Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
1275 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
1276 ByteIndex, "bittest.byteaddr"),
1277 CGF.Int8Ty, CharUnits::One());
1278 Value *PosLow =
1279 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1280 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1281
1282 // The updating instructions will need a mask.
1283 Value *Mask = nullptr;
1284 if (BT.Action != BitTest::TestOnly) {
1285 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1286 "bittest.mask");
1287 }
1288
1289 // Check the action and ordering of the interlocked intrinsics.
1290 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1291
1292 Value *OldByte = nullptr;
1293 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1294 // Emit a combined atomicrmw load/store operation for the interlocked
1295 // intrinsics.
1296 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1297 if (BT.Action == BitTest::Reset) {
1298 Mask = CGF.Builder.CreateNot(Mask);
1299 RMWOp = llvm::AtomicRMWInst::And;
1300 }
1301 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1302 } else {
1303 // Emit a plain load for the non-interlocked intrinsics.
1304 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1305 Value *NewByte = nullptr;
1306 switch (BT.Action) {
1307 case BitTest::TestOnly:
1308 // Don't store anything.
1309 break;
1310 case BitTest::Complement:
1311 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1312 break;
1313 case BitTest::Reset:
1314 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1315 break;
1316 case BitTest::Set:
1317 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1318 break;
1319 }
1320 if (NewByte)
1321 CGF.Builder.CreateStore(NewByte, ByteAddr);
1322 }
1323
1324 // However we loaded the old byte, either by plain load or atomicrmw, shift
1325 // the bit into the low position and mask it to 0 or 1.
1326 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1327 return CGF.Builder.CreateAnd(
1328 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1329 }
1330
emitPPCLoadReserveIntrinsic(CodeGenFunction & CGF,unsigned BuiltinID,const CallExpr * E)1331 static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
1332 unsigned BuiltinID,
1333 const CallExpr *E) {
1334 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1335
1336 SmallString<64> Asm;
1337 raw_svector_ostream AsmOS(Asm);
1338 llvm::IntegerType *RetType = CGF.Int32Ty;
1339
1340 switch (BuiltinID) {
1341 case clang::PPC::BI__builtin_ppc_ldarx:
1342 AsmOS << "ldarx ";
1343 RetType = CGF.Int64Ty;
1344 break;
1345 case clang::PPC::BI__builtin_ppc_lwarx:
1346 AsmOS << "lwarx ";
1347 RetType = CGF.Int32Ty;
1348 break;
1349 case clang::PPC::BI__builtin_ppc_lharx:
1350 AsmOS << "lharx ";
1351 RetType = CGF.Int16Ty;
1352 break;
1353 case clang::PPC::BI__builtin_ppc_lbarx:
1354 AsmOS << "lbarx ";
1355 RetType = CGF.Int8Ty;
1356 break;
1357 default:
1358 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1359 }
1360
1361 AsmOS << "$0, ${1:y}";
1362
1363 std::string Constraints = "=r,*Z,~{memory}";
1364 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1365 if (!MachineClobbers.empty()) {
1366 Constraints += ',';
1367 Constraints += MachineClobbers;
1368 }
1369
1370 llvm::Type *PtrType = CGF.UnqualPtrTy;
1371 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1372
1373 llvm::InlineAsm *IA =
1374 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1375 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1376 CI->addParamAttr(
1377 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1378 return CI;
1379 }
1380
1381 namespace {
1382 enum class MSVCSetJmpKind {
1383 _setjmpex,
1384 _setjmp3,
1385 _setjmp
1386 };
1387 }
1388
1389 /// MSVC handles setjmp a bit differently on different platforms. On every
1390 /// architecture except 32-bit x86, the frame address is passed. On x86, extra
1391 /// parameters can be passed as variadic arguments, but we always pass none.
EmitMSVCRTSetJmp(CodeGenFunction & CGF,MSVCSetJmpKind SJKind,const CallExpr * E)1392 static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1393 const CallExpr *E) {
1394 llvm::Value *Arg1 = nullptr;
1395 llvm::Type *Arg1Ty = nullptr;
1396 StringRef Name;
1397 bool IsVarArg = false;
1398 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1399 Name = "_setjmp3";
1400 Arg1Ty = CGF.Int32Ty;
1401 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1402 IsVarArg = true;
1403 } else {
1404 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1405 Arg1Ty = CGF.Int8PtrTy;
1406 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1407 Arg1 = CGF.Builder.CreateCall(
1408 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1409 } else
1410 Arg1 = CGF.Builder.CreateCall(
1411 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1412 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1413 }
1414
1415 // Mark the call site and declaration with ReturnsTwice.
1416 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1417 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1418 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1419 llvm::Attribute::ReturnsTwice);
1420 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1421 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1422 ReturnsTwiceAttr, /*Local=*/true);
1423
1424 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1425 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1426 llvm::Value *Args[] = {Buf, Arg1};
1427 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1428 CB->setAttributes(ReturnsTwiceAttr);
1429 return RValue::get(CB);
1430 }
1431
1432 // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1433 // we handle them here.
1434 enum class CodeGenFunction::MSVCIntrin {
1435 _BitScanForward,
1436 _BitScanReverse,
1437 _InterlockedAnd,
1438 _InterlockedDecrement,
1439 _InterlockedExchange,
1440 _InterlockedExchangeAdd,
1441 _InterlockedExchangeSub,
1442 _InterlockedIncrement,
1443 _InterlockedOr,
1444 _InterlockedXor,
1445 _InterlockedExchangeAdd_acq,
1446 _InterlockedExchangeAdd_rel,
1447 _InterlockedExchangeAdd_nf,
1448 _InterlockedExchange_acq,
1449 _InterlockedExchange_rel,
1450 _InterlockedExchange_nf,
1451 _InterlockedCompareExchange_acq,
1452 _InterlockedCompareExchange_rel,
1453 _InterlockedCompareExchange_nf,
1454 _InterlockedCompareExchange128,
1455 _InterlockedCompareExchange128_acq,
1456 _InterlockedCompareExchange128_rel,
1457 _InterlockedCompareExchange128_nf,
1458 _InterlockedOr_acq,
1459 _InterlockedOr_rel,
1460 _InterlockedOr_nf,
1461 _InterlockedXor_acq,
1462 _InterlockedXor_rel,
1463 _InterlockedXor_nf,
1464 _InterlockedAnd_acq,
1465 _InterlockedAnd_rel,
1466 _InterlockedAnd_nf,
1467 _InterlockedIncrement_acq,
1468 _InterlockedIncrement_rel,
1469 _InterlockedIncrement_nf,
1470 _InterlockedDecrement_acq,
1471 _InterlockedDecrement_rel,
1472 _InterlockedDecrement_nf,
1473 __fastfail,
1474 };
1475
1476 static std::optional<CodeGenFunction::MSVCIntrin>
translateArmToMsvcIntrin(unsigned BuiltinID)1477 translateArmToMsvcIntrin(unsigned BuiltinID) {
1478 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1479 switch (BuiltinID) {
1480 default:
1481 return std::nullopt;
1482 case clang::ARM::BI_BitScanForward:
1483 case clang::ARM::BI_BitScanForward64:
1484 return MSVCIntrin::_BitScanForward;
1485 case clang::ARM::BI_BitScanReverse:
1486 case clang::ARM::BI_BitScanReverse64:
1487 return MSVCIntrin::_BitScanReverse;
1488 case clang::ARM::BI_InterlockedAnd64:
1489 return MSVCIntrin::_InterlockedAnd;
1490 case clang::ARM::BI_InterlockedExchange64:
1491 return MSVCIntrin::_InterlockedExchange;
1492 case clang::ARM::BI_InterlockedExchangeAdd64:
1493 return MSVCIntrin::_InterlockedExchangeAdd;
1494 case clang::ARM::BI_InterlockedExchangeSub64:
1495 return MSVCIntrin::_InterlockedExchangeSub;
1496 case clang::ARM::BI_InterlockedOr64:
1497 return MSVCIntrin::_InterlockedOr;
1498 case clang::ARM::BI_InterlockedXor64:
1499 return MSVCIntrin::_InterlockedXor;
1500 case clang::ARM::BI_InterlockedDecrement64:
1501 return MSVCIntrin::_InterlockedDecrement;
1502 case clang::ARM::BI_InterlockedIncrement64:
1503 return MSVCIntrin::_InterlockedIncrement;
1504 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1505 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1506 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1507 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1508 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1509 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1510 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1511 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1512 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1513 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1514 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1515 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1516 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1517 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1518 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1519 case clang::ARM::BI_InterlockedExchange8_acq:
1520 case clang::ARM::BI_InterlockedExchange16_acq:
1521 case clang::ARM::BI_InterlockedExchange_acq:
1522 case clang::ARM::BI_InterlockedExchange64_acq:
1523 return MSVCIntrin::_InterlockedExchange_acq;
1524 case clang::ARM::BI_InterlockedExchange8_rel:
1525 case clang::ARM::BI_InterlockedExchange16_rel:
1526 case clang::ARM::BI_InterlockedExchange_rel:
1527 case clang::ARM::BI_InterlockedExchange64_rel:
1528 return MSVCIntrin::_InterlockedExchange_rel;
1529 case clang::ARM::BI_InterlockedExchange8_nf:
1530 case clang::ARM::BI_InterlockedExchange16_nf:
1531 case clang::ARM::BI_InterlockedExchange_nf:
1532 case clang::ARM::BI_InterlockedExchange64_nf:
1533 return MSVCIntrin::_InterlockedExchange_nf;
1534 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1535 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1536 case clang::ARM::BI_InterlockedCompareExchange_acq:
1537 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1538 return MSVCIntrin::_InterlockedCompareExchange_acq;
1539 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1540 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1541 case clang::ARM::BI_InterlockedCompareExchange_rel:
1542 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1543 return MSVCIntrin::_InterlockedCompareExchange_rel;
1544 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1545 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1546 case clang::ARM::BI_InterlockedCompareExchange_nf:
1547 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1548 return MSVCIntrin::_InterlockedCompareExchange_nf;
1549 case clang::ARM::BI_InterlockedOr8_acq:
1550 case clang::ARM::BI_InterlockedOr16_acq:
1551 case clang::ARM::BI_InterlockedOr_acq:
1552 case clang::ARM::BI_InterlockedOr64_acq:
1553 return MSVCIntrin::_InterlockedOr_acq;
1554 case clang::ARM::BI_InterlockedOr8_rel:
1555 case clang::ARM::BI_InterlockedOr16_rel:
1556 case clang::ARM::BI_InterlockedOr_rel:
1557 case clang::ARM::BI_InterlockedOr64_rel:
1558 return MSVCIntrin::_InterlockedOr_rel;
1559 case clang::ARM::BI_InterlockedOr8_nf:
1560 case clang::ARM::BI_InterlockedOr16_nf:
1561 case clang::ARM::BI_InterlockedOr_nf:
1562 case clang::ARM::BI_InterlockedOr64_nf:
1563 return MSVCIntrin::_InterlockedOr_nf;
1564 case clang::ARM::BI_InterlockedXor8_acq:
1565 case clang::ARM::BI_InterlockedXor16_acq:
1566 case clang::ARM::BI_InterlockedXor_acq:
1567 case clang::ARM::BI_InterlockedXor64_acq:
1568 return MSVCIntrin::_InterlockedXor_acq;
1569 case clang::ARM::BI_InterlockedXor8_rel:
1570 case clang::ARM::BI_InterlockedXor16_rel:
1571 case clang::ARM::BI_InterlockedXor_rel:
1572 case clang::ARM::BI_InterlockedXor64_rel:
1573 return MSVCIntrin::_InterlockedXor_rel;
1574 case clang::ARM::BI_InterlockedXor8_nf:
1575 case clang::ARM::BI_InterlockedXor16_nf:
1576 case clang::ARM::BI_InterlockedXor_nf:
1577 case clang::ARM::BI_InterlockedXor64_nf:
1578 return MSVCIntrin::_InterlockedXor_nf;
1579 case clang::ARM::BI_InterlockedAnd8_acq:
1580 case clang::ARM::BI_InterlockedAnd16_acq:
1581 case clang::ARM::BI_InterlockedAnd_acq:
1582 case clang::ARM::BI_InterlockedAnd64_acq:
1583 return MSVCIntrin::_InterlockedAnd_acq;
1584 case clang::ARM::BI_InterlockedAnd8_rel:
1585 case clang::ARM::BI_InterlockedAnd16_rel:
1586 case clang::ARM::BI_InterlockedAnd_rel:
1587 case clang::ARM::BI_InterlockedAnd64_rel:
1588 return MSVCIntrin::_InterlockedAnd_rel;
1589 case clang::ARM::BI_InterlockedAnd8_nf:
1590 case clang::ARM::BI_InterlockedAnd16_nf:
1591 case clang::ARM::BI_InterlockedAnd_nf:
1592 case clang::ARM::BI_InterlockedAnd64_nf:
1593 return MSVCIntrin::_InterlockedAnd_nf;
1594 case clang::ARM::BI_InterlockedIncrement16_acq:
1595 case clang::ARM::BI_InterlockedIncrement_acq:
1596 case clang::ARM::BI_InterlockedIncrement64_acq:
1597 return MSVCIntrin::_InterlockedIncrement_acq;
1598 case clang::ARM::BI_InterlockedIncrement16_rel:
1599 case clang::ARM::BI_InterlockedIncrement_rel:
1600 case clang::ARM::BI_InterlockedIncrement64_rel:
1601 return MSVCIntrin::_InterlockedIncrement_rel;
1602 case clang::ARM::BI_InterlockedIncrement16_nf:
1603 case clang::ARM::BI_InterlockedIncrement_nf:
1604 case clang::ARM::BI_InterlockedIncrement64_nf:
1605 return MSVCIntrin::_InterlockedIncrement_nf;
1606 case clang::ARM::BI_InterlockedDecrement16_acq:
1607 case clang::ARM::BI_InterlockedDecrement_acq:
1608 case clang::ARM::BI_InterlockedDecrement64_acq:
1609 return MSVCIntrin::_InterlockedDecrement_acq;
1610 case clang::ARM::BI_InterlockedDecrement16_rel:
1611 case clang::ARM::BI_InterlockedDecrement_rel:
1612 case clang::ARM::BI_InterlockedDecrement64_rel:
1613 return MSVCIntrin::_InterlockedDecrement_rel;
1614 case clang::ARM::BI_InterlockedDecrement16_nf:
1615 case clang::ARM::BI_InterlockedDecrement_nf:
1616 case clang::ARM::BI_InterlockedDecrement64_nf:
1617 return MSVCIntrin::_InterlockedDecrement_nf;
1618 }
1619 llvm_unreachable("must return from switch");
1620 }
1621
1622 static std::optional<CodeGenFunction::MSVCIntrin>
translateAarch64ToMsvcIntrin(unsigned BuiltinID)1623 translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1624 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1625 switch (BuiltinID) {
1626 default:
1627 return std::nullopt;
1628 case clang::AArch64::BI_BitScanForward:
1629 case clang::AArch64::BI_BitScanForward64:
1630 return MSVCIntrin::_BitScanForward;
1631 case clang::AArch64::BI_BitScanReverse:
1632 case clang::AArch64::BI_BitScanReverse64:
1633 return MSVCIntrin::_BitScanReverse;
1634 case clang::AArch64::BI_InterlockedAnd64:
1635 return MSVCIntrin::_InterlockedAnd;
1636 case clang::AArch64::BI_InterlockedExchange64:
1637 return MSVCIntrin::_InterlockedExchange;
1638 case clang::AArch64::BI_InterlockedExchangeAdd64:
1639 return MSVCIntrin::_InterlockedExchangeAdd;
1640 case clang::AArch64::BI_InterlockedExchangeSub64:
1641 return MSVCIntrin::_InterlockedExchangeSub;
1642 case clang::AArch64::BI_InterlockedOr64:
1643 return MSVCIntrin::_InterlockedOr;
1644 case clang::AArch64::BI_InterlockedXor64:
1645 return MSVCIntrin::_InterlockedXor;
1646 case clang::AArch64::BI_InterlockedDecrement64:
1647 return MSVCIntrin::_InterlockedDecrement;
1648 case clang::AArch64::BI_InterlockedIncrement64:
1649 return MSVCIntrin::_InterlockedIncrement;
1650 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1651 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1652 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1653 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1654 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1655 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1656 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1657 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1658 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1659 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1660 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1661 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1662 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1663 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1664 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1665 case clang::AArch64::BI_InterlockedExchange8_acq:
1666 case clang::AArch64::BI_InterlockedExchange16_acq:
1667 case clang::AArch64::BI_InterlockedExchange_acq:
1668 case clang::AArch64::BI_InterlockedExchange64_acq:
1669 return MSVCIntrin::_InterlockedExchange_acq;
1670 case clang::AArch64::BI_InterlockedExchange8_rel:
1671 case clang::AArch64::BI_InterlockedExchange16_rel:
1672 case clang::AArch64::BI_InterlockedExchange_rel:
1673 case clang::AArch64::BI_InterlockedExchange64_rel:
1674 return MSVCIntrin::_InterlockedExchange_rel;
1675 case clang::AArch64::BI_InterlockedExchange8_nf:
1676 case clang::AArch64::BI_InterlockedExchange16_nf:
1677 case clang::AArch64::BI_InterlockedExchange_nf:
1678 case clang::AArch64::BI_InterlockedExchange64_nf:
1679 return MSVCIntrin::_InterlockedExchange_nf;
1680 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1681 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1682 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1683 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1684 return MSVCIntrin::_InterlockedCompareExchange_acq;
1685 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1686 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1687 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1688 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1689 return MSVCIntrin::_InterlockedCompareExchange_rel;
1690 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1691 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1692 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1693 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1694 return MSVCIntrin::_InterlockedCompareExchange_nf;
1695 case clang::AArch64::BI_InterlockedCompareExchange128:
1696 return MSVCIntrin::_InterlockedCompareExchange128;
1697 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1698 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1699 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1700 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1701 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1702 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1703 case clang::AArch64::BI_InterlockedOr8_acq:
1704 case clang::AArch64::BI_InterlockedOr16_acq:
1705 case clang::AArch64::BI_InterlockedOr_acq:
1706 case clang::AArch64::BI_InterlockedOr64_acq:
1707 return MSVCIntrin::_InterlockedOr_acq;
1708 case clang::AArch64::BI_InterlockedOr8_rel:
1709 case clang::AArch64::BI_InterlockedOr16_rel:
1710 case clang::AArch64::BI_InterlockedOr_rel:
1711 case clang::AArch64::BI_InterlockedOr64_rel:
1712 return MSVCIntrin::_InterlockedOr_rel;
1713 case clang::AArch64::BI_InterlockedOr8_nf:
1714 case clang::AArch64::BI_InterlockedOr16_nf:
1715 case clang::AArch64::BI_InterlockedOr_nf:
1716 case clang::AArch64::BI_InterlockedOr64_nf:
1717 return MSVCIntrin::_InterlockedOr_nf;
1718 case clang::AArch64::BI_InterlockedXor8_acq:
1719 case clang::AArch64::BI_InterlockedXor16_acq:
1720 case clang::AArch64::BI_InterlockedXor_acq:
1721 case clang::AArch64::BI_InterlockedXor64_acq:
1722 return MSVCIntrin::_InterlockedXor_acq;
1723 case clang::AArch64::BI_InterlockedXor8_rel:
1724 case clang::AArch64::BI_InterlockedXor16_rel:
1725 case clang::AArch64::BI_InterlockedXor_rel:
1726 case clang::AArch64::BI_InterlockedXor64_rel:
1727 return MSVCIntrin::_InterlockedXor_rel;
1728 case clang::AArch64::BI_InterlockedXor8_nf:
1729 case clang::AArch64::BI_InterlockedXor16_nf:
1730 case clang::AArch64::BI_InterlockedXor_nf:
1731 case clang::AArch64::BI_InterlockedXor64_nf:
1732 return MSVCIntrin::_InterlockedXor_nf;
1733 case clang::AArch64::BI_InterlockedAnd8_acq:
1734 case clang::AArch64::BI_InterlockedAnd16_acq:
1735 case clang::AArch64::BI_InterlockedAnd_acq:
1736 case clang::AArch64::BI_InterlockedAnd64_acq:
1737 return MSVCIntrin::_InterlockedAnd_acq;
1738 case clang::AArch64::BI_InterlockedAnd8_rel:
1739 case clang::AArch64::BI_InterlockedAnd16_rel:
1740 case clang::AArch64::BI_InterlockedAnd_rel:
1741 case clang::AArch64::BI_InterlockedAnd64_rel:
1742 return MSVCIntrin::_InterlockedAnd_rel;
1743 case clang::AArch64::BI_InterlockedAnd8_nf:
1744 case clang::AArch64::BI_InterlockedAnd16_nf:
1745 case clang::AArch64::BI_InterlockedAnd_nf:
1746 case clang::AArch64::BI_InterlockedAnd64_nf:
1747 return MSVCIntrin::_InterlockedAnd_nf;
1748 case clang::AArch64::BI_InterlockedIncrement16_acq:
1749 case clang::AArch64::BI_InterlockedIncrement_acq:
1750 case clang::AArch64::BI_InterlockedIncrement64_acq:
1751 return MSVCIntrin::_InterlockedIncrement_acq;
1752 case clang::AArch64::BI_InterlockedIncrement16_rel:
1753 case clang::AArch64::BI_InterlockedIncrement_rel:
1754 case clang::AArch64::BI_InterlockedIncrement64_rel:
1755 return MSVCIntrin::_InterlockedIncrement_rel;
1756 case clang::AArch64::BI_InterlockedIncrement16_nf:
1757 case clang::AArch64::BI_InterlockedIncrement_nf:
1758 case clang::AArch64::BI_InterlockedIncrement64_nf:
1759 return MSVCIntrin::_InterlockedIncrement_nf;
1760 case clang::AArch64::BI_InterlockedDecrement16_acq:
1761 case clang::AArch64::BI_InterlockedDecrement_acq:
1762 case clang::AArch64::BI_InterlockedDecrement64_acq:
1763 return MSVCIntrin::_InterlockedDecrement_acq;
1764 case clang::AArch64::BI_InterlockedDecrement16_rel:
1765 case clang::AArch64::BI_InterlockedDecrement_rel:
1766 case clang::AArch64::BI_InterlockedDecrement64_rel:
1767 return MSVCIntrin::_InterlockedDecrement_rel;
1768 case clang::AArch64::BI_InterlockedDecrement16_nf:
1769 case clang::AArch64::BI_InterlockedDecrement_nf:
1770 case clang::AArch64::BI_InterlockedDecrement64_nf:
1771 return MSVCIntrin::_InterlockedDecrement_nf;
1772 }
1773 llvm_unreachable("must return from switch");
1774 }
1775
1776 static std::optional<CodeGenFunction::MSVCIntrin>
translateX86ToMsvcIntrin(unsigned BuiltinID)1777 translateX86ToMsvcIntrin(unsigned BuiltinID) {
1778 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1779 switch (BuiltinID) {
1780 default:
1781 return std::nullopt;
1782 case clang::X86::BI_BitScanForward:
1783 case clang::X86::BI_BitScanForward64:
1784 return MSVCIntrin::_BitScanForward;
1785 case clang::X86::BI_BitScanReverse:
1786 case clang::X86::BI_BitScanReverse64:
1787 return MSVCIntrin::_BitScanReverse;
1788 case clang::X86::BI_InterlockedAnd64:
1789 return MSVCIntrin::_InterlockedAnd;
1790 case clang::X86::BI_InterlockedCompareExchange128:
1791 return MSVCIntrin::_InterlockedCompareExchange128;
1792 case clang::X86::BI_InterlockedExchange64:
1793 return MSVCIntrin::_InterlockedExchange;
1794 case clang::X86::BI_InterlockedExchangeAdd64:
1795 return MSVCIntrin::_InterlockedExchangeAdd;
1796 case clang::X86::BI_InterlockedExchangeSub64:
1797 return MSVCIntrin::_InterlockedExchangeSub;
1798 case clang::X86::BI_InterlockedOr64:
1799 return MSVCIntrin::_InterlockedOr;
1800 case clang::X86::BI_InterlockedXor64:
1801 return MSVCIntrin::_InterlockedXor;
1802 case clang::X86::BI_InterlockedDecrement64:
1803 return MSVCIntrin::_InterlockedDecrement;
1804 case clang::X86::BI_InterlockedIncrement64:
1805 return MSVCIntrin::_InterlockedIncrement;
1806 }
1807 llvm_unreachable("must return from switch");
1808 }
1809
1810 // Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,const CallExpr * E)1811 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
1812 const CallExpr *E) {
1813 switch (BuiltinID) {
1814 case MSVCIntrin::_BitScanForward:
1815 case MSVCIntrin::_BitScanReverse: {
1816 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
1817 Value *ArgValue = EmitScalarExpr(E->getArg(1));
1818
1819 llvm::Type *ArgType = ArgValue->getType();
1820 llvm::Type *IndexType = IndexAddress.getElementType();
1821 llvm::Type *ResultType = ConvertType(E->getType());
1822
1823 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1824 Value *ResZero = llvm::Constant::getNullValue(ResultType);
1825 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
1826
1827 BasicBlock *Begin = Builder.GetInsertBlock();
1828 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
1829 Builder.SetInsertPoint(End);
1830 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
1831
1832 Builder.SetInsertPoint(Begin);
1833 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
1834 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
1835 Builder.CreateCondBr(IsZero, End, NotZero);
1836 Result->addIncoming(ResZero, Begin);
1837
1838 Builder.SetInsertPoint(NotZero);
1839
1840 if (BuiltinID == MSVCIntrin::_BitScanForward) {
1841 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1842 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1843 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1844 Builder.CreateStore(ZeroCount, IndexAddress, false);
1845 } else {
1846 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1847 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
1848
1849 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1850 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1851 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1852 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
1853 Builder.CreateStore(Index, IndexAddress, false);
1854 }
1855 Builder.CreateBr(End);
1856 Result->addIncoming(ResOne, NotZero);
1857
1858 Builder.SetInsertPoint(End);
1859 return Result;
1860 }
1861 case MSVCIntrin::_InterlockedAnd:
1862 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
1863 case MSVCIntrin::_InterlockedExchange:
1864 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
1865 case MSVCIntrin::_InterlockedExchangeAdd:
1866 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
1867 case MSVCIntrin::_InterlockedExchangeSub:
1868 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
1869 case MSVCIntrin::_InterlockedOr:
1870 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
1871 case MSVCIntrin::_InterlockedXor:
1872 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
1873 case MSVCIntrin::_InterlockedExchangeAdd_acq:
1874 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1875 AtomicOrdering::Acquire);
1876 case MSVCIntrin::_InterlockedExchangeAdd_rel:
1877 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1878 AtomicOrdering::Release);
1879 case MSVCIntrin::_InterlockedExchangeAdd_nf:
1880 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1881 AtomicOrdering::Monotonic);
1882 case MSVCIntrin::_InterlockedExchange_acq:
1883 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1884 AtomicOrdering::Acquire);
1885 case MSVCIntrin::_InterlockedExchange_rel:
1886 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1887 AtomicOrdering::Release);
1888 case MSVCIntrin::_InterlockedExchange_nf:
1889 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1890 AtomicOrdering::Monotonic);
1891 case MSVCIntrin::_InterlockedCompareExchange_acq:
1892 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
1893 case MSVCIntrin::_InterlockedCompareExchange_rel:
1894 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
1895 case MSVCIntrin::_InterlockedCompareExchange_nf:
1896 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1897 case MSVCIntrin::_InterlockedCompareExchange128:
1898 return EmitAtomicCmpXchg128ForMSIntrin(
1899 *this, E, AtomicOrdering::SequentiallyConsistent);
1900 case MSVCIntrin::_InterlockedCompareExchange128_acq:
1901 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
1902 case MSVCIntrin::_InterlockedCompareExchange128_rel:
1903 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
1904 case MSVCIntrin::_InterlockedCompareExchange128_nf:
1905 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1906 case MSVCIntrin::_InterlockedOr_acq:
1907 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1908 AtomicOrdering::Acquire);
1909 case MSVCIntrin::_InterlockedOr_rel:
1910 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1911 AtomicOrdering::Release);
1912 case MSVCIntrin::_InterlockedOr_nf:
1913 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1914 AtomicOrdering::Monotonic);
1915 case MSVCIntrin::_InterlockedXor_acq:
1916 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1917 AtomicOrdering::Acquire);
1918 case MSVCIntrin::_InterlockedXor_rel:
1919 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1920 AtomicOrdering::Release);
1921 case MSVCIntrin::_InterlockedXor_nf:
1922 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1923 AtomicOrdering::Monotonic);
1924 case MSVCIntrin::_InterlockedAnd_acq:
1925 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1926 AtomicOrdering::Acquire);
1927 case MSVCIntrin::_InterlockedAnd_rel:
1928 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1929 AtomicOrdering::Release);
1930 case MSVCIntrin::_InterlockedAnd_nf:
1931 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1932 AtomicOrdering::Monotonic);
1933 case MSVCIntrin::_InterlockedIncrement_acq:
1934 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
1935 case MSVCIntrin::_InterlockedIncrement_rel:
1936 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
1937 case MSVCIntrin::_InterlockedIncrement_nf:
1938 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
1939 case MSVCIntrin::_InterlockedDecrement_acq:
1940 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
1941 case MSVCIntrin::_InterlockedDecrement_rel:
1942 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
1943 case MSVCIntrin::_InterlockedDecrement_nf:
1944 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
1945
1946 case MSVCIntrin::_InterlockedDecrement:
1947 return EmitAtomicDecrementValue(*this, E);
1948 case MSVCIntrin::_InterlockedIncrement:
1949 return EmitAtomicIncrementValue(*this, E);
1950
1951 case MSVCIntrin::__fastfail: {
1952 // Request immediate process termination from the kernel. The instruction
1953 // sequences to do this are documented on MSDN:
1954 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
1955 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
1956 StringRef Asm, Constraints;
1957 switch (ISA) {
1958 default:
1959 ErrorUnsupported(E, "__fastfail call for this architecture");
1960 break;
1961 case llvm::Triple::x86:
1962 case llvm::Triple::x86_64:
1963 Asm = "int $$0x29";
1964 Constraints = "{cx}";
1965 break;
1966 case llvm::Triple::thumb:
1967 Asm = "udf #251";
1968 Constraints = "{r0}";
1969 break;
1970 case llvm::Triple::aarch64:
1971 Asm = "brk #0xF003";
1972 Constraints = "{w0}";
1973 }
1974 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1975 llvm::InlineAsm *IA =
1976 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1977 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1978 getLLVMContext(), llvm::AttributeList::FunctionIndex,
1979 llvm::Attribute::NoReturn);
1980 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1981 CI->setAttributes(NoReturnAttr);
1982 return CI;
1983 }
1984 }
1985 llvm_unreachable("Incorrect MSVC intrinsic!");
1986 }
1987
1988 namespace {
1989 // ARC cleanup for __builtin_os_log_format
1990 struct CallObjCArcUse final : EHScopeStack::Cleanup {
CallObjCArcUse__anon6c984ebf0511::CallObjCArcUse1991 CallObjCArcUse(llvm::Value *object) : object(object) {}
1992 llvm::Value *object;
1993
Emit__anon6c984ebf0511::CallObjCArcUse1994 void Emit(CodeGenFunction &CGF, Flags flags) override {
1995 CGF.EmitARCIntrinsicUse(object);
1996 }
1997 };
1998 }
1999
EmitCheckedArgForBuiltin(const Expr * E,BuiltinCheckKind Kind)2000 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
2001 BuiltinCheckKind Kind) {
2002 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
2003 && "Unsupported builtin check kind");
2004
2005 Value *ArgValue = EmitScalarExpr(E);
2006 if (!SanOpts.has(SanitizerKind::Builtin))
2007 return ArgValue;
2008
2009 SanitizerScope SanScope(this);
2010 Value *Cond = Builder.CreateICmpNE(
2011 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2012 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
2013 SanitizerHandler::InvalidBuiltin,
2014 {EmitCheckSourceLocation(E->getExprLoc()),
2015 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2016 std::nullopt);
2017 return ArgValue;
2018 }
2019
EmitAbs(CodeGenFunction & CGF,Value * ArgValue,bool HasNSW)2020 static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2021 return CGF.Builder.CreateBinaryIntrinsic(
2022 Intrinsic::abs, ArgValue,
2023 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2024 }
2025
EmitOverflowCheckedAbs(CodeGenFunction & CGF,const CallExpr * E,bool SanitizeOverflow)2026 static Value *EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E,
2027 bool SanitizeOverflow) {
2028 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2029
2030 // Try to eliminate overflow check.
2031 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2032 if (!VCI->isMinSignedValue())
2033 return EmitAbs(CGF, ArgValue, true);
2034 }
2035
2036 CodeGenFunction::SanitizerScope SanScope(&CGF);
2037
2038 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2039 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2040 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2041 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2042 Value *NotOverflow = CGF.Builder.CreateNot(
2043 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2044
2045 // TODO: support -ftrapv-handler.
2046 if (SanitizeOverflow) {
2047 CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2048 SanitizerHandler::NegateOverflow,
2049 {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2050 CGF.EmitCheckTypeDescriptor(E->getType())},
2051 {ArgValue});
2052 } else
2053 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2054
2055 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2056 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2057 }
2058
2059 /// Get the argument type for arguments to os_log_helper.
getOSLogArgType(ASTContext & C,int Size)2060 static CanQualType getOSLogArgType(ASTContext &C, int Size) {
2061 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2062 return C.getCanonicalType(UnsignedTy);
2063 }
2064
generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout & Layout,CharUnits BufferAlignment)2065 llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
2066 const analyze_os_log::OSLogBufferLayout &Layout,
2067 CharUnits BufferAlignment) {
2068 ASTContext &Ctx = getContext();
2069
2070 llvm::SmallString<64> Name;
2071 {
2072 raw_svector_ostream OS(Name);
2073 OS << "__os_log_helper";
2074 OS << "_" << BufferAlignment.getQuantity();
2075 OS << "_" << int(Layout.getSummaryByte());
2076 OS << "_" << int(Layout.getNumArgsByte());
2077 for (const auto &Item : Layout.Items)
2078 OS << "_" << int(Item.getSizeByte()) << "_"
2079 << int(Item.getDescriptorByte());
2080 }
2081
2082 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2083 return F;
2084
2085 llvm::SmallVector<QualType, 4> ArgTys;
2086 FunctionArgList Args;
2087 Args.push_back(ImplicitParamDecl::Create(
2088 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2089 ImplicitParamKind::Other));
2090 ArgTys.emplace_back(Ctx.VoidPtrTy);
2091
2092 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2093 char Size = Layout.Items[I].getSizeByte();
2094 if (!Size)
2095 continue;
2096
2097 QualType ArgTy = getOSLogArgType(Ctx, Size);
2098 Args.push_back(ImplicitParamDecl::Create(
2099 Ctx, nullptr, SourceLocation(),
2100 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2101 ImplicitParamKind::Other));
2102 ArgTys.emplace_back(ArgTy);
2103 }
2104
2105 QualType ReturnTy = Ctx.VoidTy;
2106
2107 // The helper function has linkonce_odr linkage to enable the linker to merge
2108 // identical functions. To ensure the merging always happens, 'noinline' is
2109 // attached to the function when compiling with -Oz.
2110 const CGFunctionInfo &FI =
2111 CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
2112 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2113 llvm::Function *Fn = llvm::Function::Create(
2114 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2115 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2116 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2117 CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
2118 Fn->setDoesNotThrow();
2119
2120 // Attach 'noinline' at -Oz.
2121 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2122 Fn->addFnAttr(llvm::Attribute::NoInline);
2123
2124 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2125 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2126
2127 // Create a scope with an artificial location for the body of this function.
2128 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2129
2130 CharUnits Offset;
2131 Address BufAddr = makeNaturalAddressForPointer(
2132 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2133 BufferAlignment);
2134 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2135 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2136 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2137 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2138
2139 unsigned I = 1;
2140 for (const auto &Item : Layout.Items) {
2141 Builder.CreateStore(
2142 Builder.getInt8(Item.getDescriptorByte()),
2143 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2144 Builder.CreateStore(
2145 Builder.getInt8(Item.getSizeByte()),
2146 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2147
2148 CharUnits Size = Item.size();
2149 if (!Size.getQuantity())
2150 continue;
2151
2152 Address Arg = GetAddrOfLocalVar(Args[I]);
2153 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2154 Addr = Addr.withElementType(Arg.getElementType());
2155 Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
2156 Offset += Size;
2157 ++I;
2158 }
2159
2160 FinishFunction();
2161
2162 return Fn;
2163 }
2164
emitBuiltinOSLogFormat(const CallExpr & E)2165 RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
2166 assert(E.getNumArgs() >= 2 &&
2167 "__builtin_os_log_format takes at least 2 arguments");
2168 ASTContext &Ctx = getContext();
2169 analyze_os_log::OSLogBufferLayout Layout;
2170 analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
2171 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2172 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2173
2174 // Ignore argument 1, the format string. It is not currently used.
2175 CallArgList Args;
2176 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2177
2178 for (const auto &Item : Layout.Items) {
2179 int Size = Item.getSizeByte();
2180 if (!Size)
2181 continue;
2182
2183 llvm::Value *ArgVal;
2184
2185 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2186 uint64_t Val = 0;
2187 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2188 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2189 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2190 } else if (const Expr *TheExpr = Item.getExpr()) {
2191 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2192
2193 // If a temporary object that requires destruction after the full
2194 // expression is passed, push a lifetime-extended cleanup to extend its
2195 // lifetime to the end of the enclosing block scope.
2196 auto LifetimeExtendObject = [&](const Expr *E) {
2197 E = E->IgnoreParenCasts();
2198 // Extend lifetimes of objects returned by function calls and message
2199 // sends.
2200
2201 // FIXME: We should do this in other cases in which temporaries are
2202 // created including arguments of non-ARC types (e.g., C++
2203 // temporaries).
2204 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2205 return true;
2206 return false;
2207 };
2208
2209 if (TheExpr->getType()->isObjCRetainableType() &&
2210 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2211 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2212 "Only scalar can be a ObjC retainable type");
2213 if (!isa<Constant>(ArgVal)) {
2214 CleanupKind Cleanup = getARCCleanupKind();
2215 QualType Ty = TheExpr->getType();
2216 RawAddress Alloca = RawAddress::invalid();
2217 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2218 ArgVal = EmitARCRetain(Ty, ArgVal);
2219 Builder.CreateStore(ArgVal, Addr);
2220 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2221 CodeGenFunction::destroyARCStrongPrecise,
2222 Cleanup & EHCleanup);
2223
2224 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2225 // argument has to be alive.
2226 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2227 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2228 }
2229 }
2230 } else {
2231 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2232 }
2233
2234 unsigned ArgValSize =
2235 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2236 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2237 ArgValSize);
2238 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2239 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2240 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2241 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2242 Args.add(RValue::get(ArgVal), ArgTy);
2243 }
2244
2245 const CGFunctionInfo &FI =
2246 CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
2247 llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
2248 Layout, BufAddr.getAlignment());
2249 EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
2250 return RValue::get(BufAddr, *this);
2251 }
2252
isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID,WidthAndSignedness Op1Info,WidthAndSignedness Op2Info,WidthAndSignedness ResultInfo)2253 static bool isSpecialUnsignedMultiplySignedResult(
2254 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2255 WidthAndSignedness ResultInfo) {
2256 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2257 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2258 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2259 }
2260
EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction & CGF,const clang::Expr * Op1,WidthAndSignedness Op1Info,const clang::Expr * Op2,WidthAndSignedness Op2Info,const clang::Expr * ResultArg,QualType ResultQTy,WidthAndSignedness ResultInfo)2261 static RValue EmitCheckedUnsignedMultiplySignedResult(
2262 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2263 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2264 const clang::Expr *ResultArg, QualType ResultQTy,
2265 WidthAndSignedness ResultInfo) {
2266 assert(isSpecialUnsignedMultiplySignedResult(
2267 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2268 "Cannot specialize this multiply");
2269
2270 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2271 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2272
2273 llvm::Value *HasOverflow;
2274 llvm::Value *Result = EmitOverflowIntrinsic(
2275 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2276
2277 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2278 // however, since the original builtin had a signed result, we need to report
2279 // an overflow when the result is greater than INT_MAX.
2280 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2281 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2282
2283 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2284 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2285
2286 bool isVolatile =
2287 ResultArg->getType()->getPointeeType().isVolatileQualified();
2288 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2289 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2290 isVolatile);
2291 return RValue::get(HasOverflow);
2292 }
2293
2294 /// Determine if a binop is a checked mixed-sign multiply we can specialize.
isSpecialMixedSignMultiply(unsigned BuiltinID,WidthAndSignedness Op1Info,WidthAndSignedness Op2Info,WidthAndSignedness ResultInfo)2295 static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2296 WidthAndSignedness Op1Info,
2297 WidthAndSignedness Op2Info,
2298 WidthAndSignedness ResultInfo) {
2299 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2300 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2301 Op1Info.Signed != Op2Info.Signed;
2302 }
2303
2304 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2305 /// the generic checked-binop irgen.
2306 static RValue
EmitCheckedMixedSignMultiply(CodeGenFunction & CGF,const clang::Expr * Op1,WidthAndSignedness Op1Info,const clang::Expr * Op2,WidthAndSignedness Op2Info,const clang::Expr * ResultArg,QualType ResultQTy,WidthAndSignedness ResultInfo)2307 EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
2308 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2309 WidthAndSignedness Op2Info,
2310 const clang::Expr *ResultArg, QualType ResultQTy,
2311 WidthAndSignedness ResultInfo) {
2312 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2313 Op2Info, ResultInfo) &&
2314 "Not a mixed-sign multipliction we can specialize");
2315
2316 // Emit the signed and unsigned operands.
2317 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2318 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2319 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2320 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2321 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2322 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2323
2324 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2325 if (SignedOpWidth < UnsignedOpWidth)
2326 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2327 if (UnsignedOpWidth < SignedOpWidth)
2328 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2329
2330 llvm::Type *OpTy = Signed->getType();
2331 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2332 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2333 llvm::Type *ResTy = ResultPtr.getElementType();
2334 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2335
2336 // Take the absolute value of the signed operand.
2337 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2338 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2339 llvm::Value *AbsSigned =
2340 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2341
2342 // Perform a checked unsigned multiplication.
2343 llvm::Value *UnsignedOverflow;
2344 llvm::Value *UnsignedResult =
2345 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2346 Unsigned, UnsignedOverflow);
2347
2348 llvm::Value *Overflow, *Result;
2349 if (ResultInfo.Signed) {
2350 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2351 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2352 auto IntMax =
2353 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2354 llvm::Value *MaxResult =
2355 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2356 CGF.Builder.CreateZExt(IsNegative, OpTy));
2357 llvm::Value *SignedOverflow =
2358 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2359 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2360
2361 // Prepare the signed result (possibly by negating it).
2362 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2363 llvm::Value *SignedResult =
2364 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2365 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2366 } else {
2367 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2368 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2369 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2370 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2371 if (ResultInfo.Width < OpWidth) {
2372 auto IntMax =
2373 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2374 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2375 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2376 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2377 }
2378
2379 // Negate the product if it would be negative in infinite precision.
2380 Result = CGF.Builder.CreateSelect(
2381 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2382
2383 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2384 }
2385 assert(Overflow && Result && "Missing overflow or result");
2386
2387 bool isVolatile =
2388 ResultArg->getType()->getPointeeType().isVolatileQualified();
2389 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2390 isVolatile);
2391 return RValue::get(Overflow);
2392 }
2393
2394 static bool
TypeRequiresBuiltinLaunderImp(const ASTContext & Ctx,QualType Ty,llvm::SmallPtrSetImpl<const Decl * > & Seen)2395 TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty,
2396 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2397 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2398 Ty = Ctx.getBaseElementType(Arr);
2399
2400 const auto *Record = Ty->getAsCXXRecordDecl();
2401 if (!Record)
2402 return false;
2403
2404 // We've already checked this type, or are in the process of checking it.
2405 if (!Seen.insert(Record).second)
2406 return false;
2407
2408 assert(Record->hasDefinition() &&
2409 "Incomplete types should already be diagnosed");
2410
2411 if (Record->isDynamicClass())
2412 return true;
2413
2414 for (FieldDecl *F : Record->fields()) {
2415 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2416 return true;
2417 }
2418 return false;
2419 }
2420
2421 /// Determine if the specified type requires laundering by checking if it is a
2422 /// dynamic class type or contains a subobject which is a dynamic class type.
TypeRequiresBuiltinLaunder(CodeGenModule & CGM,QualType Ty)2423 static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) {
2424 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2425 return false;
2426 llvm::SmallPtrSet<const Decl *, 16> Seen;
2427 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2428 }
2429
emitRotate(const CallExpr * E,bool IsRotateRight)2430 RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2431 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2432 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2433
2434 // The builtin's shift arg may have a different type than the source arg and
2435 // result, but the LLVM intrinsic uses the same type for all values.
2436 llvm::Type *Ty = Src->getType();
2437 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2438
2439 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2440 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2441 Function *F = CGM.getIntrinsic(IID, Ty);
2442 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2443 }
2444
2445 // Map math builtins for long-double to f128 version.
mutateLongDoubleBuiltin(unsigned BuiltinID)2446 static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2447 switch (BuiltinID) {
2448 #define MUTATE_LDBL(func) \
2449 case Builtin::BI__builtin_##func##l: \
2450 return Builtin::BI__builtin_##func##f128;
2451 MUTATE_LDBL(sqrt)
2452 MUTATE_LDBL(cbrt)
2453 MUTATE_LDBL(fabs)
2454 MUTATE_LDBL(log)
2455 MUTATE_LDBL(log2)
2456 MUTATE_LDBL(log10)
2457 MUTATE_LDBL(log1p)
2458 MUTATE_LDBL(logb)
2459 MUTATE_LDBL(exp)
2460 MUTATE_LDBL(exp2)
2461 MUTATE_LDBL(expm1)
2462 MUTATE_LDBL(fdim)
2463 MUTATE_LDBL(hypot)
2464 MUTATE_LDBL(ilogb)
2465 MUTATE_LDBL(pow)
2466 MUTATE_LDBL(fmin)
2467 MUTATE_LDBL(fmax)
2468 MUTATE_LDBL(ceil)
2469 MUTATE_LDBL(trunc)
2470 MUTATE_LDBL(rint)
2471 MUTATE_LDBL(nearbyint)
2472 MUTATE_LDBL(round)
2473 MUTATE_LDBL(floor)
2474 MUTATE_LDBL(lround)
2475 MUTATE_LDBL(llround)
2476 MUTATE_LDBL(lrint)
2477 MUTATE_LDBL(llrint)
2478 MUTATE_LDBL(fmod)
2479 MUTATE_LDBL(modf)
2480 MUTATE_LDBL(nan)
2481 MUTATE_LDBL(nans)
2482 MUTATE_LDBL(inf)
2483 MUTATE_LDBL(fma)
2484 MUTATE_LDBL(sin)
2485 MUTATE_LDBL(cos)
2486 MUTATE_LDBL(tan)
2487 MUTATE_LDBL(sinh)
2488 MUTATE_LDBL(cosh)
2489 MUTATE_LDBL(tanh)
2490 MUTATE_LDBL(asin)
2491 MUTATE_LDBL(acos)
2492 MUTATE_LDBL(atan)
2493 MUTATE_LDBL(asinh)
2494 MUTATE_LDBL(acosh)
2495 MUTATE_LDBL(atanh)
2496 MUTATE_LDBL(atan2)
2497 MUTATE_LDBL(erf)
2498 MUTATE_LDBL(erfc)
2499 MUTATE_LDBL(ldexp)
2500 MUTATE_LDBL(frexp)
2501 MUTATE_LDBL(huge_val)
2502 MUTATE_LDBL(copysign)
2503 MUTATE_LDBL(nextafter)
2504 MUTATE_LDBL(nexttoward)
2505 MUTATE_LDBL(remainder)
2506 MUTATE_LDBL(remquo)
2507 MUTATE_LDBL(scalbln)
2508 MUTATE_LDBL(scalbn)
2509 MUTATE_LDBL(tgamma)
2510 MUTATE_LDBL(lgamma)
2511 #undef MUTATE_LDBL
2512 default:
2513 return BuiltinID;
2514 }
2515 }
2516
tryUseTestFPKind(CodeGenFunction & CGF,unsigned BuiltinID,Value * V)2517 static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2518 Value *V) {
2519 if (CGF.Builder.getIsFPConstrained() &&
2520 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2521 if (Value *Result =
2522 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2523 return Result;
2524 }
2525 return nullptr;
2526 }
2527
EmitHipStdParUnsupportedBuiltin(CodeGenFunction * CGF,const FunctionDecl * FD)2528 static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
2529 const FunctionDecl *FD) {
2530 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2531 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2532 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2533
2534 SmallVector<Value *, 16> Args;
2535 for (auto &&FormalTy : FnTy->params())
2536 Args.push_back(llvm::PoisonValue::get(FormalTy));
2537
2538 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2539 }
2540
EmitBuiltinExpr(const GlobalDecl GD,unsigned BuiltinID,const CallExpr * E,ReturnValueSlot ReturnValue)2541 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2542 const CallExpr *E,
2543 ReturnValueSlot ReturnValue) {
2544 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2545 // See if we can constant fold this builtin. If so, don't emit it at all.
2546 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2547 Expr::EvalResult Result;
2548 if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) &&
2549 !Result.hasSideEffects()) {
2550 if (Result.Val.isInt())
2551 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2552 Result.Val.getInt()));
2553 if (Result.Val.isFloat())
2554 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2555 Result.Val.getFloat()));
2556 }
2557
2558 // If current long-double semantics is IEEE 128-bit, replace math builtins
2559 // of long-double with f128 equivalent.
2560 // TODO: This mutation should also be applied to other targets other than PPC,
2561 // after backend supports IEEE 128-bit style libcalls.
2562 if (getTarget().getTriple().isPPC64() &&
2563 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2564 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2565
2566 // If the builtin has been declared explicitly with an assembler label,
2567 // disable the specialized emitting below. Ideally we should communicate the
2568 // rename in IR, or at least avoid generating the intrinsic calls that are
2569 // likely to get lowered to the renamed library functions.
2570 const unsigned BuiltinIDIfNoAsmLabel =
2571 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2572
2573 std::optional<bool> ErrnoOverriden;
2574 // ErrnoOverriden is true if math-errno is overriden via the
2575 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2576 // which implies math-errno.
2577 if (E->hasStoredFPFeatures()) {
2578 FPOptionsOverride OP = E->getFPFeatures();
2579 if (OP.hasMathErrnoOverride())
2580 ErrnoOverriden = OP.getMathErrnoOverride();
2581 }
2582 // True if 'attribute__((optnone))' is used. This attribute overrides
2583 // fast-math which implies math-errno.
2584 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2585
2586 // True if we are compiling at -O2 and errno has been disabled
2587 // using the '#pragma float_control(precise, off)', and
2588 // attribute opt-none hasn't been seen.
2589 bool ErrnoOverridenToFalseWithOpt =
2590 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2591 CGM.getCodeGenOpts().OptimizationLevel != 0;
2592
2593 // There are LLVM math intrinsics/instructions corresponding to math library
2594 // functions except the LLVM op will never set errno while the math library
2595 // might. Also, math builtins have the same semantics as their math library
2596 // twins. Thus, we can transform math library and builtin calls to their
2597 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2598 // In case FP exceptions are enabled, the experimental versions of the
2599 // intrinsics model those.
2600 bool ConstAlways =
2601 getContext().BuiltinInfo.isConst(BuiltinID);
2602
2603 // There's a special case with the fma builtins where they are always const
2604 // if the target environment is GNU or the target is OS is Windows and we're
2605 // targeting the MSVCRT.dll environment.
2606 // FIXME: This list can be become outdated. Need to find a way to get it some
2607 // other way.
2608 switch (BuiltinID) {
2609 case Builtin::BI__builtin_fma:
2610 case Builtin::BI__builtin_fmaf:
2611 case Builtin::BI__builtin_fmal:
2612 case Builtin::BI__builtin_fmaf16:
2613 case Builtin::BIfma:
2614 case Builtin::BIfmaf:
2615 case Builtin::BIfmal: {
2616 auto &Trip = CGM.getTriple();
2617 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2618 ConstAlways = true;
2619 break;
2620 }
2621 default:
2622 break;
2623 }
2624
2625 bool ConstWithoutErrnoAndExceptions =
2626 getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
2627 bool ConstWithoutExceptions =
2628 getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID);
2629
2630 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2631 // disabled.
2632 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2633 // or attributes that affect math-errno should prevent or allow math
2634 // intrincs to be generated. Intrinsics are generated:
2635 // 1- In fast math mode, unless math-errno is overriden
2636 // via '#pragma float_control(precise, on)', or via an
2637 // 'attribute__((optnone))'.
2638 // 2- If math-errno was enabled on command line but overriden
2639 // to false via '#pragma float_control(precise, off))' and
2640 // 'attribute__((optnone))' hasn't been used.
2641 // 3- If we are compiling with optimization and errno has been disabled
2642 // via '#pragma float_control(precise, off)', and
2643 // 'attribute__((optnone))' hasn't been used.
2644
2645 bool ConstWithoutErrnoOrExceptions =
2646 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2647 bool GenerateIntrinsics =
2648 (ConstAlways && !OptNone) ||
2649 (!getLangOpts().MathErrno &&
2650 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2651 if (!GenerateIntrinsics) {
2652 GenerateIntrinsics =
2653 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2654 if (!GenerateIntrinsics)
2655 GenerateIntrinsics =
2656 ConstWithoutErrnoOrExceptions &&
2657 (!getLangOpts().MathErrno &&
2658 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2659 if (!GenerateIntrinsics)
2660 GenerateIntrinsics =
2661 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2662 }
2663 if (GenerateIntrinsics) {
2664 switch (BuiltinIDIfNoAsmLabel) {
2665 case Builtin::BIacos:
2666 case Builtin::BIacosf:
2667 case Builtin::BIacosl:
2668 case Builtin::BI__builtin_acos:
2669 case Builtin::BI__builtin_acosf:
2670 case Builtin::BI__builtin_acosf16:
2671 case Builtin::BI__builtin_acosl:
2672 case Builtin::BI__builtin_acosf128:
2673 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2674 *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));
2675
2676 case Builtin::BIasin:
2677 case Builtin::BIasinf:
2678 case Builtin::BIasinl:
2679 case Builtin::BI__builtin_asin:
2680 case Builtin::BI__builtin_asinf:
2681 case Builtin::BI__builtin_asinf16:
2682 case Builtin::BI__builtin_asinl:
2683 case Builtin::BI__builtin_asinf128:
2684 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2685 *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));
2686
2687 case Builtin::BIatan:
2688 case Builtin::BIatanf:
2689 case Builtin::BIatanl:
2690 case Builtin::BI__builtin_atan:
2691 case Builtin::BI__builtin_atanf:
2692 case Builtin::BI__builtin_atanf16:
2693 case Builtin::BI__builtin_atanl:
2694 case Builtin::BI__builtin_atanf128:
2695 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2696 *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));
2697
2698 case Builtin::BIceil:
2699 case Builtin::BIceilf:
2700 case Builtin::BIceill:
2701 case Builtin::BI__builtin_ceil:
2702 case Builtin::BI__builtin_ceilf:
2703 case Builtin::BI__builtin_ceilf16:
2704 case Builtin::BI__builtin_ceill:
2705 case Builtin::BI__builtin_ceilf128:
2706 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2707 Intrinsic::ceil,
2708 Intrinsic::experimental_constrained_ceil));
2709
2710 case Builtin::BIcopysign:
2711 case Builtin::BIcopysignf:
2712 case Builtin::BIcopysignl:
2713 case Builtin::BI__builtin_copysign:
2714 case Builtin::BI__builtin_copysignf:
2715 case Builtin::BI__builtin_copysignf16:
2716 case Builtin::BI__builtin_copysignl:
2717 case Builtin::BI__builtin_copysignf128:
2718 return RValue::get(
2719 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign));
2720
2721 case Builtin::BIcos:
2722 case Builtin::BIcosf:
2723 case Builtin::BIcosl:
2724 case Builtin::BI__builtin_cos:
2725 case Builtin::BI__builtin_cosf:
2726 case Builtin::BI__builtin_cosf16:
2727 case Builtin::BI__builtin_cosl:
2728 case Builtin::BI__builtin_cosf128:
2729 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2730 Intrinsic::cos,
2731 Intrinsic::experimental_constrained_cos));
2732
2733 case Builtin::BIcosh:
2734 case Builtin::BIcoshf:
2735 case Builtin::BIcoshl:
2736 case Builtin::BI__builtin_cosh:
2737 case Builtin::BI__builtin_coshf:
2738 case Builtin::BI__builtin_coshf16:
2739 case Builtin::BI__builtin_coshl:
2740 case Builtin::BI__builtin_coshf128:
2741 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2742 *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));
2743
2744 case Builtin::BIexp:
2745 case Builtin::BIexpf:
2746 case Builtin::BIexpl:
2747 case Builtin::BI__builtin_exp:
2748 case Builtin::BI__builtin_expf:
2749 case Builtin::BI__builtin_expf16:
2750 case Builtin::BI__builtin_expl:
2751 case Builtin::BI__builtin_expf128:
2752 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2753 Intrinsic::exp,
2754 Intrinsic::experimental_constrained_exp));
2755
2756 case Builtin::BIexp2:
2757 case Builtin::BIexp2f:
2758 case Builtin::BIexp2l:
2759 case Builtin::BI__builtin_exp2:
2760 case Builtin::BI__builtin_exp2f:
2761 case Builtin::BI__builtin_exp2f16:
2762 case Builtin::BI__builtin_exp2l:
2763 case Builtin::BI__builtin_exp2f128:
2764 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2765 Intrinsic::exp2,
2766 Intrinsic::experimental_constrained_exp2));
2767 case Builtin::BI__builtin_exp10:
2768 case Builtin::BI__builtin_exp10f:
2769 case Builtin::BI__builtin_exp10f16:
2770 case Builtin::BI__builtin_exp10l:
2771 case Builtin::BI__builtin_exp10f128: {
2772 // TODO: strictfp support
2773 if (Builder.getIsFPConstrained())
2774 break;
2775 return RValue::get(
2776 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10));
2777 }
2778 case Builtin::BIfabs:
2779 case Builtin::BIfabsf:
2780 case Builtin::BIfabsl:
2781 case Builtin::BI__builtin_fabs:
2782 case Builtin::BI__builtin_fabsf:
2783 case Builtin::BI__builtin_fabsf16:
2784 case Builtin::BI__builtin_fabsl:
2785 case Builtin::BI__builtin_fabsf128:
2786 return RValue::get(
2787 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs));
2788
2789 case Builtin::BIfloor:
2790 case Builtin::BIfloorf:
2791 case Builtin::BIfloorl:
2792 case Builtin::BI__builtin_floor:
2793 case Builtin::BI__builtin_floorf:
2794 case Builtin::BI__builtin_floorf16:
2795 case Builtin::BI__builtin_floorl:
2796 case Builtin::BI__builtin_floorf128:
2797 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2798 Intrinsic::floor,
2799 Intrinsic::experimental_constrained_floor));
2800
2801 case Builtin::BIfma:
2802 case Builtin::BIfmaf:
2803 case Builtin::BIfmal:
2804 case Builtin::BI__builtin_fma:
2805 case Builtin::BI__builtin_fmaf:
2806 case Builtin::BI__builtin_fmaf16:
2807 case Builtin::BI__builtin_fmal:
2808 case Builtin::BI__builtin_fmaf128:
2809 return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,
2810 Intrinsic::fma,
2811 Intrinsic::experimental_constrained_fma));
2812
2813 case Builtin::BIfmax:
2814 case Builtin::BIfmaxf:
2815 case Builtin::BIfmaxl:
2816 case Builtin::BI__builtin_fmax:
2817 case Builtin::BI__builtin_fmaxf:
2818 case Builtin::BI__builtin_fmaxf16:
2819 case Builtin::BI__builtin_fmaxl:
2820 case Builtin::BI__builtin_fmaxf128:
2821 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
2822 Intrinsic::maxnum,
2823 Intrinsic::experimental_constrained_maxnum));
2824
2825 case Builtin::BIfmin:
2826 case Builtin::BIfminf:
2827 case Builtin::BIfminl:
2828 case Builtin::BI__builtin_fmin:
2829 case Builtin::BI__builtin_fminf:
2830 case Builtin::BI__builtin_fminf16:
2831 case Builtin::BI__builtin_fminl:
2832 case Builtin::BI__builtin_fminf128:
2833 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
2834 Intrinsic::minnum,
2835 Intrinsic::experimental_constrained_minnum));
2836
2837 // fmod() is a special-case. It maps to the frem instruction rather than an
2838 // LLVM intrinsic.
2839 case Builtin::BIfmod:
2840 case Builtin::BIfmodf:
2841 case Builtin::BIfmodl:
2842 case Builtin::BI__builtin_fmod:
2843 case Builtin::BI__builtin_fmodf:
2844 case Builtin::BI__builtin_fmodf16:
2845 case Builtin::BI__builtin_fmodl:
2846 case Builtin::BI__builtin_fmodf128: {
2847 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2848 Value *Arg1 = EmitScalarExpr(E->getArg(0));
2849 Value *Arg2 = EmitScalarExpr(E->getArg(1));
2850 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
2851 }
2852
2853 case Builtin::BIlog:
2854 case Builtin::BIlogf:
2855 case Builtin::BIlogl:
2856 case Builtin::BI__builtin_log:
2857 case Builtin::BI__builtin_logf:
2858 case Builtin::BI__builtin_logf16:
2859 case Builtin::BI__builtin_logl:
2860 case Builtin::BI__builtin_logf128:
2861 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2862 Intrinsic::log,
2863 Intrinsic::experimental_constrained_log));
2864
2865 case Builtin::BIlog10:
2866 case Builtin::BIlog10f:
2867 case Builtin::BIlog10l:
2868 case Builtin::BI__builtin_log10:
2869 case Builtin::BI__builtin_log10f:
2870 case Builtin::BI__builtin_log10f16:
2871 case Builtin::BI__builtin_log10l:
2872 case Builtin::BI__builtin_log10f128:
2873 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2874 Intrinsic::log10,
2875 Intrinsic::experimental_constrained_log10));
2876
2877 case Builtin::BIlog2:
2878 case Builtin::BIlog2f:
2879 case Builtin::BIlog2l:
2880 case Builtin::BI__builtin_log2:
2881 case Builtin::BI__builtin_log2f:
2882 case Builtin::BI__builtin_log2f16:
2883 case Builtin::BI__builtin_log2l:
2884 case Builtin::BI__builtin_log2f128:
2885 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2886 Intrinsic::log2,
2887 Intrinsic::experimental_constrained_log2));
2888
2889 case Builtin::BInearbyint:
2890 case Builtin::BInearbyintf:
2891 case Builtin::BInearbyintl:
2892 case Builtin::BI__builtin_nearbyint:
2893 case Builtin::BI__builtin_nearbyintf:
2894 case Builtin::BI__builtin_nearbyintl:
2895 case Builtin::BI__builtin_nearbyintf128:
2896 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2897 Intrinsic::nearbyint,
2898 Intrinsic::experimental_constrained_nearbyint));
2899
2900 case Builtin::BIpow:
2901 case Builtin::BIpowf:
2902 case Builtin::BIpowl:
2903 case Builtin::BI__builtin_pow:
2904 case Builtin::BI__builtin_powf:
2905 case Builtin::BI__builtin_powf16:
2906 case Builtin::BI__builtin_powl:
2907 case Builtin::BI__builtin_powf128:
2908 return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,
2909 Intrinsic::pow,
2910 Intrinsic::experimental_constrained_pow));
2911
2912 case Builtin::BIrint:
2913 case Builtin::BIrintf:
2914 case Builtin::BIrintl:
2915 case Builtin::BI__builtin_rint:
2916 case Builtin::BI__builtin_rintf:
2917 case Builtin::BI__builtin_rintf16:
2918 case Builtin::BI__builtin_rintl:
2919 case Builtin::BI__builtin_rintf128:
2920 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2921 Intrinsic::rint,
2922 Intrinsic::experimental_constrained_rint));
2923
2924 case Builtin::BIround:
2925 case Builtin::BIroundf:
2926 case Builtin::BIroundl:
2927 case Builtin::BI__builtin_round:
2928 case Builtin::BI__builtin_roundf:
2929 case Builtin::BI__builtin_roundf16:
2930 case Builtin::BI__builtin_roundl:
2931 case Builtin::BI__builtin_roundf128:
2932 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2933 Intrinsic::round,
2934 Intrinsic::experimental_constrained_round));
2935
2936 case Builtin::BIroundeven:
2937 case Builtin::BIroundevenf:
2938 case Builtin::BIroundevenl:
2939 case Builtin::BI__builtin_roundeven:
2940 case Builtin::BI__builtin_roundevenf:
2941 case Builtin::BI__builtin_roundevenf16:
2942 case Builtin::BI__builtin_roundevenl:
2943 case Builtin::BI__builtin_roundevenf128:
2944 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2945 Intrinsic::roundeven,
2946 Intrinsic::experimental_constrained_roundeven));
2947
2948 case Builtin::BIsin:
2949 case Builtin::BIsinf:
2950 case Builtin::BIsinl:
2951 case Builtin::BI__builtin_sin:
2952 case Builtin::BI__builtin_sinf:
2953 case Builtin::BI__builtin_sinf16:
2954 case Builtin::BI__builtin_sinl:
2955 case Builtin::BI__builtin_sinf128:
2956 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
2957 Intrinsic::sin,
2958 Intrinsic::experimental_constrained_sin));
2959
2960 case Builtin::BIsinh:
2961 case Builtin::BIsinhf:
2962 case Builtin::BIsinhl:
2963 case Builtin::BI__builtin_sinh:
2964 case Builtin::BI__builtin_sinhf:
2965 case Builtin::BI__builtin_sinhf16:
2966 case Builtin::BI__builtin_sinhl:
2967 case Builtin::BI__builtin_sinhf128:
2968 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2969 *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
2970
2971 case Builtin::BIsqrt:
2972 case Builtin::BIsqrtf:
2973 case Builtin::BIsqrtl:
2974 case Builtin::BI__builtin_sqrt:
2975 case Builtin::BI__builtin_sqrtf:
2976 case Builtin::BI__builtin_sqrtf16:
2977 case Builtin::BI__builtin_sqrtl:
2978 case Builtin::BI__builtin_sqrtf128:
2979 case Builtin::BI__builtin_elementwise_sqrt: {
2980 llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin(
2981 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
2982 SetSqrtFPAccuracy(Call);
2983 return RValue::get(Call);
2984 }
2985
2986 case Builtin::BItan:
2987 case Builtin::BItanf:
2988 case Builtin::BItanl:
2989 case Builtin::BI__builtin_tan:
2990 case Builtin::BI__builtin_tanf:
2991 case Builtin::BI__builtin_tanf16:
2992 case Builtin::BI__builtin_tanl:
2993 case Builtin::BI__builtin_tanf128:
2994 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
2995 *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));
2996
2997 case Builtin::BItanh:
2998 case Builtin::BItanhf:
2999 case Builtin::BItanhl:
3000 case Builtin::BI__builtin_tanh:
3001 case Builtin::BI__builtin_tanhf:
3002 case Builtin::BI__builtin_tanhf16:
3003 case Builtin::BI__builtin_tanhl:
3004 case Builtin::BI__builtin_tanhf128:
3005 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
3006 *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));
3007
3008 case Builtin::BItrunc:
3009 case Builtin::BItruncf:
3010 case Builtin::BItruncl:
3011 case Builtin::BI__builtin_trunc:
3012 case Builtin::BI__builtin_truncf:
3013 case Builtin::BI__builtin_truncf16:
3014 case Builtin::BI__builtin_truncl:
3015 case Builtin::BI__builtin_truncf128:
3016 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
3017 Intrinsic::trunc,
3018 Intrinsic::experimental_constrained_trunc));
3019
3020 case Builtin::BIlround:
3021 case Builtin::BIlroundf:
3022 case Builtin::BIlroundl:
3023 case Builtin::BI__builtin_lround:
3024 case Builtin::BI__builtin_lroundf:
3025 case Builtin::BI__builtin_lroundl:
3026 case Builtin::BI__builtin_lroundf128:
3027 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3028 *this, E, Intrinsic::lround,
3029 Intrinsic::experimental_constrained_lround));
3030
3031 case Builtin::BIllround:
3032 case Builtin::BIllroundf:
3033 case Builtin::BIllroundl:
3034 case Builtin::BI__builtin_llround:
3035 case Builtin::BI__builtin_llroundf:
3036 case Builtin::BI__builtin_llroundl:
3037 case Builtin::BI__builtin_llroundf128:
3038 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3039 *this, E, Intrinsic::llround,
3040 Intrinsic::experimental_constrained_llround));
3041
3042 case Builtin::BIlrint:
3043 case Builtin::BIlrintf:
3044 case Builtin::BIlrintl:
3045 case Builtin::BI__builtin_lrint:
3046 case Builtin::BI__builtin_lrintf:
3047 case Builtin::BI__builtin_lrintl:
3048 case Builtin::BI__builtin_lrintf128:
3049 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3050 *this, E, Intrinsic::lrint,
3051 Intrinsic::experimental_constrained_lrint));
3052
3053 case Builtin::BIllrint:
3054 case Builtin::BIllrintf:
3055 case Builtin::BIllrintl:
3056 case Builtin::BI__builtin_llrint:
3057 case Builtin::BI__builtin_llrintf:
3058 case Builtin::BI__builtin_llrintl:
3059 case Builtin::BI__builtin_llrintf128:
3060 return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(
3061 *this, E, Intrinsic::llrint,
3062 Intrinsic::experimental_constrained_llrint));
3063 case Builtin::BI__builtin_ldexp:
3064 case Builtin::BI__builtin_ldexpf:
3065 case Builtin::BI__builtin_ldexpl:
3066 case Builtin::BI__builtin_ldexpf16:
3067 case Builtin::BI__builtin_ldexpf128: {
3068 return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin(
3069 *this, E, Intrinsic::ldexp,
3070 Intrinsic::experimental_constrained_ldexp));
3071 }
3072 default:
3073 break;
3074 }
3075 }
3076
3077 // Check NonnullAttribute/NullabilityArg and Alignment.
3078 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3079 unsigned ParmNum) {
3080 Value *Val = A.emitRawPointer(*this);
3081 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3082 ParmNum);
3083
3084 if (SanOpts.has(SanitizerKind::Alignment)) {
3085 SanitizerSet SkippedChecks;
3086 SkippedChecks.set(SanitizerKind::All);
3087 SkippedChecks.clear(SanitizerKind::Alignment);
3088 SourceLocation Loc = Arg->getExprLoc();
3089 // Strip an implicit cast.
3090 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3091 if (CE->getCastKind() == CK_BitCast)
3092 Arg = CE->getSubExpr();
3093 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3094 SkippedChecks);
3095 }
3096 };
3097
3098 switch (BuiltinIDIfNoAsmLabel) {
3099 default: break;
3100 case Builtin::BI__builtin___CFStringMakeConstantString:
3101 case Builtin::BI__builtin___NSStringMakeConstantString:
3102 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3103 case Builtin::BI__builtin_stdarg_start:
3104 case Builtin::BI__builtin_va_start:
3105 case Builtin::BI__va_start:
3106 case Builtin::BI__builtin_va_end:
3107 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3108 ? EmitScalarExpr(E->getArg(0))
3109 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3110 BuiltinID != Builtin::BI__builtin_va_end);
3111 return RValue::get(nullptr);
3112 case Builtin::BI__builtin_va_copy: {
3113 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3114 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3115 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3116 {DstPtr, SrcPtr});
3117 return RValue::get(nullptr);
3118 }
3119 case Builtin::BIabs:
3120 case Builtin::BIlabs:
3121 case Builtin::BIllabs:
3122 case Builtin::BI__builtin_abs:
3123 case Builtin::BI__builtin_labs:
3124 case Builtin::BI__builtin_llabs: {
3125 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3126
3127 Value *Result;
3128 switch (getLangOpts().getSignedOverflowBehavior()) {
3129 case LangOptions::SOB_Defined:
3130 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3131 break;
3132 case LangOptions::SOB_Undefined:
3133 if (!SanitizeOverflow) {
3134 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3135 break;
3136 }
3137 [[fallthrough]];
3138 case LangOptions::SOB_Trapping:
3139 // TODO: Somehow handle the corner case when the address of abs is taken.
3140 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3141 break;
3142 }
3143 return RValue::get(Result);
3144 }
3145 case Builtin::BI__builtin_complex: {
3146 Value *Real = EmitScalarExpr(E->getArg(0));
3147 Value *Imag = EmitScalarExpr(E->getArg(1));
3148 return RValue::getComplex({Real, Imag});
3149 }
3150 case Builtin::BI__builtin_conj:
3151 case Builtin::BI__builtin_conjf:
3152 case Builtin::BI__builtin_conjl:
3153 case Builtin::BIconj:
3154 case Builtin::BIconjf:
3155 case Builtin::BIconjl: {
3156 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3157 Value *Real = ComplexVal.first;
3158 Value *Imag = ComplexVal.second;
3159 Imag = Builder.CreateFNeg(Imag, "neg");
3160 return RValue::getComplex(std::make_pair(Real, Imag));
3161 }
3162 case Builtin::BI__builtin_creal:
3163 case Builtin::BI__builtin_crealf:
3164 case Builtin::BI__builtin_creall:
3165 case Builtin::BIcreal:
3166 case Builtin::BIcrealf:
3167 case Builtin::BIcreall: {
3168 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3169 return RValue::get(ComplexVal.first);
3170 }
3171
3172 case Builtin::BI__builtin_preserve_access_index: {
3173 // Only enabled preserved access index region when debuginfo
3174 // is available as debuginfo is needed to preserve user-level
3175 // access pattern.
3176 if (!getDebugInfo()) {
3177 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3178 return RValue::get(EmitScalarExpr(E->getArg(0)));
3179 }
3180
3181 // Nested builtin_preserve_access_index() not supported
3182 if (IsInPreservedAIRegion) {
3183 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3184 return RValue::get(EmitScalarExpr(E->getArg(0)));
3185 }
3186
3187 IsInPreservedAIRegion = true;
3188 Value *Res = EmitScalarExpr(E->getArg(0));
3189 IsInPreservedAIRegion = false;
3190 return RValue::get(Res);
3191 }
3192
3193 case Builtin::BI__builtin_cimag:
3194 case Builtin::BI__builtin_cimagf:
3195 case Builtin::BI__builtin_cimagl:
3196 case Builtin::BIcimag:
3197 case Builtin::BIcimagf:
3198 case Builtin::BIcimagl: {
3199 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3200 return RValue::get(ComplexVal.second);
3201 }
3202
3203 case Builtin::BI__builtin_clrsb:
3204 case Builtin::BI__builtin_clrsbl:
3205 case Builtin::BI__builtin_clrsbll: {
3206 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3207 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3208
3209 llvm::Type *ArgType = ArgValue->getType();
3210 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3211
3212 llvm::Type *ResultType = ConvertType(E->getType());
3213 Value *Zero = llvm::Constant::getNullValue(ArgType);
3214 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3215 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3216 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3217 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3218 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3219 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3220 "cast");
3221 return RValue::get(Result);
3222 }
3223 case Builtin::BI__builtin_ctzs:
3224 case Builtin::BI__builtin_ctz:
3225 case Builtin::BI__builtin_ctzl:
3226 case Builtin::BI__builtin_ctzll:
3227 case Builtin::BI__builtin_ctzg: {
3228 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3229 E->getNumArgs() > 1;
3230
3231 Value *ArgValue =
3232 HasFallback ? EmitScalarExpr(E->getArg(0))
3233 : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
3234
3235 llvm::Type *ArgType = ArgValue->getType();
3236 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3237
3238 llvm::Type *ResultType = ConvertType(E->getType());
3239 Value *ZeroUndef =
3240 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3241 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3242 if (Result->getType() != ResultType)
3243 Result =
3244 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3245 if (!HasFallback)
3246 return RValue::get(Result);
3247
3248 Value *Zero = Constant::getNullValue(ArgType);
3249 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3250 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3251 Value *ResultOrFallback =
3252 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3253 return RValue::get(ResultOrFallback);
3254 }
3255 case Builtin::BI__builtin_clzs:
3256 case Builtin::BI__builtin_clz:
3257 case Builtin::BI__builtin_clzl:
3258 case Builtin::BI__builtin_clzll:
3259 case Builtin::BI__builtin_clzg: {
3260 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3261 E->getNumArgs() > 1;
3262
3263 Value *ArgValue =
3264 HasFallback ? EmitScalarExpr(E->getArg(0))
3265 : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
3266
3267 llvm::Type *ArgType = ArgValue->getType();
3268 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3269
3270 llvm::Type *ResultType = ConvertType(E->getType());
3271 Value *ZeroUndef =
3272 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3273 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3274 if (Result->getType() != ResultType)
3275 Result =
3276 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3277 if (!HasFallback)
3278 return RValue::get(Result);
3279
3280 Value *Zero = Constant::getNullValue(ArgType);
3281 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3282 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3283 Value *ResultOrFallback =
3284 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3285 return RValue::get(ResultOrFallback);
3286 }
3287 case Builtin::BI__builtin_ffs:
3288 case Builtin::BI__builtin_ffsl:
3289 case Builtin::BI__builtin_ffsll: {
3290 // ffs(x) -> x ? cttz(x) + 1 : 0
3291 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3292
3293 llvm::Type *ArgType = ArgValue->getType();
3294 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3295
3296 llvm::Type *ResultType = ConvertType(E->getType());
3297 Value *Tmp =
3298 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3299 llvm::ConstantInt::get(ArgType, 1));
3300 Value *Zero = llvm::Constant::getNullValue(ArgType);
3301 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3302 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3303 if (Result->getType() != ResultType)
3304 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3305 "cast");
3306 return RValue::get(Result);
3307 }
3308 case Builtin::BI__builtin_parity:
3309 case Builtin::BI__builtin_parityl:
3310 case Builtin::BI__builtin_parityll: {
3311 // parity(x) -> ctpop(x) & 1
3312 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3313
3314 llvm::Type *ArgType = ArgValue->getType();
3315 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3316
3317 llvm::Type *ResultType = ConvertType(E->getType());
3318 Value *Tmp = Builder.CreateCall(F, ArgValue);
3319 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3320 if (Result->getType() != ResultType)
3321 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3322 "cast");
3323 return RValue::get(Result);
3324 }
3325 case Builtin::BI__lzcnt16:
3326 case Builtin::BI__lzcnt:
3327 case Builtin::BI__lzcnt64: {
3328 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3329
3330 llvm::Type *ArgType = ArgValue->getType();
3331 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3332
3333 llvm::Type *ResultType = ConvertType(E->getType());
3334 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3335 if (Result->getType() != ResultType)
3336 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3337 "cast");
3338 return RValue::get(Result);
3339 }
3340 case Builtin::BI__popcnt16:
3341 case Builtin::BI__popcnt:
3342 case Builtin::BI__popcnt64:
3343 case Builtin::BI__builtin_popcount:
3344 case Builtin::BI__builtin_popcountl:
3345 case Builtin::BI__builtin_popcountll:
3346 case Builtin::BI__builtin_popcountg: {
3347 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3348
3349 llvm::Type *ArgType = ArgValue->getType();
3350 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3351
3352 llvm::Type *ResultType = ConvertType(E->getType());
3353 Value *Result = Builder.CreateCall(F, ArgValue);
3354 if (Result->getType() != ResultType)
3355 Result =
3356 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3357 return RValue::get(Result);
3358 }
3359 case Builtin::BI__builtin_unpredictable: {
3360 // Always return the argument of __builtin_unpredictable. LLVM does not
3361 // handle this builtin. Metadata for this builtin should be added directly
3362 // to instructions such as branches or switches that use it.
3363 return RValue::get(EmitScalarExpr(E->getArg(0)));
3364 }
3365 case Builtin::BI__builtin_expect: {
3366 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3367 llvm::Type *ArgType = ArgValue->getType();
3368
3369 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3370 // Don't generate llvm.expect on -O0 as the backend won't use it for
3371 // anything.
3372 // Note, we still IRGen ExpectedValue because it could have side-effects.
3373 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3374 return RValue::get(ArgValue);
3375
3376 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3377 Value *Result =
3378 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3379 return RValue::get(Result);
3380 }
3381 case Builtin::BI__builtin_expect_with_probability: {
3382 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3383 llvm::Type *ArgType = ArgValue->getType();
3384
3385 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3386 llvm::APFloat Probability(0.0);
3387 const Expr *ProbArg = E->getArg(2);
3388 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3389 assert(EvalSucceed && "probability should be able to evaluate as float");
3390 (void)EvalSucceed;
3391 bool LoseInfo = false;
3392 Probability.convert(llvm::APFloat::IEEEdouble(),
3393 llvm::RoundingMode::Dynamic, &LoseInfo);
3394 llvm::Type *Ty = ConvertType(ProbArg->getType());
3395 Constant *Confidence = ConstantFP::get(Ty, Probability);
3396 // Don't generate llvm.expect.with.probability on -O0 as the backend
3397 // won't use it for anything.
3398 // Note, we still IRGen ExpectedValue because it could have side-effects.
3399 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3400 return RValue::get(ArgValue);
3401
3402 Function *FnExpect =
3403 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3404 Value *Result = Builder.CreateCall(
3405 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3406 return RValue::get(Result);
3407 }
3408 case Builtin::BI__builtin_assume_aligned: {
3409 const Expr *Ptr = E->getArg(0);
3410 Value *PtrValue = EmitScalarExpr(Ptr);
3411 Value *OffsetValue =
3412 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3413
3414 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3415 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3416 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3417 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3418 llvm::Value::MaximumAlignment);
3419
3420 emitAlignmentAssumption(PtrValue, Ptr,
3421 /*The expr loc is sufficient.*/ SourceLocation(),
3422 AlignmentCI, OffsetValue);
3423 return RValue::get(PtrValue);
3424 }
3425 case Builtin::BI__assume:
3426 case Builtin::BI__builtin_assume: {
3427 if (E->getArg(0)->HasSideEffects(getContext()))
3428 return RValue::get(nullptr);
3429
3430 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3431 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3432 Builder.CreateCall(FnAssume, ArgValue);
3433 return RValue::get(nullptr);
3434 }
3435 case Builtin::BI__builtin_assume_separate_storage: {
3436 const Expr *Arg0 = E->getArg(0);
3437 const Expr *Arg1 = E->getArg(1);
3438
3439 Value *Value0 = EmitScalarExpr(Arg0);
3440 Value *Value1 = EmitScalarExpr(Arg1);
3441
3442 Value *Values[] = {Value0, Value1};
3443 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3444 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3445 return RValue::get(nullptr);
3446 }
3447 case Builtin::BI__builtin_allow_runtime_check: {
3448 StringRef Kind =
3449 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3450 LLVMContext &Ctx = CGM.getLLVMContext();
3451 llvm::Value *Allow = Builder.CreateCall(
3452 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3453 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3454 return RValue::get(Allow);
3455 }
3456 case Builtin::BI__arithmetic_fence: {
3457 // Create the builtin call if FastMath is selected, and the target
3458 // supports the builtin, otherwise just return the argument.
3459 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3460 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3461 bool isArithmeticFenceEnabled =
3462 FMF.allowReassoc() &&
3463 getContext().getTargetInfo().checkArithmeticFenceSupported();
3464 QualType ArgType = E->getArg(0)->getType();
3465 if (ArgType->isComplexType()) {
3466 if (isArithmeticFenceEnabled) {
3467 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3468 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3469 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3470 ConvertType(ElementType));
3471 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3472 ConvertType(ElementType));
3473 return RValue::getComplex(std::make_pair(Real, Imag));
3474 }
3475 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3476 Value *Real = ComplexVal.first;
3477 Value *Imag = ComplexVal.second;
3478 return RValue::getComplex(std::make_pair(Real, Imag));
3479 }
3480 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3481 if (isArithmeticFenceEnabled)
3482 return RValue::get(
3483 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3484 return RValue::get(ArgValue);
3485 }
3486 case Builtin::BI__builtin_bswap16:
3487 case Builtin::BI__builtin_bswap32:
3488 case Builtin::BI__builtin_bswap64:
3489 case Builtin::BI_byteswap_ushort:
3490 case Builtin::BI_byteswap_ulong:
3491 case Builtin::BI_byteswap_uint64: {
3492 return RValue::get(
3493 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap));
3494 }
3495 case Builtin::BI__builtin_bitreverse8:
3496 case Builtin::BI__builtin_bitreverse16:
3497 case Builtin::BI__builtin_bitreverse32:
3498 case Builtin::BI__builtin_bitreverse64: {
3499 return RValue::get(
3500 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse));
3501 }
3502 case Builtin::BI__builtin_rotateleft8:
3503 case Builtin::BI__builtin_rotateleft16:
3504 case Builtin::BI__builtin_rotateleft32:
3505 case Builtin::BI__builtin_rotateleft64:
3506 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3507 case Builtin::BI_rotl16:
3508 case Builtin::BI_rotl:
3509 case Builtin::BI_lrotl:
3510 case Builtin::BI_rotl64:
3511 return emitRotate(E, false);
3512
3513 case Builtin::BI__builtin_rotateright8:
3514 case Builtin::BI__builtin_rotateright16:
3515 case Builtin::BI__builtin_rotateright32:
3516 case Builtin::BI__builtin_rotateright64:
3517 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3518 case Builtin::BI_rotr16:
3519 case Builtin::BI_rotr:
3520 case Builtin::BI_lrotr:
3521 case Builtin::BI_rotr64:
3522 return emitRotate(E, true);
3523
3524 case Builtin::BI__builtin_constant_p: {
3525 llvm::Type *ResultType = ConvertType(E->getType());
3526
3527 const Expr *Arg = E->getArg(0);
3528 QualType ArgType = Arg->getType();
3529 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3530 // and likely a mistake.
3531 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3532 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3533 // Per the GCC documentation, only numeric constants are recognized after
3534 // inlining.
3535 return RValue::get(ConstantInt::get(ResultType, 0));
3536
3537 if (Arg->HasSideEffects(getContext()))
3538 // The argument is unevaluated, so be conservative if it might have
3539 // side-effects.
3540 return RValue::get(ConstantInt::get(ResultType, 0));
3541
3542 Value *ArgValue = EmitScalarExpr(Arg);
3543 if (ArgType->isObjCObjectPointerType()) {
3544 // Convert Objective-C objects to id because we cannot distinguish between
3545 // LLVM types for Obj-C classes as they are opaque.
3546 ArgType = CGM.getContext().getObjCIdType();
3547 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3548 }
3549 Function *F =
3550 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3551 Value *Result = Builder.CreateCall(F, ArgValue);
3552 if (Result->getType() != ResultType)
3553 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3554 return RValue::get(Result);
3555 }
3556 case Builtin::BI__builtin_dynamic_object_size:
3557 case Builtin::BI__builtin_object_size: {
3558 unsigned Type =
3559 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3560 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3561
3562 // We pass this builtin onto the optimizer so that it can figure out the
3563 // object size in more complex cases.
3564 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3565 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3566 /*EmittedE=*/nullptr, IsDynamic));
3567 }
3568 case Builtin::BI__builtin_prefetch: {
3569 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3570 // FIXME: Technically these constants should of type 'int', yes?
3571 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3572 llvm::ConstantInt::get(Int32Ty, 0);
3573 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3574 llvm::ConstantInt::get(Int32Ty, 3);
3575 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3576 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3577 Builder.CreateCall(F, {Address, RW, Locality, Data});
3578 return RValue::get(nullptr);
3579 }
3580 case Builtin::BI__builtin_readcyclecounter: {
3581 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3582 return RValue::get(Builder.CreateCall(F));
3583 }
3584 case Builtin::BI__builtin_readsteadycounter: {
3585 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3586 return RValue::get(Builder.CreateCall(F));
3587 }
3588 case Builtin::BI__builtin___clear_cache: {
3589 Value *Begin = EmitScalarExpr(E->getArg(0));
3590 Value *End = EmitScalarExpr(E->getArg(1));
3591 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3592 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3593 }
3594 case Builtin::BI__builtin_trap:
3595 EmitTrapCall(Intrinsic::trap);
3596 return RValue::get(nullptr);
3597 case Builtin::BI__builtin_verbose_trap: {
3598 llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
3599 if (getDebugInfo()) {
3600 TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
3601 TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
3602 *E->getArg(1)->tryEvaluateString(getContext()));
3603 }
3604 ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
3605 // Currently no attempt is made to prevent traps from being merged.
3606 EmitTrapCall(Intrinsic::trap);
3607 return RValue::get(nullptr);
3608 }
3609 case Builtin::BI__debugbreak:
3610 EmitTrapCall(Intrinsic::debugtrap);
3611 return RValue::get(nullptr);
3612 case Builtin::BI__builtin_unreachable: {
3613 EmitUnreachable(E->getExprLoc());
3614
3615 // We do need to preserve an insertion point.
3616 EmitBlock(createBasicBlock("unreachable.cont"));
3617
3618 return RValue::get(nullptr);
3619 }
3620
3621 case Builtin::BI__builtin_powi:
3622 case Builtin::BI__builtin_powif:
3623 case Builtin::BI__builtin_powil: {
3624 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3625 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3626
3627 if (Builder.getIsFPConstrained()) {
3628 // FIXME: llvm.powi has 2 mangling types,
3629 // llvm.experimental.constrained.powi has one.
3630 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3631 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3632 Src0->getType());
3633 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3634 }
3635
3636 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3637 { Src0->getType(), Src1->getType() });
3638 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3639 }
3640 case Builtin::BI__builtin_frexpl: {
3641 // Linux PPC will not be adding additional PPCDoubleDouble support.
3642 // WIP to switch default to IEEE long double. Will emit libcall for
3643 // frexpl instead of legalizing this type in the BE.
3644 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3645 break;
3646 [[fallthrough]];
3647 }
3648 case Builtin::BI__builtin_frexp:
3649 case Builtin::BI__builtin_frexpf:
3650 case Builtin::BI__builtin_frexpf128:
3651 case Builtin::BI__builtin_frexpf16:
3652 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3653 case Builtin::BI__builtin_isgreater:
3654 case Builtin::BI__builtin_isgreaterequal:
3655 case Builtin::BI__builtin_isless:
3656 case Builtin::BI__builtin_islessequal:
3657 case Builtin::BI__builtin_islessgreater:
3658 case Builtin::BI__builtin_isunordered: {
3659 // Ordered comparisons: we know the arguments to these are matching scalar
3660 // floating point values.
3661 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3662 Value *LHS = EmitScalarExpr(E->getArg(0));
3663 Value *RHS = EmitScalarExpr(E->getArg(1));
3664
3665 switch (BuiltinID) {
3666 default: llvm_unreachable("Unknown ordered comparison");
3667 case Builtin::BI__builtin_isgreater:
3668 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3669 break;
3670 case Builtin::BI__builtin_isgreaterequal:
3671 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3672 break;
3673 case Builtin::BI__builtin_isless:
3674 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3675 break;
3676 case Builtin::BI__builtin_islessequal:
3677 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3678 break;
3679 case Builtin::BI__builtin_islessgreater:
3680 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
3681 break;
3682 case Builtin::BI__builtin_isunordered:
3683 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
3684 break;
3685 }
3686 // ZExt bool to int type.
3687 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3688 }
3689
3690 case Builtin::BI__builtin_isnan: {
3691 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3692 Value *V = EmitScalarExpr(E->getArg(0));
3693 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3694 return RValue::get(Result);
3695 return RValue::get(
3696 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
3697 ConvertType(E->getType())));
3698 }
3699
3700 case Builtin::BI__builtin_issignaling: {
3701 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3702 Value *V = EmitScalarExpr(E->getArg(0));
3703 return RValue::get(
3704 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
3705 ConvertType(E->getType())));
3706 }
3707
3708 case Builtin::BI__builtin_isinf: {
3709 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3710 Value *V = EmitScalarExpr(E->getArg(0));
3711 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3712 return RValue::get(Result);
3713 return RValue::get(
3714 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
3715 ConvertType(E->getType())));
3716 }
3717
3718 case Builtin::BIfinite:
3719 case Builtin::BI__finite:
3720 case Builtin::BIfinitef:
3721 case Builtin::BI__finitef:
3722 case Builtin::BIfinitel:
3723 case Builtin::BI__finitel:
3724 case Builtin::BI__builtin_isfinite: {
3725 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3726 Value *V = EmitScalarExpr(E->getArg(0));
3727 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3728 return RValue::get(Result);
3729 return RValue::get(
3730 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
3731 ConvertType(E->getType())));
3732 }
3733
3734 case Builtin::BI__builtin_isnormal: {
3735 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3736 Value *V = EmitScalarExpr(E->getArg(0));
3737 return RValue::get(
3738 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
3739 ConvertType(E->getType())));
3740 }
3741
3742 case Builtin::BI__builtin_issubnormal: {
3743 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3744 Value *V = EmitScalarExpr(E->getArg(0));
3745 return RValue::get(
3746 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
3747 ConvertType(E->getType())));
3748 }
3749
3750 case Builtin::BI__builtin_iszero: {
3751 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3752 Value *V = EmitScalarExpr(E->getArg(0));
3753 return RValue::get(
3754 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
3755 ConvertType(E->getType())));
3756 }
3757
3758 case Builtin::BI__builtin_isfpclass: {
3759 Expr::EvalResult Result;
3760 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
3761 break;
3762 uint64_t Test = Result.Val.getInt().getLimitedValue();
3763 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3764 Value *V = EmitScalarExpr(E->getArg(0));
3765 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
3766 ConvertType(E->getType())));
3767 }
3768
3769 case Builtin::BI__builtin_nondeterministic_value: {
3770 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
3771
3772 Value *Result = PoisonValue::get(Ty);
3773 Result = Builder.CreateFreeze(Result);
3774
3775 return RValue::get(Result);
3776 }
3777
3778 case Builtin::BI__builtin_elementwise_abs: {
3779 Value *Result;
3780 QualType QT = E->getArg(0)->getType();
3781
3782 if (auto *VecTy = QT->getAs<VectorType>())
3783 QT = VecTy->getElementType();
3784 if (QT->isIntegerType())
3785 Result = Builder.CreateBinaryIntrinsic(
3786 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
3787 Builder.getFalse(), nullptr, "elt.abs");
3788 else
3789 Result = emitBuiltinWithOneOverloadedType<1>(
3790 *this, E, llvm::Intrinsic::fabs, "elt.abs");
3791
3792 return RValue::get(Result);
3793 }
3794 case Builtin::BI__builtin_elementwise_acos:
3795 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3796 *this, E, llvm::Intrinsic::acos, "elt.acos"));
3797 case Builtin::BI__builtin_elementwise_asin:
3798 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3799 *this, E, llvm::Intrinsic::asin, "elt.asin"));
3800 case Builtin::BI__builtin_elementwise_atan:
3801 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3802 *this, E, llvm::Intrinsic::atan, "elt.atan"));
3803 case Builtin::BI__builtin_elementwise_ceil:
3804 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3805 *this, E, llvm::Intrinsic::ceil, "elt.ceil"));
3806 case Builtin::BI__builtin_elementwise_exp:
3807 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3808 *this, E, llvm::Intrinsic::exp, "elt.exp"));
3809 case Builtin::BI__builtin_elementwise_exp2:
3810 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3811 *this, E, llvm::Intrinsic::exp2, "elt.exp2"));
3812 case Builtin::BI__builtin_elementwise_log:
3813 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3814 *this, E, llvm::Intrinsic::log, "elt.log"));
3815 case Builtin::BI__builtin_elementwise_log2:
3816 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3817 *this, E, llvm::Intrinsic::log2, "elt.log2"));
3818 case Builtin::BI__builtin_elementwise_log10:
3819 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3820 *this, E, llvm::Intrinsic::log10, "elt.log10"));
3821 case Builtin::BI__builtin_elementwise_pow: {
3822 return RValue::get(
3823 emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow));
3824 }
3825 case Builtin::BI__builtin_elementwise_bitreverse:
3826 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3827 *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse"));
3828 case Builtin::BI__builtin_elementwise_cos:
3829 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3830 *this, E, llvm::Intrinsic::cos, "elt.cos"));
3831 case Builtin::BI__builtin_elementwise_cosh:
3832 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3833 *this, E, llvm::Intrinsic::cosh, "elt.cosh"));
3834 case Builtin::BI__builtin_elementwise_floor:
3835 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3836 *this, E, llvm::Intrinsic::floor, "elt.floor"));
3837 case Builtin::BI__builtin_elementwise_roundeven:
3838 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3839 *this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
3840 case Builtin::BI__builtin_elementwise_round:
3841 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3842 *this, E, llvm::Intrinsic::round, "elt.round"));
3843 case Builtin::BI__builtin_elementwise_rint:
3844 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3845 *this, E, llvm::Intrinsic::rint, "elt.rint"));
3846 case Builtin::BI__builtin_elementwise_nearbyint:
3847 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3848 *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint"));
3849 case Builtin::BI__builtin_elementwise_sin:
3850 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3851 *this, E, llvm::Intrinsic::sin, "elt.sin"));
3852 case Builtin::BI__builtin_elementwise_sinh:
3853 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3854 *this, E, llvm::Intrinsic::sinh, "elt.sinh"));
3855 case Builtin::BI__builtin_elementwise_tan:
3856 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3857 *this, E, llvm::Intrinsic::tan, "elt.tan"));
3858 case Builtin::BI__builtin_elementwise_tanh:
3859 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3860 *this, E, llvm::Intrinsic::tanh, "elt.tanh"));
3861 case Builtin::BI__builtin_elementwise_trunc:
3862 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3863 *this, E, llvm::Intrinsic::trunc, "elt.trunc"));
3864 case Builtin::BI__builtin_elementwise_canonicalize:
3865 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3866 *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
3867 case Builtin::BI__builtin_elementwise_copysign:
3868 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
3869 *this, E, llvm::Intrinsic::copysign));
3870 case Builtin::BI__builtin_elementwise_fma:
3871 return RValue::get(
3872 emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma));
3873 case Builtin::BI__builtin_elementwise_add_sat:
3874 case Builtin::BI__builtin_elementwise_sub_sat: {
3875 Value *Op0 = EmitScalarExpr(E->getArg(0));
3876 Value *Op1 = EmitScalarExpr(E->getArg(1));
3877 Value *Result;
3878 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3879 QualType Ty = E->getArg(0)->getType();
3880 if (auto *VecTy = Ty->getAs<VectorType>())
3881 Ty = VecTy->getElementType();
3882 bool IsSigned = Ty->isSignedIntegerType();
3883 unsigned Opc;
3884 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3885 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
3886 else
3887 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
3888 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
3889 return RValue::get(Result);
3890 }
3891
3892 case Builtin::BI__builtin_elementwise_max: {
3893 Value *Op0 = EmitScalarExpr(E->getArg(0));
3894 Value *Op1 = EmitScalarExpr(E->getArg(1));
3895 Value *Result;
3896 if (Op0->getType()->isIntOrIntVectorTy()) {
3897 QualType Ty = E->getArg(0)->getType();
3898 if (auto *VecTy = Ty->getAs<VectorType>())
3899 Ty = VecTy->getElementType();
3900 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3901 ? llvm::Intrinsic::smax
3902 : llvm::Intrinsic::umax,
3903 Op0, Op1, nullptr, "elt.max");
3904 } else
3905 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
3906 return RValue::get(Result);
3907 }
3908 case Builtin::BI__builtin_elementwise_min: {
3909 Value *Op0 = EmitScalarExpr(E->getArg(0));
3910 Value *Op1 = EmitScalarExpr(E->getArg(1));
3911 Value *Result;
3912 if (Op0->getType()->isIntOrIntVectorTy()) {
3913 QualType Ty = E->getArg(0)->getType();
3914 if (auto *VecTy = Ty->getAs<VectorType>())
3915 Ty = VecTy->getElementType();
3916 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3917 ? llvm::Intrinsic::smin
3918 : llvm::Intrinsic::umin,
3919 Op0, Op1, nullptr, "elt.min");
3920 } else
3921 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
3922 return RValue::get(Result);
3923 }
3924
3925 case Builtin::BI__builtin_reduce_max: {
3926 auto GetIntrinsicID = [this](QualType QT) {
3927 if (auto *VecTy = QT->getAs<VectorType>())
3928 QT = VecTy->getElementType();
3929 else if (QT->isSizelessVectorType())
3930 QT = QT->getSizelessVectorEltType(CGM.getContext());
3931
3932 if (QT->isSignedIntegerType())
3933 return llvm::Intrinsic::vector_reduce_smax;
3934 if (QT->isUnsignedIntegerType())
3935 return llvm::Intrinsic::vector_reduce_umax;
3936 assert(QT->isFloatingType() && "must have a float here");
3937 return llvm::Intrinsic::vector_reduce_fmax;
3938 };
3939 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3940 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3941 }
3942
3943 case Builtin::BI__builtin_reduce_min: {
3944 auto GetIntrinsicID = [this](QualType QT) {
3945 if (auto *VecTy = QT->getAs<VectorType>())
3946 QT = VecTy->getElementType();
3947 else if (QT->isSizelessVectorType())
3948 QT = QT->getSizelessVectorEltType(CGM.getContext());
3949
3950 if (QT->isSignedIntegerType())
3951 return llvm::Intrinsic::vector_reduce_smin;
3952 if (QT->isUnsignedIntegerType())
3953 return llvm::Intrinsic::vector_reduce_umin;
3954 assert(QT->isFloatingType() && "must have a float here");
3955 return llvm::Intrinsic::vector_reduce_fmin;
3956 };
3957
3958 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3959 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3960 }
3961
3962 case Builtin::BI__builtin_reduce_add:
3963 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3964 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3965 case Builtin::BI__builtin_reduce_mul:
3966 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3967 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
3968 case Builtin::BI__builtin_reduce_xor:
3969 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3970 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
3971 case Builtin::BI__builtin_reduce_or:
3972 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3973 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
3974 case Builtin::BI__builtin_reduce_and:
3975 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3976 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
3977
3978 case Builtin::BI__builtin_matrix_transpose: {
3979 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
3980 Value *MatValue = EmitScalarExpr(E->getArg(0));
3981 MatrixBuilder MB(Builder);
3982 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
3983 MatrixTy->getNumColumns());
3984 return RValue::get(Result);
3985 }
3986
3987 case Builtin::BI__builtin_matrix_column_major_load: {
3988 MatrixBuilder MB(Builder);
3989 // Emit everything that isn't dependent on the first parameter type
3990 Value *Stride = EmitScalarExpr(E->getArg(3));
3991 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
3992 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
3993 assert(PtrTy && "arg0 must be of pointer type");
3994 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3995
3996 Address Src = EmitPointerWithAlignment(E->getArg(0));
3997 EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)),
3998 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
3999 0);
4000 Value *Result = MB.CreateColumnMajorLoad(
4001 Src.getElementType(), Src.emitRawPointer(*this),
4002 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
4003 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
4004 return RValue::get(Result);
4005 }
4006
4007 case Builtin::BI__builtin_matrix_column_major_store: {
4008 MatrixBuilder MB(Builder);
4009 Value *Matrix = EmitScalarExpr(E->getArg(0));
4010 Address Dst = EmitPointerWithAlignment(E->getArg(1));
4011 Value *Stride = EmitScalarExpr(E->getArg(2));
4012
4013 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
4014 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
4015 assert(PtrTy && "arg1 must be of pointer type");
4016 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4017
4018 EmitNonNullArgCheck(RValue::get(Dst.emitRawPointer(*this)),
4019 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4020 0);
4021 Value *Result = MB.CreateColumnMajorStore(
4022 Matrix, Dst.emitRawPointer(*this),
4023 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
4024 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4025 return RValue::get(Result);
4026 }
4027
4028 case Builtin::BI__builtin_isinf_sign: {
4029 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4030 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4031 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4032 Value *Arg = EmitScalarExpr(E->getArg(0));
4033 Value *AbsArg = EmitFAbs(*this, Arg);
4034 Value *IsInf = Builder.CreateFCmpOEQ(
4035 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
4036 Value *IsNeg = EmitSignBit(*this, Arg);
4037
4038 llvm::Type *IntTy = ConvertType(E->getType());
4039 Value *Zero = Constant::getNullValue(IntTy);
4040 Value *One = ConstantInt::get(IntTy, 1);
4041 Value *NegativeOne = ConstantInt::get(IntTy, -1);
4042 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
4043 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4044 return RValue::get(Result);
4045 }
4046
4047 case Builtin::BI__builtin_flt_rounds: {
4048 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4049
4050 llvm::Type *ResultType = ConvertType(E->getType());
4051 Value *Result = Builder.CreateCall(F);
4052 if (Result->getType() != ResultType)
4053 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4054 "cast");
4055 return RValue::get(Result);
4056 }
4057
4058 case Builtin::BI__builtin_set_flt_rounds: {
4059 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4060
4061 Value *V = EmitScalarExpr(E->getArg(0));
4062 Builder.CreateCall(F, V);
4063 return RValue::get(nullptr);
4064 }
4065
4066 case Builtin::BI__builtin_fpclassify: {
4067 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4068 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4069 Value *V = EmitScalarExpr(E->getArg(5));
4070 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4071
4072 // Create Result
4073 BasicBlock *Begin = Builder.GetInsertBlock();
4074 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4075 Builder.SetInsertPoint(End);
4076 PHINode *Result =
4077 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4078 "fpclassify_result");
4079
4080 // if (V==0) return FP_ZERO
4081 Builder.SetInsertPoint(Begin);
4082 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4083 "iszero");
4084 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4085 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4086 Builder.CreateCondBr(IsZero, End, NotZero);
4087 Result->addIncoming(ZeroLiteral, Begin);
4088
4089 // if (V != V) return FP_NAN
4090 Builder.SetInsertPoint(NotZero);
4091 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4092 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4093 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4094 Builder.CreateCondBr(IsNan, End, NotNan);
4095 Result->addIncoming(NanLiteral, NotZero);
4096
4097 // if (fabs(V) == infinity) return FP_INFINITY
4098 Builder.SetInsertPoint(NotNan);
4099 Value *VAbs = EmitFAbs(*this, V);
4100 Value *IsInf =
4101 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4102 "isinf");
4103 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4104 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4105 Builder.CreateCondBr(IsInf, End, NotInf);
4106 Result->addIncoming(InfLiteral, NotNan);
4107
4108 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4109 Builder.SetInsertPoint(NotInf);
4110 APFloat Smallest = APFloat::getSmallestNormalized(
4111 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4112 Value *IsNormal =
4113 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4114 "isnormal");
4115 Value *NormalResult =
4116 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4117 EmitScalarExpr(E->getArg(3)));
4118 Builder.CreateBr(End);
4119 Result->addIncoming(NormalResult, NotInf);
4120
4121 // return Result
4122 Builder.SetInsertPoint(End);
4123 return RValue::get(Result);
4124 }
4125
4126 // An alloca will always return a pointer to the alloca (stack) address
4127 // space. This address space need not be the same as the AST / Language
4128 // default (e.g. in C / C++ auto vars are in the generic address space). At
4129 // the AST level this is handled within CreateTempAlloca et al., but for the
4130 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4131 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4132 case Builtin::BIalloca:
4133 case Builtin::BI_alloca:
4134 case Builtin::BI__builtin_alloca_uninitialized:
4135 case Builtin::BI__builtin_alloca: {
4136 Value *Size = EmitScalarExpr(E->getArg(0));
4137 const TargetInfo &TI = getContext().getTargetInfo();
4138 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4139 const Align SuitableAlignmentInBytes =
4140 CGM.getContext()
4141 .toCharUnitsFromBits(TI.getSuitableAlign())
4142 .getAsAlign();
4143 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4144 AI->setAlignment(SuitableAlignmentInBytes);
4145 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4146 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4147 LangAS AAS = getASTAllocaAddressSpace();
4148 LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
4149 if (AAS != EAS) {
4150 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4151 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4152 EAS, Ty));
4153 }
4154 return RValue::get(AI);
4155 }
4156
4157 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4158 case Builtin::BI__builtin_alloca_with_align: {
4159 Value *Size = EmitScalarExpr(E->getArg(0));
4160 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4161 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4162 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4163 const Align AlignmentInBytes =
4164 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4165 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4166 AI->setAlignment(AlignmentInBytes);
4167 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4168 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4169 LangAS AAS = getASTAllocaAddressSpace();
4170 LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
4171 if (AAS != EAS) {
4172 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4173 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4174 EAS, Ty));
4175 }
4176 return RValue::get(AI);
4177 }
4178
4179 case Builtin::BIbzero:
4180 case Builtin::BI__builtin_bzero: {
4181 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4182 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4183 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4184 E->getArg(0)->getExprLoc(), FD, 0);
4185 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4186 return RValue::get(nullptr);
4187 }
4188
4189 case Builtin::BIbcopy:
4190 case Builtin::BI__builtin_bcopy: {
4191 Address Src = EmitPointerWithAlignment(E->getArg(0));
4192 Address Dest = EmitPointerWithAlignment(E->getArg(1));
4193 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4194 EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)),
4195 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4196 0);
4197 EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)),
4198 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4199 0);
4200 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4201 return RValue::get(nullptr);
4202 }
4203
4204 case Builtin::BImemcpy:
4205 case Builtin::BI__builtin_memcpy:
4206 case Builtin::BImempcpy:
4207 case Builtin::BI__builtin_mempcpy: {
4208 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4209 Address Src = EmitPointerWithAlignment(E->getArg(1));
4210 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4211 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4212 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4213 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4214 if (BuiltinID == Builtin::BImempcpy ||
4215 BuiltinID == Builtin::BI__builtin_mempcpy)
4216 return RValue::get(Builder.CreateInBoundsGEP(
4217 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4218 else
4219 return RValue::get(Dest, *this);
4220 }
4221
4222 case Builtin::BI__builtin_memcpy_inline: {
4223 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4224 Address Src = EmitPointerWithAlignment(E->getArg(1));
4225 uint64_t Size =
4226 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4227 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4228 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4229 Builder.CreateMemCpyInline(Dest, Src, Size);
4230 return RValue::get(nullptr);
4231 }
4232
4233 case Builtin::BI__builtin_char_memchr:
4234 BuiltinID = Builtin::BI__builtin_memchr;
4235 break;
4236
4237 case Builtin::BI__builtin___memcpy_chk: {
4238 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4239 Expr::EvalResult SizeResult, DstSizeResult;
4240 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4241 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4242 break;
4243 llvm::APSInt Size = SizeResult.Val.getInt();
4244 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4245 if (Size.ugt(DstSize))
4246 break;
4247 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4248 Address Src = EmitPointerWithAlignment(E->getArg(1));
4249 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4250 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4251 return RValue::get(Dest, *this);
4252 }
4253
4254 case Builtin::BI__builtin_objc_memmove_collectable: {
4255 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4256 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4257 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4258 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
4259 DestAddr, SrcAddr, SizeVal);
4260 return RValue::get(DestAddr, *this);
4261 }
4262
4263 case Builtin::BI__builtin___memmove_chk: {
4264 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4265 Expr::EvalResult SizeResult, DstSizeResult;
4266 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4267 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4268 break;
4269 llvm::APSInt Size = SizeResult.Val.getInt();
4270 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4271 if (Size.ugt(DstSize))
4272 break;
4273 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4274 Address Src = EmitPointerWithAlignment(E->getArg(1));
4275 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4276 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4277 return RValue::get(Dest, *this);
4278 }
4279
4280 case Builtin::BImemmove:
4281 case Builtin::BI__builtin_memmove: {
4282 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4283 Address Src = EmitPointerWithAlignment(E->getArg(1));
4284 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4285 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4286 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4287 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4288 return RValue::get(Dest, *this);
4289 }
4290 case Builtin::BImemset:
4291 case Builtin::BI__builtin_memset: {
4292 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4293 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4294 Builder.getInt8Ty());
4295 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4296 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4297 E->getArg(0)->getExprLoc(), FD, 0);
4298 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4299 return RValue::get(Dest, *this);
4300 }
4301 case Builtin::BI__builtin_memset_inline: {
4302 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4303 Value *ByteVal =
4304 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4305 uint64_t Size =
4306 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4307 EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)),
4308 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4309 0);
4310 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4311 return RValue::get(nullptr);
4312 }
4313 case Builtin::BI__builtin___memset_chk: {
4314 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4315 Expr::EvalResult SizeResult, DstSizeResult;
4316 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4317 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4318 break;
4319 llvm::APSInt Size = SizeResult.Val.getInt();
4320 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4321 if (Size.ugt(DstSize))
4322 break;
4323 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4324 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4325 Builder.getInt8Ty());
4326 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4327 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4328 return RValue::get(Dest, *this);
4329 }
4330 case Builtin::BI__builtin_wmemchr: {
4331 // The MSVC runtime library does not provide a definition of wmemchr, so we
4332 // need an inline implementation.
4333 if (!getTarget().getTriple().isOSMSVCRT())
4334 break;
4335
4336 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4337 Value *Str = EmitScalarExpr(E->getArg(0));
4338 Value *Chr = EmitScalarExpr(E->getArg(1));
4339 Value *Size = EmitScalarExpr(E->getArg(2));
4340
4341 BasicBlock *Entry = Builder.GetInsertBlock();
4342 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4343 BasicBlock *Next = createBasicBlock("wmemchr.next");
4344 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4345 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4346 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4347
4348 EmitBlock(CmpEq);
4349 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4350 StrPhi->addIncoming(Str, Entry);
4351 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4352 SizePhi->addIncoming(Size, Entry);
4353 CharUnits WCharAlign =
4354 getContext().getTypeAlignInChars(getContext().WCharTy);
4355 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4356 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4357 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4358 Builder.CreateCondBr(StrEqChr, Exit, Next);
4359
4360 EmitBlock(Next);
4361 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4362 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4363 Value *NextSizeEq0 =
4364 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4365 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4366 StrPhi->addIncoming(NextStr, Next);
4367 SizePhi->addIncoming(NextSize, Next);
4368
4369 EmitBlock(Exit);
4370 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4371 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4372 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4373 Ret->addIncoming(FoundChr, CmpEq);
4374 return RValue::get(Ret);
4375 }
4376 case Builtin::BI__builtin_wmemcmp: {
4377 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4378 // need an inline implementation.
4379 if (!getTarget().getTriple().isOSMSVCRT())
4380 break;
4381
4382 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4383
4384 Value *Dst = EmitScalarExpr(E->getArg(0));
4385 Value *Src = EmitScalarExpr(E->getArg(1));
4386 Value *Size = EmitScalarExpr(E->getArg(2));
4387
4388 BasicBlock *Entry = Builder.GetInsertBlock();
4389 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4390 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4391 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4392 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4393 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4394 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4395
4396 EmitBlock(CmpGT);
4397 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4398 DstPhi->addIncoming(Dst, Entry);
4399 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4400 SrcPhi->addIncoming(Src, Entry);
4401 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4402 SizePhi->addIncoming(Size, Entry);
4403 CharUnits WCharAlign =
4404 getContext().getTypeAlignInChars(getContext().WCharTy);
4405 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4406 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4407 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4408 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4409
4410 EmitBlock(CmpLT);
4411 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4412 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4413
4414 EmitBlock(Next);
4415 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4416 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4417 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4418 Value *NextSizeEq0 =
4419 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4420 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4421 DstPhi->addIncoming(NextDst, Next);
4422 SrcPhi->addIncoming(NextSrc, Next);
4423 SizePhi->addIncoming(NextSize, Next);
4424
4425 EmitBlock(Exit);
4426 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4427 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4428 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4429 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4430 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4431 return RValue::get(Ret);
4432 }
4433 case Builtin::BI__builtin_dwarf_cfa: {
4434 // The offset in bytes from the first argument to the CFA.
4435 //
4436 // Why on earth is this in the frontend? Is there any reason at
4437 // all that the backend can't reasonably determine this while
4438 // lowering llvm.eh.dwarf.cfa()?
4439 //
4440 // TODO: If there's a satisfactory reason, add a target hook for
4441 // this instead of hard-coding 0, which is correct for most targets.
4442 int32_t Offset = 0;
4443
4444 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4445 return RValue::get(Builder.CreateCall(F,
4446 llvm::ConstantInt::get(Int32Ty, Offset)));
4447 }
4448 case Builtin::BI__builtin_return_address: {
4449 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4450 getContext().UnsignedIntTy);
4451 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4452 return RValue::get(Builder.CreateCall(F, Depth));
4453 }
4454 case Builtin::BI_ReturnAddress: {
4455 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4456 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4457 }
4458 case Builtin::BI__builtin_frame_address: {
4459 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4460 getContext().UnsignedIntTy);
4461 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4462 return RValue::get(Builder.CreateCall(F, Depth));
4463 }
4464 case Builtin::BI__builtin_extract_return_addr: {
4465 Value *Address = EmitScalarExpr(E->getArg(0));
4466 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
4467 return RValue::get(Result);
4468 }
4469 case Builtin::BI__builtin_frob_return_addr: {
4470 Value *Address = EmitScalarExpr(E->getArg(0));
4471 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
4472 return RValue::get(Result);
4473 }
4474 case Builtin::BI__builtin_dwarf_sp_column: {
4475 llvm::IntegerType *Ty
4476 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4477 int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
4478 if (Column == -1) {
4479 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4480 return RValue::get(llvm::UndefValue::get(Ty));
4481 }
4482 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4483 }
4484 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4485 Value *Address = EmitScalarExpr(E->getArg(0));
4486 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4487 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4488 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4489 }
4490 case Builtin::BI__builtin_eh_return: {
4491 Value *Int = EmitScalarExpr(E->getArg(0));
4492 Value *Ptr = EmitScalarExpr(E->getArg(1));
4493
4494 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4495 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4496 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4497 Function *F =
4498 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4499 : Intrinsic::eh_return_i64);
4500 Builder.CreateCall(F, {Int, Ptr});
4501 Builder.CreateUnreachable();
4502
4503 // We do need to preserve an insertion point.
4504 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4505
4506 return RValue::get(nullptr);
4507 }
4508 case Builtin::BI__builtin_unwind_init: {
4509 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4510 Builder.CreateCall(F);
4511 return RValue::get(nullptr);
4512 }
4513 case Builtin::BI__builtin_extend_pointer: {
4514 // Extends a pointer to the size of an _Unwind_Word, which is
4515 // uint64_t on all platforms. Generally this gets poked into a
4516 // register and eventually used as an address, so if the
4517 // addressing registers are wider than pointers and the platform
4518 // doesn't implicitly ignore high-order bits when doing
4519 // addressing, we need to make sure we zext / sext based on
4520 // the platform's expectations.
4521 //
4522 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4523
4524 // Cast the pointer to intptr_t.
4525 Value *Ptr = EmitScalarExpr(E->getArg(0));
4526 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4527
4528 // If that's 64 bits, we're done.
4529 if (IntPtrTy->getBitWidth() == 64)
4530 return RValue::get(Result);
4531
4532 // Otherwise, ask the codegen data what to do.
4533 if (getTargetHooks().extendPointerWithSExt())
4534 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4535 else
4536 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4537 }
4538 case Builtin::BI__builtin_setjmp: {
4539 // Buffer is a void**.
4540 Address Buf = EmitPointerWithAlignment(E->getArg(0));
4541
4542 // Store the frame pointer to the setjmp buffer.
4543 Value *FrameAddr = Builder.CreateCall(
4544 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4545 ConstantInt::get(Int32Ty, 0));
4546 Builder.CreateStore(FrameAddr, Buf);
4547
4548 // Store the stack pointer to the setjmp buffer.
4549 Value *StackAddr = Builder.CreateStackSave();
4550 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4551
4552 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4553 Builder.CreateStore(StackAddr, StackSaveSlot);
4554
4555 // Call LLVM's EH setjmp, which is lightweight.
4556 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4557 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4558 }
4559 case Builtin::BI__builtin_longjmp: {
4560 Value *Buf = EmitScalarExpr(E->getArg(0));
4561
4562 // Call LLVM's EH longjmp, which is lightweight.
4563 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4564
4565 // longjmp doesn't return; mark this as unreachable.
4566 Builder.CreateUnreachable();
4567
4568 // We do need to preserve an insertion point.
4569 EmitBlock(createBasicBlock("longjmp.cont"));
4570
4571 return RValue::get(nullptr);
4572 }
4573 case Builtin::BI__builtin_launder: {
4574 const Expr *Arg = E->getArg(0);
4575 QualType ArgTy = Arg->getType()->getPointeeType();
4576 Value *Ptr = EmitScalarExpr(Arg);
4577 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4578 Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
4579
4580 return RValue::get(Ptr);
4581 }
4582 case Builtin::BI__sync_fetch_and_add:
4583 case Builtin::BI__sync_fetch_and_sub:
4584 case Builtin::BI__sync_fetch_and_or:
4585 case Builtin::BI__sync_fetch_and_and:
4586 case Builtin::BI__sync_fetch_and_xor:
4587 case Builtin::BI__sync_fetch_and_nand:
4588 case Builtin::BI__sync_add_and_fetch:
4589 case Builtin::BI__sync_sub_and_fetch:
4590 case Builtin::BI__sync_and_and_fetch:
4591 case Builtin::BI__sync_or_and_fetch:
4592 case Builtin::BI__sync_xor_and_fetch:
4593 case Builtin::BI__sync_nand_and_fetch:
4594 case Builtin::BI__sync_val_compare_and_swap:
4595 case Builtin::BI__sync_bool_compare_and_swap:
4596 case Builtin::BI__sync_lock_test_and_set:
4597 case Builtin::BI__sync_lock_release:
4598 case Builtin::BI__sync_swap:
4599 llvm_unreachable("Shouldn't make it through sema");
4600 case Builtin::BI__sync_fetch_and_add_1:
4601 case Builtin::BI__sync_fetch_and_add_2:
4602 case Builtin::BI__sync_fetch_and_add_4:
4603 case Builtin::BI__sync_fetch_and_add_8:
4604 case Builtin::BI__sync_fetch_and_add_16:
4605 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4606 case Builtin::BI__sync_fetch_and_sub_1:
4607 case Builtin::BI__sync_fetch_and_sub_2:
4608 case Builtin::BI__sync_fetch_and_sub_4:
4609 case Builtin::BI__sync_fetch_and_sub_8:
4610 case Builtin::BI__sync_fetch_and_sub_16:
4611 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4612 case Builtin::BI__sync_fetch_and_or_1:
4613 case Builtin::BI__sync_fetch_and_or_2:
4614 case Builtin::BI__sync_fetch_and_or_4:
4615 case Builtin::BI__sync_fetch_and_or_8:
4616 case Builtin::BI__sync_fetch_and_or_16:
4617 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4618 case Builtin::BI__sync_fetch_and_and_1:
4619 case Builtin::BI__sync_fetch_and_and_2:
4620 case Builtin::BI__sync_fetch_and_and_4:
4621 case Builtin::BI__sync_fetch_and_and_8:
4622 case Builtin::BI__sync_fetch_and_and_16:
4623 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4624 case Builtin::BI__sync_fetch_and_xor_1:
4625 case Builtin::BI__sync_fetch_and_xor_2:
4626 case Builtin::BI__sync_fetch_and_xor_4:
4627 case Builtin::BI__sync_fetch_and_xor_8:
4628 case Builtin::BI__sync_fetch_and_xor_16:
4629 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4630 case Builtin::BI__sync_fetch_and_nand_1:
4631 case Builtin::BI__sync_fetch_and_nand_2:
4632 case Builtin::BI__sync_fetch_and_nand_4:
4633 case Builtin::BI__sync_fetch_and_nand_8:
4634 case Builtin::BI__sync_fetch_and_nand_16:
4635 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4636
4637 // Clang extensions: not overloaded yet.
4638 case Builtin::BI__sync_fetch_and_min:
4639 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4640 case Builtin::BI__sync_fetch_and_max:
4641 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4642 case Builtin::BI__sync_fetch_and_umin:
4643 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
4644 case Builtin::BI__sync_fetch_and_umax:
4645 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
4646
4647 case Builtin::BI__sync_add_and_fetch_1:
4648 case Builtin::BI__sync_add_and_fetch_2:
4649 case Builtin::BI__sync_add_and_fetch_4:
4650 case Builtin::BI__sync_add_and_fetch_8:
4651 case Builtin::BI__sync_add_and_fetch_16:
4652 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
4653 llvm::Instruction::Add);
4654 case Builtin::BI__sync_sub_and_fetch_1:
4655 case Builtin::BI__sync_sub_and_fetch_2:
4656 case Builtin::BI__sync_sub_and_fetch_4:
4657 case Builtin::BI__sync_sub_and_fetch_8:
4658 case Builtin::BI__sync_sub_and_fetch_16:
4659 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
4660 llvm::Instruction::Sub);
4661 case Builtin::BI__sync_and_and_fetch_1:
4662 case Builtin::BI__sync_and_and_fetch_2:
4663 case Builtin::BI__sync_and_and_fetch_4:
4664 case Builtin::BI__sync_and_and_fetch_8:
4665 case Builtin::BI__sync_and_and_fetch_16:
4666 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
4667 llvm::Instruction::And);
4668 case Builtin::BI__sync_or_and_fetch_1:
4669 case Builtin::BI__sync_or_and_fetch_2:
4670 case Builtin::BI__sync_or_and_fetch_4:
4671 case Builtin::BI__sync_or_and_fetch_8:
4672 case Builtin::BI__sync_or_and_fetch_16:
4673 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
4674 llvm::Instruction::Or);
4675 case Builtin::BI__sync_xor_and_fetch_1:
4676 case Builtin::BI__sync_xor_and_fetch_2:
4677 case Builtin::BI__sync_xor_and_fetch_4:
4678 case Builtin::BI__sync_xor_and_fetch_8:
4679 case Builtin::BI__sync_xor_and_fetch_16:
4680 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
4681 llvm::Instruction::Xor);
4682 case Builtin::BI__sync_nand_and_fetch_1:
4683 case Builtin::BI__sync_nand_and_fetch_2:
4684 case Builtin::BI__sync_nand_and_fetch_4:
4685 case Builtin::BI__sync_nand_and_fetch_8:
4686 case Builtin::BI__sync_nand_and_fetch_16:
4687 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
4688 llvm::Instruction::And, true);
4689
4690 case Builtin::BI__sync_val_compare_and_swap_1:
4691 case Builtin::BI__sync_val_compare_and_swap_2:
4692 case Builtin::BI__sync_val_compare_and_swap_4:
4693 case Builtin::BI__sync_val_compare_and_swap_8:
4694 case Builtin::BI__sync_val_compare_and_swap_16:
4695 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
4696
4697 case Builtin::BI__sync_bool_compare_and_swap_1:
4698 case Builtin::BI__sync_bool_compare_and_swap_2:
4699 case Builtin::BI__sync_bool_compare_and_swap_4:
4700 case Builtin::BI__sync_bool_compare_and_swap_8:
4701 case Builtin::BI__sync_bool_compare_and_swap_16:
4702 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
4703
4704 case Builtin::BI__sync_swap_1:
4705 case Builtin::BI__sync_swap_2:
4706 case Builtin::BI__sync_swap_4:
4707 case Builtin::BI__sync_swap_8:
4708 case Builtin::BI__sync_swap_16:
4709 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4710
4711 case Builtin::BI__sync_lock_test_and_set_1:
4712 case Builtin::BI__sync_lock_test_and_set_2:
4713 case Builtin::BI__sync_lock_test_and_set_4:
4714 case Builtin::BI__sync_lock_test_and_set_8:
4715 case Builtin::BI__sync_lock_test_and_set_16:
4716 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4717
4718 case Builtin::BI__sync_lock_release_1:
4719 case Builtin::BI__sync_lock_release_2:
4720 case Builtin::BI__sync_lock_release_4:
4721 case Builtin::BI__sync_lock_release_8:
4722 case Builtin::BI__sync_lock_release_16: {
4723 Address Ptr = CheckAtomicAlignment(*this, E);
4724 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4725
4726 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4727 getContext().getTypeSize(ElTy));
4728 llvm::StoreInst *Store =
4729 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
4730 Store->setAtomic(llvm::AtomicOrdering::Release);
4731 return RValue::get(nullptr);
4732 }
4733
4734 case Builtin::BI__sync_synchronize: {
4735 // We assume this is supposed to correspond to a C++0x-style
4736 // sequentially-consistent fence (i.e. this is only usable for
4737 // synchronization, not device I/O or anything like that). This intrinsic
4738 // is really badly designed in the sense that in theory, there isn't
4739 // any way to safely use it... but in practice, it mostly works
4740 // to use it with non-atomic loads and stores to get acquire/release
4741 // semantics.
4742 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
4743 return RValue::get(nullptr);
4744 }
4745
4746 case Builtin::BI__builtin_nontemporal_load:
4747 return RValue::get(EmitNontemporalLoad(*this, E));
4748 case Builtin::BI__builtin_nontemporal_store:
4749 return RValue::get(EmitNontemporalStore(*this, E));
4750 case Builtin::BI__c11_atomic_is_lock_free:
4751 case Builtin::BI__atomic_is_lock_free: {
4752 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
4753 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
4754 // _Atomic(T) is always properly-aligned.
4755 const char *LibCallName = "__atomic_is_lock_free";
4756 CallArgList Args;
4757 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
4758 getContext().getSizeType());
4759 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
4760 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
4761 getContext().VoidPtrTy);
4762 else
4763 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
4764 getContext().VoidPtrTy);
4765 const CGFunctionInfo &FuncInfo =
4766 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
4767 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
4768 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
4769 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
4770 ReturnValueSlot(), Args);
4771 }
4772
4773 case Builtin::BI__atomic_test_and_set: {
4774 // Look at the argument type to determine whether this is a volatile
4775 // operation. The parameter type is always volatile.
4776 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4777 bool Volatile =
4778 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4779
4780 Address Ptr =
4781 EmitPointerWithAlignment(E->getArg(0)).withElementType(Int8Ty);
4782
4783 Value *NewVal = Builder.getInt8(1);
4784 Value *Order = EmitScalarExpr(E->getArg(1));
4785 if (isa<llvm::ConstantInt>(Order)) {
4786 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4787 AtomicRMWInst *Result = nullptr;
4788 switch (ord) {
4789 case 0: // memory_order_relaxed
4790 default: // invalid order
4791 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4792 llvm::AtomicOrdering::Monotonic);
4793 break;
4794 case 1: // memory_order_consume
4795 case 2: // memory_order_acquire
4796 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4797 llvm::AtomicOrdering::Acquire);
4798 break;
4799 case 3: // memory_order_release
4800 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4801 llvm::AtomicOrdering::Release);
4802 break;
4803 case 4: // memory_order_acq_rel
4804
4805 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4806 llvm::AtomicOrdering::AcquireRelease);
4807 break;
4808 case 5: // memory_order_seq_cst
4809 Result = Builder.CreateAtomicRMW(
4810 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4811 llvm::AtomicOrdering::SequentiallyConsistent);
4812 break;
4813 }
4814 Result->setVolatile(Volatile);
4815 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4816 }
4817
4818 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4819
4820 llvm::BasicBlock *BBs[5] = {
4821 createBasicBlock("monotonic", CurFn),
4822 createBasicBlock("acquire", CurFn),
4823 createBasicBlock("release", CurFn),
4824 createBasicBlock("acqrel", CurFn),
4825 createBasicBlock("seqcst", CurFn)
4826 };
4827 llvm::AtomicOrdering Orders[5] = {
4828 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
4829 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
4830 llvm::AtomicOrdering::SequentiallyConsistent};
4831
4832 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4833 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4834
4835 Builder.SetInsertPoint(ContBB);
4836 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
4837
4838 for (unsigned i = 0; i < 5; ++i) {
4839 Builder.SetInsertPoint(BBs[i]);
4840 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
4841 Ptr, NewVal, Orders[i]);
4842 RMW->setVolatile(Volatile);
4843 Result->addIncoming(RMW, BBs[i]);
4844 Builder.CreateBr(ContBB);
4845 }
4846
4847 SI->addCase(Builder.getInt32(0), BBs[0]);
4848 SI->addCase(Builder.getInt32(1), BBs[1]);
4849 SI->addCase(Builder.getInt32(2), BBs[1]);
4850 SI->addCase(Builder.getInt32(3), BBs[2]);
4851 SI->addCase(Builder.getInt32(4), BBs[3]);
4852 SI->addCase(Builder.getInt32(5), BBs[4]);
4853
4854 Builder.SetInsertPoint(ContBB);
4855 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4856 }
4857
4858 case Builtin::BI__atomic_clear: {
4859 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4860 bool Volatile =
4861 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4862
4863 Address Ptr = EmitPointerWithAlignment(E->getArg(0));
4864 Ptr = Ptr.withElementType(Int8Ty);
4865 Value *NewVal = Builder.getInt8(0);
4866 Value *Order = EmitScalarExpr(E->getArg(1));
4867 if (isa<llvm::ConstantInt>(Order)) {
4868 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4869 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4870 switch (ord) {
4871 case 0: // memory_order_relaxed
4872 default: // invalid order
4873 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
4874 break;
4875 case 3: // memory_order_release
4876 Store->setOrdering(llvm::AtomicOrdering::Release);
4877 break;
4878 case 5: // memory_order_seq_cst
4879 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
4880 break;
4881 }
4882 return RValue::get(nullptr);
4883 }
4884
4885 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4886
4887 llvm::BasicBlock *BBs[3] = {
4888 createBasicBlock("monotonic", CurFn),
4889 createBasicBlock("release", CurFn),
4890 createBasicBlock("seqcst", CurFn)
4891 };
4892 llvm::AtomicOrdering Orders[3] = {
4893 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
4894 llvm::AtomicOrdering::SequentiallyConsistent};
4895
4896 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4897 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4898
4899 for (unsigned i = 0; i < 3; ++i) {
4900 Builder.SetInsertPoint(BBs[i]);
4901 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4902 Store->setOrdering(Orders[i]);
4903 Builder.CreateBr(ContBB);
4904 }
4905
4906 SI->addCase(Builder.getInt32(0), BBs[0]);
4907 SI->addCase(Builder.getInt32(3), BBs[1]);
4908 SI->addCase(Builder.getInt32(5), BBs[2]);
4909
4910 Builder.SetInsertPoint(ContBB);
4911 return RValue::get(nullptr);
4912 }
4913
4914 case Builtin::BI__atomic_thread_fence:
4915 case Builtin::BI__atomic_signal_fence:
4916 case Builtin::BI__c11_atomic_thread_fence:
4917 case Builtin::BI__c11_atomic_signal_fence: {
4918 llvm::SyncScope::ID SSID;
4919 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
4920 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
4921 SSID = llvm::SyncScope::SingleThread;
4922 else
4923 SSID = llvm::SyncScope::System;
4924 Value *Order = EmitScalarExpr(E->getArg(0));
4925 if (isa<llvm::ConstantInt>(Order)) {
4926 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4927 switch (ord) {
4928 case 0: // memory_order_relaxed
4929 default: // invalid order
4930 break;
4931 case 1: // memory_order_consume
4932 case 2: // memory_order_acquire
4933 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4934 break;
4935 case 3: // memory_order_release
4936 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4937 break;
4938 case 4: // memory_order_acq_rel
4939 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4940 break;
4941 case 5: // memory_order_seq_cst
4942 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4943 break;
4944 }
4945 return RValue::get(nullptr);
4946 }
4947
4948 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
4949 AcquireBB = createBasicBlock("acquire", CurFn);
4950 ReleaseBB = createBasicBlock("release", CurFn);
4951 AcqRelBB = createBasicBlock("acqrel", CurFn);
4952 SeqCstBB = createBasicBlock("seqcst", CurFn);
4953 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4954
4955 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4956 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
4957
4958 Builder.SetInsertPoint(AcquireBB);
4959 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4960 Builder.CreateBr(ContBB);
4961 SI->addCase(Builder.getInt32(1), AcquireBB);
4962 SI->addCase(Builder.getInt32(2), AcquireBB);
4963
4964 Builder.SetInsertPoint(ReleaseBB);
4965 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4966 Builder.CreateBr(ContBB);
4967 SI->addCase(Builder.getInt32(3), ReleaseBB);
4968
4969 Builder.SetInsertPoint(AcqRelBB);
4970 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4971 Builder.CreateBr(ContBB);
4972 SI->addCase(Builder.getInt32(4), AcqRelBB);
4973
4974 Builder.SetInsertPoint(SeqCstBB);
4975 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4976 Builder.CreateBr(ContBB);
4977 SI->addCase(Builder.getInt32(5), SeqCstBB);
4978
4979 Builder.SetInsertPoint(ContBB);
4980 return RValue::get(nullptr);
4981 }
4982
4983 case Builtin::BI__builtin_signbit:
4984 case Builtin::BI__builtin_signbitf:
4985 case Builtin::BI__builtin_signbitl: {
4986 return RValue::get(
4987 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
4988 ConvertType(E->getType())));
4989 }
4990 case Builtin::BI__warn_memset_zero_len:
4991 return RValue::getIgnored();
4992 case Builtin::BI__annotation: {
4993 // Re-encode each wide string to UTF8 and make an MDString.
4994 SmallVector<Metadata *, 1> Strings;
4995 for (const Expr *Arg : E->arguments()) {
4996 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
4997 assert(Str->getCharByteWidth() == 2);
4998 StringRef WideBytes = Str->getBytes();
4999 std::string StrUtf8;
5000 if (!convertUTF16ToUTF8String(
5001 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
5002 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
5003 continue;
5004 }
5005 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
5006 }
5007
5008 // Build and MDTuple of MDStrings and emit the intrinsic call.
5009 llvm::Function *F =
5010 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
5011 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
5012 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
5013 return RValue::getIgnored();
5014 }
5015 case Builtin::BI__builtin_annotation: {
5016 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
5017 llvm::Function *F =
5018 CGM.getIntrinsic(llvm::Intrinsic::annotation,
5019 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
5020
5021 // Get the annotation string, go through casts. Sema requires this to be a
5022 // non-wide string literal, potentially casted, so the cast<> is safe.
5023 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
5024 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5025 return RValue::get(
5026 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
5027 }
5028 case Builtin::BI__builtin_addcb:
5029 case Builtin::BI__builtin_addcs:
5030 case Builtin::BI__builtin_addc:
5031 case Builtin::BI__builtin_addcl:
5032 case Builtin::BI__builtin_addcll:
5033 case Builtin::BI__builtin_subcb:
5034 case Builtin::BI__builtin_subcs:
5035 case Builtin::BI__builtin_subc:
5036 case Builtin::BI__builtin_subcl:
5037 case Builtin::BI__builtin_subcll: {
5038
5039 // We translate all of these builtins from expressions of the form:
5040 // int x = ..., y = ..., carryin = ..., carryout, result;
5041 // result = __builtin_addc(x, y, carryin, &carryout);
5042 //
5043 // to LLVM IR of the form:
5044 //
5045 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5046 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5047 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5048 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5049 // i32 %carryin)
5050 // %result = extractvalue {i32, i1} %tmp2, 0
5051 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5052 // %tmp3 = or i1 %carry1, %carry2
5053 // %tmp4 = zext i1 %tmp3 to i32
5054 // store i32 %tmp4, i32* %carryout
5055
5056 // Scalarize our inputs.
5057 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5058 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5059 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5060 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5061
5062 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5063 llvm::Intrinsic::ID IntrinsicId;
5064 switch (BuiltinID) {
5065 default: llvm_unreachable("Unknown multiprecision builtin id.");
5066 case Builtin::BI__builtin_addcb:
5067 case Builtin::BI__builtin_addcs:
5068 case Builtin::BI__builtin_addc:
5069 case Builtin::BI__builtin_addcl:
5070 case Builtin::BI__builtin_addcll:
5071 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5072 break;
5073 case Builtin::BI__builtin_subcb:
5074 case Builtin::BI__builtin_subcs:
5075 case Builtin::BI__builtin_subc:
5076 case Builtin::BI__builtin_subcl:
5077 case Builtin::BI__builtin_subcll:
5078 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5079 break;
5080 }
5081
5082 // Construct our resulting LLVM IR expression.
5083 llvm::Value *Carry1;
5084 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5085 X, Y, Carry1);
5086 llvm::Value *Carry2;
5087 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5088 Sum1, Carryin, Carry2);
5089 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5090 X->getType());
5091 Builder.CreateStore(CarryOut, CarryOutPtr);
5092 return RValue::get(Sum2);
5093 }
5094
5095 case Builtin::BI__builtin_add_overflow:
5096 case Builtin::BI__builtin_sub_overflow:
5097 case Builtin::BI__builtin_mul_overflow: {
5098 const clang::Expr *LeftArg = E->getArg(0);
5099 const clang::Expr *RightArg = E->getArg(1);
5100 const clang::Expr *ResultArg = E->getArg(2);
5101
5102 clang::QualType ResultQTy =
5103 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5104
5105 WidthAndSignedness LeftInfo =
5106 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
5107 WidthAndSignedness RightInfo =
5108 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
5109 WidthAndSignedness ResultInfo =
5110 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
5111
5112 // Handle mixed-sign multiplication as a special case, because adding
5113 // runtime or backend support for our generic irgen would be too expensive.
5114 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5115 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5116 RightInfo, ResultArg, ResultQTy,
5117 ResultInfo);
5118
5119 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5120 ResultInfo))
5121 return EmitCheckedUnsignedMultiplySignedResult(
5122 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5123 ResultInfo);
5124
5125 WidthAndSignedness EncompassingInfo =
5126 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5127
5128 llvm::Type *EncompassingLLVMTy =
5129 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5130
5131 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5132
5133 llvm::Intrinsic::ID IntrinsicId;
5134 switch (BuiltinID) {
5135 default:
5136 llvm_unreachable("Unknown overflow builtin id.");
5137 case Builtin::BI__builtin_add_overflow:
5138 IntrinsicId = EncompassingInfo.Signed
5139 ? llvm::Intrinsic::sadd_with_overflow
5140 : llvm::Intrinsic::uadd_with_overflow;
5141 break;
5142 case Builtin::BI__builtin_sub_overflow:
5143 IntrinsicId = EncompassingInfo.Signed
5144 ? llvm::Intrinsic::ssub_with_overflow
5145 : llvm::Intrinsic::usub_with_overflow;
5146 break;
5147 case Builtin::BI__builtin_mul_overflow:
5148 IntrinsicId = EncompassingInfo.Signed
5149 ? llvm::Intrinsic::smul_with_overflow
5150 : llvm::Intrinsic::umul_with_overflow;
5151 break;
5152 }
5153
5154 llvm::Value *Left = EmitScalarExpr(LeftArg);
5155 llvm::Value *Right = EmitScalarExpr(RightArg);
5156 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5157
5158 // Extend each operand to the encompassing type.
5159 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5160 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5161
5162 // Perform the operation on the extended values.
5163 llvm::Value *Overflow, *Result;
5164 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5165
5166 if (EncompassingInfo.Width > ResultInfo.Width) {
5167 // The encompassing type is wider than the result type, so we need to
5168 // truncate it.
5169 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5170
5171 // To see if the truncation caused an overflow, we will extend
5172 // the result and then compare it to the original result.
5173 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5174 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5175 llvm::Value *TruncationOverflow =
5176 Builder.CreateICmpNE(Result, ResultTruncExt);
5177
5178 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5179 Result = ResultTrunc;
5180 }
5181
5182 // Finally, store the result using the pointer.
5183 bool isVolatile =
5184 ResultArg->getType()->getPointeeType().isVolatileQualified();
5185 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5186
5187 return RValue::get(Overflow);
5188 }
5189
5190 case Builtin::BI__builtin_uadd_overflow:
5191 case Builtin::BI__builtin_uaddl_overflow:
5192 case Builtin::BI__builtin_uaddll_overflow:
5193 case Builtin::BI__builtin_usub_overflow:
5194 case Builtin::BI__builtin_usubl_overflow:
5195 case Builtin::BI__builtin_usubll_overflow:
5196 case Builtin::BI__builtin_umul_overflow:
5197 case Builtin::BI__builtin_umull_overflow:
5198 case Builtin::BI__builtin_umulll_overflow:
5199 case Builtin::BI__builtin_sadd_overflow:
5200 case Builtin::BI__builtin_saddl_overflow:
5201 case Builtin::BI__builtin_saddll_overflow:
5202 case Builtin::BI__builtin_ssub_overflow:
5203 case Builtin::BI__builtin_ssubl_overflow:
5204 case Builtin::BI__builtin_ssubll_overflow:
5205 case Builtin::BI__builtin_smul_overflow:
5206 case Builtin::BI__builtin_smull_overflow:
5207 case Builtin::BI__builtin_smulll_overflow: {
5208
5209 // We translate all of these builtins directly to the relevant llvm IR node.
5210
5211 // Scalarize our inputs.
5212 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5213 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5214 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5215
5216 // Decide which of the overflow intrinsics we are lowering to:
5217 llvm::Intrinsic::ID IntrinsicId;
5218 switch (BuiltinID) {
5219 default: llvm_unreachable("Unknown overflow builtin id.");
5220 case Builtin::BI__builtin_uadd_overflow:
5221 case Builtin::BI__builtin_uaddl_overflow:
5222 case Builtin::BI__builtin_uaddll_overflow:
5223 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5224 break;
5225 case Builtin::BI__builtin_usub_overflow:
5226 case Builtin::BI__builtin_usubl_overflow:
5227 case Builtin::BI__builtin_usubll_overflow:
5228 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5229 break;
5230 case Builtin::BI__builtin_umul_overflow:
5231 case Builtin::BI__builtin_umull_overflow:
5232 case Builtin::BI__builtin_umulll_overflow:
5233 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5234 break;
5235 case Builtin::BI__builtin_sadd_overflow:
5236 case Builtin::BI__builtin_saddl_overflow:
5237 case Builtin::BI__builtin_saddll_overflow:
5238 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5239 break;
5240 case Builtin::BI__builtin_ssub_overflow:
5241 case Builtin::BI__builtin_ssubl_overflow:
5242 case Builtin::BI__builtin_ssubll_overflow:
5243 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5244 break;
5245 case Builtin::BI__builtin_smul_overflow:
5246 case Builtin::BI__builtin_smull_overflow:
5247 case Builtin::BI__builtin_smulll_overflow:
5248 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5249 break;
5250 }
5251
5252
5253 llvm::Value *Carry;
5254 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5255 Builder.CreateStore(Sum, SumOutPtr);
5256
5257 return RValue::get(Carry);
5258 }
5259 case Builtin::BIaddressof:
5260 case Builtin::BI__addressof:
5261 case Builtin::BI__builtin_addressof:
5262 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5263 case Builtin::BI__builtin_function_start:
5264 return RValue::get(CGM.GetFunctionStart(
5265 E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext())));
5266 case Builtin::BI__builtin_operator_new:
5267 return EmitBuiltinNewDeleteCall(
5268 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5269 case Builtin::BI__builtin_operator_delete:
5270 EmitBuiltinNewDeleteCall(
5271 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5272 return RValue::get(nullptr);
5273
5274 case Builtin::BI__builtin_is_aligned:
5275 return EmitBuiltinIsAligned(E);
5276 case Builtin::BI__builtin_align_up:
5277 return EmitBuiltinAlignTo(E, true);
5278 case Builtin::BI__builtin_align_down:
5279 return EmitBuiltinAlignTo(E, false);
5280
5281 case Builtin::BI__noop:
5282 // __noop always evaluates to an integer literal zero.
5283 return RValue::get(ConstantInt::get(IntTy, 0));
5284 case Builtin::BI__builtin_call_with_static_chain: {
5285 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5286 const Expr *Chain = E->getArg(1);
5287 return EmitCall(Call->getCallee()->getType(),
5288 EmitCallee(Call->getCallee()), Call, ReturnValue,
5289 EmitScalarExpr(Chain));
5290 }
5291 case Builtin::BI_InterlockedExchange8:
5292 case Builtin::BI_InterlockedExchange16:
5293 case Builtin::BI_InterlockedExchange:
5294 case Builtin::BI_InterlockedExchangePointer:
5295 return RValue::get(
5296 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5297 case Builtin::BI_InterlockedCompareExchangePointer:
5298 case Builtin::BI_InterlockedCompareExchangePointer_nf: {
5299 llvm::Type *RTy;
5300 llvm::IntegerType *IntType = IntegerType::get(
5301 getLLVMContext(), getContext().getTypeSize(E->getType()));
5302
5303 Address DestAddr = CheckAtomicAlignment(*this, E);
5304
5305 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
5306 RTy = Exchange->getType();
5307 Exchange = Builder.CreatePtrToInt(Exchange, IntType);
5308
5309 llvm::Value *Comparand =
5310 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
5311
5312 auto Ordering =
5313 BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
5314 AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
5315
5316 auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
5317 Ordering, Ordering);
5318 Result->setVolatile(true);
5319
5320 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
5321 0),
5322 RTy));
5323 }
5324 case Builtin::BI_InterlockedCompareExchange8:
5325 case Builtin::BI_InterlockedCompareExchange16:
5326 case Builtin::BI_InterlockedCompareExchange:
5327 case Builtin::BI_InterlockedCompareExchange64:
5328 return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
5329 case Builtin::BI_InterlockedIncrement16:
5330 case Builtin::BI_InterlockedIncrement:
5331 return RValue::get(
5332 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5333 case Builtin::BI_InterlockedDecrement16:
5334 case Builtin::BI_InterlockedDecrement:
5335 return RValue::get(
5336 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5337 case Builtin::BI_InterlockedAnd8:
5338 case Builtin::BI_InterlockedAnd16:
5339 case Builtin::BI_InterlockedAnd:
5340 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5341 case Builtin::BI_InterlockedExchangeAdd8:
5342 case Builtin::BI_InterlockedExchangeAdd16:
5343 case Builtin::BI_InterlockedExchangeAdd:
5344 return RValue::get(
5345 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5346 case Builtin::BI_InterlockedExchangeSub8:
5347 case Builtin::BI_InterlockedExchangeSub16:
5348 case Builtin::BI_InterlockedExchangeSub:
5349 return RValue::get(
5350 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5351 case Builtin::BI_InterlockedOr8:
5352 case Builtin::BI_InterlockedOr16:
5353 case Builtin::BI_InterlockedOr:
5354 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5355 case Builtin::BI_InterlockedXor8:
5356 case Builtin::BI_InterlockedXor16:
5357 case Builtin::BI_InterlockedXor:
5358 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5359
5360 case Builtin::BI_bittest64:
5361 case Builtin::BI_bittest:
5362 case Builtin::BI_bittestandcomplement64:
5363 case Builtin::BI_bittestandcomplement:
5364 case Builtin::BI_bittestandreset64:
5365 case Builtin::BI_bittestandreset:
5366 case Builtin::BI_bittestandset64:
5367 case Builtin::BI_bittestandset:
5368 case Builtin::BI_interlockedbittestandreset:
5369 case Builtin::BI_interlockedbittestandreset64:
5370 case Builtin::BI_interlockedbittestandset64:
5371 case Builtin::BI_interlockedbittestandset:
5372 case Builtin::BI_interlockedbittestandset_acq:
5373 case Builtin::BI_interlockedbittestandset_rel:
5374 case Builtin::BI_interlockedbittestandset_nf:
5375 case Builtin::BI_interlockedbittestandreset_acq:
5376 case Builtin::BI_interlockedbittestandreset_rel:
5377 case Builtin::BI_interlockedbittestandreset_nf:
5378 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5379
5380 // These builtins exist to emit regular volatile loads and stores not
5381 // affected by the -fms-volatile setting.
5382 case Builtin::BI__iso_volatile_load8:
5383 case Builtin::BI__iso_volatile_load16:
5384 case Builtin::BI__iso_volatile_load32:
5385 case Builtin::BI__iso_volatile_load64:
5386 return RValue::get(EmitISOVolatileLoad(*this, E));
5387 case Builtin::BI__iso_volatile_store8:
5388 case Builtin::BI__iso_volatile_store16:
5389 case Builtin::BI__iso_volatile_store32:
5390 case Builtin::BI__iso_volatile_store64:
5391 return RValue::get(EmitISOVolatileStore(*this, E));
5392
5393 case Builtin::BI__builtin_ptrauth_sign_constant:
5394 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
5395
5396 case Builtin::BI__builtin_ptrauth_auth:
5397 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5398 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5399 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5400 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5401 case Builtin::BI__builtin_ptrauth_strip: {
5402 // Emit the arguments.
5403 SmallVector<llvm::Value *, 5> Args;
5404 for (auto argExpr : E->arguments())
5405 Args.push_back(EmitScalarExpr(argExpr));
5406
5407 // Cast the value to intptr_t, saving its original type.
5408 llvm::Type *OrigValueType = Args[0]->getType();
5409 if (OrigValueType->isPointerTy())
5410 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5411
5412 switch (BuiltinID) {
5413 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5414 if (Args[4]->getType()->isPointerTy())
5415 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5416 [[fallthrough]];
5417
5418 case Builtin::BI__builtin_ptrauth_auth:
5419 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5420 if (Args[2]->getType()->isPointerTy())
5421 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5422 break;
5423
5424 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5425 if (Args[1]->getType()->isPointerTy())
5426 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5427 break;
5428
5429 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5430 case Builtin::BI__builtin_ptrauth_strip:
5431 break;
5432 }
5433
5434 // Call the intrinsic.
5435 auto IntrinsicID = [&]() -> unsigned {
5436 switch (BuiltinID) {
5437 case Builtin::BI__builtin_ptrauth_auth:
5438 return llvm::Intrinsic::ptrauth_auth;
5439 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5440 return llvm::Intrinsic::ptrauth_resign;
5441 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5442 return llvm::Intrinsic::ptrauth_blend;
5443 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5444 return llvm::Intrinsic::ptrauth_sign_generic;
5445 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5446 return llvm::Intrinsic::ptrauth_sign;
5447 case Builtin::BI__builtin_ptrauth_strip:
5448 return llvm::Intrinsic::ptrauth_strip;
5449 }
5450 llvm_unreachable("bad ptrauth intrinsic");
5451 }();
5452 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5453 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5454
5455 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5456 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5457 OrigValueType->isPointerTy()) {
5458 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5459 }
5460 return RValue::get(Result);
5461 }
5462
5463 case Builtin::BI__exception_code:
5464 case Builtin::BI_exception_code:
5465 return RValue::get(EmitSEHExceptionCode());
5466 case Builtin::BI__exception_info:
5467 case Builtin::BI_exception_info:
5468 return RValue::get(EmitSEHExceptionInfo());
5469 case Builtin::BI__abnormal_termination:
5470 case Builtin::BI_abnormal_termination:
5471 return RValue::get(EmitSEHAbnormalTermination());
5472 case Builtin::BI_setjmpex:
5473 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5474 E->getArg(0)->getType()->isPointerType())
5475 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5476 break;
5477 case Builtin::BI_setjmp:
5478 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5479 E->getArg(0)->getType()->isPointerType()) {
5480 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5481 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5482 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5483 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5484 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5485 }
5486 break;
5487
5488 // C++ std:: builtins.
5489 case Builtin::BImove:
5490 case Builtin::BImove_if_noexcept:
5491 case Builtin::BIforward:
5492 case Builtin::BIforward_like:
5493 case Builtin::BIas_const:
5494 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5495 case Builtin::BI__GetExceptionInfo: {
5496 if (llvm::GlobalVariable *GV =
5497 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
5498 return RValue::get(GV);
5499 break;
5500 }
5501
5502 case Builtin::BI__fastfail:
5503 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5504
5505 case Builtin::BI__builtin_coro_id:
5506 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5507 case Builtin::BI__builtin_coro_promise:
5508 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5509 case Builtin::BI__builtin_coro_resume:
5510 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5511 return RValue::get(nullptr);
5512 case Builtin::BI__builtin_coro_frame:
5513 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5514 case Builtin::BI__builtin_coro_noop:
5515 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5516 case Builtin::BI__builtin_coro_free:
5517 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5518 case Builtin::BI__builtin_coro_destroy:
5519 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5520 return RValue::get(nullptr);
5521 case Builtin::BI__builtin_coro_done:
5522 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5523 case Builtin::BI__builtin_coro_alloc:
5524 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5525 case Builtin::BI__builtin_coro_begin:
5526 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5527 case Builtin::BI__builtin_coro_end:
5528 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5529 case Builtin::BI__builtin_coro_suspend:
5530 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5531 case Builtin::BI__builtin_coro_size:
5532 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5533 case Builtin::BI__builtin_coro_align:
5534 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5535
5536 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5537 case Builtin::BIread_pipe:
5538 case Builtin::BIwrite_pipe: {
5539 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5540 *Arg1 = EmitScalarExpr(E->getArg(1));
5541 CGOpenCLRuntime OpenCLRT(CGM);
5542 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5543 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5544
5545 // Type of the generic packet parameter.
5546 unsigned GenericAS =
5547 getContext().getTargetAddressSpace(LangAS::opencl_generic);
5548 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5549
5550 // Testing which overloaded version we should generate the call for.
5551 if (2U == E->getNumArgs()) {
5552 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5553 : "__write_pipe_2";
5554 // Creating a generic function type to be able to call with any builtin or
5555 // user defined type.
5556 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5557 llvm::FunctionType *FTy = llvm::FunctionType::get(
5558 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5559 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
5560 return RValue::get(
5561 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5562 {Arg0, BCast, PacketSize, PacketAlign}));
5563 } else {
5564 assert(4 == E->getNumArgs() &&
5565 "Illegal number of parameters to pipe function");
5566 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
5567 : "__write_pipe_4";
5568
5569 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5570 Int32Ty, Int32Ty};
5571 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
5572 *Arg3 = EmitScalarExpr(E->getArg(3));
5573 llvm::FunctionType *FTy = llvm::FunctionType::get(
5574 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5575 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
5576 // We know the third argument is an integer type, but we may need to cast
5577 // it to i32.
5578 if (Arg2->getType() != Int32Ty)
5579 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
5580 return RValue::get(
5581 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5582 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
5583 }
5584 }
5585 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5586 // functions
5587 case Builtin::BIreserve_read_pipe:
5588 case Builtin::BIreserve_write_pipe:
5589 case Builtin::BIwork_group_reserve_read_pipe:
5590 case Builtin::BIwork_group_reserve_write_pipe:
5591 case Builtin::BIsub_group_reserve_read_pipe:
5592 case Builtin::BIsub_group_reserve_write_pipe: {
5593 // Composing the mangled name for the function.
5594 const char *Name;
5595 if (BuiltinID == Builtin::BIreserve_read_pipe)
5596 Name = "__reserve_read_pipe";
5597 else if (BuiltinID == Builtin::BIreserve_write_pipe)
5598 Name = "__reserve_write_pipe";
5599 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5600 Name = "__work_group_reserve_read_pipe";
5601 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5602 Name = "__work_group_reserve_write_pipe";
5603 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5604 Name = "__sub_group_reserve_read_pipe";
5605 else
5606 Name = "__sub_group_reserve_write_pipe";
5607
5608 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5609 *Arg1 = EmitScalarExpr(E->getArg(1));
5610 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5611 CGOpenCLRuntime OpenCLRT(CGM);
5612 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5613 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5614
5615 // Building the generic function prototype.
5616 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5617 llvm::FunctionType *FTy = llvm::FunctionType::get(
5618 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5619 // We know the second argument is an integer type, but we may need to cast
5620 // it to i32.
5621 if (Arg1->getType() != Int32Ty)
5622 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
5623 return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5624 {Arg0, Arg1, PacketSize, PacketAlign}));
5625 }
5626 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5627 // functions
5628 case Builtin::BIcommit_read_pipe:
5629 case Builtin::BIcommit_write_pipe:
5630 case Builtin::BIwork_group_commit_read_pipe:
5631 case Builtin::BIwork_group_commit_write_pipe:
5632 case Builtin::BIsub_group_commit_read_pipe:
5633 case Builtin::BIsub_group_commit_write_pipe: {
5634 const char *Name;
5635 if (BuiltinID == Builtin::BIcommit_read_pipe)
5636 Name = "__commit_read_pipe";
5637 else if (BuiltinID == Builtin::BIcommit_write_pipe)
5638 Name = "__commit_write_pipe";
5639 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5640 Name = "__work_group_commit_read_pipe";
5641 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5642 Name = "__work_group_commit_write_pipe";
5643 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5644 Name = "__sub_group_commit_read_pipe";
5645 else
5646 Name = "__sub_group_commit_write_pipe";
5647
5648 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5649 *Arg1 = EmitScalarExpr(E->getArg(1));
5650 CGOpenCLRuntime OpenCLRT(CGM);
5651 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5652 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5653
5654 // Building the generic function prototype.
5655 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5656 llvm::FunctionType *FTy =
5657 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
5658 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5659
5660 return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5661 {Arg0, Arg1, PacketSize, PacketAlign}));
5662 }
5663 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5664 case Builtin::BIget_pipe_num_packets:
5665 case Builtin::BIget_pipe_max_packets: {
5666 const char *BaseName;
5667 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
5668 if (BuiltinID == Builtin::BIget_pipe_num_packets)
5669 BaseName = "__get_pipe_num_packets";
5670 else
5671 BaseName = "__get_pipe_max_packets";
5672 std::string Name = std::string(BaseName) +
5673 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
5674
5675 // Building the generic function prototype.
5676 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5677 CGOpenCLRuntime OpenCLRT(CGM);
5678 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5679 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5680 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
5681 llvm::FunctionType *FTy = llvm::FunctionType::get(
5682 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5683
5684 return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5685 {Arg0, PacketSize, PacketAlign}));
5686 }
5687
5688 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
5689 case Builtin::BIto_global:
5690 case Builtin::BIto_local:
5691 case Builtin::BIto_private: {
5692 auto Arg0 = EmitScalarExpr(E->getArg(0));
5693 auto NewArgT = llvm::PointerType::get(
5694 getLLVMContext(),
5695 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
5696 auto NewRetT = llvm::PointerType::get(
5697 getLLVMContext(),
5698 CGM.getContext().getTargetAddressSpace(
5699 E->getType()->getPointeeType().getAddressSpace()));
5700 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
5701 llvm::Value *NewArg;
5702 if (Arg0->getType()->getPointerAddressSpace() !=
5703 NewArgT->getPointerAddressSpace())
5704 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
5705 else
5706 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
5707 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
5708 auto NewCall =
5709 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
5710 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
5711 ConvertType(E->getType())));
5712 }
5713
5714 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
5715 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
5716 // The code below expands the builtin call to a call to one of the following
5717 // functions that an OpenCL runtime library will have to provide:
5718 // __enqueue_kernel_basic
5719 // __enqueue_kernel_varargs
5720 // __enqueue_kernel_basic_events
5721 // __enqueue_kernel_events_varargs
5722 case Builtin::BIenqueue_kernel: {
5723 StringRef Name; // Generated function call name
5724 unsigned NumArgs = E->getNumArgs();
5725
5726 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
5727 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5728 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5729
5730 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
5731 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
5732 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
5733 llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this);
5734 llvm::Type *RangeTy = NDRangeL.getAddress().getType();
5735
5736 if (NumArgs == 4) {
5737 // The most basic form of the call with parameters:
5738 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
5739 Name = "__enqueue_kernel_basic";
5740 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
5741 GenericVoidPtrTy};
5742 llvm::FunctionType *FTy = llvm::FunctionType::get(
5743 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5744
5745 auto Info =
5746 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5747 llvm::Value *Kernel =
5748 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5749 llvm::Value *Block =
5750 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5751
5752 AttrBuilder B(Builder.getContext());
5753 B.addByValAttr(NDRangeL.getAddress().getElementType());
5754 llvm::AttributeList ByValAttrSet =
5755 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
5756
5757 auto RTCall =
5758 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
5759 {Queue, Flags, Range, Kernel, Block});
5760 RTCall->setAttributes(ByValAttrSet);
5761 return RValue::get(RTCall);
5762 }
5763 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
5764
5765 // Create a temporary array to hold the sizes of local pointer arguments
5766 // for the block. \p First is the position of the first size argument.
5767 auto CreateArrayForSizeVar = [=](unsigned First)
5768 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
5769 llvm::APInt ArraySize(32, NumArgs - First);
5770 QualType SizeArrayTy = getContext().getConstantArrayType(
5771 getContext().getSizeType(), ArraySize, nullptr,
5772 ArraySizeModifier::Normal,
5773 /*IndexTypeQuals=*/0);
5774 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
5775 llvm::Value *TmpPtr = Tmp.getPointer();
5776 llvm::Value *TmpSize = EmitLifetimeStart(
5777 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
5778 llvm::Value *ElemPtr;
5779 // Each of the following arguments specifies the size of the corresponding
5780 // argument passed to the enqueued block.
5781 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
5782 for (unsigned I = First; I < NumArgs; ++I) {
5783 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
5784 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
5785 {Zero, Index});
5786 if (I == First)
5787 ElemPtr = GEP;
5788 auto *V =
5789 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
5790 Builder.CreateAlignedStore(
5791 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
5792 }
5793 return std::tie(ElemPtr, TmpSize, TmpPtr);
5794 };
5795
5796 // Could have events and/or varargs.
5797 if (E->getArg(3)->getType()->isBlockPointerType()) {
5798 // No events passed, but has variadic arguments.
5799 Name = "__enqueue_kernel_varargs";
5800 auto Info =
5801 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5802 llvm::Value *Kernel =
5803 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5804 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5805 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5806 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
5807
5808 // Create a vector of the arguments, as well as a constant value to
5809 // express to the runtime the number of variadic arguments.
5810 llvm::Value *const Args[] = {Queue, Flags,
5811 Range, Kernel,
5812 Block, ConstantInt::get(IntTy, NumArgs - 4),
5813 ElemPtr};
5814 llvm::Type *const ArgTys[] = {
5815 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
5816 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
5817
5818 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
5819 auto Call = RValue::get(
5820 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
5821 if (TmpSize)
5822 EmitLifetimeEnd(TmpSize, TmpPtr);
5823 return Call;
5824 }
5825 // Any calls now have event arguments passed.
5826 if (NumArgs >= 7) {
5827 llvm::PointerType *PtrTy = llvm::PointerType::get(
5828 CGM.getLLVMContext(),
5829 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
5830
5831 llvm::Value *NumEvents =
5832 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
5833
5834 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
5835 // to be a null pointer constant (including `0` literal), we can take it
5836 // into account and emit null pointer directly.
5837 llvm::Value *EventWaitList = nullptr;
5838 if (E->getArg(4)->isNullPointerConstant(
5839 getContext(), Expr::NPC_ValueDependentIsNotNull)) {
5840 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
5841 } else {
5842 EventWaitList =
5843 E->getArg(4)->getType()->isArrayType()
5844 ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)
5845 : EmitScalarExpr(E->getArg(4));
5846 // Convert to generic address space.
5847 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
5848 }
5849 llvm::Value *EventRet = nullptr;
5850 if (E->getArg(5)->isNullPointerConstant(
5851 getContext(), Expr::NPC_ValueDependentIsNotNull)) {
5852 EventRet = llvm::ConstantPointerNull::get(PtrTy);
5853 } else {
5854 EventRet =
5855 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
5856 }
5857
5858 auto Info =
5859 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
5860 llvm::Value *Kernel =
5861 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5862 llvm::Value *Block =
5863 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5864
5865 std::vector<llvm::Type *> ArgTys = {
5866 QueueTy, Int32Ty, RangeTy, Int32Ty,
5867 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
5868
5869 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
5870 NumEvents, EventWaitList, EventRet,
5871 Kernel, Block};
5872
5873 if (NumArgs == 7) {
5874 // Has events but no variadics.
5875 Name = "__enqueue_kernel_basic_events";
5876 llvm::FunctionType *FTy = llvm::FunctionType::get(
5877 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5878 return RValue::get(
5879 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5880 llvm::ArrayRef<llvm::Value *>(Args)));
5881 }
5882 // Has event info and variadics
5883 // Pass the number of variadics to the runtime function too.
5884 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
5885 ArgTys.push_back(Int32Ty);
5886 Name = "__enqueue_kernel_events_varargs";
5887
5888 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5889 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
5890 Args.push_back(ElemPtr);
5891 ArgTys.push_back(ElemPtr->getType());
5892
5893 llvm::FunctionType *FTy = llvm::FunctionType::get(
5894 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5895 auto Call =
5896 RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
5897 llvm::ArrayRef<llvm::Value *>(Args)));
5898 if (TmpSize)
5899 EmitLifetimeEnd(TmpSize, TmpPtr);
5900 return Call;
5901 }
5902 llvm_unreachable("Unexpected enqueue_kernel signature");
5903 }
5904 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5905 // parameter.
5906 case Builtin::BIget_kernel_work_group_size: {
5907 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5908 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5909 auto Info =
5910 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5911 Value *Kernel =
5912 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5913 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5914 return RValue::get(EmitRuntimeCall(
5915 CGM.CreateRuntimeFunction(
5916 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5917 false),
5918 "__get_kernel_work_group_size_impl"),
5919 {Kernel, Arg}));
5920 }
5921 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
5922 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5923 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5924 auto Info =
5925 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5926 Value *Kernel =
5927 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5928 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5929 return RValue::get(EmitRuntimeCall(
5930 CGM.CreateRuntimeFunction(
5931 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5932 false),
5933 "__get_kernel_preferred_work_group_size_multiple_impl"),
5934 {Kernel, Arg}));
5935 }
5936 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
5937 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
5938 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5939 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5940 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
5941 llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this);
5942 auto Info =
5943 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
5944 Value *Kernel =
5945 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5946 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5947 const char *Name =
5948 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5949 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
5950 : "__get_kernel_sub_group_count_for_ndrange_impl";
5951 return RValue::get(EmitRuntimeCall(
5952 CGM.CreateRuntimeFunction(
5953 llvm::FunctionType::get(
5954 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
5955 false),
5956 Name),
5957 {NDRange, Kernel, Block}));
5958 }
5959 case Builtin::BI__builtin_store_half:
5960 case Builtin::BI__builtin_store_halff: {
5961 Value *Val = EmitScalarExpr(E->getArg(0));
5962 Address Address = EmitPointerWithAlignment(E->getArg(1));
5963 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
5964 Builder.CreateStore(HalfVal, Address);
5965 return RValue::get(nullptr);
5966 }
5967 case Builtin::BI__builtin_load_half: {
5968 Address Address = EmitPointerWithAlignment(E->getArg(0));
5969 Value *HalfVal = Builder.CreateLoad(Address);
5970 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
5971 }
5972 case Builtin::BI__builtin_load_halff: {
5973 Address Address = EmitPointerWithAlignment(E->getArg(0));
5974 Value *HalfVal = Builder.CreateLoad(Address);
5975 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
5976 }
5977 case Builtin::BI__builtin_printf:
5978 case Builtin::BIprintf:
5979 if (getTarget().getTriple().isNVPTX() ||
5980 getTarget().getTriple().isAMDGCN() ||
5981 (getTarget().getTriple().isSPIRV() &&
5982 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
5983 if (getLangOpts().OpenMPIsTargetDevice)
5984 return EmitOpenMPDevicePrintfCallExpr(E);
5985 if (getTarget().getTriple().isNVPTX())
5986 return EmitNVPTXDevicePrintfCallExpr(E);
5987 if ((getTarget().getTriple().isAMDGCN() ||
5988 getTarget().getTriple().isSPIRV()) &&
5989 getLangOpts().HIP)
5990 return EmitAMDGPUDevicePrintfCallExpr(E);
5991 }
5992
5993 break;
5994 case Builtin::BI__builtin_canonicalize:
5995 case Builtin::BI__builtin_canonicalizef:
5996 case Builtin::BI__builtin_canonicalizef16:
5997 case Builtin::BI__builtin_canonicalizel:
5998 return RValue::get(
5999 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize));
6000
6001 case Builtin::BI__builtin_thread_pointer: {
6002 if (!getContext().getTargetInfo().isTLSSupported())
6003 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
6004 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6005 break;
6006 }
6007 case Builtin::BI__builtin_os_log_format:
6008 return emitBuiltinOSLogFormat(*E);
6009
6010 case Builtin::BI__xray_customevent: {
6011 if (!ShouldXRayInstrumentFunction())
6012 return RValue::getIgnored();
6013
6014 if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
6015 XRayInstrKind::Custom))
6016 return RValue::getIgnored();
6017
6018 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6019 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6020 return RValue::getIgnored();
6021
6022 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
6023 auto FTy = F->getFunctionType();
6024 auto Arg0 = E->getArg(0);
6025 auto Arg0Val = EmitScalarExpr(Arg0);
6026 auto Arg0Ty = Arg0->getType();
6027 auto PTy0 = FTy->getParamType(0);
6028 if (PTy0 != Arg0Val->getType()) {
6029 if (Arg0Ty->isArrayType())
6030 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
6031 else
6032 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
6033 }
6034 auto Arg1 = EmitScalarExpr(E->getArg(1));
6035 auto PTy1 = FTy->getParamType(1);
6036 if (PTy1 != Arg1->getType())
6037 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
6038 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
6039 }
6040
6041 case Builtin::BI__xray_typedevent: {
6042 // TODO: There should be a way to always emit events even if the current
6043 // function is not instrumented. Losing events in a stream can cripple
6044 // a trace.
6045 if (!ShouldXRayInstrumentFunction())
6046 return RValue::getIgnored();
6047
6048 if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
6049 XRayInstrKind::Typed))
6050 return RValue::getIgnored();
6051
6052 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6053 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6054 return RValue::getIgnored();
6055
6056 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6057 auto FTy = F->getFunctionType();
6058 auto Arg0 = EmitScalarExpr(E->getArg(0));
6059 auto PTy0 = FTy->getParamType(0);
6060 if (PTy0 != Arg0->getType())
6061 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6062 auto Arg1 = E->getArg(1);
6063 auto Arg1Val = EmitScalarExpr(Arg1);
6064 auto Arg1Ty = Arg1->getType();
6065 auto PTy1 = FTy->getParamType(1);
6066 if (PTy1 != Arg1Val->getType()) {
6067 if (Arg1Ty->isArrayType())
6068 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6069 else
6070 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6071 }
6072 auto Arg2 = EmitScalarExpr(E->getArg(2));
6073 auto PTy2 = FTy->getParamType(2);
6074 if (PTy2 != Arg2->getType())
6075 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6076 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6077 }
6078
6079 case Builtin::BI__builtin_ms_va_start:
6080 case Builtin::BI__builtin_ms_va_end:
6081 return RValue::get(
6082 EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).emitRawPointer(*this),
6083 BuiltinID == Builtin::BI__builtin_ms_va_start));
6084
6085 case Builtin::BI__builtin_ms_va_copy: {
6086 // Lower this manually. We can't reliably determine whether or not any
6087 // given va_copy() is for a Win64 va_list from the calling convention
6088 // alone, because it's legal to do this from a System V ABI function.
6089 // With opaque pointer types, we won't have enough information in LLVM
6090 // IR to determine this from the argument types, either. Best to do it
6091 // now, while we have enough information.
6092 Address DestAddr = EmitMSVAListRef(E->getArg(0));
6093 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6094
6095 DestAddr = DestAddr.withElementType(Int8PtrTy);
6096 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6097
6098 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6099 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6100 }
6101
6102 case Builtin::BI__builtin_get_device_side_mangled_name: {
6103 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6104 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6105 auto Str = CGM.GetAddrOfConstantCString(Name, "");
6106 return RValue::get(Str.getPointer());
6107 }
6108 }
6109
6110 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6111 // the call using the normal call path, but using the unmangled
6112 // version of the function name.
6113 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6114 return emitLibraryCall(*this, FD, E,
6115 CGM.getBuiltinLibFunction(FD, BuiltinID));
6116
6117 // If this is a predefined lib function (e.g. malloc), emit the call
6118 // using exactly the normal call path.
6119 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6120 return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
6121
6122 // Check that a call to a target specific builtin has the correct target
6123 // features.
6124 // This is down here to avoid non-target specific builtins, however, if
6125 // generic builtins start to require generic target features then we
6126 // can move this up to the beginning of the function.
6127 checkTargetFeatures(E, FD);
6128
6129 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6130 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6131
6132 // See if we have a target specific intrinsic.
6133 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6134 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6135 StringRef Prefix =
6136 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6137 if (!Prefix.empty()) {
6138 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6139 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" &&
6140 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)
6141 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name);
6142 // NOTE we don't need to perform a compatibility flag check here since the
6143 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6144 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6145 if (IntrinsicID == Intrinsic::not_intrinsic)
6146 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6147 }
6148
6149 if (IntrinsicID != Intrinsic::not_intrinsic) {
6150 SmallVector<Value*, 16> Args;
6151
6152 // Find out if any arguments are required to be integer constant
6153 // expressions.
6154 unsigned ICEArguments = 0;
6155 ASTContext::GetBuiltinTypeError Error;
6156 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6157 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6158
6159 Function *F = CGM.getIntrinsic(IntrinsicID);
6160 llvm::FunctionType *FTy = F->getFunctionType();
6161
6162 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6163 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6164 // If the intrinsic arg type is different from the builtin arg type
6165 // we need to do a bit cast.
6166 llvm::Type *PTy = FTy->getParamType(i);
6167 if (PTy != ArgValue->getType()) {
6168 // XXX - vector of pointers?
6169 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6170 if (PtrTy->getAddressSpace() !=
6171 ArgValue->getType()->getPointerAddressSpace()) {
6172 ArgValue = Builder.CreateAddrSpaceCast(
6173 ArgValue, llvm::PointerType::get(getLLVMContext(),
6174 PtrTy->getAddressSpace()));
6175 }
6176 }
6177
6178 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6179 // in amx intrinsics.
6180 if (PTy->isX86_AMXTy())
6181 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6182 {ArgValue->getType()}, {ArgValue});
6183 else
6184 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6185 }
6186
6187 Args.push_back(ArgValue);
6188 }
6189
6190 Value *V = Builder.CreateCall(F, Args);
6191 QualType BuiltinRetType = E->getType();
6192
6193 llvm::Type *RetTy = VoidTy;
6194 if (!BuiltinRetType->isVoidType())
6195 RetTy = ConvertType(BuiltinRetType);
6196
6197 if (RetTy != V->getType()) {
6198 // XXX - vector of pointers?
6199 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6200 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6201 V = Builder.CreateAddrSpaceCast(
6202 V, llvm::PointerType::get(getLLVMContext(),
6203 PtrTy->getAddressSpace()));
6204 }
6205 }
6206
6207 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6208 // in amx intrinsics.
6209 if (V->getType()->isX86_AMXTy())
6210 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6211 {V});
6212 else
6213 V = Builder.CreateBitCast(V, RetTy);
6214 }
6215
6216 if (RetTy->isVoidTy())
6217 return RValue::get(nullptr);
6218
6219 return RValue::get(V);
6220 }
6221
6222 // Some target-specific builtins can have aggregate return values, e.g.
6223 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6224 // ReturnValue to be non-null, so that the target-specific emission code can
6225 // always just emit into it.
6226 TypeEvaluationKind EvalKind = getEvaluationKind(E->getType());
6227 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6228 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6229 ReturnValue = ReturnValueSlot(DestPtr, false);
6230 }
6231
6232 // Now see if we can emit a target-specific builtin.
6233 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6234 switch (EvalKind) {
6235 case TEK_Scalar:
6236 if (V->getType()->isVoidTy())
6237 return RValue::get(nullptr);
6238 return RValue::get(V);
6239 case TEK_Aggregate:
6240 return RValue::getAggregate(ReturnValue.getAddress(),
6241 ReturnValue.isVolatile());
6242 case TEK_Complex:
6243 llvm_unreachable("No current target builtin returns complex");
6244 }
6245 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6246 }
6247
6248 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6249 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E))
6250 return RValue::get(V);
6251
6252 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6253 return EmitHipStdParUnsupportedBuiltin(this, FD);
6254
6255 ErrorUnsupported(E, "builtin function");
6256
6257 // Unknown builtin, for now just dump it out and return undef.
6258 return GetUndefRValue(E->getType());
6259 }
6260
EmitTargetArchBuiltinExpr(CodeGenFunction * CGF,unsigned BuiltinID,const CallExpr * E,ReturnValueSlot ReturnValue,llvm::Triple::ArchType Arch)6261 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
6262 unsigned BuiltinID, const CallExpr *E,
6263 ReturnValueSlot ReturnValue,
6264 llvm::Triple::ArchType Arch) {
6265 // When compiling in HipStdPar mode we have to be conservative in rejecting
6266 // target specific features in the FE, and defer the possible error to the
6267 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6268 // referenced by an accelerator executable function, we emit an error.
6269 // Returning nullptr here leads to the builtin being handled in
6270 // EmitStdParUnsupportedBuiltin.
6271 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6272 Arch != CGF->getTarget().getTriple().getArch())
6273 return nullptr;
6274
6275 switch (Arch) {
6276 case llvm::Triple::arm:
6277 case llvm::Triple::armeb:
6278 case llvm::Triple::thumb:
6279 case llvm::Triple::thumbeb:
6280 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6281 case llvm::Triple::aarch64:
6282 case llvm::Triple::aarch64_32:
6283 case llvm::Triple::aarch64_be:
6284 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6285 case llvm::Triple::bpfeb:
6286 case llvm::Triple::bpfel:
6287 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6288 case llvm::Triple::x86:
6289 case llvm::Triple::x86_64:
6290 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6291 case llvm::Triple::ppc:
6292 case llvm::Triple::ppcle:
6293 case llvm::Triple::ppc64:
6294 case llvm::Triple::ppc64le:
6295 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6296 case llvm::Triple::r600:
6297 case llvm::Triple::amdgcn:
6298 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6299 case llvm::Triple::systemz:
6300 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6301 case llvm::Triple::nvptx:
6302 case llvm::Triple::nvptx64:
6303 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6304 case llvm::Triple::wasm32:
6305 case llvm::Triple::wasm64:
6306 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6307 case llvm::Triple::hexagon:
6308 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6309 case llvm::Triple::riscv32:
6310 case llvm::Triple::riscv64:
6311 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6312 case llvm::Triple::spirv64:
6313 if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
6314 return nullptr;
6315 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6316 default:
6317 return nullptr;
6318 }
6319 }
6320
EmitTargetBuiltinExpr(unsigned BuiltinID,const CallExpr * E,ReturnValueSlot ReturnValue)6321 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
6322 const CallExpr *E,
6323 ReturnValueSlot ReturnValue) {
6324 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6325 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6326 return EmitTargetArchBuiltinExpr(
6327 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6328 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6329 }
6330
6331 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6332 getTarget().getTriple().getArch());
6333 }
6334
GetNeonType(CodeGenFunction * CGF,NeonTypeFlags TypeFlags,bool HasLegalHalfType=true,bool V1Ty=false,bool AllowBFloatArgsAndRet=true)6335 static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6336 NeonTypeFlags TypeFlags,
6337 bool HasLegalHalfType = true,
6338 bool V1Ty = false,
6339 bool AllowBFloatArgsAndRet = true) {
6340 int IsQuad = TypeFlags.isQuad();
6341 switch (TypeFlags.getEltType()) {
6342 case NeonTypeFlags::Int8:
6343 case NeonTypeFlags::Poly8:
6344 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6345 case NeonTypeFlags::Int16:
6346 case NeonTypeFlags::Poly16:
6347 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6348 case NeonTypeFlags::BFloat16:
6349 if (AllowBFloatArgsAndRet)
6350 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6351 else
6352 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6353 case NeonTypeFlags::Float16:
6354 if (HasLegalHalfType)
6355 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6356 else
6357 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6358 case NeonTypeFlags::Int32:
6359 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6360 case NeonTypeFlags::Int64:
6361 case NeonTypeFlags::Poly64:
6362 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6363 case NeonTypeFlags::Poly128:
6364 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6365 // There is a lot of i128 and f128 API missing.
6366 // so we use v16i8 to represent poly128 and get pattern matched.
6367 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6368 case NeonTypeFlags::Float32:
6369 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6370 case NeonTypeFlags::Float64:
6371 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6372 }
6373 llvm_unreachable("Unknown vector element type!");
6374 }
6375
GetFloatNeonType(CodeGenFunction * CGF,NeonTypeFlags IntTypeFlags)6376 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6377 NeonTypeFlags IntTypeFlags) {
6378 int IsQuad = IntTypeFlags.isQuad();
6379 switch (IntTypeFlags.getEltType()) {
6380 case NeonTypeFlags::Int16:
6381 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6382 case NeonTypeFlags::Int32:
6383 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6384 case NeonTypeFlags::Int64:
6385 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6386 default:
6387 llvm_unreachable("Type can't be converted to floating-point!");
6388 }
6389 }
6390
EmitNeonSplat(Value * V,Constant * C,const ElementCount & Count)6391 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,
6392 const ElementCount &Count) {
6393 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6394 return Builder.CreateShuffleVector(V, V, SV, "lane");
6395 }
6396
EmitNeonSplat(Value * V,Constant * C)6397 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
6398 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6399 return EmitNeonSplat(V, C, EC);
6400 }
6401
EmitNeonCall(Function * F,SmallVectorImpl<Value * > & Ops,const char * name,unsigned shift,bool rightshift)6402 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
6403 const char *name,
6404 unsigned shift, bool rightshift) {
6405 unsigned j = 0;
6406 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6407 ai != ae; ++ai, ++j) {
6408 if (F->isConstrainedFPIntrinsic())
6409 if (ai->getType()->isMetadataTy())
6410 continue;
6411 if (shift > 0 && shift == j)
6412 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6413 else
6414 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6415 }
6416
6417 if (F->isConstrainedFPIntrinsic())
6418 return Builder.CreateConstrainedFPCall(F, Ops, name);
6419 else
6420 return Builder.CreateCall(F, Ops, name);
6421 }
6422
EmitNeonShiftVector(Value * V,llvm::Type * Ty,bool neg)6423 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
6424 bool neg) {
6425 int SV = cast<ConstantInt>(V)->getSExtValue();
6426 return ConstantInt::get(Ty, neg ? -SV : SV);
6427 }
6428
6429 // Right-shift a vector by a constant.
EmitNeonRShiftImm(Value * Vec,Value * Shift,llvm::Type * Ty,bool usgn,const char * name)6430 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
6431 llvm::Type *Ty, bool usgn,
6432 const char *name) {
6433 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6434
6435 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6436 int EltSize = VTy->getScalarSizeInBits();
6437
6438 Vec = Builder.CreateBitCast(Vec, Ty);
6439
6440 // lshr/ashr are undefined when the shift amount is equal to the vector
6441 // element size.
6442 if (ShiftAmt == EltSize) {
6443 if (usgn) {
6444 // Right-shifting an unsigned value by its size yields 0.
6445 return llvm::ConstantAggregateZero::get(VTy);
6446 } else {
6447 // Right-shifting a signed value by its size is equivalent
6448 // to a shift of size-1.
6449 --ShiftAmt;
6450 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6451 }
6452 }
6453
6454 Shift = EmitNeonShiftVector(Shift, Ty, false);
6455 if (usgn)
6456 return Builder.CreateLShr(Vec, Shift, name);
6457 else
6458 return Builder.CreateAShr(Vec, Shift, name);
6459 }
6460
6461 enum {
6462 AddRetType = (1 << 0),
6463 Add1ArgType = (1 << 1),
6464 Add2ArgTypes = (1 << 2),
6465
6466 VectorizeRetType = (1 << 3),
6467 VectorizeArgTypes = (1 << 4),
6468
6469 InventFloatType = (1 << 5),
6470 UnsignedAlts = (1 << 6),
6471
6472 Use64BitVectors = (1 << 7),
6473 Use128BitVectors = (1 << 8),
6474
6475 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
6476 VectorRet = AddRetType | VectorizeRetType,
6477 VectorRetGetArgs01 =
6478 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
6479 FpCmpzModifiers =
6480 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
6481 };
6482
6483 namespace {
6484 struct ARMVectorIntrinsicInfo {
6485 const char *NameHint;
6486 unsigned BuiltinID;
6487 unsigned LLVMIntrinsic;
6488 unsigned AltLLVMIntrinsic;
6489 uint64_t TypeModifier;
6490
operator <__anon6c984ebf0d11::ARMVectorIntrinsicInfo6491 bool operator<(unsigned RHSBuiltinID) const {
6492 return BuiltinID < RHSBuiltinID;
6493 }
operator <__anon6c984ebf0d11::ARMVectorIntrinsicInfo6494 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6495 return BuiltinID < TE.BuiltinID;
6496 }
6497 };
6498 } // end anonymous namespace
6499
6500 #define NEONMAP0(NameBase) \
6501 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6502
6503 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6504 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6505 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6506
6507 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6508 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6509 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6510 TypeModifier }
6511
6512 static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6513 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6514 NEONMAP0(splat_lane_v),
6515 NEONMAP0(splat_laneq_v),
6516 NEONMAP0(splatq_lane_v),
6517 NEONMAP0(splatq_laneq_v),
6518 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6519 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6520 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6521 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6522 NEONMAP0(vadd_v),
6523 NEONMAP0(vaddhn_v),
6524 NEONMAP0(vaddq_v),
6525 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6526 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6527 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6528 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6529 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6530 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6531 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6532 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6533 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6534 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6535 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6536 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6537 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6538 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6539 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6540 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6541 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6542 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6543 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6544 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6545 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6546 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6547 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6548 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6549 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6550 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6551 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6552 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6553 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6554 NEONMAP0(vceqz_v),
6555 NEONMAP0(vceqzq_v),
6556 NEONMAP0(vcgez_v),
6557 NEONMAP0(vcgezq_v),
6558 NEONMAP0(vcgtz_v),
6559 NEONMAP0(vcgtzq_v),
6560 NEONMAP0(vclez_v),
6561 NEONMAP0(vclezq_v),
6562 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6563 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6564 NEONMAP0(vcltz_v),
6565 NEONMAP0(vcltzq_v),
6566 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6567 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6568 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6569 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6570 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6571 NEONMAP0(vcvt_f16_s16),
6572 NEONMAP0(vcvt_f16_u16),
6573 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6574 NEONMAP0(vcvt_f32_v),
6575 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6576 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6577 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6578 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6579 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6580 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6581 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6582 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6583 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6584 NEONMAP0(vcvt_s16_f16),
6585 NEONMAP0(vcvt_s32_v),
6586 NEONMAP0(vcvt_s64_v),
6587 NEONMAP0(vcvt_u16_f16),
6588 NEONMAP0(vcvt_u32_v),
6589 NEONMAP0(vcvt_u64_v),
6590 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6591 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6592 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6593 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6594 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6595 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6596 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6597 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6598 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6599 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6600 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6601 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6602 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6603 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6604 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6605 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6606 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6607 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6608 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6609 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6610 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6611 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6612 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6613 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6614 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6615 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6616 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6617 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6618 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6619 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6620 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6621 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6622 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6623 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6624 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6625 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6626 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6627 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6628 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6629 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6630 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6631 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6632 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6633 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6634 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6635 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6636 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6637 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6638 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6639 NEONMAP0(vcvtq_f16_s16),
6640 NEONMAP0(vcvtq_f16_u16),
6641 NEONMAP0(vcvtq_f32_v),
6642 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6643 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6644 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6645 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6646 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6647 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6648 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6649 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6650 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6651 NEONMAP0(vcvtq_s16_f16),
6652 NEONMAP0(vcvtq_s32_v),
6653 NEONMAP0(vcvtq_s64_v),
6654 NEONMAP0(vcvtq_u16_f16),
6655 NEONMAP0(vcvtq_u32_v),
6656 NEONMAP0(vcvtq_u64_v),
6657 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6658 NEONMAP1(vdot_u32, arm_neon_udot, 0),
6659 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6660 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
6661 NEONMAP0(vext_v),
6662 NEONMAP0(vextq_v),
6663 NEONMAP0(vfma_v),
6664 NEONMAP0(vfmaq_v),
6665 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6666 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6667 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6668 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6669 NEONMAP0(vld1_dup_v),
6670 NEONMAP1(vld1_v, arm_neon_vld1, 0),
6671 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
6672 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
6673 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
6674 NEONMAP0(vld1q_dup_v),
6675 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
6676 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
6677 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
6678 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
6679 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
6680 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
6681 NEONMAP1(vld2_v, arm_neon_vld2, 0),
6682 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
6683 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
6684 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
6685 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
6686 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
6687 NEONMAP1(vld3_v, arm_neon_vld3, 0),
6688 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
6689 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
6690 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
6691 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
6692 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
6693 NEONMAP1(vld4_v, arm_neon_vld4, 0),
6694 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
6695 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
6696 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
6697 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6698 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
6699 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
6700 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6701 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6702 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
6703 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
6704 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6705 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
6706 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
6707 NEONMAP0(vmovl_v),
6708 NEONMAP0(vmovn_v),
6709 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
6710 NEONMAP0(vmull_v),
6711 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
6712 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6713 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6714 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
6715 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6716 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6717 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
6718 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
6719 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
6720 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
6721 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
6722 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6723 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6724 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
6725 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
6726 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
6727 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
6728 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
6729 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
6730 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
6731 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
6732 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
6733 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
6734 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
6735 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
6736 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
6737 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
6738 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
6739 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
6740 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
6741 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
6742 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
6743 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6744 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6745 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6746 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6747 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6748 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6749 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
6750 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
6751 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6752 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6753 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
6754 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6755 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6756 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
6757 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
6758 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6759 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6760 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
6761 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
6762 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
6763 NEONMAP0(vrndi_v),
6764 NEONMAP0(vrndiq_v),
6765 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
6766 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
6767 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
6768 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
6769 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
6770 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
6771 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
6772 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
6773 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
6774 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6775 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6776 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6777 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6778 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6779 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6780 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
6781 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
6782 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
6783 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
6784 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
6785 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
6786 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
6787 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
6788 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
6789 NEONMAP0(vshl_n_v),
6790 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6791 NEONMAP0(vshll_n_v),
6792 NEONMAP0(vshlq_n_v),
6793 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6794 NEONMAP0(vshr_n_v),
6795 NEONMAP0(vshrn_n_v),
6796 NEONMAP0(vshrq_n_v),
6797 NEONMAP1(vst1_v, arm_neon_vst1, 0),
6798 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
6799 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
6800 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
6801 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
6802 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
6803 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
6804 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
6805 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
6806 NEONMAP1(vst2_v, arm_neon_vst2, 0),
6807 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
6808 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
6809 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
6810 NEONMAP1(vst3_v, arm_neon_vst3, 0),
6811 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
6812 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
6813 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
6814 NEONMAP1(vst4_v, arm_neon_vst4, 0),
6815 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
6816 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
6817 NEONMAP0(vsubhn_v),
6818 NEONMAP0(vtrn_v),
6819 NEONMAP0(vtrnq_v),
6820 NEONMAP0(vtst_v),
6821 NEONMAP0(vtstq_v),
6822 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
6823 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
6824 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
6825 NEONMAP0(vuzp_v),
6826 NEONMAP0(vuzpq_v),
6827 NEONMAP0(vzip_v),
6828 NEONMAP0(vzipq_v)
6829 };
6830
6831 static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
6832 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
6833 NEONMAP0(splat_lane_v),
6834 NEONMAP0(splat_laneq_v),
6835 NEONMAP0(splatq_lane_v),
6836 NEONMAP0(splatq_laneq_v),
6837 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
6838 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
6839 NEONMAP0(vadd_v),
6840 NEONMAP0(vaddhn_v),
6841 NEONMAP0(vaddq_p128),
6842 NEONMAP0(vaddq_v),
6843 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
6844 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
6845 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
6846 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
6847 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6848 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6849 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6850 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6851 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6852 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6853 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6854 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6855 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
6856 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
6857 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
6858 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
6859 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
6860 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6861 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6862 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6863 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6864 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6865 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6866 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
6867 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6868 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6869 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
6870 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
6871 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
6872 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
6873 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
6874 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
6875 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
6876 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
6877 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
6878 NEONMAP0(vceqz_v),
6879 NEONMAP0(vceqzq_v),
6880 NEONMAP0(vcgez_v),
6881 NEONMAP0(vcgezq_v),
6882 NEONMAP0(vcgtz_v),
6883 NEONMAP0(vcgtzq_v),
6884 NEONMAP0(vclez_v),
6885 NEONMAP0(vclezq_v),
6886 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
6887 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
6888 NEONMAP0(vcltz_v),
6889 NEONMAP0(vcltzq_v),
6890 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6891 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6892 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6893 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6894 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6895 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6896 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6897 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6898 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6899 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6900 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6901 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6902 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
6903 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6904 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6905 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
6906 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6907 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6908 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
6909 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6910 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6911 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
6912 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6913 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6914 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
6915 NEONMAP0(vcvt_f16_s16),
6916 NEONMAP0(vcvt_f16_u16),
6917 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
6918 NEONMAP0(vcvt_f32_v),
6919 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6920 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6921 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6922 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6923 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6924 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6925 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6926 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6927 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6928 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6929 NEONMAP0(vcvtq_f16_s16),
6930 NEONMAP0(vcvtq_f16_u16),
6931 NEONMAP0(vcvtq_f32_v),
6932 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
6933 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6934 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6935 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6936 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6937 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6938 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6939 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6940 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6941 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6942 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6943 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
6944 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
6945 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
6946 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
6947 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
6948 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6949 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6950 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6951 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6952 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6953 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6954 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6955 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6956 NEONMAP0(vext_v),
6957 NEONMAP0(vextq_v),
6958 NEONMAP0(vfma_v),
6959 NEONMAP0(vfmaq_v),
6960 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
6961 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
6962 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
6963 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
6964 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
6965 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
6966 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
6967 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
6968 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6969 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6970 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6971 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6972 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
6973 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
6974 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
6975 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
6976 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
6977 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
6978 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
6979 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
6980 NEONMAP0(vmovl_v),
6981 NEONMAP0(vmovn_v),
6982 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
6983 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
6984 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
6985 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6986 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6987 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
6988 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
6989 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
6990 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6991 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6992 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
6993 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
6994 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
6995 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6996 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
6997 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
6998 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6999 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
7000 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
7001 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
7002 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
7003 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
7004 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
7005 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7006 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7007 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7008 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7009 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7010 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7011 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7012 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7013 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7014 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7015 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
7016 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7017 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7018 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
7019 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7020 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7021 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
7022 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7023 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
7024 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7025 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7026 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7027 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7028 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7029 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
7030 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7031 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7032 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7033 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
7034 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
7035 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7036 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7037 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
7038 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
7039 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
7040 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
7041 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
7042 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
7043 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
7044 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
7045 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
7046 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
7047 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
7048 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
7049 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
7050 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
7051 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
7052 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
7053 NEONMAP0(vrndi_v),
7054 NEONMAP0(vrndiq_v),
7055 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7056 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7057 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7058 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7059 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7060 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7061 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7062 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7063 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7064 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7065 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7066 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7067 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7068 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7069 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7070 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7071 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7072 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7073 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7074 NEONMAP0(vshl_n_v),
7075 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7076 NEONMAP0(vshll_n_v),
7077 NEONMAP0(vshlq_n_v),
7078 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7079 NEONMAP0(vshr_n_v),
7080 NEONMAP0(vshrn_n_v),
7081 NEONMAP0(vshrq_n_v),
7082 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7083 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7084 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7085 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7086 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7087 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7088 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7089 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7090 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7091 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7092 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7093 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7094 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7095 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7096 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7097 NEONMAP0(vsubhn_v),
7098 NEONMAP0(vtst_v),
7099 NEONMAP0(vtstq_v),
7100 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7101 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7102 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7103 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7104 };
7105
7106 static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7107 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7108 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7109 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7110 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7111 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7112 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7113 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7114 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7115 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7116 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7117 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7118 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7119 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7120 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7121 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7122 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7123 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7124 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7125 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7126 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7127 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7128 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7129 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7130 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7131 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7132 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7133 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7134 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7135 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7136 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7137 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7138 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7139 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7140 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7141 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
7142 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7143 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7144 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7145 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7146 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7147 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7148 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7149 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7150 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7151 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7152 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7153 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7154 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7155 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7156 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7157 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7158 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7159 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7160 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7161 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7162 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7163 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7164 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7165 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7166 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7167 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7168 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7169 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7170 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7171 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7172 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7173 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7174 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7175 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7176 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7177 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7178 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7179 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7180 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7181 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7182 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7183 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7184 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7185 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7186 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7187 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7188 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7189 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7190 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7191 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7192 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7193 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7194 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7195 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7196 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7197 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7198 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7199 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7200 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7201 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7202 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7203 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7204 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7205 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7206 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7207 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7208 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7209 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7210 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7211 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7212 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7213 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7214 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7215 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7216 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7217 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7218 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7219 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7220 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7221 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7222 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7223 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7224 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7225 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7226 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7227 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7228 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7229 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7230 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7231 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7232 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7233 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7234 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7235 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7236 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7237 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7238 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7239 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7240 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7241 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7242 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7243 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7244 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7245 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7246 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7247 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7248 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7249 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7250 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7251 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7252 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7253 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7254 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7255 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7256 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7257 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7258 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7259 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7260 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7261 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7262 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7263 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7264 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7265 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7266 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7267 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7268 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7269 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7270 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7271 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7272 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7273 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7274 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7275 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7276 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7277 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7278 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7279 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7280 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7281 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7282 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7283 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7284 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7285 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7286 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7287 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7288 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7289 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7290 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7291 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7292 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7293 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7294 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7295 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7296 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7297 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7298 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7299 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7300 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7301 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7302 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7303 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7304 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7305 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7306 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7307 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7308 // FP16 scalar intrinisics go here.
7309 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7310 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7311 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7312 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7313 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7314 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7315 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7316 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7317 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7318 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7319 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7320 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7321 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7322 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7323 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7324 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7325 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7326 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7327 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7328 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7329 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7330 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7331 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7332 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7333 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7334 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7335 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7336 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7337 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7338 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7339 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7340 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7341 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7342 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7343 };
7344
7345 // Some intrinsics are equivalent for codegen.
7346 static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7347 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7348 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7349 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7350 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7351 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7352 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7353 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7354 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7355 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7356 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7357 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7358 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7359 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7360 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7361 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7362 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7363 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7364 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7365 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7366 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7367 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7368 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7369 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7370 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7371 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7372 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7373 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7374 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7375 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7376 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7377 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7378 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7379 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7380 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7381 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7382 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7383 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7384 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7385 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7386 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7387 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7388 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7389 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7390 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7391 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7392 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7393 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7394 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7395 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7396 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7397 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7398 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7399 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7400 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7401 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7402 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7403 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7404 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7405 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7406 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7407 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7408 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7409 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7410 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7411 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7412 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7413 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7414 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7415 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7416 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7417 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7418 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7419 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7420 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7421 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7422 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7423 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7424 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7425 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7426 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7427 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7428 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7429 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7430 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7431 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7432 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7433 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7434 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7435 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7436 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7437 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7438 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7439 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7440 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7441 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7442 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7443 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7444 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7445 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7446 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7447 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7448 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7449 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7450 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7451 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7452 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7453 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7454 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7455 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7456 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7457 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7458 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7459 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7460 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7461 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7462 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7463 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7464 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7465 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7466 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7467 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7468 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7469 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7470 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7471 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7472 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7473 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7474 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7475 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7476 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7477 // arbitrary one to be handled as tha canonical variation.
7478 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7479 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7480 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7481 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7482 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7483 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7484 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7485 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7486 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7487 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7488 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7489 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7490 };
7491
7492 #undef NEONMAP0
7493 #undef NEONMAP1
7494 #undef NEONMAP2
7495
7496 #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7497 { \
7498 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7499 TypeModifier \
7500 }
7501
7502 #define SVEMAP2(NameBase, TypeModifier) \
7503 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7504 static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7505 #define GET_SVE_LLVM_INTRINSIC_MAP
7506 #include "clang/Basic/arm_sve_builtin_cg.inc"
7507 #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7508 #undef GET_SVE_LLVM_INTRINSIC_MAP
7509 };
7510
7511 #undef SVEMAP1
7512 #undef SVEMAP2
7513
7514 #define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7515 { \
7516 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7517 TypeModifier \
7518 }
7519
7520 #define SMEMAP2(NameBase, TypeModifier) \
7521 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7522 static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7523 #define GET_SME_LLVM_INTRINSIC_MAP
7524 #include "clang/Basic/arm_sme_builtin_cg.inc"
7525 #undef GET_SME_LLVM_INTRINSIC_MAP
7526 };
7527
7528 #undef SMEMAP1
7529 #undef SMEMAP2
7530
7531 static bool NEONSIMDIntrinsicsProvenSorted = false;
7532
7533 static bool AArch64SIMDIntrinsicsProvenSorted = false;
7534 static bool AArch64SISDIntrinsicsProvenSorted = false;
7535 static bool AArch64SVEIntrinsicsProvenSorted = false;
7536 static bool AArch64SMEIntrinsicsProvenSorted = false;
7537
7538 static const ARMVectorIntrinsicInfo *
findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,unsigned BuiltinID,bool & MapProvenSorted)7539 findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
7540 unsigned BuiltinID, bool &MapProvenSorted) {
7541
7542 #ifndef NDEBUG
7543 if (!MapProvenSorted) {
7544 assert(llvm::is_sorted(IntrinsicMap));
7545 MapProvenSorted = true;
7546 }
7547 #endif
7548
7549 const ARMVectorIntrinsicInfo *Builtin =
7550 llvm::lower_bound(IntrinsicMap, BuiltinID);
7551
7552 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7553 return Builtin;
7554
7555 return nullptr;
7556 }
7557
LookupNeonLLVMIntrinsic(unsigned IntrinsicID,unsigned Modifier,llvm::Type * ArgType,const CallExpr * E)7558 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
7559 unsigned Modifier,
7560 llvm::Type *ArgType,
7561 const CallExpr *E) {
7562 int VectorSize = 0;
7563 if (Modifier & Use64BitVectors)
7564 VectorSize = 64;
7565 else if (Modifier & Use128BitVectors)
7566 VectorSize = 128;
7567
7568 // Return type.
7569 SmallVector<llvm::Type *, 3> Tys;
7570 if (Modifier & AddRetType) {
7571 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7572 if (Modifier & VectorizeRetType)
7573 Ty = llvm::FixedVectorType::get(
7574 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7575
7576 Tys.push_back(Ty);
7577 }
7578
7579 // Arguments.
7580 if (Modifier & VectorizeArgTypes) {
7581 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7582 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7583 }
7584
7585 if (Modifier & (Add1ArgType | Add2ArgTypes))
7586 Tys.push_back(ArgType);
7587
7588 if (Modifier & Add2ArgTypes)
7589 Tys.push_back(ArgType);
7590
7591 if (Modifier & InventFloatType)
7592 Tys.push_back(FloatTy);
7593
7594 return CGM.getIntrinsic(IntrinsicID, Tys);
7595 }
7596
EmitCommonNeonSISDBuiltinExpr(CodeGenFunction & CGF,const ARMVectorIntrinsicInfo & SISDInfo,SmallVectorImpl<Value * > & Ops,const CallExpr * E)7597 static Value *EmitCommonNeonSISDBuiltinExpr(
7598 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7599 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7600 unsigned BuiltinID = SISDInfo.BuiltinID;
7601 unsigned int Int = SISDInfo.LLVMIntrinsic;
7602 unsigned Modifier = SISDInfo.TypeModifier;
7603 const char *s = SISDInfo.NameHint;
7604
7605 switch (BuiltinID) {
7606 case NEON::BI__builtin_neon_vcled_s64:
7607 case NEON::BI__builtin_neon_vcled_u64:
7608 case NEON::BI__builtin_neon_vcles_f32:
7609 case NEON::BI__builtin_neon_vcled_f64:
7610 case NEON::BI__builtin_neon_vcltd_s64:
7611 case NEON::BI__builtin_neon_vcltd_u64:
7612 case NEON::BI__builtin_neon_vclts_f32:
7613 case NEON::BI__builtin_neon_vcltd_f64:
7614 case NEON::BI__builtin_neon_vcales_f32:
7615 case NEON::BI__builtin_neon_vcaled_f64:
7616 case NEON::BI__builtin_neon_vcalts_f32:
7617 case NEON::BI__builtin_neon_vcaltd_f64:
7618 // Only one direction of comparisons actually exist, cmle is actually a cmge
7619 // with swapped operands. The table gives us the right intrinsic but we
7620 // still need to do the swap.
7621 std::swap(Ops[0], Ops[1]);
7622 break;
7623 }
7624
7625 assert(Int && "Generic code assumes a valid intrinsic");
7626
7627 // Determine the type(s) of this overloaded AArch64 intrinsic.
7628 const Expr *Arg = E->getArg(0);
7629 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
7630 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
7631
7632 int j = 0;
7633 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
7634 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7635 ai != ae; ++ai, ++j) {
7636 llvm::Type *ArgTy = ai->getType();
7637 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7638 ArgTy->getPrimitiveSizeInBits())
7639 continue;
7640
7641 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7642 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
7643 // it before inserting.
7644 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
7645 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
7646 Ops[j] =
7647 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
7648 }
7649
7650 Value *Result = CGF.EmitNeonCall(F, Ops, s);
7651 llvm::Type *ResultType = CGF.ConvertType(E->getType());
7652 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7653 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7654 return CGF.Builder.CreateExtractElement(Result, C0);
7655
7656 return CGF.Builder.CreateBitCast(Result, ResultType, s);
7657 }
7658
EmitCommonNeonBuiltinExpr(unsigned BuiltinID,unsigned LLVMIntrinsic,unsigned AltLLVMIntrinsic,const char * NameHint,unsigned Modifier,const CallExpr * E,SmallVectorImpl<llvm::Value * > & Ops,Address PtrOp0,Address PtrOp1,llvm::Triple::ArchType Arch)7659 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
7660 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
7661 const char *NameHint, unsigned Modifier, const CallExpr *E,
7662 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
7663 llvm::Triple::ArchType Arch) {
7664 // Get the last argument, which specifies the vector type.
7665 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
7666 std::optional<llvm::APSInt> NeonTypeConst =
7667 Arg->getIntegerConstantExpr(getContext());
7668 if (!NeonTypeConst)
7669 return nullptr;
7670
7671 // Determine the type of this overloaded NEON intrinsic.
7672 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
7673 bool Usgn = Type.isUnsigned();
7674 bool Quad = Type.isQuad();
7675 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
7676 const bool AllowBFloatArgsAndRet =
7677 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
7678
7679 llvm::FixedVectorType *VTy =
7680 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
7681 llvm::Type *Ty = VTy;
7682 if (!Ty)
7683 return nullptr;
7684
7685 auto getAlignmentValue32 = [&](Address addr) -> Value* {
7686 return Builder.getInt32(addr.getAlignment().getQuantity());
7687 };
7688
7689 unsigned Int = LLVMIntrinsic;
7690 if ((Modifier & UnsignedAlts) && !Usgn)
7691 Int = AltLLVMIntrinsic;
7692
7693 switch (BuiltinID) {
7694 default: break;
7695 case NEON::BI__builtin_neon_splat_lane_v:
7696 case NEON::BI__builtin_neon_splat_laneq_v:
7697 case NEON::BI__builtin_neon_splatq_lane_v:
7698 case NEON::BI__builtin_neon_splatq_laneq_v: {
7699 auto NumElements = VTy->getElementCount();
7700 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
7701 NumElements = NumElements * 2;
7702 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
7703 NumElements = NumElements.divideCoefficientBy(2);
7704
7705 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7706 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
7707 }
7708 case NEON::BI__builtin_neon_vpadd_v:
7709 case NEON::BI__builtin_neon_vpaddq_v:
7710 // We don't allow fp/int overloading of intrinsics.
7711 if (VTy->getElementType()->isFloatingPointTy() &&
7712 Int == Intrinsic::aarch64_neon_addp)
7713 Int = Intrinsic::aarch64_neon_faddp;
7714 break;
7715 case NEON::BI__builtin_neon_vabs_v:
7716 case NEON::BI__builtin_neon_vabsq_v:
7717 if (VTy->getElementType()->isFloatingPointTy())
7718 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
7719 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
7720 case NEON::BI__builtin_neon_vadd_v:
7721 case NEON::BI__builtin_neon_vaddq_v: {
7722 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
7723 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7724 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7725 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
7726 return Builder.CreateBitCast(Ops[0], Ty);
7727 }
7728 case NEON::BI__builtin_neon_vaddhn_v: {
7729 llvm::FixedVectorType *SrcTy =
7730 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7731
7732 // %sum = add <4 x i32> %lhs, %rhs
7733 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7734 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7735 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
7736
7737 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7738 Constant *ShiftAmt =
7739 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7740 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
7741
7742 // %res = trunc <4 x i32> %high to <4 x i16>
7743 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
7744 }
7745 case NEON::BI__builtin_neon_vcale_v:
7746 case NEON::BI__builtin_neon_vcaleq_v:
7747 case NEON::BI__builtin_neon_vcalt_v:
7748 case NEON::BI__builtin_neon_vcaltq_v:
7749 std::swap(Ops[0], Ops[1]);
7750 [[fallthrough]];
7751 case NEON::BI__builtin_neon_vcage_v:
7752 case NEON::BI__builtin_neon_vcageq_v:
7753 case NEON::BI__builtin_neon_vcagt_v:
7754 case NEON::BI__builtin_neon_vcagtq_v: {
7755 llvm::Type *Ty;
7756 switch (VTy->getScalarSizeInBits()) {
7757 default: llvm_unreachable("unexpected type");
7758 case 32:
7759 Ty = FloatTy;
7760 break;
7761 case 64:
7762 Ty = DoubleTy;
7763 break;
7764 case 16:
7765 Ty = HalfTy;
7766 break;
7767 }
7768 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
7769 llvm::Type *Tys[] = { VTy, VecFlt };
7770 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7771 return EmitNeonCall(F, Ops, NameHint);
7772 }
7773 case NEON::BI__builtin_neon_vceqz_v:
7774 case NEON::BI__builtin_neon_vceqzq_v:
7775 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
7776 ICmpInst::ICMP_EQ, "vceqz");
7777 case NEON::BI__builtin_neon_vcgez_v:
7778 case NEON::BI__builtin_neon_vcgezq_v:
7779 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
7780 ICmpInst::ICMP_SGE, "vcgez");
7781 case NEON::BI__builtin_neon_vclez_v:
7782 case NEON::BI__builtin_neon_vclezq_v:
7783 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
7784 ICmpInst::ICMP_SLE, "vclez");
7785 case NEON::BI__builtin_neon_vcgtz_v:
7786 case NEON::BI__builtin_neon_vcgtzq_v:
7787 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
7788 ICmpInst::ICMP_SGT, "vcgtz");
7789 case NEON::BI__builtin_neon_vcltz_v:
7790 case NEON::BI__builtin_neon_vcltzq_v:
7791 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
7792 ICmpInst::ICMP_SLT, "vcltz");
7793 case NEON::BI__builtin_neon_vclz_v:
7794 case NEON::BI__builtin_neon_vclzq_v:
7795 // We generate target-independent intrinsic, which needs a second argument
7796 // for whether or not clz of zero is undefined; on ARM it isn't.
7797 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
7798 break;
7799 case NEON::BI__builtin_neon_vcvt_f32_v:
7800 case NEON::BI__builtin_neon_vcvtq_f32_v:
7801 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7802 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
7803 HasLegalHalfType);
7804 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7805 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7806 case NEON::BI__builtin_neon_vcvt_f16_s16:
7807 case NEON::BI__builtin_neon_vcvt_f16_u16:
7808 case NEON::BI__builtin_neon_vcvtq_f16_s16:
7809 case NEON::BI__builtin_neon_vcvtq_f16_u16:
7810 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7811 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
7812 HasLegalHalfType);
7813 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7814 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7815 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
7816 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
7817 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
7818 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
7819 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7820 Function *F = CGM.getIntrinsic(Int, Tys);
7821 return EmitNeonCall(F, Ops, "vcvt_n");
7822 }
7823 case NEON::BI__builtin_neon_vcvt_n_f32_v:
7824 case NEON::BI__builtin_neon_vcvt_n_f64_v:
7825 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
7826 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
7827 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7828 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
7829 Function *F = CGM.getIntrinsic(Int, Tys);
7830 return EmitNeonCall(F, Ops, "vcvt_n");
7831 }
7832 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
7833 case NEON::BI__builtin_neon_vcvt_n_s32_v:
7834 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
7835 case NEON::BI__builtin_neon_vcvt_n_u32_v:
7836 case NEON::BI__builtin_neon_vcvt_n_s64_v:
7837 case NEON::BI__builtin_neon_vcvt_n_u64_v:
7838 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
7839 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
7840 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
7841 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
7842 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
7843 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
7844 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7845 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7846 return EmitNeonCall(F, Ops, "vcvt_n");
7847 }
7848 case NEON::BI__builtin_neon_vcvt_s32_v:
7849 case NEON::BI__builtin_neon_vcvt_u32_v:
7850 case NEON::BI__builtin_neon_vcvt_s64_v:
7851 case NEON::BI__builtin_neon_vcvt_u64_v:
7852 case NEON::BI__builtin_neon_vcvt_s16_f16:
7853 case NEON::BI__builtin_neon_vcvt_u16_f16:
7854 case NEON::BI__builtin_neon_vcvtq_s32_v:
7855 case NEON::BI__builtin_neon_vcvtq_u32_v:
7856 case NEON::BI__builtin_neon_vcvtq_s64_v:
7857 case NEON::BI__builtin_neon_vcvtq_u64_v:
7858 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7859 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7860 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
7861 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
7862 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
7863 }
7864 case NEON::BI__builtin_neon_vcvta_s16_f16:
7865 case NEON::BI__builtin_neon_vcvta_s32_v:
7866 case NEON::BI__builtin_neon_vcvta_s64_v:
7867 case NEON::BI__builtin_neon_vcvta_u16_f16:
7868 case NEON::BI__builtin_neon_vcvta_u32_v:
7869 case NEON::BI__builtin_neon_vcvta_u64_v:
7870 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7871 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7872 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7873 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7874 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7875 case NEON::BI__builtin_neon_vcvtaq_u64_v:
7876 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7877 case NEON::BI__builtin_neon_vcvtn_s32_v:
7878 case NEON::BI__builtin_neon_vcvtn_s64_v:
7879 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7880 case NEON::BI__builtin_neon_vcvtn_u32_v:
7881 case NEON::BI__builtin_neon_vcvtn_u64_v:
7882 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7883 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7884 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7885 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7886 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7887 case NEON::BI__builtin_neon_vcvtnq_u64_v:
7888 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7889 case NEON::BI__builtin_neon_vcvtp_s32_v:
7890 case NEON::BI__builtin_neon_vcvtp_s64_v:
7891 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7892 case NEON::BI__builtin_neon_vcvtp_u32_v:
7893 case NEON::BI__builtin_neon_vcvtp_u64_v:
7894 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7895 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7896 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7897 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7898 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7899 case NEON::BI__builtin_neon_vcvtpq_u64_v:
7900 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7901 case NEON::BI__builtin_neon_vcvtm_s32_v:
7902 case NEON::BI__builtin_neon_vcvtm_s64_v:
7903 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7904 case NEON::BI__builtin_neon_vcvtm_u32_v:
7905 case NEON::BI__builtin_neon_vcvtm_u64_v:
7906 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7907 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7908 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7909 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7910 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7911 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7912 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7913 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7914 }
7915 case NEON::BI__builtin_neon_vcvtx_f32_v: {
7916 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
7917 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7918
7919 }
7920 case NEON::BI__builtin_neon_vext_v:
7921 case NEON::BI__builtin_neon_vextq_v: {
7922 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
7923 SmallVector<int, 16> Indices;
7924 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7925 Indices.push_back(i+CV);
7926
7927 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7928 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7929 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
7930 }
7931 case NEON::BI__builtin_neon_vfma_v:
7932 case NEON::BI__builtin_neon_vfmaq_v: {
7933 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7934 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7935 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7936
7937 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7938 return emitCallMaybeConstrainedFPBuiltin(
7939 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
7940 {Ops[1], Ops[2], Ops[0]});
7941 }
7942 case NEON::BI__builtin_neon_vld1_v:
7943 case NEON::BI__builtin_neon_vld1q_v: {
7944 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7945 Ops.push_back(getAlignmentValue32(PtrOp0));
7946 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
7947 }
7948 case NEON::BI__builtin_neon_vld1_x2_v:
7949 case NEON::BI__builtin_neon_vld1q_x2_v:
7950 case NEON::BI__builtin_neon_vld1_x3_v:
7951 case NEON::BI__builtin_neon_vld1q_x3_v:
7952 case NEON::BI__builtin_neon_vld1_x4_v:
7953 case NEON::BI__builtin_neon_vld1q_x4_v: {
7954 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7955 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7956 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
7957 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7958 }
7959 case NEON::BI__builtin_neon_vld2_v:
7960 case NEON::BI__builtin_neon_vld2q_v:
7961 case NEON::BI__builtin_neon_vld3_v:
7962 case NEON::BI__builtin_neon_vld3q_v:
7963 case NEON::BI__builtin_neon_vld4_v:
7964 case NEON::BI__builtin_neon_vld4q_v:
7965 case NEON::BI__builtin_neon_vld2_dup_v:
7966 case NEON::BI__builtin_neon_vld2q_dup_v:
7967 case NEON::BI__builtin_neon_vld3_dup_v:
7968 case NEON::BI__builtin_neon_vld3q_dup_v:
7969 case NEON::BI__builtin_neon_vld4_dup_v:
7970 case NEON::BI__builtin_neon_vld4q_dup_v: {
7971 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7972 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7973 Value *Align = getAlignmentValue32(PtrOp1);
7974 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
7975 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7976 }
7977 case NEON::BI__builtin_neon_vld1_dup_v:
7978 case NEON::BI__builtin_neon_vld1q_dup_v: {
7979 Value *V = PoisonValue::get(Ty);
7980 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
7981 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
7982 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
7983 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
7984 return EmitNeonSplat(Ops[0], CI);
7985 }
7986 case NEON::BI__builtin_neon_vld2_lane_v:
7987 case NEON::BI__builtin_neon_vld2q_lane_v:
7988 case NEON::BI__builtin_neon_vld3_lane_v:
7989 case NEON::BI__builtin_neon_vld3q_lane_v:
7990 case NEON::BI__builtin_neon_vld4_lane_v:
7991 case NEON::BI__builtin_neon_vld4q_lane_v: {
7992 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7993 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7994 for (unsigned I = 2; I < Ops.size() - 1; ++I)
7995 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
7996 Ops.push_back(getAlignmentValue32(PtrOp1));
7997 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
7998 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7999 }
8000 case NEON::BI__builtin_neon_vmovl_v: {
8001 llvm::FixedVectorType *DTy =
8002 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8003 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
8004 if (Usgn)
8005 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
8006 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
8007 }
8008 case NEON::BI__builtin_neon_vmovn_v: {
8009 llvm::FixedVectorType *QTy =
8010 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8011 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
8012 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
8013 }
8014 case NEON::BI__builtin_neon_vmull_v:
8015 // FIXME: the integer vmull operations could be emitted in terms of pure
8016 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8017 // hoisting the exts outside loops. Until global ISel comes along that can
8018 // see through such movement this leads to bad CodeGen. So we need an
8019 // intrinsic for now.
8020 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8021 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
8022 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8023 case NEON::BI__builtin_neon_vpadal_v:
8024 case NEON::BI__builtin_neon_vpadalq_v: {
8025 // The source operand type has twice as many elements of half the size.
8026 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8027 llvm::Type *EltTy =
8028 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8029 auto *NarrowTy =
8030 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8031 llvm::Type *Tys[2] = { Ty, NarrowTy };
8032 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8033 }
8034 case NEON::BI__builtin_neon_vpaddl_v:
8035 case NEON::BI__builtin_neon_vpaddlq_v: {
8036 // The source operand type has twice as many elements of half the size.
8037 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8038 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8039 auto *NarrowTy =
8040 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8041 llvm::Type *Tys[2] = { Ty, NarrowTy };
8042 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
8043 }
8044 case NEON::BI__builtin_neon_vqdmlal_v:
8045 case NEON::BI__builtin_neon_vqdmlsl_v: {
8046 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
8047 Ops[1] =
8048 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
8049 Ops.resize(2);
8050 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
8051 }
8052 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8053 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8054 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8055 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8056 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8057 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8058 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8059 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8060 RTy->getNumElements() * 2);
8061 llvm::Type *Tys[2] = {
8062 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8063 /*isQuad*/ false))};
8064 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8065 }
8066 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8067 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8068 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8069 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8070 llvm::Type *Tys[2] = {
8071 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8072 /*isQuad*/ true))};
8073 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8074 }
8075 case NEON::BI__builtin_neon_vqshl_n_v:
8076 case NEON::BI__builtin_neon_vqshlq_n_v:
8077 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8078 1, false);
8079 case NEON::BI__builtin_neon_vqshlu_n_v:
8080 case NEON::BI__builtin_neon_vqshluq_n_v:
8081 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8082 1, false);
8083 case NEON::BI__builtin_neon_vrecpe_v:
8084 case NEON::BI__builtin_neon_vrecpeq_v:
8085 case NEON::BI__builtin_neon_vrsqrte_v:
8086 case NEON::BI__builtin_neon_vrsqrteq_v:
8087 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8088 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8089 case NEON::BI__builtin_neon_vrndi_v:
8090 case NEON::BI__builtin_neon_vrndiq_v:
8091 Int = Builder.getIsFPConstrained()
8092 ? Intrinsic::experimental_constrained_nearbyint
8093 : Intrinsic::nearbyint;
8094 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8095 case NEON::BI__builtin_neon_vrshr_n_v:
8096 case NEON::BI__builtin_neon_vrshrq_n_v:
8097 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8098 1, true);
8099 case NEON::BI__builtin_neon_vsha512hq_u64:
8100 case NEON::BI__builtin_neon_vsha512h2q_u64:
8101 case NEON::BI__builtin_neon_vsha512su0q_u64:
8102 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8103 Function *F = CGM.getIntrinsic(Int);
8104 return EmitNeonCall(F, Ops, "");
8105 }
8106 case NEON::BI__builtin_neon_vshl_n_v:
8107 case NEON::BI__builtin_neon_vshlq_n_v:
8108 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8109 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8110 "vshl_n");
8111 case NEON::BI__builtin_neon_vshll_n_v: {
8112 llvm::FixedVectorType *SrcTy =
8113 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8114 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8115 if (Usgn)
8116 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8117 else
8118 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8119 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8120 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8121 }
8122 case NEON::BI__builtin_neon_vshrn_n_v: {
8123 llvm::FixedVectorType *SrcTy =
8124 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8125 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8126 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8127 if (Usgn)
8128 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8129 else
8130 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8131 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8132 }
8133 case NEON::BI__builtin_neon_vshr_n_v:
8134 case NEON::BI__builtin_neon_vshrq_n_v:
8135 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8136 case NEON::BI__builtin_neon_vst1_v:
8137 case NEON::BI__builtin_neon_vst1q_v:
8138 case NEON::BI__builtin_neon_vst2_v:
8139 case NEON::BI__builtin_neon_vst2q_v:
8140 case NEON::BI__builtin_neon_vst3_v:
8141 case NEON::BI__builtin_neon_vst3q_v:
8142 case NEON::BI__builtin_neon_vst4_v:
8143 case NEON::BI__builtin_neon_vst4q_v:
8144 case NEON::BI__builtin_neon_vst2_lane_v:
8145 case NEON::BI__builtin_neon_vst2q_lane_v:
8146 case NEON::BI__builtin_neon_vst3_lane_v:
8147 case NEON::BI__builtin_neon_vst3q_lane_v:
8148 case NEON::BI__builtin_neon_vst4_lane_v:
8149 case NEON::BI__builtin_neon_vst4q_lane_v: {
8150 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8151 Ops.push_back(getAlignmentValue32(PtrOp0));
8152 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8153 }
8154 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8155 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8156 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8157 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8158 case NEON::BI__builtin_neon_vsm4eq_u32: {
8159 Function *F = CGM.getIntrinsic(Int);
8160 return EmitNeonCall(F, Ops, "");
8161 }
8162 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8163 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8164 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8165 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8166 Function *F = CGM.getIntrinsic(Int);
8167 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8168 return EmitNeonCall(F, Ops, "");
8169 }
8170 case NEON::BI__builtin_neon_vst1_x2_v:
8171 case NEON::BI__builtin_neon_vst1q_x2_v:
8172 case NEON::BI__builtin_neon_vst1_x3_v:
8173 case NEON::BI__builtin_neon_vst1q_x3_v:
8174 case NEON::BI__builtin_neon_vst1_x4_v:
8175 case NEON::BI__builtin_neon_vst1q_x4_v: {
8176 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8177 // in AArch64 it comes last. We may want to stick to one or another.
8178 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8179 Arch == llvm::Triple::aarch64_32) {
8180 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8181 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8182 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8183 }
8184 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8185 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8186 }
8187 case NEON::BI__builtin_neon_vsubhn_v: {
8188 llvm::FixedVectorType *SrcTy =
8189 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8190
8191 // %sum = add <4 x i32> %lhs, %rhs
8192 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8193 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8194 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8195
8196 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8197 Constant *ShiftAmt =
8198 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8199 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8200
8201 // %res = trunc <4 x i32> %high to <4 x i16>
8202 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8203 }
8204 case NEON::BI__builtin_neon_vtrn_v:
8205 case NEON::BI__builtin_neon_vtrnq_v: {
8206 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8207 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8208 Value *SV = nullptr;
8209
8210 for (unsigned vi = 0; vi != 2; ++vi) {
8211 SmallVector<int, 16> Indices;
8212 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8213 Indices.push_back(i+vi);
8214 Indices.push_back(i+e+vi);
8215 }
8216 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8217 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8218 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8219 }
8220 return SV;
8221 }
8222 case NEON::BI__builtin_neon_vtst_v:
8223 case NEON::BI__builtin_neon_vtstq_v: {
8224 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8225 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8226 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8227 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8228 ConstantAggregateZero::get(Ty));
8229 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8230 }
8231 case NEON::BI__builtin_neon_vuzp_v:
8232 case NEON::BI__builtin_neon_vuzpq_v: {
8233 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8234 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8235 Value *SV = nullptr;
8236
8237 for (unsigned vi = 0; vi != 2; ++vi) {
8238 SmallVector<int, 16> Indices;
8239 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8240 Indices.push_back(2*i+vi);
8241
8242 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8243 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8244 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8245 }
8246 return SV;
8247 }
8248 case NEON::BI__builtin_neon_vxarq_u64: {
8249 Function *F = CGM.getIntrinsic(Int);
8250 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8251 return EmitNeonCall(F, Ops, "");
8252 }
8253 case NEON::BI__builtin_neon_vzip_v:
8254 case NEON::BI__builtin_neon_vzipq_v: {
8255 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8256 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8257 Value *SV = nullptr;
8258
8259 for (unsigned vi = 0; vi != 2; ++vi) {
8260 SmallVector<int, 16> Indices;
8261 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8262 Indices.push_back((i + vi*e) >> 1);
8263 Indices.push_back(((i + vi*e) >> 1)+e);
8264 }
8265 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8266 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8267 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8268 }
8269 return SV;
8270 }
8271 case NEON::BI__builtin_neon_vdot_s32:
8272 case NEON::BI__builtin_neon_vdot_u32:
8273 case NEON::BI__builtin_neon_vdotq_s32:
8274 case NEON::BI__builtin_neon_vdotq_u32: {
8275 auto *InputTy =
8276 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8277 llvm::Type *Tys[2] = { Ty, InputTy };
8278 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8279 }
8280 case NEON::BI__builtin_neon_vfmlal_low_f16:
8281 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8282 auto *InputTy =
8283 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8284 llvm::Type *Tys[2] = { Ty, InputTy };
8285 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8286 }
8287 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8288 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8289 auto *InputTy =
8290 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8291 llvm::Type *Tys[2] = { Ty, InputTy };
8292 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8293 }
8294 case NEON::BI__builtin_neon_vfmlal_high_f16:
8295 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8296 auto *InputTy =
8297 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8298 llvm::Type *Tys[2] = { Ty, InputTy };
8299 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8300 }
8301 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8302 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8303 auto *InputTy =
8304 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8305 llvm::Type *Tys[2] = { Ty, InputTy };
8306 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8307 }
8308 case NEON::BI__builtin_neon_vmmlaq_s32:
8309 case NEON::BI__builtin_neon_vmmlaq_u32: {
8310 auto *InputTy =
8311 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8312 llvm::Type *Tys[2] = { Ty, InputTy };
8313 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8314 }
8315 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8316 auto *InputTy =
8317 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8318 llvm::Type *Tys[2] = { Ty, InputTy };
8319 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8320 }
8321 case NEON::BI__builtin_neon_vusdot_s32:
8322 case NEON::BI__builtin_neon_vusdotq_s32: {
8323 auto *InputTy =
8324 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8325 llvm::Type *Tys[2] = { Ty, InputTy };
8326 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8327 }
8328 case NEON::BI__builtin_neon_vbfdot_f32:
8329 case NEON::BI__builtin_neon_vbfdotq_f32: {
8330 llvm::Type *InputTy =
8331 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8332 llvm::Type *Tys[2] = { Ty, InputTy };
8333 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8334 }
8335 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8336 llvm::Type *Tys[1] = { Ty };
8337 Function *F = CGM.getIntrinsic(Int, Tys);
8338 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8339 }
8340
8341 }
8342
8343 assert(Int && "Expected valid intrinsic number");
8344
8345 // Determine the type(s) of this overloaded AArch64 intrinsic.
8346 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8347
8348 Value *Result = EmitNeonCall(F, Ops, NameHint);
8349 llvm::Type *ResultType = ConvertType(E->getType());
8350 // AArch64 intrinsic one-element vector type cast to
8351 // scalar type expected by the builtin
8352 return Builder.CreateBitCast(Result, ResultType, NameHint);
8353 }
8354
EmitAArch64CompareBuiltinExpr(Value * Op,llvm::Type * Ty,const CmpInst::Predicate Fp,const CmpInst::Predicate Ip,const Twine & Name)8355 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
8356 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8357 const CmpInst::Predicate Ip, const Twine &Name) {
8358 llvm::Type *OTy = Op->getType();
8359
8360 // FIXME: this is utterly horrific. We should not be looking at previous
8361 // codegen context to find out what needs doing. Unfortunately TableGen
8362 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8363 // (etc).
8364 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8365 OTy = BI->getOperand(0)->getType();
8366
8367 Op = Builder.CreateBitCast(Op, OTy);
8368 if (OTy->getScalarType()->isFloatingPointTy()) {
8369 if (Fp == CmpInst::FCMP_OEQ)
8370 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8371 else
8372 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8373 } else {
8374 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8375 }
8376 return Builder.CreateSExt(Op, Ty, Name);
8377 }
8378
packTBLDVectorList(CodeGenFunction & CGF,ArrayRef<Value * > Ops,Value * ExtOp,Value * IndexOp,llvm::Type * ResTy,unsigned IntID,const char * Name)8379 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
8380 Value *ExtOp, Value *IndexOp,
8381 llvm::Type *ResTy, unsigned IntID,
8382 const char *Name) {
8383 SmallVector<Value *, 2> TblOps;
8384 if (ExtOp)
8385 TblOps.push_back(ExtOp);
8386
8387 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8388 SmallVector<int, 16> Indices;
8389 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8390 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8391 Indices.push_back(2*i);
8392 Indices.push_back(2*i+1);
8393 }
8394
8395 int PairPos = 0, End = Ops.size() - 1;
8396 while (PairPos < End) {
8397 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8398 Ops[PairPos+1], Indices,
8399 Name));
8400 PairPos += 2;
8401 }
8402
8403 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8404 // of the 128-bit lookup table with zero.
8405 if (PairPos == End) {
8406 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8407 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8408 ZeroTbl, Indices, Name));
8409 }
8410
8411 Function *TblF;
8412 TblOps.push_back(IndexOp);
8413 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8414
8415 return CGF.EmitNeonCall(TblF, TblOps, Name);
8416 }
8417
GetValueForARMHint(unsigned BuiltinID)8418 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8419 unsigned Value;
8420 switch (BuiltinID) {
8421 default:
8422 return nullptr;
8423 case clang::ARM::BI__builtin_arm_nop:
8424 Value = 0;
8425 break;
8426 case clang::ARM::BI__builtin_arm_yield:
8427 case clang::ARM::BI__yield:
8428 Value = 1;
8429 break;
8430 case clang::ARM::BI__builtin_arm_wfe:
8431 case clang::ARM::BI__wfe:
8432 Value = 2;
8433 break;
8434 case clang::ARM::BI__builtin_arm_wfi:
8435 case clang::ARM::BI__wfi:
8436 Value = 3;
8437 break;
8438 case clang::ARM::BI__builtin_arm_sev:
8439 case clang::ARM::BI__sev:
8440 Value = 4;
8441 break;
8442 case clang::ARM::BI__builtin_arm_sevl:
8443 case clang::ARM::BI__sevl:
8444 Value = 5;
8445 break;
8446 }
8447
8448 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8449 llvm::ConstantInt::get(Int32Ty, Value));
8450 }
8451
8452 enum SpecialRegisterAccessKind {
8453 NormalRead,
8454 VolatileRead,
8455 Write,
8456 };
8457
8458 // Generates the IR for __builtin_read_exec_*.
8459 // Lowers the builtin to amdgcn_ballot intrinsic.
EmitAMDGCNBallotForExec(CodeGenFunction & CGF,const CallExpr * E,llvm::Type * RegisterType,llvm::Type * ValueType,bool isExecHi)8460 static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
8461 llvm::Type *RegisterType,
8462 llvm::Type *ValueType, bool isExecHi) {
8463 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8464 CodeGen::CodeGenModule &CGM = CGF.CGM;
8465
8466 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8467 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8468
8469 if (isExecHi) {
8470 Value *Rt2 = Builder.CreateLShr(Call, 32);
8471 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8472 return Rt2;
8473 }
8474
8475 return Call;
8476 }
8477
8478 // Generates the IR for the read/write special register builtin,
8479 // ValueType is the type of the value that is to be written or read,
8480 // RegisterType is the type of the register being written to or read from.
EmitSpecialRegisterBuiltin(CodeGenFunction & CGF,const CallExpr * E,llvm::Type * RegisterType,llvm::Type * ValueType,SpecialRegisterAccessKind AccessKind,StringRef SysReg="")8481 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
8482 const CallExpr *E,
8483 llvm::Type *RegisterType,
8484 llvm::Type *ValueType,
8485 SpecialRegisterAccessKind AccessKind,
8486 StringRef SysReg = "") {
8487 // write and register intrinsics only support 32, 64 and 128 bit operations.
8488 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8489 RegisterType->isIntegerTy(128)) &&
8490 "Unsupported size for register.");
8491
8492 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8493 CodeGen::CodeGenModule &CGM = CGF.CGM;
8494 LLVMContext &Context = CGM.getLLVMContext();
8495
8496 if (SysReg.empty()) {
8497 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8498 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8499 }
8500
8501 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8502 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8503 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8504
8505 llvm::Type *Types[] = { RegisterType };
8506
8507 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8508 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8509 && "Can't fit 64-bit value in 32-bit register");
8510
8511 if (AccessKind != Write) {
8512 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8513 llvm::Function *F = CGM.getIntrinsic(
8514 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8515 : llvm::Intrinsic::read_register,
8516 Types);
8517 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8518
8519 if (MixedTypes)
8520 // Read into 64 bit register and then truncate result to 32 bit.
8521 return Builder.CreateTrunc(Call, ValueType);
8522
8523 if (ValueType->isPointerTy())
8524 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8525 return Builder.CreateIntToPtr(Call, ValueType);
8526
8527 return Call;
8528 }
8529
8530 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8531 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8532 if (MixedTypes) {
8533 // Extend 32 bit write value to 64 bit to pass to write.
8534 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8535 return Builder.CreateCall(F, { Metadata, ArgValue });
8536 }
8537
8538 if (ValueType->isPointerTy()) {
8539 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8540 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8541 return Builder.CreateCall(F, { Metadata, ArgValue });
8542 }
8543
8544 return Builder.CreateCall(F, { Metadata, ArgValue });
8545 }
8546
8547 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8548 /// argument that specifies the vector type.
HasExtraNeonArgument(unsigned BuiltinID)8549 static bool HasExtraNeonArgument(unsigned BuiltinID) {
8550 switch (BuiltinID) {
8551 default: break;
8552 case NEON::BI__builtin_neon_vget_lane_i8:
8553 case NEON::BI__builtin_neon_vget_lane_i16:
8554 case NEON::BI__builtin_neon_vget_lane_bf16:
8555 case NEON::BI__builtin_neon_vget_lane_i32:
8556 case NEON::BI__builtin_neon_vget_lane_i64:
8557 case NEON::BI__builtin_neon_vget_lane_f32:
8558 case NEON::BI__builtin_neon_vgetq_lane_i8:
8559 case NEON::BI__builtin_neon_vgetq_lane_i16:
8560 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8561 case NEON::BI__builtin_neon_vgetq_lane_i32:
8562 case NEON::BI__builtin_neon_vgetq_lane_i64:
8563 case NEON::BI__builtin_neon_vgetq_lane_f32:
8564 case NEON::BI__builtin_neon_vduph_lane_bf16:
8565 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8566 case NEON::BI__builtin_neon_vset_lane_i8:
8567 case NEON::BI__builtin_neon_vset_lane_i16:
8568 case NEON::BI__builtin_neon_vset_lane_bf16:
8569 case NEON::BI__builtin_neon_vset_lane_i32:
8570 case NEON::BI__builtin_neon_vset_lane_i64:
8571 case NEON::BI__builtin_neon_vset_lane_f32:
8572 case NEON::BI__builtin_neon_vsetq_lane_i8:
8573 case NEON::BI__builtin_neon_vsetq_lane_i16:
8574 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8575 case NEON::BI__builtin_neon_vsetq_lane_i32:
8576 case NEON::BI__builtin_neon_vsetq_lane_i64:
8577 case NEON::BI__builtin_neon_vsetq_lane_f32:
8578 case NEON::BI__builtin_neon_vsha1h_u32:
8579 case NEON::BI__builtin_neon_vsha1cq_u32:
8580 case NEON::BI__builtin_neon_vsha1pq_u32:
8581 case NEON::BI__builtin_neon_vsha1mq_u32:
8582 case NEON::BI__builtin_neon_vcvth_bf16_f32:
8583 case clang::ARM::BI_MoveToCoprocessor:
8584 case clang::ARM::BI_MoveToCoprocessor2:
8585 return false;
8586 }
8587 return true;
8588 }
8589
EmitARMBuiltinExpr(unsigned BuiltinID,const CallExpr * E,ReturnValueSlot ReturnValue,llvm::Triple::ArchType Arch)8590 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
8591 const CallExpr *E,
8592 ReturnValueSlot ReturnValue,
8593 llvm::Triple::ArchType Arch) {
8594 if (auto Hint = GetValueForARMHint(BuiltinID))
8595 return Hint;
8596
8597 if (BuiltinID == clang::ARM::BI__emit) {
8598 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8599 llvm::FunctionType *FTy =
8600 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
8601
8602 Expr::EvalResult Result;
8603 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
8604 llvm_unreachable("Sema will ensure that the parameter is constant");
8605
8606 llvm::APSInt Value = Result.Val.getInt();
8607 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
8608
8609 llvm::InlineAsm *Emit =
8610 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
8611 /*hasSideEffects=*/true)
8612 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
8613 /*hasSideEffects=*/true);
8614
8615 return Builder.CreateCall(Emit);
8616 }
8617
8618 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8619 Value *Option = EmitScalarExpr(E->getArg(0));
8620 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
8621 }
8622
8623 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8624 Value *Address = EmitScalarExpr(E->getArg(0));
8625 Value *RW = EmitScalarExpr(E->getArg(1));
8626 Value *IsData = EmitScalarExpr(E->getArg(2));
8627
8628 // Locality is not supported on ARM target
8629 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
8630
8631 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
8632 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
8633 }
8634
8635 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8636 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8637 return Builder.CreateCall(
8638 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
8639 }
8640
8641 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8642 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
8643 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8644 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
8645 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
8646 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8647 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
8648 return Res;
8649 }
8650
8651
8652 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8653 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8654 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
8655 }
8656 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
8657 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8658 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
8659 "cls");
8660 }
8661
8662 if (BuiltinID == clang::ARM::BI__clear_cache) {
8663 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
8664 const FunctionDecl *FD = E->getDirectCallee();
8665 Value *Ops[2];
8666 for (unsigned i = 0; i < 2; i++)
8667 Ops[i] = EmitScalarExpr(E->getArg(i));
8668 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
8669 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
8670 StringRef Name = FD->getName();
8671 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
8672 }
8673
8674 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
8675 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
8676 Function *F;
8677
8678 switch (BuiltinID) {
8679 default: llvm_unreachable("unexpected builtin");
8680 case clang::ARM::BI__builtin_arm_mcrr:
8681 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
8682 break;
8683 case clang::ARM::BI__builtin_arm_mcrr2:
8684 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
8685 break;
8686 }
8687
8688 // MCRR{2} instruction has 5 operands but
8689 // the intrinsic has 4 because Rt and Rt2
8690 // are represented as a single unsigned 64
8691 // bit integer in the intrinsic definition
8692 // but internally it's represented as 2 32
8693 // bit integers.
8694
8695 Value *Coproc = EmitScalarExpr(E->getArg(0));
8696 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8697 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
8698 Value *CRm = EmitScalarExpr(E->getArg(3));
8699
8700 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8701 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
8702 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
8703 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
8704
8705 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
8706 }
8707
8708 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
8709 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
8710 Function *F;
8711
8712 switch (BuiltinID) {
8713 default: llvm_unreachable("unexpected builtin");
8714 case clang::ARM::BI__builtin_arm_mrrc:
8715 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
8716 break;
8717 case clang::ARM::BI__builtin_arm_mrrc2:
8718 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
8719 break;
8720 }
8721
8722 Value *Coproc = EmitScalarExpr(E->getArg(0));
8723 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8724 Value *CRm = EmitScalarExpr(E->getArg(2));
8725 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
8726
8727 // Returns an unsigned 64 bit integer, represented
8728 // as two 32 bit integers.
8729
8730 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
8731 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
8732 Rt = Builder.CreateZExt(Rt, Int64Ty);
8733 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
8734
8735 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
8736 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
8737 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
8738
8739 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
8740 }
8741
8742 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
8743 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8744 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
8745 getContext().getTypeSize(E->getType()) == 64) ||
8746 BuiltinID == clang::ARM::BI__ldrexd) {
8747 Function *F;
8748
8749 switch (BuiltinID) {
8750 default: llvm_unreachable("unexpected builtin");
8751 case clang::ARM::BI__builtin_arm_ldaex:
8752 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
8753 break;
8754 case clang::ARM::BI__builtin_arm_ldrexd:
8755 case clang::ARM::BI__builtin_arm_ldrex:
8756 case clang::ARM::BI__ldrexd:
8757 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
8758 break;
8759 }
8760
8761 Value *LdPtr = EmitScalarExpr(E->getArg(0));
8762 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
8763
8764 Value *Val0 = Builder.CreateExtractValue(Val, 1);
8765 Value *Val1 = Builder.CreateExtractValue(Val, 0);
8766 Val0 = Builder.CreateZExt(Val0, Int64Ty);
8767 Val1 = Builder.CreateZExt(Val1, Int64Ty);
8768
8769 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
8770 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
8771 Val = Builder.CreateOr(Val, Val1);
8772 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
8773 }
8774
8775 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8776 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
8777 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
8778
8779 QualType Ty = E->getType();
8780 llvm::Type *RealResTy = ConvertType(Ty);
8781 llvm::Type *IntTy =
8782 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8783
8784 Function *F = CGM.getIntrinsic(
8785 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
8786 : Intrinsic::arm_ldrex,
8787 UnqualPtrTy);
8788 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
8789 Val->addParamAttr(
8790 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
8791
8792 if (RealResTy->isPointerTy())
8793 return Builder.CreateIntToPtr(Val, RealResTy);
8794 else {
8795 llvm::Type *IntResTy = llvm::IntegerType::get(
8796 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
8797 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
8798 RealResTy);
8799 }
8800 }
8801
8802 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
8803 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
8804 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
8805 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
8806 Function *F = CGM.getIntrinsic(
8807 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
8808 : Intrinsic::arm_strexd);
8809 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
8810
8811 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
8812 Value *Val = EmitScalarExpr(E->getArg(0));
8813 Builder.CreateStore(Val, Tmp);
8814
8815 Address LdPtr = Tmp.withElementType(STy);
8816 Val = Builder.CreateLoad(LdPtr);
8817
8818 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
8819 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
8820 Value *StPtr = EmitScalarExpr(E->getArg(1));
8821 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
8822 }
8823
8824 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
8825 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
8826 Value *StoreVal = EmitScalarExpr(E->getArg(0));
8827 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
8828
8829 QualType Ty = E->getArg(0)->getType();
8830 llvm::Type *StoreTy =
8831 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8832
8833 if (StoreVal->getType()->isPointerTy())
8834 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
8835 else {
8836 llvm::Type *IntTy = llvm::IntegerType::get(
8837 getLLVMContext(),
8838 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
8839 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
8840 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
8841 }
8842
8843 Function *F = CGM.getIntrinsic(
8844 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
8845 : Intrinsic::arm_strex,
8846 StoreAddr->getType());
8847
8848 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
8849 CI->addParamAttr(
8850 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
8851 return CI;
8852 }
8853
8854 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
8855 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
8856 return Builder.CreateCall(F);
8857 }
8858
8859 // CRC32
8860 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
8861 switch (BuiltinID) {
8862 case clang::ARM::BI__builtin_arm_crc32b:
8863 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
8864 case clang::ARM::BI__builtin_arm_crc32cb:
8865 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
8866 case clang::ARM::BI__builtin_arm_crc32h:
8867 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
8868 case clang::ARM::BI__builtin_arm_crc32ch:
8869 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
8870 case clang::ARM::BI__builtin_arm_crc32w:
8871 case clang::ARM::BI__builtin_arm_crc32d:
8872 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
8873 case clang::ARM::BI__builtin_arm_crc32cw:
8874 case clang::ARM::BI__builtin_arm_crc32cd:
8875 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
8876 }
8877
8878 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
8879 Value *Arg0 = EmitScalarExpr(E->getArg(0));
8880 Value *Arg1 = EmitScalarExpr(E->getArg(1));
8881
8882 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
8883 // intrinsics, hence we need different codegen for these cases.
8884 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
8885 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
8886 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8887 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
8888 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
8889 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
8890
8891 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8892 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
8893 return Builder.CreateCall(F, {Res, Arg1b});
8894 } else {
8895 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
8896
8897 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8898 return Builder.CreateCall(F, {Arg0, Arg1});
8899 }
8900 }
8901
8902 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8903 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8904 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8905 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
8906 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
8907 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
8908
8909 SpecialRegisterAccessKind AccessKind = Write;
8910 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8911 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8912 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
8913 AccessKind = VolatileRead;
8914
8915 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8916 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
8917
8918 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8919 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
8920
8921 llvm::Type *ValueType;
8922 llvm::Type *RegisterType;
8923 if (IsPointerBuiltin) {
8924 ValueType = VoidPtrTy;
8925 RegisterType = Int32Ty;
8926 } else if (Is64Bit) {
8927 ValueType = RegisterType = Int64Ty;
8928 } else {
8929 ValueType = RegisterType = Int32Ty;
8930 }
8931
8932 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
8933 AccessKind);
8934 }
8935
8936 if (BuiltinID == ARM::BI__builtin_sponentry) {
8937 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
8938 return Builder.CreateCall(F);
8939 }
8940
8941 // Handle MSVC intrinsics before argument evaluation to prevent double
8942 // evaluation.
8943 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
8944 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
8945
8946 // Deal with MVE builtins
8947 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8948 return Result;
8949 // Handle CDE builtins
8950 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8951 return Result;
8952
8953 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
8954 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
8955 return P.first == BuiltinID;
8956 });
8957 if (It != end(NEONEquivalentIntrinsicMap))
8958 BuiltinID = It->second;
8959
8960 // Find out if any arguments are required to be integer constant
8961 // expressions.
8962 unsigned ICEArguments = 0;
8963 ASTContext::GetBuiltinTypeError Error;
8964 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
8965 assert(Error == ASTContext::GE_None && "Should not codegen an error");
8966
8967 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8968 return Builder.getInt32(addr.getAlignment().getQuantity());
8969 };
8970
8971 Address PtrOp0 = Address::invalid();
8972 Address PtrOp1 = Address::invalid();
8973 SmallVector<Value*, 4> Ops;
8974 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
8975 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
8976 for (unsigned i = 0, e = NumArgs; i != e; i++) {
8977 if (i == 0) {
8978 switch (BuiltinID) {
8979 case NEON::BI__builtin_neon_vld1_v:
8980 case NEON::BI__builtin_neon_vld1q_v:
8981 case NEON::BI__builtin_neon_vld1q_lane_v:
8982 case NEON::BI__builtin_neon_vld1_lane_v:
8983 case NEON::BI__builtin_neon_vld1_dup_v:
8984 case NEON::BI__builtin_neon_vld1q_dup_v:
8985 case NEON::BI__builtin_neon_vst1_v:
8986 case NEON::BI__builtin_neon_vst1q_v:
8987 case NEON::BI__builtin_neon_vst1q_lane_v:
8988 case NEON::BI__builtin_neon_vst1_lane_v:
8989 case NEON::BI__builtin_neon_vst2_v:
8990 case NEON::BI__builtin_neon_vst2q_v:
8991 case NEON::BI__builtin_neon_vst2_lane_v:
8992 case NEON::BI__builtin_neon_vst2q_lane_v:
8993 case NEON::BI__builtin_neon_vst3_v:
8994 case NEON::BI__builtin_neon_vst3q_v:
8995 case NEON::BI__builtin_neon_vst3_lane_v:
8996 case NEON::BI__builtin_neon_vst3q_lane_v:
8997 case NEON::BI__builtin_neon_vst4_v:
8998 case NEON::BI__builtin_neon_vst4q_v:
8999 case NEON::BI__builtin_neon_vst4_lane_v:
9000 case NEON::BI__builtin_neon_vst4q_lane_v:
9001 // Get the alignment for the argument in addition to the value;
9002 // we'll use it later.
9003 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
9004 Ops.push_back(PtrOp0.emitRawPointer(*this));
9005 continue;
9006 }
9007 }
9008 if (i == 1) {
9009 switch (BuiltinID) {
9010 case NEON::BI__builtin_neon_vld2_v:
9011 case NEON::BI__builtin_neon_vld2q_v:
9012 case NEON::BI__builtin_neon_vld3_v:
9013 case NEON::BI__builtin_neon_vld3q_v:
9014 case NEON::BI__builtin_neon_vld4_v:
9015 case NEON::BI__builtin_neon_vld4q_v:
9016 case NEON::BI__builtin_neon_vld2_lane_v:
9017 case NEON::BI__builtin_neon_vld2q_lane_v:
9018 case NEON::BI__builtin_neon_vld3_lane_v:
9019 case NEON::BI__builtin_neon_vld3q_lane_v:
9020 case NEON::BI__builtin_neon_vld4_lane_v:
9021 case NEON::BI__builtin_neon_vld4q_lane_v:
9022 case NEON::BI__builtin_neon_vld2_dup_v:
9023 case NEON::BI__builtin_neon_vld2q_dup_v:
9024 case NEON::BI__builtin_neon_vld3_dup_v:
9025 case NEON::BI__builtin_neon_vld3q_dup_v:
9026 case NEON::BI__builtin_neon_vld4_dup_v:
9027 case NEON::BI__builtin_neon_vld4q_dup_v:
9028 // Get the alignment for the argument in addition to the value;
9029 // we'll use it later.
9030 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
9031 Ops.push_back(PtrOp1.emitRawPointer(*this));
9032 continue;
9033 }
9034 }
9035
9036 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
9037 }
9038
9039 switch (BuiltinID) {
9040 default: break;
9041
9042 case NEON::BI__builtin_neon_vget_lane_i8:
9043 case NEON::BI__builtin_neon_vget_lane_i16:
9044 case NEON::BI__builtin_neon_vget_lane_i32:
9045 case NEON::BI__builtin_neon_vget_lane_i64:
9046 case NEON::BI__builtin_neon_vget_lane_bf16:
9047 case NEON::BI__builtin_neon_vget_lane_f32:
9048 case NEON::BI__builtin_neon_vgetq_lane_i8:
9049 case NEON::BI__builtin_neon_vgetq_lane_i16:
9050 case NEON::BI__builtin_neon_vgetq_lane_i32:
9051 case NEON::BI__builtin_neon_vgetq_lane_i64:
9052 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9053 case NEON::BI__builtin_neon_vgetq_lane_f32:
9054 case NEON::BI__builtin_neon_vduph_lane_bf16:
9055 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9056 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9057
9058 case NEON::BI__builtin_neon_vrndns_f32: {
9059 Value *Arg = EmitScalarExpr(E->getArg(0));
9060 llvm::Type *Tys[] = {Arg->getType()};
9061 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9062 return Builder.CreateCall(F, {Arg}, "vrndn"); }
9063
9064 case NEON::BI__builtin_neon_vset_lane_i8:
9065 case NEON::BI__builtin_neon_vset_lane_i16:
9066 case NEON::BI__builtin_neon_vset_lane_i32:
9067 case NEON::BI__builtin_neon_vset_lane_i64:
9068 case NEON::BI__builtin_neon_vset_lane_bf16:
9069 case NEON::BI__builtin_neon_vset_lane_f32:
9070 case NEON::BI__builtin_neon_vsetq_lane_i8:
9071 case NEON::BI__builtin_neon_vsetq_lane_i16:
9072 case NEON::BI__builtin_neon_vsetq_lane_i32:
9073 case NEON::BI__builtin_neon_vsetq_lane_i64:
9074 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9075 case NEON::BI__builtin_neon_vsetq_lane_f32:
9076 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9077
9078 case NEON::BI__builtin_neon_vsha1h_u32:
9079 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9080 "vsha1h");
9081 case NEON::BI__builtin_neon_vsha1cq_u32:
9082 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9083 "vsha1h");
9084 case NEON::BI__builtin_neon_vsha1pq_u32:
9085 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9086 "vsha1h");
9087 case NEON::BI__builtin_neon_vsha1mq_u32:
9088 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9089 "vsha1h");
9090
9091 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9092 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9093 "vcvtbfp2bf");
9094 }
9095
9096 // The ARM _MoveToCoprocessor builtins put the input register value as
9097 // the first argument, but the LLVM intrinsic expects it as the third one.
9098 case clang::ARM::BI_MoveToCoprocessor:
9099 case clang::ARM::BI_MoveToCoprocessor2: {
9100 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9101 ? Intrinsic::arm_mcr
9102 : Intrinsic::arm_mcr2);
9103 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9104 Ops[3], Ops[4], Ops[5]});
9105 }
9106 }
9107
9108 // Get the last argument, which specifies the vector type.
9109 assert(HasExtraArg);
9110 const Expr *Arg = E->getArg(E->getNumArgs()-1);
9111 std::optional<llvm::APSInt> Result =
9112 Arg->getIntegerConstantExpr(getContext());
9113 if (!Result)
9114 return nullptr;
9115
9116 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9117 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9118 // Determine the overloaded type of this builtin.
9119 llvm::Type *Ty;
9120 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9121 Ty = FloatTy;
9122 else
9123 Ty = DoubleTy;
9124
9125 // Determine whether this is an unsigned conversion or not.
9126 bool usgn = Result->getZExtValue() == 1;
9127 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9128
9129 // Call the appropriate intrinsic.
9130 Function *F = CGM.getIntrinsic(Int, Ty);
9131 return Builder.CreateCall(F, Ops, "vcvtr");
9132 }
9133
9134 // Determine the type of this overloaded NEON intrinsic.
9135 NeonTypeFlags Type = Result->getZExtValue();
9136 bool usgn = Type.isUnsigned();
9137 bool rightShift = false;
9138
9139 llvm::FixedVectorType *VTy =
9140 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9141 getTarget().hasBFloat16Type());
9142 llvm::Type *Ty = VTy;
9143 if (!Ty)
9144 return nullptr;
9145
9146 // Many NEON builtins have identical semantics and uses in ARM and
9147 // AArch64. Emit these in a single function.
9148 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9149 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9150 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9151 if (Builtin)
9152 return EmitCommonNeonBuiltinExpr(
9153 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9154 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9155
9156 unsigned Int;
9157 switch (BuiltinID) {
9158 default: return nullptr;
9159 case NEON::BI__builtin_neon_vld1q_lane_v:
9160 // Handle 64-bit integer elements as a special case. Use shuffles of
9161 // one-element vectors to avoid poor code for i64 in the backend.
9162 if (VTy->getElementType()->isIntegerTy(64)) {
9163 // Extract the other lane.
9164 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9165 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9166 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9167 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9168 // Load the value as a one-element vector.
9169 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9170 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9171 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9172 Value *Align = getAlignmentValue32(PtrOp0);
9173 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9174 // Combine them.
9175 int Indices[] = {1 - Lane, Lane};
9176 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9177 }
9178 [[fallthrough]];
9179 case NEON::BI__builtin_neon_vld1_lane_v: {
9180 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9181 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9182 Value *Ld = Builder.CreateLoad(PtrOp0);
9183 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9184 }
9185 case NEON::BI__builtin_neon_vqrshrn_n_v:
9186 Int =
9187 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9188 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9189 1, true);
9190 case NEON::BI__builtin_neon_vqrshrun_n_v:
9191 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9192 Ops, "vqrshrun_n", 1, true);
9193 case NEON::BI__builtin_neon_vqshrn_n_v:
9194 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9195 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9196 1, true);
9197 case NEON::BI__builtin_neon_vqshrun_n_v:
9198 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9199 Ops, "vqshrun_n", 1, true);
9200 case NEON::BI__builtin_neon_vrecpe_v:
9201 case NEON::BI__builtin_neon_vrecpeq_v:
9202 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9203 Ops, "vrecpe");
9204 case NEON::BI__builtin_neon_vrshrn_n_v:
9205 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9206 Ops, "vrshrn_n", 1, true);
9207 case NEON::BI__builtin_neon_vrsra_n_v:
9208 case NEON::BI__builtin_neon_vrsraq_n_v:
9209 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9210 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9211 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9212 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9213 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9214 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9215 case NEON::BI__builtin_neon_vsri_n_v:
9216 case NEON::BI__builtin_neon_vsriq_n_v:
9217 rightShift = true;
9218 [[fallthrough]];
9219 case NEON::BI__builtin_neon_vsli_n_v:
9220 case NEON::BI__builtin_neon_vsliq_n_v:
9221 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9222 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9223 Ops, "vsli_n");
9224 case NEON::BI__builtin_neon_vsra_n_v:
9225 case NEON::BI__builtin_neon_vsraq_n_v:
9226 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9227 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9228 return Builder.CreateAdd(Ops[0], Ops[1]);
9229 case NEON::BI__builtin_neon_vst1q_lane_v:
9230 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9231 // a one-element vector and avoid poor code for i64 in the backend.
9232 if (VTy->getElementType()->isIntegerTy(64)) {
9233 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9234 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9235 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9236 Ops[2] = getAlignmentValue32(PtrOp0);
9237 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9238 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9239 Tys), Ops);
9240 }
9241 [[fallthrough]];
9242 case NEON::BI__builtin_neon_vst1_lane_v: {
9243 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9244 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9245 return Builder.CreateStore(Ops[1],
9246 PtrOp0.withElementType(Ops[1]->getType()));
9247 }
9248 case NEON::BI__builtin_neon_vtbl1_v:
9249 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9250 Ops, "vtbl1");
9251 case NEON::BI__builtin_neon_vtbl2_v:
9252 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9253 Ops, "vtbl2");
9254 case NEON::BI__builtin_neon_vtbl3_v:
9255 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9256 Ops, "vtbl3");
9257 case NEON::BI__builtin_neon_vtbl4_v:
9258 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9259 Ops, "vtbl4");
9260 case NEON::BI__builtin_neon_vtbx1_v:
9261 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9262 Ops, "vtbx1");
9263 case NEON::BI__builtin_neon_vtbx2_v:
9264 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9265 Ops, "vtbx2");
9266 case NEON::BI__builtin_neon_vtbx3_v:
9267 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9268 Ops, "vtbx3");
9269 case NEON::BI__builtin_neon_vtbx4_v:
9270 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9271 Ops, "vtbx4");
9272 }
9273 }
9274
9275 template<typename Integer>
GetIntegerConstantValue(const Expr * E,ASTContext & Context)9276 static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
9277 return E->getIntegerConstantExpr(Context)->getExtValue();
9278 }
9279
SignOrZeroExtend(CGBuilderTy & Builder,llvm::Value * V,llvm::Type * T,bool Unsigned)9280 static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9281 llvm::Type *T, bool Unsigned) {
9282 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9283 // which finds it convenient to specify signed/unsigned as a boolean flag.
9284 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9285 }
9286
MVEImmediateShr(CGBuilderTy & Builder,llvm::Value * V,uint32_t Shift,bool Unsigned)9287 static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9288 uint32_t Shift, bool Unsigned) {
9289 // MVE helper function for integer shift right. This must handle signed vs
9290 // unsigned, and also deal specially with the case where the shift count is
9291 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9292 // undefined behavior, but in MVE it's legal, so we must convert it to code
9293 // that is not undefined in IR.
9294 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9295 ->getElementType()
9296 ->getPrimitiveSizeInBits();
9297 if (Shift == LaneBits) {
9298 // An unsigned shift of the full lane size always generates zero, so we can
9299 // simply emit a zero vector. A signed shift of the full lane size does the
9300 // same thing as shifting by one bit fewer.
9301 if (Unsigned)
9302 return llvm::Constant::getNullValue(V->getType());
9303 else
9304 --Shift;
9305 }
9306 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9307 }
9308
ARMMVEVectorSplat(CGBuilderTy & Builder,llvm::Value * V)9309 static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9310 // MVE-specific helper function for a vector splat, which infers the element
9311 // count of the output vector by knowing that MVE vectors are all 128 bits
9312 // wide.
9313 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9314 return Builder.CreateVectorSplat(Elements, V);
9315 }
9316
ARMMVEVectorReinterpret(CGBuilderTy & Builder,CodeGenFunction * CGF,llvm::Value * V,llvm::Type * DestType)9317 static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9318 CodeGenFunction *CGF,
9319 llvm::Value *V,
9320 llvm::Type *DestType) {
9321 // Convert one MVE vector type into another by reinterpreting its in-register
9322 // format.
9323 //
9324 // Little-endian, this is identical to a bitcast (which reinterprets the
9325 // memory format). But big-endian, they're not necessarily the same, because
9326 // the register and memory formats map to each other differently depending on
9327 // the lane size.
9328 //
9329 // We generate a bitcast whenever we can (if we're little-endian, or if the
9330 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9331 // that performs the different kind of reinterpretation.
9332 if (CGF->getTarget().isBigEndian() &&
9333 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9334 return Builder.CreateCall(
9335 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9336 {DestType, V->getType()}),
9337 V);
9338 } else {
9339 return Builder.CreateBitCast(V, DestType);
9340 }
9341 }
9342
VectorUnzip(CGBuilderTy & Builder,llvm::Value * V,bool Odd)9343 static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9344 // Make a shufflevector that extracts every other element of a vector (evens
9345 // or odds, as desired).
9346 SmallVector<int, 16> Indices;
9347 unsigned InputElements =
9348 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9349 for (unsigned i = 0; i < InputElements; i += 2)
9350 Indices.push_back(i + Odd);
9351 return Builder.CreateShuffleVector(V, Indices);
9352 }
9353
VectorZip(CGBuilderTy & Builder,llvm::Value * V0,llvm::Value * V1)9354 static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9355 llvm::Value *V1) {
9356 // Make a shufflevector that interleaves two vectors element by element.
9357 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9358 SmallVector<int, 16> Indices;
9359 unsigned InputElements =
9360 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9361 for (unsigned i = 0; i < InputElements; i++) {
9362 Indices.push_back(i);
9363 Indices.push_back(i + InputElements);
9364 }
9365 return Builder.CreateShuffleVector(V0, V1, Indices);
9366 }
9367
9368 template<unsigned HighBit, unsigned OtherBits>
ARMMVEConstantSplat(CGBuilderTy & Builder,llvm::Type * VT)9369 static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9370 // MVE-specific helper function to make a vector splat of a constant such as
9371 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9372 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9373 unsigned LaneBits = T->getPrimitiveSizeInBits();
9374 uint32_t Value = HighBit << (LaneBits - 1);
9375 if (OtherBits)
9376 Value |= (1UL << (LaneBits - 1)) - 1;
9377 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9378 return ARMMVEVectorSplat(Builder, Lane);
9379 }
9380
ARMMVEVectorElementReverse(CGBuilderTy & Builder,llvm::Value * V,unsigned ReverseWidth)9381 static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9382 llvm::Value *V,
9383 unsigned ReverseWidth) {
9384 // MVE-specific helper function which reverses the elements of a
9385 // vector within every (ReverseWidth)-bit collection of lanes.
9386 SmallVector<int, 16> Indices;
9387 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9388 unsigned Elements = 128 / LaneSize;
9389 unsigned Mask = ReverseWidth / LaneSize - 1;
9390 for (unsigned i = 0; i < Elements; i++)
9391 Indices.push_back(i ^ Mask);
9392 return Builder.CreateShuffleVector(V, Indices);
9393 }
9394
EmitARMMVEBuiltinExpr(unsigned BuiltinID,const CallExpr * E,ReturnValueSlot ReturnValue,llvm::Triple::ArchType Arch)9395 Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
9396 const CallExpr *E,
9397 ReturnValueSlot ReturnValue,
9398 llvm::Triple::ArchType Arch) {
9399 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9400 Intrinsic::ID IRIntr;
9401 unsigned NumVectors;
9402
9403 // Code autogenerated by Tablegen will handle all the simple builtins.
9404 switch (BuiltinID) {
9405 #include "clang/Basic/arm_mve_builtin_cg.inc"
9406
9407 // If we didn't match an MVE builtin id at all, go back to the
9408 // main EmitARMBuiltinExpr.
9409 default:
9410 return nullptr;
9411 }
9412
9413 // Anything that breaks from that switch is an MVE builtin that
9414 // needs handwritten code to generate.
9415
9416 switch (CustomCodeGenType) {
9417
9418 case CustomCodeGen::VLD24: {
9419 llvm::SmallVector<Value *, 4> Ops;
9420 llvm::SmallVector<llvm::Type *, 4> Tys;
9421
9422 auto MvecCType = E->getType();
9423 auto MvecLType = ConvertType(MvecCType);
9424 assert(MvecLType->isStructTy() &&
9425 "Return type for vld[24]q should be a struct");
9426 assert(MvecLType->getStructNumElements() == 1 &&
9427 "Return-type struct for vld[24]q should have one element");
9428 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9429 assert(MvecLTypeInner->isArrayTy() &&
9430 "Return-type struct for vld[24]q should contain an array");
9431 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9432 "Array member of return-type struct vld[24]q has wrong length");
9433 auto VecLType = MvecLTypeInner->getArrayElementType();
9434
9435 Tys.push_back(VecLType);
9436
9437 auto Addr = E->getArg(0);
9438 Ops.push_back(EmitScalarExpr(Addr));
9439 Tys.push_back(ConvertType(Addr->getType()));
9440
9441 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9442 Value *LoadResult = Builder.CreateCall(F, Ops);
9443 Value *MvecOut = PoisonValue::get(MvecLType);
9444 for (unsigned i = 0; i < NumVectors; ++i) {
9445 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9446 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9447 }
9448
9449 if (ReturnValue.isNull())
9450 return MvecOut;
9451 else
9452 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9453 }
9454
9455 case CustomCodeGen::VST24: {
9456 llvm::SmallVector<Value *, 4> Ops;
9457 llvm::SmallVector<llvm::Type *, 4> Tys;
9458
9459 auto Addr = E->getArg(0);
9460 Ops.push_back(EmitScalarExpr(Addr));
9461 Tys.push_back(ConvertType(Addr->getType()));
9462
9463 auto MvecCType = E->getArg(1)->getType();
9464 auto MvecLType = ConvertType(MvecCType);
9465 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9466 assert(MvecLType->getStructNumElements() == 1 &&
9467 "Data-type struct for vst2q should have one element");
9468 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9469 assert(MvecLTypeInner->isArrayTy() &&
9470 "Data-type struct for vst2q should contain an array");
9471 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9472 "Array member of return-type struct vld[24]q has wrong length");
9473 auto VecLType = MvecLTypeInner->getArrayElementType();
9474
9475 Tys.push_back(VecLType);
9476
9477 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9478 EmitAggExpr(E->getArg(1), MvecSlot);
9479 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9480 for (unsigned i = 0; i < NumVectors; i++)
9481 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9482
9483 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9484 Value *ToReturn = nullptr;
9485 for (unsigned i = 0; i < NumVectors; i++) {
9486 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9487 ToReturn = Builder.CreateCall(F, Ops);
9488 Ops.pop_back();
9489 }
9490 return ToReturn;
9491 }
9492 }
9493 llvm_unreachable("unknown custom codegen type.");
9494 }
9495
EmitARMCDEBuiltinExpr(unsigned BuiltinID,const CallExpr * E,ReturnValueSlot ReturnValue,llvm::Triple::ArchType Arch)9496 Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,
9497 const CallExpr *E,
9498 ReturnValueSlot ReturnValue,
9499 llvm::Triple::ArchType Arch) {
9500 switch (BuiltinID) {
9501 default:
9502 return nullptr;
9503 #include "clang/Basic/arm_cde_builtin_cg.inc"
9504 }
9505 }
9506
EmitAArch64TblBuiltinExpr(CodeGenFunction & CGF,unsigned BuiltinID,const CallExpr * E,SmallVectorImpl<Value * > & Ops,llvm::Triple::ArchType Arch)9507 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9508 const CallExpr *E,
9509 SmallVectorImpl<Value *> &Ops,
9510 llvm::Triple::ArchType Arch) {
9511 unsigned int Int = 0;
9512 const char *s = nullptr;
9513
9514 switch (BuiltinID) {
9515 default:
9516 return nullptr;
9517 case NEON::BI__builtin_neon_vtbl1_v:
9518 case NEON::BI__builtin_neon_vqtbl1_v:
9519 case NEON::BI__builtin_neon_vqtbl1q_v:
9520 case NEON::BI__builtin_neon_vtbl2_v:
9521 case NEON::BI__builtin_neon_vqtbl2_v:
9522 case NEON::BI__builtin_neon_vqtbl2q_v:
9523 case NEON::BI__builtin_neon_vtbl3_v:
9524 case NEON::BI__builtin_neon_vqtbl3_v:
9525 case NEON::BI__builtin_neon_vqtbl3q_v:
9526 case NEON::BI__builtin_neon_vtbl4_v:
9527 case NEON::BI__builtin_neon_vqtbl4_v:
9528 case NEON::BI__builtin_neon_vqtbl4q_v:
9529 break;
9530 case NEON::BI__builtin_neon_vtbx1_v:
9531 case NEON::BI__builtin_neon_vqtbx1_v:
9532 case NEON::BI__builtin_neon_vqtbx1q_v:
9533 case NEON::BI__builtin_neon_vtbx2_v:
9534 case NEON::BI__builtin_neon_vqtbx2_v:
9535 case NEON::BI__builtin_neon_vqtbx2q_v:
9536 case NEON::BI__builtin_neon_vtbx3_v:
9537 case NEON::BI__builtin_neon_vqtbx3_v:
9538 case NEON::BI__builtin_neon_vqtbx3q_v:
9539 case NEON::BI__builtin_neon_vtbx4_v:
9540 case NEON::BI__builtin_neon_vqtbx4_v:
9541 case NEON::BI__builtin_neon_vqtbx4q_v:
9542 break;
9543 }
9544
9545 assert(E->getNumArgs() >= 3);
9546
9547 // Get the last argument, which specifies the vector type.
9548 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9549 std::optional<llvm::APSInt> Result =
9550 Arg->getIntegerConstantExpr(CGF.getContext());
9551 if (!Result)
9552 return nullptr;
9553
9554 // Determine the type of this overloaded NEON intrinsic.
9555 NeonTypeFlags Type = Result->getZExtValue();
9556 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
9557 if (!Ty)
9558 return nullptr;
9559
9560 CodeGen::CGBuilderTy &Builder = CGF.Builder;
9561
9562 // AArch64 scalar builtins are not overloaded, they do not have an extra
9563 // argument that specifies the vector type, need to handle each case.
9564 switch (BuiltinID) {
9565 case NEON::BI__builtin_neon_vtbl1_v: {
9566 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9567 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9568 }
9569 case NEON::BI__builtin_neon_vtbl2_v: {
9570 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9571 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9572 }
9573 case NEON::BI__builtin_neon_vtbl3_v: {
9574 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9575 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9576 }
9577 case NEON::BI__builtin_neon_vtbl4_v: {
9578 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9579 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9580 }
9581 case NEON::BI__builtin_neon_vtbx1_v: {
9582 Value *TblRes =
9583 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9584 Intrinsic::aarch64_neon_tbl1, "vtbl1");
9585
9586 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9587 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9588 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9589
9590 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9591 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9592 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9593 }
9594 case NEON::BI__builtin_neon_vtbx2_v: {
9595 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9596 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9597 }
9598 case NEON::BI__builtin_neon_vtbx3_v: {
9599 Value *TblRes =
9600 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9601 Intrinsic::aarch64_neon_tbl2, "vtbl2");
9602
9603 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9604 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
9605 TwentyFourV);
9606 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9607
9608 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9609 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9610 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9611 }
9612 case NEON::BI__builtin_neon_vtbx4_v: {
9613 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
9614 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
9615 }
9616 case NEON::BI__builtin_neon_vqtbl1_v:
9617 case NEON::BI__builtin_neon_vqtbl1q_v:
9618 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
9619 case NEON::BI__builtin_neon_vqtbl2_v:
9620 case NEON::BI__builtin_neon_vqtbl2q_v: {
9621 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
9622 case NEON::BI__builtin_neon_vqtbl3_v:
9623 case NEON::BI__builtin_neon_vqtbl3q_v:
9624 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
9625 case NEON::BI__builtin_neon_vqtbl4_v:
9626 case NEON::BI__builtin_neon_vqtbl4q_v:
9627 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
9628 case NEON::BI__builtin_neon_vqtbx1_v:
9629 case NEON::BI__builtin_neon_vqtbx1q_v:
9630 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
9631 case NEON::BI__builtin_neon_vqtbx2_v:
9632 case NEON::BI__builtin_neon_vqtbx2q_v:
9633 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
9634 case NEON::BI__builtin_neon_vqtbx3_v:
9635 case NEON::BI__builtin_neon_vqtbx3q_v:
9636 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
9637 case NEON::BI__builtin_neon_vqtbx4_v:
9638 case NEON::BI__builtin_neon_vqtbx4q_v:
9639 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
9640 }
9641 }
9642
9643 if (!Int)
9644 return nullptr;
9645
9646 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
9647 return CGF.EmitNeonCall(F, Ops, s);
9648 }
9649
vectorWrapScalar16(Value * Op)9650 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
9651 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
9652 Op = Builder.CreateBitCast(Op, Int16Ty);
9653 Value *V = PoisonValue::get(VTy);
9654 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
9655 Op = Builder.CreateInsertElement(V, Op, CI);
9656 return Op;
9657 }
9658
9659 /// SVEBuiltinMemEltTy - Returns the memory element type for this memory
9660 /// access builtin. Only required if it can't be inferred from the base pointer
9661 /// operand.
SVEBuiltinMemEltTy(const SVETypeFlags & TypeFlags)9662 llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
9663 switch (TypeFlags.getMemEltType()) {
9664 case SVETypeFlags::MemEltTyDefault:
9665 return getEltType(TypeFlags);
9666 case SVETypeFlags::MemEltTyInt8:
9667 return Builder.getInt8Ty();
9668 case SVETypeFlags::MemEltTyInt16:
9669 return Builder.getInt16Ty();
9670 case SVETypeFlags::MemEltTyInt32:
9671 return Builder.getInt32Ty();
9672 case SVETypeFlags::MemEltTyInt64:
9673 return Builder.getInt64Ty();
9674 }
9675 llvm_unreachable("Unknown MemEltType");
9676 }
9677
getEltType(const SVETypeFlags & TypeFlags)9678 llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
9679 switch (TypeFlags.getEltType()) {
9680 default:
9681 llvm_unreachable("Invalid SVETypeFlag!");
9682
9683 case SVETypeFlags::EltTyInt8:
9684 return Builder.getInt8Ty();
9685 case SVETypeFlags::EltTyInt16:
9686 return Builder.getInt16Ty();
9687 case SVETypeFlags::EltTyInt32:
9688 return Builder.getInt32Ty();
9689 case SVETypeFlags::EltTyInt64:
9690 return Builder.getInt64Ty();
9691 case SVETypeFlags::EltTyInt128:
9692 return Builder.getInt128Ty();
9693
9694 case SVETypeFlags::EltTyFloat16:
9695 return Builder.getHalfTy();
9696 case SVETypeFlags::EltTyFloat32:
9697 return Builder.getFloatTy();
9698 case SVETypeFlags::EltTyFloat64:
9699 return Builder.getDoubleTy();
9700
9701 case SVETypeFlags::EltTyBFloat16:
9702 return Builder.getBFloatTy();
9703
9704 case SVETypeFlags::EltTyBool8:
9705 case SVETypeFlags::EltTyBool16:
9706 case SVETypeFlags::EltTyBool32:
9707 case SVETypeFlags::EltTyBool64:
9708 return Builder.getInt1Ty();
9709 }
9710 }
9711
9712 // Return the llvm predicate vector type corresponding to the specified element
9713 // TypeFlags.
9714 llvm::ScalableVectorType *
getSVEPredType(const SVETypeFlags & TypeFlags)9715 CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
9716 switch (TypeFlags.getEltType()) {
9717 default: llvm_unreachable("Unhandled SVETypeFlag!");
9718
9719 case SVETypeFlags::EltTyInt8:
9720 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9721 case SVETypeFlags::EltTyInt16:
9722 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9723 case SVETypeFlags::EltTyInt32:
9724 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9725 case SVETypeFlags::EltTyInt64:
9726 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9727
9728 case SVETypeFlags::EltTyBFloat16:
9729 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9730 case SVETypeFlags::EltTyFloat16:
9731 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9732 case SVETypeFlags::EltTyFloat32:
9733 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9734 case SVETypeFlags::EltTyFloat64:
9735 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9736
9737 case SVETypeFlags::EltTyBool8:
9738 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9739 case SVETypeFlags::EltTyBool16:
9740 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9741 case SVETypeFlags::EltTyBool32:
9742 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9743 case SVETypeFlags::EltTyBool64:
9744 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9745 }
9746 }
9747
9748 // Return the llvm vector type corresponding to the specified element TypeFlags.
9749 llvm::ScalableVectorType *
getSVEType(const SVETypeFlags & TypeFlags)9750 CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
9751 switch (TypeFlags.getEltType()) {
9752 default:
9753 llvm_unreachable("Invalid SVETypeFlag!");
9754
9755 case SVETypeFlags::EltTyInt8:
9756 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
9757 case SVETypeFlags::EltTyInt16:
9758 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
9759 case SVETypeFlags::EltTyInt32:
9760 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
9761 case SVETypeFlags::EltTyInt64:
9762 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
9763
9764 case SVETypeFlags::EltTyFloat16:
9765 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
9766 case SVETypeFlags::EltTyBFloat16:
9767 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
9768 case SVETypeFlags::EltTyFloat32:
9769 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
9770 case SVETypeFlags::EltTyFloat64:
9771 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
9772
9773 case SVETypeFlags::EltTyBool8:
9774 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9775 case SVETypeFlags::EltTyBool16:
9776 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9777 case SVETypeFlags::EltTyBool32:
9778 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9779 case SVETypeFlags::EltTyBool64:
9780 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9781 }
9782 }
9783
9784 llvm::Value *
EmitSVEAllTruePred(const SVETypeFlags & TypeFlags)9785 CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
9786 Function *Ptrue =
9787 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
9788 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
9789 }
9790
9791 constexpr unsigned SVEBitsPerBlock = 128;
9792
getSVEVectorForElementType(llvm::Type * EltTy)9793 static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
9794 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
9795 return llvm::ScalableVectorType::get(EltTy, NumElts);
9796 }
9797
9798 // Reinterpret the input predicate so that it can be used to correctly isolate
9799 // the elements of the specified datatype.
EmitSVEPredicateCast(Value * Pred,llvm::ScalableVectorType * VTy)9800 Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
9801 llvm::ScalableVectorType *VTy) {
9802
9803 if (isa<TargetExtType>(Pred->getType()) &&
9804 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
9805 return Pred;
9806
9807 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
9808 if (Pred->getType() == RTy)
9809 return Pred;
9810
9811 unsigned IntID;
9812 llvm::Type *IntrinsicTy;
9813 switch (VTy->getMinNumElements()) {
9814 default:
9815 llvm_unreachable("unsupported element count!");
9816 case 1:
9817 case 2:
9818 case 4:
9819 case 8:
9820 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
9821 IntrinsicTy = RTy;
9822 break;
9823 case 16:
9824 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
9825 IntrinsicTy = Pred->getType();
9826 break;
9827 }
9828
9829 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
9830 Value *C = Builder.CreateCall(F, Pred);
9831 assert(C->getType() == RTy && "Unexpected return type!");
9832 return C;
9833 }
9834
EmitSVEGatherLoad(const SVETypeFlags & TypeFlags,SmallVectorImpl<Value * > & Ops,unsigned IntID)9835 Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
9836 SmallVectorImpl<Value *> &Ops,
9837 unsigned IntID) {
9838 auto *ResultTy = getSVEType(TypeFlags);
9839 auto *OverloadedTy =
9840 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
9841
9842 Function *F = nullptr;
9843 if (Ops[1]->getType()->isVectorTy())
9844 // This is the "vector base, scalar offset" case. In order to uniquely
9845 // map this built-in to an LLVM IR intrinsic, we need both the return type
9846 // and the type of the vector base.
9847 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
9848 else
9849 // This is the "scalar base, vector offset case". The type of the offset
9850 // is encoded in the name of the intrinsic. We only need to specify the
9851 // return type in order to uniquely map this built-in to an LLVM IR
9852 // intrinsic.
9853 F = CGM.getIntrinsic(IntID, OverloadedTy);
9854
9855 // At the ACLE level there's only one predicate type, svbool_t, which is
9856 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9857 // actual type being loaded. For example, when loading doubles (i64) the
9858 // predicate should be <n x 2 x i1> instead. At the IR level the type of
9859 // the predicate and the data being loaded must match. Cast to the type
9860 // expected by the intrinsic. The intrinsic itself should be defined in
9861 // a way than enforces relations between parameter types.
9862 Ops[0] = EmitSVEPredicateCast(
9863 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
9864
9865 // Pass 0 when the offset is missing. This can only be applied when using
9866 // the "vector base" addressing mode for which ACLE allows no offset. The
9867 // corresponding LLVM IR always requires an offset.
9868 if (Ops.size() == 2) {
9869 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9870 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9871 }
9872
9873 // For "vector base, scalar index" scale the index so that it becomes a
9874 // scalar offset.
9875 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
9876 unsigned BytesPerElt =
9877 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9878 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9879 }
9880
9881 Value *Call = Builder.CreateCall(F, Ops);
9882
9883 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
9884 // other cases it's folded into a nop.
9885 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
9886 : Builder.CreateSExt(Call, ResultTy);
9887 }
9888
EmitSVEScatterStore(const SVETypeFlags & TypeFlags,SmallVectorImpl<Value * > & Ops,unsigned IntID)9889 Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
9890 SmallVectorImpl<Value *> &Ops,
9891 unsigned IntID) {
9892 auto *SrcDataTy = getSVEType(TypeFlags);
9893 auto *OverloadedTy =
9894 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
9895
9896 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
9897 // it's the first argument. Move it accordingly.
9898 Ops.insert(Ops.begin(), Ops.pop_back_val());
9899
9900 Function *F = nullptr;
9901 if (Ops[2]->getType()->isVectorTy())
9902 // This is the "vector base, scalar offset" case. In order to uniquely
9903 // map this built-in to an LLVM IR intrinsic, we need both the return type
9904 // and the type of the vector base.
9905 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
9906 else
9907 // This is the "scalar base, vector offset case". The type of the offset
9908 // is encoded in the name of the intrinsic. We only need to specify the
9909 // return type in order to uniquely map this built-in to an LLVM IR
9910 // intrinsic.
9911 F = CGM.getIntrinsic(IntID, OverloadedTy);
9912
9913 // Pass 0 when the offset is missing. This can only be applied when using
9914 // the "vector base" addressing mode for which ACLE allows no offset. The
9915 // corresponding LLVM IR always requires an offset.
9916 if (Ops.size() == 3) {
9917 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9918 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9919 }
9920
9921 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
9922 // folded into a nop.
9923 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
9924
9925 // At the ACLE level there's only one predicate type, svbool_t, which is
9926 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9927 // actual type being stored. For example, when storing doubles (i64) the
9928 // predicated should be <n x 2 x i1> instead. At the IR level the type of
9929 // the predicate and the data being stored must match. Cast to the type
9930 // expected by the intrinsic. The intrinsic itself should be defined in
9931 // a way that enforces relations between parameter types.
9932 Ops[1] = EmitSVEPredicateCast(
9933 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
9934
9935 // For "vector base, scalar index" scale the index so that it becomes a
9936 // scalar offset.
9937 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
9938 unsigned BytesPerElt =
9939 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9940 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
9941 }
9942
9943 return Builder.CreateCall(F, Ops);
9944 }
9945
EmitSVEGatherPrefetch(const SVETypeFlags & TypeFlags,SmallVectorImpl<Value * > & Ops,unsigned IntID)9946 Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
9947 SmallVectorImpl<Value *> &Ops,
9948 unsigned IntID) {
9949 // The gather prefetches are overloaded on the vector input - this can either
9950 // be the vector of base addresses or vector of offsets.
9951 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
9952 if (!OverloadedTy)
9953 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
9954
9955 // Cast the predicate from svbool_t to the right number of elements.
9956 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9957
9958 // vector + imm addressing modes
9959 if (Ops[1]->getType()->isVectorTy()) {
9960 if (Ops.size() == 3) {
9961 // Pass 0 for 'vector+imm' when the index is omitted.
9962 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9963
9964 // The sv_prfop is the last operand in the builtin and IR intrinsic.
9965 std::swap(Ops[2], Ops[3]);
9966 } else {
9967 // Index needs to be passed as scaled offset.
9968 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9969 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
9970 if (BytesPerElt > 1)
9971 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9972 }
9973 }
9974
9975 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
9976 return Builder.CreateCall(F, Ops);
9977 }
9978
EmitSVEStructLoad(const SVETypeFlags & TypeFlags,SmallVectorImpl<Value * > & Ops,unsigned IntID)9979 Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
9980 SmallVectorImpl<Value*> &Ops,
9981 unsigned IntID) {
9982 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9983
9984 unsigned N;
9985 switch (IntID) {
9986 case Intrinsic::aarch64_sve_ld2_sret:
9987 case Intrinsic::aarch64_sve_ld1_pn_x2:
9988 case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9989 case Intrinsic::aarch64_sve_ld2q_sret:
9990 N = 2;
9991 break;
9992 case Intrinsic::aarch64_sve_ld3_sret:
9993 case Intrinsic::aarch64_sve_ld3q_sret:
9994 N = 3;
9995 break;
9996 case Intrinsic::aarch64_sve_ld4_sret:
9997 case Intrinsic::aarch64_sve_ld1_pn_x4:
9998 case Intrinsic::aarch64_sve_ldnt1_pn_x4:
9999 case Intrinsic::aarch64_sve_ld4q_sret:
10000 N = 4;
10001 break;
10002 default:
10003 llvm_unreachable("unknown intrinsic!");
10004 }
10005 auto RetTy = llvm::VectorType::get(VTy->getElementType(),
10006 VTy->getElementCount() * N);
10007
10008 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10009 Value *BasePtr = Ops[1];
10010
10011 // Does the load have an offset?
10012 if (Ops.size() > 2)
10013 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10014
10015 Function *F = CGM.getIntrinsic(IntID, {VTy});
10016 Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
10017 unsigned MinElts = VTy->getMinNumElements();
10018 Value *Ret = llvm::PoisonValue::get(RetTy);
10019 for (unsigned I = 0; I < N; I++) {
10020 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10021 Value *SRet = Builder.CreateExtractValue(Call, I);
10022 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
10023 }
10024 return Ret;
10025 }
10026
EmitSVEStructStore(const SVETypeFlags & TypeFlags,SmallVectorImpl<Value * > & Ops,unsigned IntID)10027 Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
10028 SmallVectorImpl<Value*> &Ops,
10029 unsigned IntID) {
10030 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10031
10032 unsigned N;
10033 switch (IntID) {
10034 case Intrinsic::aarch64_sve_st2:
10035 case Intrinsic::aarch64_sve_st1_pn_x2:
10036 case Intrinsic::aarch64_sve_stnt1_pn_x2:
10037 case Intrinsic::aarch64_sve_st2q:
10038 N = 2;
10039 break;
10040 case Intrinsic::aarch64_sve_st3:
10041 case Intrinsic::aarch64_sve_st3q:
10042 N = 3;
10043 break;
10044 case Intrinsic::aarch64_sve_st4:
10045 case Intrinsic::aarch64_sve_st1_pn_x4:
10046 case Intrinsic::aarch64_sve_stnt1_pn_x4:
10047 case Intrinsic::aarch64_sve_st4q:
10048 N = 4;
10049 break;
10050 default:
10051 llvm_unreachable("unknown intrinsic!");
10052 }
10053
10054 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10055 Value *BasePtr = Ops[1];
10056
10057 // Does the store have an offset?
10058 if (Ops.size() > (2 + N))
10059 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10060
10061 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10062 // need to break up the tuple vector.
10063 SmallVector<llvm::Value*, 5> Operands;
10064 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10065 Operands.push_back(Ops[I]);
10066 Operands.append({Predicate, BasePtr});
10067 Function *F = CGM.getIntrinsic(IntID, { VTy });
10068
10069 return Builder.CreateCall(F, Operands);
10070 }
10071
10072 // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10073 // svpmullt_pair intrinsics, with the exception that their results are bitcast
10074 // to a wider type.
EmitSVEPMull(const SVETypeFlags & TypeFlags,SmallVectorImpl<Value * > & Ops,unsigned BuiltinID)10075 Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
10076 SmallVectorImpl<Value *> &Ops,
10077 unsigned BuiltinID) {
10078 // Splat scalar operand to vector (intrinsics with _n infix)
10079 if (TypeFlags.hasSplatOperand()) {
10080 unsigned OpNo = TypeFlags.getSplatOperand();
10081 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10082 }
10083
10084 // The pair-wise function has a narrower overloaded type.
10085 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10086 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10087
10088 // Now bitcast to the wider result type.
10089 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10090 return EmitSVEReinterpret(Call, Ty);
10091 }
10092
EmitSVEMovl(const SVETypeFlags & TypeFlags,ArrayRef<Value * > Ops,unsigned BuiltinID)10093 Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
10094 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10095 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10096 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10097 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10098 }
10099
EmitSVEPrefetchLoad(const SVETypeFlags & TypeFlags,SmallVectorImpl<Value * > & Ops,unsigned BuiltinID)10100 Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
10101 SmallVectorImpl<Value *> &Ops,
10102 unsigned BuiltinID) {
10103 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10104 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10105 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10106
10107 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10108 Value *BasePtr = Ops[1];
10109
10110 // Implement the index operand if not omitted.
10111 if (Ops.size() > 3)
10112 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10113
10114 Value *PrfOp = Ops.back();
10115
10116 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10117 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10118 }
10119
EmitSVEMaskedLoad(const CallExpr * E,llvm::Type * ReturnTy,SmallVectorImpl<Value * > & Ops,unsigned IntrinsicID,bool IsZExtReturn)10120 Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
10121 llvm::Type *ReturnTy,
10122 SmallVectorImpl<Value *> &Ops,
10123 unsigned IntrinsicID,
10124 bool IsZExtReturn) {
10125 QualType LangPTy = E->getArg(1)->getType();
10126 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10127 LangPTy->castAs<PointerType>()->getPointeeType());
10128
10129 // The vector type that is returned may be different from the
10130 // eventual type loaded from memory.
10131 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10132 llvm::ScalableVectorType *MemoryTy = nullptr;
10133 llvm::ScalableVectorType *PredTy = nullptr;
10134 bool IsQuadLoad = false;
10135 switch (IntrinsicID) {
10136 case Intrinsic::aarch64_sve_ld1uwq:
10137 case Intrinsic::aarch64_sve_ld1udq:
10138 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10139 PredTy = llvm::ScalableVectorType::get(
10140 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10141 IsQuadLoad = true;
10142 break;
10143 default:
10144 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10145 PredTy = MemoryTy;
10146 break;
10147 }
10148
10149 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10150 Value *BasePtr = Ops[1];
10151
10152 // Does the load have an offset?
10153 if (Ops.size() > 2)
10154 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10155
10156 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10157 auto *Load =
10158 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10159 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10160 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10161
10162 if (IsQuadLoad)
10163 return Load;
10164
10165 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10166 : Builder.CreateSExt(Load, VectorTy);
10167 }
10168
EmitSVEMaskedStore(const CallExpr * E,SmallVectorImpl<Value * > & Ops,unsigned IntrinsicID)10169 Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
10170 SmallVectorImpl<Value *> &Ops,
10171 unsigned IntrinsicID) {
10172 QualType LangPTy = E->getArg(1)->getType();
10173 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10174 LangPTy->castAs<PointerType>()->getPointeeType());
10175
10176 // The vector type that is stored may be different from the
10177 // eventual type stored to memory.
10178 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10179 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10180
10181 auto PredTy = MemoryTy;
10182 auto AddrMemoryTy = MemoryTy;
10183 bool IsQuadStore = false;
10184
10185 switch (IntrinsicID) {
10186 case Intrinsic::aarch64_sve_st1wq:
10187 case Intrinsic::aarch64_sve_st1dq:
10188 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10189 PredTy =
10190 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10191 IsQuadStore = true;
10192 break;
10193 default:
10194 break;
10195 }
10196 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10197 Value *BasePtr = Ops[1];
10198
10199 // Does the store have an offset?
10200 if (Ops.size() == 4)
10201 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10202
10203 // Last value is always the data
10204 Value *Val =
10205 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10206
10207 Function *F =
10208 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10209 auto *Store =
10210 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10211 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10212 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10213 return Store;
10214 }
10215
EmitSMELd1St1(const SVETypeFlags & TypeFlags,SmallVectorImpl<Value * > & Ops,unsigned IntID)10216 Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
10217 SmallVectorImpl<Value *> &Ops,
10218 unsigned IntID) {
10219 Ops[2] = EmitSVEPredicateCast(
10220 Ops[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags)));
10221
10222 SmallVector<Value *> NewOps;
10223 NewOps.push_back(Ops[2]);
10224
10225 llvm::Value *BasePtr = Ops[3];
10226
10227 // If the intrinsic contains the vnum parameter, multiply it with the vector
10228 // size in bytes.
10229 if (Ops.size() == 5) {
10230 Function *StreamingVectorLength =
10231 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10232 llvm::Value *StreamingVectorLengthCall =
10233 Builder.CreateCall(StreamingVectorLength);
10234 llvm::Value *Mulvl =
10235 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10236 // The type of the ptr parameter is void *, so use Int8Ty here.
10237 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10238 }
10239 NewOps.push_back(BasePtr);
10240 NewOps.push_back(Ops[0]);
10241 NewOps.push_back(Ops[1]);
10242 Function *F = CGM.getIntrinsic(IntID);
10243 return Builder.CreateCall(F, NewOps);
10244 }
10245
EmitSMEReadWrite(const SVETypeFlags & TypeFlags,SmallVectorImpl<Value * > & Ops,unsigned IntID)10246 Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags,
10247 SmallVectorImpl<Value *> &Ops,
10248 unsigned IntID) {
10249 auto *VecTy = getSVEType(TypeFlags);
10250 Function *F = CGM.getIntrinsic(IntID, VecTy);
10251 if (TypeFlags.isReadZA())
10252 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10253 else if (TypeFlags.isWriteZA())
10254 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10255 return Builder.CreateCall(F, Ops);
10256 }
10257
EmitSMEZero(const SVETypeFlags & TypeFlags,SmallVectorImpl<Value * > & Ops,unsigned IntID)10258 Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags,
10259 SmallVectorImpl<Value *> &Ops,
10260 unsigned IntID) {
10261 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10262 if (Ops.size() == 0)
10263 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10264 Function *F = CGM.getIntrinsic(IntID, {});
10265 return Builder.CreateCall(F, Ops);
10266 }
10267
EmitSMELdrStr(const SVETypeFlags & TypeFlags,SmallVectorImpl<Value * > & Ops,unsigned IntID)10268 Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
10269 SmallVectorImpl<Value *> &Ops,
10270 unsigned IntID) {
10271 if (Ops.size() == 2)
10272 Ops.push_back(Builder.getInt32(0));
10273 else
10274 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10275 Function *F = CGM.getIntrinsic(IntID, {});
10276 return Builder.CreateCall(F, Ops);
10277 }
10278
10279 // Limit the usage of scalable llvm IR generated by the ACLE by using the
10280 // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
EmitSVEDupX(Value * Scalar,llvm::Type * Ty)10281 Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10282 return Builder.CreateVectorSplat(
10283 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10284 }
10285
EmitSVEDupX(Value * Scalar)10286 Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {
10287 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10288 }
10289
EmitSVEReinterpret(Value * Val,llvm::Type * Ty)10290 Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10291 // FIXME: For big endian this needs an additional REV, or needs a separate
10292 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10293 // instruction is defined as 'bitwise' equivalent from memory point of
10294 // view (when storing/reloading), whereas the svreinterpret builtin
10295 // implements bitwise equivalent cast from register point of view.
10296 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10297 return Builder.CreateBitCast(Val, Ty);
10298 }
10299
InsertExplicitZeroOperand(CGBuilderTy & Builder,llvm::Type * Ty,SmallVectorImpl<Value * > & Ops)10300 static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10301 SmallVectorImpl<Value *> &Ops) {
10302 auto *SplatZero = Constant::getNullValue(Ty);
10303 Ops.insert(Ops.begin(), SplatZero);
10304 }
10305
InsertExplicitUndefOperand(CGBuilderTy & Builder,llvm::Type * Ty,SmallVectorImpl<Value * > & Ops)10306 static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10307 SmallVectorImpl<Value *> &Ops) {
10308 auto *SplatUndef = UndefValue::get(Ty);
10309 Ops.insert(Ops.begin(), SplatUndef);
10310 }
10311
10312 SmallVector<llvm::Type *, 2>
getSVEOverloadTypes(const SVETypeFlags & TypeFlags,llvm::Type * ResultType,ArrayRef<Value * > Ops)10313 CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
10314 llvm::Type *ResultType,
10315 ArrayRef<Value *> Ops) {
10316 if (TypeFlags.isOverloadNone())
10317 return {};
10318
10319 llvm::Type *DefaultType = getSVEType(TypeFlags);
10320
10321 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10322 return {DefaultType, Ops[1]->getType()};
10323
10324 if (TypeFlags.isOverloadWhileRW())
10325 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10326
10327 if (TypeFlags.isOverloadCvt())
10328 return {Ops[0]->getType(), Ops.back()->getType()};
10329
10330 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10331 ResultType->isVectorTy())
10332 return {ResultType, Ops[1]->getType()};
10333
10334 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10335 return {DefaultType};
10336 }
10337
EmitSVETupleSetOrGet(const SVETypeFlags & TypeFlags,llvm::Type * Ty,ArrayRef<Value * > Ops)10338 Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
10339 llvm::Type *Ty,
10340 ArrayRef<Value *> Ops) {
10341 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10342 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10343
10344 unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
10345 auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
10346 TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
10347
10348 if (!SingleVecTy)
10349 return nullptr;
10350
10351 Value *Idx = ConstantInt::get(CGM.Int64Ty,
10352 I * SingleVecTy->getMinNumElements());
10353
10354 if (TypeFlags.isTupleSet())
10355 return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
10356 return Builder.CreateExtractVector(Ty, Ops[0], Idx);
10357 }
10358
EmitSVETupleCreate(const SVETypeFlags & TypeFlags,llvm::Type * Ty,ArrayRef<Value * > Ops)10359 Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
10360 llvm::Type *Ty,
10361 ArrayRef<Value *> Ops) {
10362 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10363
10364 auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
10365
10366 if (!SrcTy)
10367 return nullptr;
10368
10369 unsigned MinElts = SrcTy->getMinNumElements();
10370 Value *Call = llvm::PoisonValue::get(Ty);
10371 for (unsigned I = 0; I < Ops.size(); I++) {
10372 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10373 Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
10374 }
10375
10376 return Call;
10377 }
10378
FormSVEBuiltinResult(Value * Call)10379 Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) {
10380 // Multi-vector results should be broken up into a single (wide) result
10381 // vector.
10382 auto *StructTy = dyn_cast<StructType>(Call->getType());
10383 if (!StructTy)
10384 return Call;
10385
10386 auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
10387 if (!VTy)
10388 return Call;
10389 unsigned N = StructTy->getNumElements();
10390
10391 // We may need to emit a cast to a svbool_t
10392 bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
10393 unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();
10394
10395 ScalableVectorType *WideVTy =
10396 ScalableVectorType::get(VTy->getElementType(), MinElts * N);
10397 Value *Ret = llvm::PoisonValue::get(WideVTy);
10398 for (unsigned I = 0; I < N; ++I) {
10399 Value *SRet = Builder.CreateExtractValue(Call, I);
10400 assert(SRet->getType() == VTy && "Unexpected type for result value");
10401 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10402
10403 if (IsPredTy)
10404 SRet = EmitSVEPredicateCast(
10405 SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));
10406
10407 Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
10408 }
10409 Call = Ret;
10410
10411 return Call;
10412 }
10413
GetAArch64SVEProcessedOperands(unsigned BuiltinID,const CallExpr * E,SmallVectorImpl<Value * > & Ops,SVETypeFlags TypeFlags)10414 void CodeGenFunction::GetAArch64SVEProcessedOperands(
10415 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10416 SVETypeFlags TypeFlags) {
10417 // Find out if any arguments are required to be integer constant expressions.
10418 unsigned ICEArguments = 0;
10419 ASTContext::GetBuiltinTypeError Error;
10420 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10421 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10422
10423 // Tuple set/get only requires one insert/extract vector, which is
10424 // created by EmitSVETupleSetOrGet.
10425 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10426
10427 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10428 bool IsICE = ICEArguments & (1 << i);
10429 Value *Arg = EmitScalarExpr(E->getArg(i));
10430
10431 if (IsICE) {
10432 // If this is required to be a constant, constant fold it so that we know
10433 // that the generated intrinsic gets a ConstantInt.
10434 std::optional<llvm::APSInt> Result =
10435 E->getArg(i)->getIntegerConstantExpr(getContext());
10436 assert(Result && "Expected argument to be a constant");
10437
10438 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10439 // truncate because the immediate has been range checked and no valid
10440 // immediate requires more than a handful of bits.
10441 *Result = Result->extOrTrunc(32);
10442 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10443 continue;
10444 }
10445
10446 if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
10447 Ops.push_back(Arg);
10448 continue;
10449 }
10450
10451 auto *VTy = cast<ScalableVectorType>(Arg->getType());
10452 unsigned MinElts = VTy->getMinNumElements();
10453 bool IsPred = VTy->getElementType()->isIntegerTy(1);
10454 unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
10455
10456 if (N == 1) {
10457 Ops.push_back(Arg);
10458 continue;
10459 }
10460
10461 for (unsigned I = 0; I < N; ++I) {
10462 Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
10463 auto *NewVTy =
10464 ScalableVectorType::get(VTy->getElementType(), MinElts / N);
10465 Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
10466 }
10467 }
10468 }
10469
EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,const CallExpr * E)10470 Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
10471 const CallExpr *E) {
10472 llvm::Type *Ty = ConvertType(E->getType());
10473 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10474 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10475 Value *Val = EmitScalarExpr(E->getArg(0));
10476 return EmitSVEReinterpret(Val, Ty);
10477 }
10478
10479 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10480 AArch64SVEIntrinsicsProvenSorted);
10481
10482 llvm::SmallVector<Value *, 4> Ops;
10483 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10484 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10485
10486 if (TypeFlags.isLoad())
10487 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10488 TypeFlags.isZExtReturn());
10489 else if (TypeFlags.isStore())
10490 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10491 else if (TypeFlags.isGatherLoad())
10492 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10493 else if (TypeFlags.isScatterStore())
10494 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10495 else if (TypeFlags.isPrefetch())
10496 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10497 else if (TypeFlags.isGatherPrefetch())
10498 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10499 else if (TypeFlags.isStructLoad())
10500 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10501 else if (TypeFlags.isStructStore())
10502 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10503 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10504 return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10505 else if (TypeFlags.isTupleCreate())
10506 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10507 else if (TypeFlags.isUndef())
10508 return UndefValue::get(Ty);
10509 else if (Builtin->LLVMIntrinsic != 0) {
10510 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10511 InsertExplicitZeroOperand(Builder, Ty, Ops);
10512
10513 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10514 InsertExplicitUndefOperand(Builder, Ty, Ops);
10515
10516 // Some ACLE builtins leave out the argument to specify the predicate
10517 // pattern, which is expected to be expanded to an SV_ALL pattern.
10518 if (TypeFlags.isAppendSVALL())
10519 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10520 if (TypeFlags.isInsertOp1SVALL())
10521 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10522
10523 // Predicates must match the main datatype.
10524 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10525 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10526 if (PredTy->getElementType()->isIntegerTy(1))
10527 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10528
10529 // Splat scalar operand to vector (intrinsics with _n infix)
10530 if (TypeFlags.hasSplatOperand()) {
10531 unsigned OpNo = TypeFlags.getSplatOperand();
10532 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10533 }
10534
10535 if (TypeFlags.isReverseCompare())
10536 std::swap(Ops[1], Ops[2]);
10537 else if (TypeFlags.isReverseUSDOT())
10538 std::swap(Ops[1], Ops[2]);
10539 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10540 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10541 std::swap(Ops[1], Ops[2]);
10542 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10543 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10544 std::swap(Ops[1], Ops[3]);
10545
10546 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10547 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10548 llvm::Type *OpndTy = Ops[1]->getType();
10549 auto *SplatZero = Constant::getNullValue(OpndTy);
10550 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10551 }
10552
10553 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10554 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10555 Value *Call = Builder.CreateCall(F, Ops);
10556
10557 // Predicate results must be converted to svbool_t.
10558 if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
10559 if (PredTy->getScalarType()->isIntegerTy(1))
10560 Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10561
10562 return FormSVEBuiltinResult(Call);
10563 }
10564
10565 switch (BuiltinID) {
10566 default:
10567 return nullptr;
10568
10569 case SVE::BI__builtin_sve_svreinterpret_b: {
10570 auto SVCountTy =
10571 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10572 Function *CastFromSVCountF =
10573 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10574 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10575 }
10576 case SVE::BI__builtin_sve_svreinterpret_c: {
10577 auto SVCountTy =
10578 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10579 Function *CastToSVCountF =
10580 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10581 return Builder.CreateCall(CastToSVCountF, Ops[0]);
10582 }
10583
10584 case SVE::BI__builtin_sve_svpsel_lane_b8:
10585 case SVE::BI__builtin_sve_svpsel_lane_b16:
10586 case SVE::BI__builtin_sve_svpsel_lane_b32:
10587 case SVE::BI__builtin_sve_svpsel_lane_b64:
10588 case SVE::BI__builtin_sve_svpsel_lane_c8:
10589 case SVE::BI__builtin_sve_svpsel_lane_c16:
10590 case SVE::BI__builtin_sve_svpsel_lane_c32:
10591 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10592 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10593 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10594 "aarch64.svcount")) &&
10595 "Unexpected TargetExtType");
10596 auto SVCountTy =
10597 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10598 Function *CastFromSVCountF =
10599 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10600 Function *CastToSVCountF =
10601 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10602
10603 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10604 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10605 llvm::Value *Ops0 =
10606 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10607 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10608 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10609 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
10610 }
10611 case SVE::BI__builtin_sve_svmov_b_z: {
10612 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10613 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10614 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10615 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10616 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10617 }
10618
10619 case SVE::BI__builtin_sve_svnot_b_z: {
10620 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10621 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10622 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10623 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10624 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10625 }
10626
10627 case SVE::BI__builtin_sve_svmovlb_u16:
10628 case SVE::BI__builtin_sve_svmovlb_u32:
10629 case SVE::BI__builtin_sve_svmovlb_u64:
10630 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10631
10632 case SVE::BI__builtin_sve_svmovlb_s16:
10633 case SVE::BI__builtin_sve_svmovlb_s32:
10634 case SVE::BI__builtin_sve_svmovlb_s64:
10635 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10636
10637 case SVE::BI__builtin_sve_svmovlt_u16:
10638 case SVE::BI__builtin_sve_svmovlt_u32:
10639 case SVE::BI__builtin_sve_svmovlt_u64:
10640 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10641
10642 case SVE::BI__builtin_sve_svmovlt_s16:
10643 case SVE::BI__builtin_sve_svmovlt_s32:
10644 case SVE::BI__builtin_sve_svmovlt_s64:
10645 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10646
10647 case SVE::BI__builtin_sve_svpmullt_u16:
10648 case SVE::BI__builtin_sve_svpmullt_u64:
10649 case SVE::BI__builtin_sve_svpmullt_n_u16:
10650 case SVE::BI__builtin_sve_svpmullt_n_u64:
10651 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10652
10653 case SVE::BI__builtin_sve_svpmullb_u16:
10654 case SVE::BI__builtin_sve_svpmullb_u64:
10655 case SVE::BI__builtin_sve_svpmullb_n_u16:
10656 case SVE::BI__builtin_sve_svpmullb_n_u64:
10657 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10658
10659 case SVE::BI__builtin_sve_svdup_n_b8:
10660 case SVE::BI__builtin_sve_svdup_n_b16:
10661 case SVE::BI__builtin_sve_svdup_n_b32:
10662 case SVE::BI__builtin_sve_svdup_n_b64: {
10663 Value *CmpNE =
10664 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
10665 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
10666 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
10667 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
10668 }
10669
10670 case SVE::BI__builtin_sve_svdupq_n_b8:
10671 case SVE::BI__builtin_sve_svdupq_n_b16:
10672 case SVE::BI__builtin_sve_svdupq_n_b32:
10673 case SVE::BI__builtin_sve_svdupq_n_b64:
10674 case SVE::BI__builtin_sve_svdupq_n_u8:
10675 case SVE::BI__builtin_sve_svdupq_n_s8:
10676 case SVE::BI__builtin_sve_svdupq_n_u64:
10677 case SVE::BI__builtin_sve_svdupq_n_f64:
10678 case SVE::BI__builtin_sve_svdupq_n_s64:
10679 case SVE::BI__builtin_sve_svdupq_n_u16:
10680 case SVE::BI__builtin_sve_svdupq_n_f16:
10681 case SVE::BI__builtin_sve_svdupq_n_bf16:
10682 case SVE::BI__builtin_sve_svdupq_n_s16:
10683 case SVE::BI__builtin_sve_svdupq_n_u32:
10684 case SVE::BI__builtin_sve_svdupq_n_f32:
10685 case SVE::BI__builtin_sve_svdupq_n_s32: {
10686 // These builtins are implemented by storing each element to an array and using
10687 // ld1rq to materialize a vector.
10688 unsigned NumOpnds = Ops.size();
10689
10690 bool IsBoolTy =
10691 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
10692
10693 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
10694 // so that the compare can use the width that is natural for the expected
10695 // number of predicate lanes.
10696 llvm::Type *EltTy = Ops[0]->getType();
10697 if (IsBoolTy)
10698 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
10699
10700 SmallVector<llvm::Value *, 16> VecOps;
10701 for (unsigned I = 0; I < NumOpnds; ++I)
10702 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
10703 Value *Vec = BuildVector(VecOps);
10704
10705 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
10706 Value *InsertSubVec = Builder.CreateInsertVector(
10707 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
10708
10709 Function *F =
10710 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
10711 Value *DupQLane =
10712 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
10713
10714 if (!IsBoolTy)
10715 return DupQLane;
10716
10717 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10718 Value *Pred = EmitSVEAllTruePred(TypeFlags);
10719
10720 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
10721 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
10722 : Intrinsic::aarch64_sve_cmpne_wide,
10723 OverloadedTy);
10724 Value *Call = Builder.CreateCall(
10725 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
10726 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10727 }
10728
10729 case SVE::BI__builtin_sve_svpfalse_b:
10730 return ConstantInt::getFalse(Ty);
10731
10732 case SVE::BI__builtin_sve_svpfalse_c: {
10733 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
10734 Function *CastToSVCountF =
10735 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
10736 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
10737 }
10738
10739 case SVE::BI__builtin_sve_svlen_bf16:
10740 case SVE::BI__builtin_sve_svlen_f16:
10741 case SVE::BI__builtin_sve_svlen_f32:
10742 case SVE::BI__builtin_sve_svlen_f64:
10743 case SVE::BI__builtin_sve_svlen_s8:
10744 case SVE::BI__builtin_sve_svlen_s16:
10745 case SVE::BI__builtin_sve_svlen_s32:
10746 case SVE::BI__builtin_sve_svlen_s64:
10747 case SVE::BI__builtin_sve_svlen_u8:
10748 case SVE::BI__builtin_sve_svlen_u16:
10749 case SVE::BI__builtin_sve_svlen_u32:
10750 case SVE::BI__builtin_sve_svlen_u64: {
10751 SVETypeFlags TF(Builtin->TypeModifier);
10752 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
10753 auto *NumEls =
10754 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
10755
10756 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
10757 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
10758 }
10759
10760 case SVE::BI__builtin_sve_svtbl2_u8:
10761 case SVE::BI__builtin_sve_svtbl2_s8:
10762 case SVE::BI__builtin_sve_svtbl2_u16:
10763 case SVE::BI__builtin_sve_svtbl2_s16:
10764 case SVE::BI__builtin_sve_svtbl2_u32:
10765 case SVE::BI__builtin_sve_svtbl2_s32:
10766 case SVE::BI__builtin_sve_svtbl2_u64:
10767 case SVE::BI__builtin_sve_svtbl2_s64:
10768 case SVE::BI__builtin_sve_svtbl2_f16:
10769 case SVE::BI__builtin_sve_svtbl2_bf16:
10770 case SVE::BI__builtin_sve_svtbl2_f32:
10771 case SVE::BI__builtin_sve_svtbl2_f64: {
10772 SVETypeFlags TF(Builtin->TypeModifier);
10773 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
10774 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
10775 return Builder.CreateCall(F, Ops);
10776 }
10777
10778 case SVE::BI__builtin_sve_svset_neonq_s8:
10779 case SVE::BI__builtin_sve_svset_neonq_s16:
10780 case SVE::BI__builtin_sve_svset_neonq_s32:
10781 case SVE::BI__builtin_sve_svset_neonq_s64:
10782 case SVE::BI__builtin_sve_svset_neonq_u8:
10783 case SVE::BI__builtin_sve_svset_neonq_u16:
10784 case SVE::BI__builtin_sve_svset_neonq_u32:
10785 case SVE::BI__builtin_sve_svset_neonq_u64:
10786 case SVE::BI__builtin_sve_svset_neonq_f16:
10787 case SVE::BI__builtin_sve_svset_neonq_f32:
10788 case SVE::BI__builtin_sve_svset_neonq_f64:
10789 case SVE::BI__builtin_sve_svset_neonq_bf16: {
10790 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
10791 }
10792
10793 case SVE::BI__builtin_sve_svget_neonq_s8:
10794 case SVE::BI__builtin_sve_svget_neonq_s16:
10795 case SVE::BI__builtin_sve_svget_neonq_s32:
10796 case SVE::BI__builtin_sve_svget_neonq_s64:
10797 case SVE::BI__builtin_sve_svget_neonq_u8:
10798 case SVE::BI__builtin_sve_svget_neonq_u16:
10799 case SVE::BI__builtin_sve_svget_neonq_u32:
10800 case SVE::BI__builtin_sve_svget_neonq_u64:
10801 case SVE::BI__builtin_sve_svget_neonq_f16:
10802 case SVE::BI__builtin_sve_svget_neonq_f32:
10803 case SVE::BI__builtin_sve_svget_neonq_f64:
10804 case SVE::BI__builtin_sve_svget_neonq_bf16: {
10805 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
10806 }
10807
10808 case SVE::BI__builtin_sve_svdup_neonq_s8:
10809 case SVE::BI__builtin_sve_svdup_neonq_s16:
10810 case SVE::BI__builtin_sve_svdup_neonq_s32:
10811 case SVE::BI__builtin_sve_svdup_neonq_s64:
10812 case SVE::BI__builtin_sve_svdup_neonq_u8:
10813 case SVE::BI__builtin_sve_svdup_neonq_u16:
10814 case SVE::BI__builtin_sve_svdup_neonq_u32:
10815 case SVE::BI__builtin_sve_svdup_neonq_u64:
10816 case SVE::BI__builtin_sve_svdup_neonq_f16:
10817 case SVE::BI__builtin_sve_svdup_neonq_f32:
10818 case SVE::BI__builtin_sve_svdup_neonq_f64:
10819 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
10820 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
10821 Builder.getInt64(0));
10822 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
10823 {Insert, Builder.getInt64(0)});
10824 }
10825 }
10826
10827 /// Should not happen
10828 return nullptr;
10829 }
10830
swapCommutativeSMEOperands(unsigned BuiltinID,SmallVectorImpl<Value * > & Ops)10831 static void swapCommutativeSMEOperands(unsigned BuiltinID,
10832 SmallVectorImpl<Value *> &Ops) {
10833 unsigned MultiVec;
10834 switch (BuiltinID) {
10835 default:
10836 return;
10837 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
10838 MultiVec = 1;
10839 break;
10840 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
10841 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
10842 MultiVec = 2;
10843 break;
10844 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
10845 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
10846 MultiVec = 4;
10847 break;
10848 }
10849
10850 if (MultiVec > 0)
10851 for (unsigned I = 0; I < MultiVec; ++I)
10852 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
10853 }
10854
EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,const CallExpr * E)10855 Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
10856 const CallExpr *E) {
10857 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
10858 AArch64SMEIntrinsicsProvenSorted);
10859
10860 llvm::SmallVector<Value *, 4> Ops;
10861 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10862 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10863
10864 if (TypeFlags.isLoad() || TypeFlags.isStore())
10865 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10866 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
10867 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10868 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
10869 BuiltinID == SME::BI__builtin_sme_svzero_za)
10870 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10871 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
10872 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
10873 BuiltinID == SME::BI__builtin_sme_svldr_za ||
10874 BuiltinID == SME::BI__builtin_sme_svstr_za)
10875 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10876
10877 // Handle builtins which require their multi-vector operands to be swapped
10878 swapCommutativeSMEOperands(BuiltinID, Ops);
10879
10880 // Should not happen!
10881 if (Builtin->LLVMIntrinsic == 0)
10882 return nullptr;
10883
10884 // Predicates must match the main datatype.
10885 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10886 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10887 if (PredTy->getElementType()->isIntegerTy(1))
10888 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10889
10890 Function *F =
10891 TypeFlags.isOverloadNone()
10892 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
10893 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
10894 Value *Call = Builder.CreateCall(F, Ops);
10895
10896 return FormSVEBuiltinResult(Call);
10897 }
10898
EmitAArch64BuiltinExpr(unsigned BuiltinID,const CallExpr * E,llvm::Triple::ArchType Arch)10899 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
10900 const CallExpr *E,
10901 llvm::Triple::ArchType Arch) {
10902 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
10903 BuiltinID <= clang::AArch64::LastSVEBuiltin)
10904 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
10905
10906 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
10907 BuiltinID <= clang::AArch64::LastSMEBuiltin)
10908 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
10909
10910 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
10911 return EmitAArch64CpuSupports(E);
10912
10913 unsigned HintID = static_cast<unsigned>(-1);
10914 switch (BuiltinID) {
10915 default: break;
10916 case clang::AArch64::BI__builtin_arm_nop:
10917 HintID = 0;
10918 break;
10919 case clang::AArch64::BI__builtin_arm_yield:
10920 case clang::AArch64::BI__yield:
10921 HintID = 1;
10922 break;
10923 case clang::AArch64::BI__builtin_arm_wfe:
10924 case clang::AArch64::BI__wfe:
10925 HintID = 2;
10926 break;
10927 case clang::AArch64::BI__builtin_arm_wfi:
10928 case clang::AArch64::BI__wfi:
10929 HintID = 3;
10930 break;
10931 case clang::AArch64::BI__builtin_arm_sev:
10932 case clang::AArch64::BI__sev:
10933 HintID = 4;
10934 break;
10935 case clang::AArch64::BI__builtin_arm_sevl:
10936 case clang::AArch64::BI__sevl:
10937 HintID = 5;
10938 break;
10939 }
10940
10941 if (HintID != static_cast<unsigned>(-1)) {
10942 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
10943 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
10944 }
10945
10946 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
10947 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
10948 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10949 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
10950 }
10951
10952 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
10953 // Create call to __arm_sme_state and store the results to the two pointers.
10954 CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(
10955 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
10956 false),
10957 "__arm_sme_state"));
10958 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
10959 "aarch64_pstate_sm_compatible");
10960 CI->setAttributes(Attrs);
10961 CI->setCallingConv(
10962 llvm::CallingConv::
10963 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
10964 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
10965 EmitPointerWithAlignment(E->getArg(0)));
10966 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
10967 EmitPointerWithAlignment(E->getArg(1)));
10968 }
10969
10970 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
10971 assert((getContext().getTypeSize(E->getType()) == 32) &&
10972 "rbit of unusual size!");
10973 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10974 return Builder.CreateCall(
10975 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10976 }
10977 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
10978 assert((getContext().getTypeSize(E->getType()) == 64) &&
10979 "rbit of unusual size!");
10980 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10981 return Builder.CreateCall(
10982 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10983 }
10984
10985 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
10986 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
10987 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10988 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
10989 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
10990 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
10991 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
10992 return Res;
10993 }
10994
10995 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
10996 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10997 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
10998 "cls");
10999 }
11000 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
11001 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11002 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
11003 "cls");
11004 }
11005
11006 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11007 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11008 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11009 llvm::Type *Ty = Arg->getType();
11010 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
11011 Arg, "frint32z");
11012 }
11013
11014 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11015 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11016 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11017 llvm::Type *Ty = Arg->getType();
11018 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
11019 Arg, "frint64z");
11020 }
11021
11022 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11023 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11024 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11025 llvm::Type *Ty = Arg->getType();
11026 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
11027 Arg, "frint32x");
11028 }
11029
11030 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11031 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11032 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11033 llvm::Type *Ty = Arg->getType();
11034 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
11035 Arg, "frint64x");
11036 }
11037
11038 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11039 assert((getContext().getTypeSize(E->getType()) == 32) &&
11040 "__jcvt of unusual size!");
11041 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11042 return Builder.CreateCall(
11043 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
11044 }
11045
11046 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11047 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11048 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11049 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11050 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
11051 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
11052
11053 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11054 // Load from the address via an LLVM intrinsic, receiving a
11055 // tuple of 8 i64 words, and store each one to ValPtr.
11056 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
11057 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
11058 llvm::Value *ToRet;
11059 for (size_t i = 0; i < 8; i++) {
11060 llvm::Value *ValOffsetPtr =
11061 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11062 Address Addr =
11063 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11064 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11065 }
11066 return ToRet;
11067 } else {
11068 // Load 8 i64 words from ValPtr, and store them to the address
11069 // via an LLVM intrinsic.
11070 SmallVector<llvm::Value *, 9> Args;
11071 Args.push_back(MemAddr);
11072 for (size_t i = 0; i < 8; i++) {
11073 llvm::Value *ValOffsetPtr =
11074 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11075 Address Addr =
11076 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11077 Args.push_back(Builder.CreateLoad(Addr));
11078 }
11079
11080 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11081 ? Intrinsic::aarch64_st64b
11082 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11083 ? Intrinsic::aarch64_st64bv
11084 : Intrinsic::aarch64_st64bv0);
11085 Function *F = CGM.getIntrinsic(Intr);
11086 return Builder.CreateCall(F, Args);
11087 }
11088 }
11089
11090 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11091 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11092
11093 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11094 ? Intrinsic::aarch64_rndr
11095 : Intrinsic::aarch64_rndrrs);
11096 Function *F = CGM.getIntrinsic(Intr);
11097 llvm::Value *Val = Builder.CreateCall(F);
11098 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11099 Value *Status = Builder.CreateExtractValue(Val, 1);
11100
11101 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11102 Builder.CreateStore(RandomValue, MemAddress);
11103 Status = Builder.CreateZExt(Status, Int32Ty);
11104 return Status;
11105 }
11106
11107 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11108 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11109 const FunctionDecl *FD = E->getDirectCallee();
11110 Value *Ops[2];
11111 for (unsigned i = 0; i < 2; i++)
11112 Ops[i] = EmitScalarExpr(E->getArg(i));
11113 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11114 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11115 StringRef Name = FD->getName();
11116 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11117 }
11118
11119 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11120 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11121 getContext().getTypeSize(E->getType()) == 128) {
11122 Function *F =
11123 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11124 ? Intrinsic::aarch64_ldaxp
11125 : Intrinsic::aarch64_ldxp);
11126
11127 Value *LdPtr = EmitScalarExpr(E->getArg(0));
11128 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11129
11130 Value *Val0 = Builder.CreateExtractValue(Val, 1);
11131 Value *Val1 = Builder.CreateExtractValue(Val, 0);
11132 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11133 Val0 = Builder.CreateZExt(Val0, Int128Ty);
11134 Val1 = Builder.CreateZExt(Val1, Int128Ty);
11135
11136 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11137 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11138 Val = Builder.CreateOr(Val, Val1);
11139 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11140 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11141 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11142 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11143
11144 QualType Ty = E->getType();
11145 llvm::Type *RealResTy = ConvertType(Ty);
11146 llvm::Type *IntTy =
11147 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11148
11149 Function *F =
11150 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11151 ? Intrinsic::aarch64_ldaxr
11152 : Intrinsic::aarch64_ldxr,
11153 UnqualPtrTy);
11154 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11155 Val->addParamAttr(
11156 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11157
11158 if (RealResTy->isPointerTy())
11159 return Builder.CreateIntToPtr(Val, RealResTy);
11160
11161 llvm::Type *IntResTy = llvm::IntegerType::get(
11162 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11163 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11164 RealResTy);
11165 }
11166
11167 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11168 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11169 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11170 Function *F =
11171 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11172 ? Intrinsic::aarch64_stlxp
11173 : Intrinsic::aarch64_stxp);
11174 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11175
11176 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11177 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11178
11179 Tmp = Tmp.withElementType(STy);
11180 llvm::Value *Val = Builder.CreateLoad(Tmp);
11181
11182 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11183 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11184 Value *StPtr = EmitScalarExpr(E->getArg(1));
11185 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11186 }
11187
11188 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11189 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11190 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11191 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11192
11193 QualType Ty = E->getArg(0)->getType();
11194 llvm::Type *StoreTy =
11195 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11196
11197 if (StoreVal->getType()->isPointerTy())
11198 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11199 else {
11200 llvm::Type *IntTy = llvm::IntegerType::get(
11201 getLLVMContext(),
11202 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11203 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11204 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11205 }
11206
11207 Function *F =
11208 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11209 ? Intrinsic::aarch64_stlxr
11210 : Intrinsic::aarch64_stxr,
11211 StoreAddr->getType());
11212 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11213 CI->addParamAttr(
11214 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11215 return CI;
11216 }
11217
11218 if (BuiltinID == clang::AArch64::BI__getReg) {
11219 Expr::EvalResult Result;
11220 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11221 llvm_unreachable("Sema will ensure that the parameter is constant");
11222
11223 llvm::APSInt Value = Result.Val.getInt();
11224 LLVMContext &Context = CGM.getLLVMContext();
11225 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11226
11227 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11228 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11229 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11230
11231 llvm::Function *F =
11232 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11233 return Builder.CreateCall(F, Metadata);
11234 }
11235
11236 if (BuiltinID == clang::AArch64::BI__break) {
11237 Expr::EvalResult Result;
11238 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11239 llvm_unreachable("Sema will ensure that the parameter is constant");
11240
11241 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11242 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11243 }
11244
11245 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11246 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11247 return Builder.CreateCall(F);
11248 }
11249
11250 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11251 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11252 llvm::SyncScope::SingleThread);
11253
11254 // CRC32
11255 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11256 switch (BuiltinID) {
11257 case clang::AArch64::BI__builtin_arm_crc32b:
11258 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11259 case clang::AArch64::BI__builtin_arm_crc32cb:
11260 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11261 case clang::AArch64::BI__builtin_arm_crc32h:
11262 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11263 case clang::AArch64::BI__builtin_arm_crc32ch:
11264 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11265 case clang::AArch64::BI__builtin_arm_crc32w:
11266 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11267 case clang::AArch64::BI__builtin_arm_crc32cw:
11268 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11269 case clang::AArch64::BI__builtin_arm_crc32d:
11270 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11271 case clang::AArch64::BI__builtin_arm_crc32cd:
11272 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11273 }
11274
11275 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11276 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11277 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11278 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11279
11280 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11281 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11282
11283 return Builder.CreateCall(F, {Arg0, Arg1});
11284 }
11285
11286 // Memory Operations (MOPS)
11287 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11288 Value *Dst = EmitScalarExpr(E->getArg(0));
11289 Value *Val = EmitScalarExpr(E->getArg(1));
11290 Value *Size = EmitScalarExpr(E->getArg(2));
11291 Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
11292 Val = Builder.CreateTrunc(Val, Int8Ty);
11293 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11294 return Builder.CreateCall(
11295 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11296 }
11297
11298 // Memory Tagging Extensions (MTE) Intrinsics
11299 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11300 switch (BuiltinID) {
11301 case clang::AArch64::BI__builtin_arm_irg:
11302 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11303 case clang::AArch64::BI__builtin_arm_addg:
11304 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11305 case clang::AArch64::BI__builtin_arm_gmi:
11306 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11307 case clang::AArch64::BI__builtin_arm_ldg:
11308 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11309 case clang::AArch64::BI__builtin_arm_stg:
11310 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11311 case clang::AArch64::BI__builtin_arm_subp:
11312 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11313 }
11314
11315 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11316 llvm::Type *T = ConvertType(E->getType());
11317
11318 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11319 Value *Pointer = EmitScalarExpr(E->getArg(0));
11320 Value *Mask = EmitScalarExpr(E->getArg(1));
11321
11322 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11323 Mask = Builder.CreateZExt(Mask, Int64Ty);
11324 Value *RV = Builder.CreateCall(
11325 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
11326 return Builder.CreatePointerCast(RV, T);
11327 }
11328 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11329 Value *Pointer = EmitScalarExpr(E->getArg(0));
11330 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11331
11332 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11333 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11334 Value *RV = Builder.CreateCall(
11335 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
11336 return Builder.CreatePointerCast(RV, T);
11337 }
11338 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11339 Value *Pointer = EmitScalarExpr(E->getArg(0));
11340 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11341
11342 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11343 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11344 return Builder.CreateCall(
11345 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11346 }
11347 // Although it is possible to supply a different return
11348 // address (first arg) to this intrinsic, for now we set
11349 // return address same as input address.
11350 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11351 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11352 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11353 Value *RV = Builder.CreateCall(
11354 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11355 return Builder.CreatePointerCast(RV, T);
11356 }
11357 // Although it is possible to supply a different tag (to set)
11358 // to this intrinsic (as first arg), for now we supply
11359 // the tag that is in input address arg (common use case).
11360 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11361 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11362 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11363 return Builder.CreateCall(
11364 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11365 }
11366 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11367 Value *PointerA = EmitScalarExpr(E->getArg(0));
11368 Value *PointerB = EmitScalarExpr(E->getArg(1));
11369 PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
11370 PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
11371 return Builder.CreateCall(
11372 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11373 }
11374 }
11375
11376 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11377 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11378 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11379 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11380 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11381 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11382 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11383 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11384
11385 SpecialRegisterAccessKind AccessKind = Write;
11386 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11387 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11388 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11389 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11390 AccessKind = VolatileRead;
11391
11392 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11393 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11394
11395 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11396 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11397
11398 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11399 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11400
11401 llvm::Type *ValueType;
11402 llvm::Type *RegisterType = Int64Ty;
11403 if (Is32Bit) {
11404 ValueType = Int32Ty;
11405 } else if (Is128Bit) {
11406 llvm::Type *Int128Ty =
11407 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11408 ValueType = Int128Ty;
11409 RegisterType = Int128Ty;
11410 } else if (IsPointerBuiltin) {
11411 ValueType = VoidPtrTy;
11412 } else {
11413 ValueType = Int64Ty;
11414 };
11415
11416 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11417 AccessKind);
11418 }
11419
11420 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11421 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11422 LLVMContext &Context = CGM.getLLVMContext();
11423
11424 unsigned SysReg =
11425 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11426
11427 std::string SysRegStr;
11428 llvm::raw_string_ostream(SysRegStr) <<
11429 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11430 ((SysReg >> 11) & 7) << ":" <<
11431 ((SysReg >> 7) & 15) << ":" <<
11432 ((SysReg >> 3) & 15) << ":" <<
11433 ( SysReg & 7);
11434
11435 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11436 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11437 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11438
11439 llvm::Type *RegisterType = Int64Ty;
11440 llvm::Type *Types[] = { RegisterType };
11441
11442 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11443 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11444
11445 return Builder.CreateCall(F, Metadata);
11446 }
11447
11448 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11449 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11450
11451 return Builder.CreateCall(F, { Metadata, ArgValue });
11452 }
11453
11454 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11455 llvm::Function *F =
11456 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11457 return Builder.CreateCall(F);
11458 }
11459
11460 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11461 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11462 return Builder.CreateCall(F);
11463 }
11464
11465 if (BuiltinID == clang::AArch64::BI__mulh ||
11466 BuiltinID == clang::AArch64::BI__umulh) {
11467 llvm::Type *ResType = ConvertType(E->getType());
11468 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11469
11470 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11471 Value *LHS =
11472 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11473 Value *RHS =
11474 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11475
11476 Value *MulResult, *HigherBits;
11477 if (IsSigned) {
11478 MulResult = Builder.CreateNSWMul(LHS, RHS);
11479 HigherBits = Builder.CreateAShr(MulResult, 64);
11480 } else {
11481 MulResult = Builder.CreateNUWMul(LHS, RHS);
11482 HigherBits = Builder.CreateLShr(MulResult, 64);
11483 }
11484 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11485
11486 return HigherBits;
11487 }
11488
11489 if (BuiltinID == AArch64::BI__writex18byte ||
11490 BuiltinID == AArch64::BI__writex18word ||
11491 BuiltinID == AArch64::BI__writex18dword ||
11492 BuiltinID == AArch64::BI__writex18qword) {
11493 // Read x18 as i8*
11494 LLVMContext &Context = CGM.getLLVMContext();
11495 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11496 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11497 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11498 llvm::Function *F =
11499 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11500 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11501 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11502
11503 // Store val at x18 + offset
11504 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11505 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11506 Value *Val = EmitScalarExpr(E->getArg(1));
11507 StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
11508 return Store;
11509 }
11510
11511 if (BuiltinID == AArch64::BI__readx18byte ||
11512 BuiltinID == AArch64::BI__readx18word ||
11513 BuiltinID == AArch64::BI__readx18dword ||
11514 BuiltinID == AArch64::BI__readx18qword) {
11515 llvm::Type *IntTy = ConvertType(E->getType());
11516
11517 // Read x18 as i8*
11518 LLVMContext &Context = CGM.getLLVMContext();
11519 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11520 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11521 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11522 llvm::Function *F =
11523 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11524 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11525 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11526
11527 // Load x18 + offset
11528 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11529 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11530 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11531 return Load;
11532 }
11533
11534 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11535 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11536 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11537 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11538 Value *Arg = EmitScalarExpr(E->getArg(0));
11539 llvm::Type *RetTy = ConvertType(E->getType());
11540 return Builder.CreateBitCast(Arg, RetTy);
11541 }
11542
11543 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11544 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11545 BuiltinID == AArch64::BI_CountLeadingZeros ||
11546 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11547 Value *Arg = EmitScalarExpr(E->getArg(0));
11548 llvm::Type *ArgType = Arg->getType();
11549
11550 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11551 BuiltinID == AArch64::BI_CountLeadingOnes64)
11552 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11553
11554 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11555 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11556
11557 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11558 BuiltinID == AArch64::BI_CountLeadingZeros64)
11559 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11560 return Result;
11561 }
11562
11563 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11564 BuiltinID == AArch64::BI_CountLeadingSigns64) {
11565 Value *Arg = EmitScalarExpr(E->getArg(0));
11566
11567 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11568 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11569 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11570
11571 Value *Result = Builder.CreateCall(F, Arg, "cls");
11572 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11573 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11574 return Result;
11575 }
11576
11577 if (BuiltinID == AArch64::BI_CountOneBits ||
11578 BuiltinID == AArch64::BI_CountOneBits64) {
11579 Value *ArgValue = EmitScalarExpr(E->getArg(0));
11580 llvm::Type *ArgType = ArgValue->getType();
11581 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11582
11583 Value *Result = Builder.CreateCall(F, ArgValue);
11584 if (BuiltinID == AArch64::BI_CountOneBits64)
11585 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11586 return Result;
11587 }
11588
11589 if (BuiltinID == AArch64::BI__prefetch) {
11590 Value *Address = EmitScalarExpr(E->getArg(0));
11591 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
11592 Value *Locality = ConstantInt::get(Int32Ty, 3);
11593 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
11594 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11595 return Builder.CreateCall(F, {Address, RW, Locality, Data});
11596 }
11597
11598 if (BuiltinID == AArch64::BI__hlt) {
11599 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
11600 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11601
11602 // Return 0 for convenience, even though MSVC returns some other undefined
11603 // value.
11604 return ConstantInt::get(Builder.getInt32Ty(), 0);
11605 }
11606
11607 // Handle MSVC intrinsics before argument evaluation to prevent double
11608 // evaluation.
11609 if (std::optional<MSVCIntrin> MsvcIntId =
11610 translateAarch64ToMsvcIntrin(BuiltinID))
11611 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
11612
11613 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
11614 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
11615 return P.first == BuiltinID;
11616 });
11617 if (It != end(NEONEquivalentIntrinsicMap))
11618 BuiltinID = It->second;
11619
11620 // Find out if any arguments are required to be integer constant
11621 // expressions.
11622 unsigned ICEArguments = 0;
11623 ASTContext::GetBuiltinTypeError Error;
11624 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
11625 assert(Error == ASTContext::GE_None && "Should not codegen an error");
11626
11627 llvm::SmallVector<Value*, 4> Ops;
11628 Address PtrOp0 = Address::invalid();
11629 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
11630 if (i == 0) {
11631 switch (BuiltinID) {
11632 case NEON::BI__builtin_neon_vld1_v:
11633 case NEON::BI__builtin_neon_vld1q_v:
11634 case NEON::BI__builtin_neon_vld1_dup_v:
11635 case NEON::BI__builtin_neon_vld1q_dup_v:
11636 case NEON::BI__builtin_neon_vld1_lane_v:
11637 case NEON::BI__builtin_neon_vld1q_lane_v:
11638 case NEON::BI__builtin_neon_vst1_v:
11639 case NEON::BI__builtin_neon_vst1q_v:
11640 case NEON::BI__builtin_neon_vst1_lane_v:
11641 case NEON::BI__builtin_neon_vst1q_lane_v:
11642 case NEON::BI__builtin_neon_vldap1_lane_s64:
11643 case NEON::BI__builtin_neon_vldap1q_lane_s64:
11644 case NEON::BI__builtin_neon_vstl1_lane_s64:
11645 case NEON::BI__builtin_neon_vstl1q_lane_s64:
11646 // Get the alignment for the argument in addition to the value;
11647 // we'll use it later.
11648 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
11649 Ops.push_back(PtrOp0.emitRawPointer(*this));
11650 continue;
11651 }
11652 }
11653 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
11654 }
11655
11656 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
11657 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
11658 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
11659
11660 if (Builtin) {
11661 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
11662 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
11663 assert(Result && "SISD intrinsic should have been handled");
11664 return Result;
11665 }
11666
11667 const Expr *Arg = E->getArg(E->getNumArgs()-1);
11668 NeonTypeFlags Type(0);
11669 if (std::optional<llvm::APSInt> Result =
11670 Arg->getIntegerConstantExpr(getContext()))
11671 // Determine the type of this overloaded NEON intrinsic.
11672 Type = NeonTypeFlags(Result->getZExtValue());
11673
11674 bool usgn = Type.isUnsigned();
11675 bool quad = Type.isQuad();
11676
11677 // Handle non-overloaded intrinsics first.
11678 switch (BuiltinID) {
11679 default: break;
11680 case NEON::BI__builtin_neon_vabsh_f16:
11681 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11682 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
11683 case NEON::BI__builtin_neon_vaddq_p128: {
11684 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
11685 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11686 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11687 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11688 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
11689 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11690 return Builder.CreateBitCast(Ops[0], Int128Ty);
11691 }
11692 case NEON::BI__builtin_neon_vldrq_p128: {
11693 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11694 Value *Ptr = EmitScalarExpr(E->getArg(0));
11695 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
11696 CharUnits::fromQuantity(16));
11697 }
11698 case NEON::BI__builtin_neon_vstrq_p128: {
11699 Value *Ptr = Ops[0];
11700 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
11701 }
11702 case NEON::BI__builtin_neon_vcvts_f32_u32:
11703 case NEON::BI__builtin_neon_vcvtd_f64_u64:
11704 usgn = true;
11705 [[fallthrough]];
11706 case NEON::BI__builtin_neon_vcvts_f32_s32:
11707 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
11708 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11709 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
11710 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
11711 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
11712 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11713 if (usgn)
11714 return Builder.CreateUIToFP(Ops[0], FTy);
11715 return Builder.CreateSIToFP(Ops[0], FTy);
11716 }
11717 case NEON::BI__builtin_neon_vcvth_f16_u16:
11718 case NEON::BI__builtin_neon_vcvth_f16_u32:
11719 case NEON::BI__builtin_neon_vcvth_f16_u64:
11720 usgn = true;
11721 [[fallthrough]];
11722 case NEON::BI__builtin_neon_vcvth_f16_s16:
11723 case NEON::BI__builtin_neon_vcvth_f16_s32:
11724 case NEON::BI__builtin_neon_vcvth_f16_s64: {
11725 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11726 llvm::Type *FTy = HalfTy;
11727 llvm::Type *InTy;
11728 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
11729 InTy = Int64Ty;
11730 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
11731 InTy = Int32Ty;
11732 else
11733 InTy = Int16Ty;
11734 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11735 if (usgn)
11736 return Builder.CreateUIToFP(Ops[0], FTy);
11737 return Builder.CreateSIToFP(Ops[0], FTy);
11738 }
11739 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11740 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11741 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11742 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11743 case NEON::BI__builtin_neon_vcvth_u16_f16:
11744 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11745 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11746 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11747 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11748 case NEON::BI__builtin_neon_vcvth_s16_f16: {
11749 unsigned Int;
11750 llvm::Type* InTy = Int32Ty;
11751 llvm::Type* FTy = HalfTy;
11752 llvm::Type *Tys[2] = {InTy, FTy};
11753 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11754 switch (BuiltinID) {
11755 default: llvm_unreachable("missing builtin ID in switch!");
11756 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11757 Int = Intrinsic::aarch64_neon_fcvtau; break;
11758 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11759 Int = Intrinsic::aarch64_neon_fcvtmu; break;
11760 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11761 Int = Intrinsic::aarch64_neon_fcvtnu; break;
11762 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11763 Int = Intrinsic::aarch64_neon_fcvtpu; break;
11764 case NEON::BI__builtin_neon_vcvth_u16_f16:
11765 Int = Intrinsic::aarch64_neon_fcvtzu; break;
11766 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11767 Int = Intrinsic::aarch64_neon_fcvtas; break;
11768 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11769 Int = Intrinsic::aarch64_neon_fcvtms; break;
11770 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11771 Int = Intrinsic::aarch64_neon_fcvtns; break;
11772 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11773 Int = Intrinsic::aarch64_neon_fcvtps; break;
11774 case NEON::BI__builtin_neon_vcvth_s16_f16:
11775 Int = Intrinsic::aarch64_neon_fcvtzs; break;
11776 }
11777 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
11778 return Builder.CreateTrunc(Ops[0], Int16Ty);
11779 }
11780 case NEON::BI__builtin_neon_vcaleh_f16:
11781 case NEON::BI__builtin_neon_vcalth_f16:
11782 case NEON::BI__builtin_neon_vcageh_f16:
11783 case NEON::BI__builtin_neon_vcagth_f16: {
11784 unsigned Int;
11785 llvm::Type* InTy = Int32Ty;
11786 llvm::Type* FTy = HalfTy;
11787 llvm::Type *Tys[2] = {InTy, FTy};
11788 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11789 switch (BuiltinID) {
11790 default: llvm_unreachable("missing builtin ID in switch!");
11791 case NEON::BI__builtin_neon_vcageh_f16:
11792 Int = Intrinsic::aarch64_neon_facge; break;
11793 case NEON::BI__builtin_neon_vcagth_f16:
11794 Int = Intrinsic::aarch64_neon_facgt; break;
11795 case NEON::BI__builtin_neon_vcaleh_f16:
11796 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
11797 case NEON::BI__builtin_neon_vcalth_f16:
11798 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
11799 }
11800 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
11801 return Builder.CreateTrunc(Ops[0], Int16Ty);
11802 }
11803 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11804 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
11805 unsigned Int;
11806 llvm::Type* InTy = Int32Ty;
11807 llvm::Type* FTy = HalfTy;
11808 llvm::Type *Tys[2] = {InTy, FTy};
11809 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11810 switch (BuiltinID) {
11811 default: llvm_unreachable("missing builtin ID in switch!");
11812 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11813 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
11814 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
11815 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
11816 }
11817 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11818 return Builder.CreateTrunc(Ops[0], Int16Ty);
11819 }
11820 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11821 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
11822 unsigned Int;
11823 llvm::Type* FTy = HalfTy;
11824 llvm::Type* InTy = Int32Ty;
11825 llvm::Type *Tys[2] = {FTy, InTy};
11826 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11827 switch (BuiltinID) {
11828 default: llvm_unreachable("missing builtin ID in switch!");
11829 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11830 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
11831 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
11832 break;
11833 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
11834 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
11835 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
11836 break;
11837 }
11838 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11839 }
11840 case NEON::BI__builtin_neon_vpaddd_s64: {
11841 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
11842 Value *Vec = EmitScalarExpr(E->getArg(0));
11843 // The vector is v2f64, so make sure it's bitcast to that.
11844 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
11845 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11846 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11847 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11848 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11849 // Pairwise addition of a v2f64 into a scalar f64.
11850 return Builder.CreateAdd(Op0, Op1, "vpaddd");
11851 }
11852 case NEON::BI__builtin_neon_vpaddd_f64: {
11853 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
11854 Value *Vec = EmitScalarExpr(E->getArg(0));
11855 // The vector is v2f64, so make sure it's bitcast to that.
11856 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
11857 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11858 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11859 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11860 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11861 // Pairwise addition of a v2f64 into a scalar f64.
11862 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11863 }
11864 case NEON::BI__builtin_neon_vpadds_f32: {
11865 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
11866 Value *Vec = EmitScalarExpr(E->getArg(0));
11867 // The vector is v2f32, so make sure it's bitcast to that.
11868 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
11869 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11870 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11871 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11872 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11873 // Pairwise addition of a v2f32 into a scalar f32.
11874 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11875 }
11876 case NEON::BI__builtin_neon_vceqzd_s64:
11877 case NEON::BI__builtin_neon_vceqzd_f64:
11878 case NEON::BI__builtin_neon_vceqzs_f32:
11879 case NEON::BI__builtin_neon_vceqzh_f16:
11880 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11881 return EmitAArch64CompareBuiltinExpr(
11882 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11883 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
11884 case NEON::BI__builtin_neon_vcgezd_s64:
11885 case NEON::BI__builtin_neon_vcgezd_f64:
11886 case NEON::BI__builtin_neon_vcgezs_f32:
11887 case NEON::BI__builtin_neon_vcgezh_f16:
11888 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11889 return EmitAArch64CompareBuiltinExpr(
11890 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11891 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
11892 case NEON::BI__builtin_neon_vclezd_s64:
11893 case NEON::BI__builtin_neon_vclezd_f64:
11894 case NEON::BI__builtin_neon_vclezs_f32:
11895 case NEON::BI__builtin_neon_vclezh_f16:
11896 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11897 return EmitAArch64CompareBuiltinExpr(
11898 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11899 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
11900 case NEON::BI__builtin_neon_vcgtzd_s64:
11901 case NEON::BI__builtin_neon_vcgtzd_f64:
11902 case NEON::BI__builtin_neon_vcgtzs_f32:
11903 case NEON::BI__builtin_neon_vcgtzh_f16:
11904 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11905 return EmitAArch64CompareBuiltinExpr(
11906 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11907 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
11908 case NEON::BI__builtin_neon_vcltzd_s64:
11909 case NEON::BI__builtin_neon_vcltzd_f64:
11910 case NEON::BI__builtin_neon_vcltzs_f32:
11911 case NEON::BI__builtin_neon_vcltzh_f16:
11912 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11913 return EmitAArch64CompareBuiltinExpr(
11914 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11915 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
11916
11917 case NEON::BI__builtin_neon_vceqzd_u64: {
11918 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11919 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11920 Ops[0] =
11921 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
11922 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
11923 }
11924 case NEON::BI__builtin_neon_vceqd_f64:
11925 case NEON::BI__builtin_neon_vcled_f64:
11926 case NEON::BI__builtin_neon_vcltd_f64:
11927 case NEON::BI__builtin_neon_vcged_f64:
11928 case NEON::BI__builtin_neon_vcgtd_f64: {
11929 llvm::CmpInst::Predicate P;
11930 switch (BuiltinID) {
11931 default: llvm_unreachable("missing builtin ID in switch!");
11932 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
11933 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
11934 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
11935 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
11936 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
11937 }
11938 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11939 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11940 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
11941 if (P == llvm::FCmpInst::FCMP_OEQ)
11942 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11943 else
11944 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11945 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
11946 }
11947 case NEON::BI__builtin_neon_vceqs_f32:
11948 case NEON::BI__builtin_neon_vcles_f32:
11949 case NEON::BI__builtin_neon_vclts_f32:
11950 case NEON::BI__builtin_neon_vcges_f32:
11951 case NEON::BI__builtin_neon_vcgts_f32: {
11952 llvm::CmpInst::Predicate P;
11953 switch (BuiltinID) {
11954 default: llvm_unreachable("missing builtin ID in switch!");
11955 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
11956 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
11957 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
11958 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
11959 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
11960 }
11961 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11962 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
11963 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
11964 if (P == llvm::FCmpInst::FCMP_OEQ)
11965 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11966 else
11967 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11968 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
11969 }
11970 case NEON::BI__builtin_neon_vceqh_f16:
11971 case NEON::BI__builtin_neon_vcleh_f16:
11972 case NEON::BI__builtin_neon_vclth_f16:
11973 case NEON::BI__builtin_neon_vcgeh_f16:
11974 case NEON::BI__builtin_neon_vcgth_f16: {
11975 llvm::CmpInst::Predicate P;
11976 switch (BuiltinID) {
11977 default: llvm_unreachable("missing builtin ID in switch!");
11978 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
11979 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
11980 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
11981 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
11982 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
11983 }
11984 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11985 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
11986 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
11987 if (P == llvm::FCmpInst::FCMP_OEQ)
11988 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11989 else
11990 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11991 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
11992 }
11993 case NEON::BI__builtin_neon_vceqd_s64:
11994 case NEON::BI__builtin_neon_vceqd_u64:
11995 case NEON::BI__builtin_neon_vcgtd_s64:
11996 case NEON::BI__builtin_neon_vcgtd_u64:
11997 case NEON::BI__builtin_neon_vcltd_s64:
11998 case NEON::BI__builtin_neon_vcltd_u64:
11999 case NEON::BI__builtin_neon_vcged_u64:
12000 case NEON::BI__builtin_neon_vcged_s64:
12001 case NEON::BI__builtin_neon_vcled_u64:
12002 case NEON::BI__builtin_neon_vcled_s64: {
12003 llvm::CmpInst::Predicate P;
12004 switch (BuiltinID) {
12005 default: llvm_unreachable("missing builtin ID in switch!");
12006 case NEON::BI__builtin_neon_vceqd_s64:
12007 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
12008 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
12009 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
12010 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
12011 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
12012 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
12013 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
12014 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
12015 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
12016 }
12017 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12018 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12019 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12020 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
12021 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
12022 }
12023 case NEON::BI__builtin_neon_vtstd_s64:
12024 case NEON::BI__builtin_neon_vtstd_u64: {
12025 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12026 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12027 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12028 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
12029 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12030 llvm::Constant::getNullValue(Int64Ty));
12031 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
12032 }
12033 case NEON::BI__builtin_neon_vset_lane_i8:
12034 case NEON::BI__builtin_neon_vset_lane_i16:
12035 case NEON::BI__builtin_neon_vset_lane_i32:
12036 case NEON::BI__builtin_neon_vset_lane_i64:
12037 case NEON::BI__builtin_neon_vset_lane_bf16:
12038 case NEON::BI__builtin_neon_vset_lane_f32:
12039 case NEON::BI__builtin_neon_vsetq_lane_i8:
12040 case NEON::BI__builtin_neon_vsetq_lane_i16:
12041 case NEON::BI__builtin_neon_vsetq_lane_i32:
12042 case NEON::BI__builtin_neon_vsetq_lane_i64:
12043 case NEON::BI__builtin_neon_vsetq_lane_bf16:
12044 case NEON::BI__builtin_neon_vsetq_lane_f32:
12045 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12046 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12047 case NEON::BI__builtin_neon_vset_lane_f64:
12048 // The vector type needs a cast for the v1f64 variant.
12049 Ops[1] =
12050 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
12051 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12052 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12053 case NEON::BI__builtin_neon_vsetq_lane_f64:
12054 // The vector type needs a cast for the v2f64 variant.
12055 Ops[1] =
12056 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
12057 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12058 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12059
12060 case NEON::BI__builtin_neon_vget_lane_i8:
12061 case NEON::BI__builtin_neon_vdupb_lane_i8:
12062 Ops[0] =
12063 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
12064 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12065 "vget_lane");
12066 case NEON::BI__builtin_neon_vgetq_lane_i8:
12067 case NEON::BI__builtin_neon_vdupb_laneq_i8:
12068 Ops[0] =
12069 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
12070 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12071 "vgetq_lane");
12072 case NEON::BI__builtin_neon_vget_lane_i16:
12073 case NEON::BI__builtin_neon_vduph_lane_i16:
12074 Ops[0] =
12075 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12076 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12077 "vget_lane");
12078 case NEON::BI__builtin_neon_vgetq_lane_i16:
12079 case NEON::BI__builtin_neon_vduph_laneq_i16:
12080 Ops[0] =
12081 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12082 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12083 "vgetq_lane");
12084 case NEON::BI__builtin_neon_vget_lane_i32:
12085 case NEON::BI__builtin_neon_vdups_lane_i32:
12086 Ops[0] =
12087 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12088 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12089 "vget_lane");
12090 case NEON::BI__builtin_neon_vdups_lane_f32:
12091 Ops[0] =
12092 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12093 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12094 "vdups_lane");
12095 case NEON::BI__builtin_neon_vgetq_lane_i32:
12096 case NEON::BI__builtin_neon_vdups_laneq_i32:
12097 Ops[0] =
12098 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12099 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12100 "vgetq_lane");
12101 case NEON::BI__builtin_neon_vget_lane_i64:
12102 case NEON::BI__builtin_neon_vdupd_lane_i64:
12103 Ops[0] =
12104 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12105 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12106 "vget_lane");
12107 case NEON::BI__builtin_neon_vdupd_lane_f64:
12108 Ops[0] =
12109 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12110 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12111 "vdupd_lane");
12112 case NEON::BI__builtin_neon_vgetq_lane_i64:
12113 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12114 Ops[0] =
12115 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12116 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12117 "vgetq_lane");
12118 case NEON::BI__builtin_neon_vget_lane_f32:
12119 Ops[0] =
12120 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12121 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12122 "vget_lane");
12123 case NEON::BI__builtin_neon_vget_lane_f64:
12124 Ops[0] =
12125 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12126 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12127 "vget_lane");
12128 case NEON::BI__builtin_neon_vgetq_lane_f32:
12129 case NEON::BI__builtin_neon_vdups_laneq_f32:
12130 Ops[0] =
12131 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12132 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12133 "vgetq_lane");
12134 case NEON::BI__builtin_neon_vgetq_lane_f64:
12135 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12136 Ops[0] =
12137 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12138 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12139 "vgetq_lane");
12140 case NEON::BI__builtin_neon_vaddh_f16:
12141 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12142 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12143 case NEON::BI__builtin_neon_vsubh_f16:
12144 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12145 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12146 case NEON::BI__builtin_neon_vmulh_f16:
12147 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12148 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12149 case NEON::BI__builtin_neon_vdivh_f16:
12150 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12151 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12152 case NEON::BI__builtin_neon_vfmah_f16:
12153 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12154 return emitCallMaybeConstrainedFPBuiltin(
12155 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12156 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12157 case NEON::BI__builtin_neon_vfmsh_f16: {
12158 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12159
12160 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12161 return emitCallMaybeConstrainedFPBuiltin(
12162 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12163 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12164 }
12165 case NEON::BI__builtin_neon_vaddd_s64:
12166 case NEON::BI__builtin_neon_vaddd_u64:
12167 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12168 case NEON::BI__builtin_neon_vsubd_s64:
12169 case NEON::BI__builtin_neon_vsubd_u64:
12170 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12171 case NEON::BI__builtin_neon_vqdmlalh_s16:
12172 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12173 SmallVector<Value *, 2> ProductOps;
12174 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12175 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12176 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12177 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12178 ProductOps, "vqdmlXl");
12179 Constant *CI = ConstantInt::get(SizeTy, 0);
12180 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12181
12182 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12183 ? Intrinsic::aarch64_neon_sqadd
12184 : Intrinsic::aarch64_neon_sqsub;
12185 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12186 }
12187 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12188 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12189 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12190 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12191 Ops, "vqshlu_n");
12192 }
12193 case NEON::BI__builtin_neon_vqshld_n_u64:
12194 case NEON::BI__builtin_neon_vqshld_n_s64: {
12195 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12196 ? Intrinsic::aarch64_neon_uqshl
12197 : Intrinsic::aarch64_neon_sqshl;
12198 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12199 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12200 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12201 }
12202 case NEON::BI__builtin_neon_vrshrd_n_u64:
12203 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12204 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12205 ? Intrinsic::aarch64_neon_urshl
12206 : Intrinsic::aarch64_neon_srshl;
12207 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12208 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12209 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12210 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12211 }
12212 case NEON::BI__builtin_neon_vrsrad_n_u64:
12213 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12214 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12215 ? Intrinsic::aarch64_neon_urshl
12216 : Intrinsic::aarch64_neon_srshl;
12217 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12218 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12219 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12220 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12221 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12222 }
12223 case NEON::BI__builtin_neon_vshld_n_s64:
12224 case NEON::BI__builtin_neon_vshld_n_u64: {
12225 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12226 return Builder.CreateShl(
12227 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12228 }
12229 case NEON::BI__builtin_neon_vshrd_n_s64: {
12230 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12231 return Builder.CreateAShr(
12232 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12233 Amt->getZExtValue())),
12234 "shrd_n");
12235 }
12236 case NEON::BI__builtin_neon_vshrd_n_u64: {
12237 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12238 uint64_t ShiftAmt = Amt->getZExtValue();
12239 // Right-shifting an unsigned value by its size yields 0.
12240 if (ShiftAmt == 64)
12241 return ConstantInt::get(Int64Ty, 0);
12242 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12243 "shrd_n");
12244 }
12245 case NEON::BI__builtin_neon_vsrad_n_s64: {
12246 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12247 Ops[1] = Builder.CreateAShr(
12248 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12249 Amt->getZExtValue())),
12250 "shrd_n");
12251 return Builder.CreateAdd(Ops[0], Ops[1]);
12252 }
12253 case NEON::BI__builtin_neon_vsrad_n_u64: {
12254 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12255 uint64_t ShiftAmt = Amt->getZExtValue();
12256 // Right-shifting an unsigned value by its size yields 0.
12257 // As Op + 0 = Op, return Ops[0] directly.
12258 if (ShiftAmt == 64)
12259 return Ops[0];
12260 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12261 "shrd_n");
12262 return Builder.CreateAdd(Ops[0], Ops[1]);
12263 }
12264 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12265 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12266 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12267 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12268 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12269 "lane");
12270 SmallVector<Value *, 2> ProductOps;
12271 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12272 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12273 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12274 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12275 ProductOps, "vqdmlXl");
12276 Constant *CI = ConstantInt::get(SizeTy, 0);
12277 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12278 Ops.pop_back();
12279
12280 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12281 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12282 ? Intrinsic::aarch64_neon_sqadd
12283 : Intrinsic::aarch64_neon_sqsub;
12284 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12285 }
12286 case NEON::BI__builtin_neon_vqdmlals_s32:
12287 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12288 SmallVector<Value *, 2> ProductOps;
12289 ProductOps.push_back(Ops[1]);
12290 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12291 Ops[1] =
12292 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12293 ProductOps, "vqdmlXl");
12294
12295 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12296 ? Intrinsic::aarch64_neon_sqadd
12297 : Intrinsic::aarch64_neon_sqsub;
12298 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12299 }
12300 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12301 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12302 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12303 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12304 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12305 "lane");
12306 SmallVector<Value *, 2> ProductOps;
12307 ProductOps.push_back(Ops[1]);
12308 ProductOps.push_back(Ops[2]);
12309 Ops[1] =
12310 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12311 ProductOps, "vqdmlXl");
12312 Ops.pop_back();
12313
12314 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12315 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12316 ? Intrinsic::aarch64_neon_sqadd
12317 : Intrinsic::aarch64_neon_sqsub;
12318 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12319 }
12320 case NEON::BI__builtin_neon_vget_lane_bf16:
12321 case NEON::BI__builtin_neon_vduph_lane_bf16:
12322 case NEON::BI__builtin_neon_vduph_lane_f16: {
12323 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12324 "vget_lane");
12325 }
12326 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12327 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12328 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12329 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12330 "vgetq_lane");
12331 }
12332
12333 case clang::AArch64::BI_InterlockedAdd:
12334 case clang::AArch64::BI_InterlockedAdd64: {
12335 Address DestAddr = CheckAtomicAlignment(*this, E);
12336 Value *Val = EmitScalarExpr(E->getArg(1));
12337 AtomicRMWInst *RMWI =
12338 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12339 llvm::AtomicOrdering::SequentiallyConsistent);
12340 return Builder.CreateAdd(RMWI, Val);
12341 }
12342 }
12343
12344 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12345 llvm::Type *Ty = VTy;
12346 if (!Ty)
12347 return nullptr;
12348
12349 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12350 // defer to common code if it's been added to our special map.
12351 Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
12352 AArch64SIMDIntrinsicsProvenSorted);
12353
12354 if (Builtin)
12355 return EmitCommonNeonBuiltinExpr(
12356 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12357 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12358 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12359
12360 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12361 return V;
12362
12363 unsigned Int;
12364 switch (BuiltinID) {
12365 default: return nullptr;
12366 case NEON::BI__builtin_neon_vbsl_v:
12367 case NEON::BI__builtin_neon_vbslq_v: {
12368 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12369 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12370 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12371 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12372
12373 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12374 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12375 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12376 return Builder.CreateBitCast(Ops[0], Ty);
12377 }
12378 case NEON::BI__builtin_neon_vfma_lane_v:
12379 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12380 // The ARM builtins (and instructions) have the addend as the first
12381 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12382 Value *Addend = Ops[0];
12383 Value *Multiplicand = Ops[1];
12384 Value *LaneSource = Ops[2];
12385 Ops[0] = Multiplicand;
12386 Ops[1] = LaneSource;
12387 Ops[2] = Addend;
12388
12389 // Now adjust things to handle the lane access.
12390 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12391 ? llvm::FixedVectorType::get(VTy->getElementType(),
12392 VTy->getNumElements() / 2)
12393 : VTy;
12394 llvm::Constant *cst = cast<Constant>(Ops[3]);
12395 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12396 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12397 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12398
12399 Ops.pop_back();
12400 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12401 : Intrinsic::fma;
12402 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12403 }
12404 case NEON::BI__builtin_neon_vfma_laneq_v: {
12405 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12406 // v1f64 fma should be mapped to Neon scalar f64 fma
12407 if (VTy && VTy->getElementType() == DoubleTy) {
12408 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12409 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12410 llvm::FixedVectorType *VTy =
12411 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
12412 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12413 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12414 Value *Result;
12415 Result = emitCallMaybeConstrainedFPBuiltin(
12416 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12417 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12418 return Builder.CreateBitCast(Result, Ty);
12419 }
12420 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12421 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12422
12423 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12424 VTy->getNumElements() * 2);
12425 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12426 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12427 cast<ConstantInt>(Ops[3]));
12428 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12429
12430 return emitCallMaybeConstrainedFPBuiltin(
12431 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12432 {Ops[2], Ops[1], Ops[0]});
12433 }
12434 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12435 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12436 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12437
12438 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12439 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12440 return emitCallMaybeConstrainedFPBuiltin(
12441 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12442 {Ops[2], Ops[1], Ops[0]});
12443 }
12444 case NEON::BI__builtin_neon_vfmah_lane_f16:
12445 case NEON::BI__builtin_neon_vfmas_lane_f32:
12446 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12447 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12448 case NEON::BI__builtin_neon_vfmad_lane_f64:
12449 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12450 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12451 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12452 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12453 return emitCallMaybeConstrainedFPBuiltin(
12454 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12455 {Ops[1], Ops[2], Ops[0]});
12456 }
12457 case NEON::BI__builtin_neon_vmull_v:
12458 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12459 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12460 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12461 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12462 case NEON::BI__builtin_neon_vmax_v:
12463 case NEON::BI__builtin_neon_vmaxq_v:
12464 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12465 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12466 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12467 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12468 case NEON::BI__builtin_neon_vmaxh_f16: {
12469 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12470 Int = Intrinsic::aarch64_neon_fmax;
12471 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12472 }
12473 case NEON::BI__builtin_neon_vmin_v:
12474 case NEON::BI__builtin_neon_vminq_v:
12475 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12476 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12477 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12478 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12479 case NEON::BI__builtin_neon_vminh_f16: {
12480 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12481 Int = Intrinsic::aarch64_neon_fmin;
12482 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12483 }
12484 case NEON::BI__builtin_neon_vabd_v:
12485 case NEON::BI__builtin_neon_vabdq_v:
12486 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12487 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12488 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12489 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12490 case NEON::BI__builtin_neon_vpadal_v:
12491 case NEON::BI__builtin_neon_vpadalq_v: {
12492 unsigned ArgElts = VTy->getNumElements();
12493 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12494 unsigned BitWidth = EltTy->getBitWidth();
12495 auto *ArgTy = llvm::FixedVectorType::get(
12496 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12497 llvm::Type* Tys[2] = { VTy, ArgTy };
12498 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12499 SmallVector<llvm::Value*, 1> TmpOps;
12500 TmpOps.push_back(Ops[1]);
12501 Function *F = CGM.getIntrinsic(Int, Tys);
12502 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12503 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12504 return Builder.CreateAdd(tmp, addend);
12505 }
12506 case NEON::BI__builtin_neon_vpmin_v:
12507 case NEON::BI__builtin_neon_vpminq_v:
12508 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12509 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12510 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12511 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12512 case NEON::BI__builtin_neon_vpmax_v:
12513 case NEON::BI__builtin_neon_vpmaxq_v:
12514 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12515 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12516 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12517 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12518 case NEON::BI__builtin_neon_vminnm_v:
12519 case NEON::BI__builtin_neon_vminnmq_v:
12520 Int = Intrinsic::aarch64_neon_fminnm;
12521 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12522 case NEON::BI__builtin_neon_vminnmh_f16:
12523 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12524 Int = Intrinsic::aarch64_neon_fminnm;
12525 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12526 case NEON::BI__builtin_neon_vmaxnm_v:
12527 case NEON::BI__builtin_neon_vmaxnmq_v:
12528 Int = Intrinsic::aarch64_neon_fmaxnm;
12529 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12530 case NEON::BI__builtin_neon_vmaxnmh_f16:
12531 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12532 Int = Intrinsic::aarch64_neon_fmaxnm;
12533 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12534 case NEON::BI__builtin_neon_vrecpss_f32: {
12535 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12536 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12537 Ops, "vrecps");
12538 }
12539 case NEON::BI__builtin_neon_vrecpsd_f64:
12540 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12541 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12542 Ops, "vrecps");
12543 case NEON::BI__builtin_neon_vrecpsh_f16:
12544 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12545 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12546 Ops, "vrecps");
12547 case NEON::BI__builtin_neon_vqshrun_n_v:
12548 Int = Intrinsic::aarch64_neon_sqshrun;
12549 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12550 case NEON::BI__builtin_neon_vqrshrun_n_v:
12551 Int = Intrinsic::aarch64_neon_sqrshrun;
12552 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12553 case NEON::BI__builtin_neon_vqshrn_n_v:
12554 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12555 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12556 case NEON::BI__builtin_neon_vrshrn_n_v:
12557 Int = Intrinsic::aarch64_neon_rshrn;
12558 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12559 case NEON::BI__builtin_neon_vqrshrn_n_v:
12560 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12561 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12562 case NEON::BI__builtin_neon_vrndah_f16: {
12563 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12564 Int = Builder.getIsFPConstrained()
12565 ? Intrinsic::experimental_constrained_round
12566 : Intrinsic::round;
12567 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
12568 }
12569 case NEON::BI__builtin_neon_vrnda_v:
12570 case NEON::BI__builtin_neon_vrndaq_v: {
12571 Int = Builder.getIsFPConstrained()
12572 ? Intrinsic::experimental_constrained_round
12573 : Intrinsic::round;
12574 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
12575 }
12576 case NEON::BI__builtin_neon_vrndih_f16: {
12577 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12578 Int = Builder.getIsFPConstrained()
12579 ? Intrinsic::experimental_constrained_nearbyint
12580 : Intrinsic::nearbyint;
12581 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
12582 }
12583 case NEON::BI__builtin_neon_vrndmh_f16: {
12584 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12585 Int = Builder.getIsFPConstrained()
12586 ? Intrinsic::experimental_constrained_floor
12587 : Intrinsic::floor;
12588 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
12589 }
12590 case NEON::BI__builtin_neon_vrndm_v:
12591 case NEON::BI__builtin_neon_vrndmq_v: {
12592 Int = Builder.getIsFPConstrained()
12593 ? Intrinsic::experimental_constrained_floor
12594 : Intrinsic::floor;
12595 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
12596 }
12597 case NEON::BI__builtin_neon_vrndnh_f16: {
12598 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12599 Int = Builder.getIsFPConstrained()
12600 ? Intrinsic::experimental_constrained_roundeven
12601 : Intrinsic::roundeven;
12602 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
12603 }
12604 case NEON::BI__builtin_neon_vrndn_v:
12605 case NEON::BI__builtin_neon_vrndnq_v: {
12606 Int = Builder.getIsFPConstrained()
12607 ? Intrinsic::experimental_constrained_roundeven
12608 : Intrinsic::roundeven;
12609 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
12610 }
12611 case NEON::BI__builtin_neon_vrndns_f32: {
12612 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12613 Int = Builder.getIsFPConstrained()
12614 ? Intrinsic::experimental_constrained_roundeven
12615 : Intrinsic::roundeven;
12616 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
12617 }
12618 case NEON::BI__builtin_neon_vrndph_f16: {
12619 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12620 Int = Builder.getIsFPConstrained()
12621 ? Intrinsic::experimental_constrained_ceil
12622 : Intrinsic::ceil;
12623 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
12624 }
12625 case NEON::BI__builtin_neon_vrndp_v:
12626 case NEON::BI__builtin_neon_vrndpq_v: {
12627 Int = Builder.getIsFPConstrained()
12628 ? Intrinsic::experimental_constrained_ceil
12629 : Intrinsic::ceil;
12630 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
12631 }
12632 case NEON::BI__builtin_neon_vrndxh_f16: {
12633 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12634 Int = Builder.getIsFPConstrained()
12635 ? Intrinsic::experimental_constrained_rint
12636 : Intrinsic::rint;
12637 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
12638 }
12639 case NEON::BI__builtin_neon_vrndx_v:
12640 case NEON::BI__builtin_neon_vrndxq_v: {
12641 Int = Builder.getIsFPConstrained()
12642 ? Intrinsic::experimental_constrained_rint
12643 : Intrinsic::rint;
12644 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
12645 }
12646 case NEON::BI__builtin_neon_vrndh_f16: {
12647 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12648 Int = Builder.getIsFPConstrained()
12649 ? Intrinsic::experimental_constrained_trunc
12650 : Intrinsic::trunc;
12651 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
12652 }
12653 case NEON::BI__builtin_neon_vrnd32x_f32:
12654 case NEON::BI__builtin_neon_vrnd32xq_f32:
12655 case NEON::BI__builtin_neon_vrnd32x_f64:
12656 case NEON::BI__builtin_neon_vrnd32xq_f64: {
12657 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12658 Int = Intrinsic::aarch64_neon_frint32x;
12659 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
12660 }
12661 case NEON::BI__builtin_neon_vrnd32z_f32:
12662 case NEON::BI__builtin_neon_vrnd32zq_f32:
12663 case NEON::BI__builtin_neon_vrnd32z_f64:
12664 case NEON::BI__builtin_neon_vrnd32zq_f64: {
12665 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12666 Int = Intrinsic::aarch64_neon_frint32z;
12667 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
12668 }
12669 case NEON::BI__builtin_neon_vrnd64x_f32:
12670 case NEON::BI__builtin_neon_vrnd64xq_f32:
12671 case NEON::BI__builtin_neon_vrnd64x_f64:
12672 case NEON::BI__builtin_neon_vrnd64xq_f64: {
12673 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12674 Int = Intrinsic::aarch64_neon_frint64x;
12675 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
12676 }
12677 case NEON::BI__builtin_neon_vrnd64z_f32:
12678 case NEON::BI__builtin_neon_vrnd64zq_f32:
12679 case NEON::BI__builtin_neon_vrnd64z_f64:
12680 case NEON::BI__builtin_neon_vrnd64zq_f64: {
12681 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12682 Int = Intrinsic::aarch64_neon_frint64z;
12683 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
12684 }
12685 case NEON::BI__builtin_neon_vrnd_v:
12686 case NEON::BI__builtin_neon_vrndq_v: {
12687 Int = Builder.getIsFPConstrained()
12688 ? Intrinsic::experimental_constrained_trunc
12689 : Intrinsic::trunc;
12690 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
12691 }
12692 case NEON::BI__builtin_neon_vcvt_f64_v:
12693 case NEON::BI__builtin_neon_vcvtq_f64_v:
12694 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12695 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
12696 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
12697 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
12698 case NEON::BI__builtin_neon_vcvt_f64_f32: {
12699 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
12700 "unexpected vcvt_f64_f32 builtin");
12701 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
12702 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12703
12704 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
12705 }
12706 case NEON::BI__builtin_neon_vcvt_f32_f64: {
12707 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
12708 "unexpected vcvt_f32_f64 builtin");
12709 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
12710 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12711
12712 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
12713 }
12714 case NEON::BI__builtin_neon_vcvt_s32_v:
12715 case NEON::BI__builtin_neon_vcvt_u32_v:
12716 case NEON::BI__builtin_neon_vcvt_s64_v:
12717 case NEON::BI__builtin_neon_vcvt_u64_v:
12718 case NEON::BI__builtin_neon_vcvt_s16_f16:
12719 case NEON::BI__builtin_neon_vcvt_u16_f16:
12720 case NEON::BI__builtin_neon_vcvtq_s32_v:
12721 case NEON::BI__builtin_neon_vcvtq_u32_v:
12722 case NEON::BI__builtin_neon_vcvtq_s64_v:
12723 case NEON::BI__builtin_neon_vcvtq_u64_v:
12724 case NEON::BI__builtin_neon_vcvtq_s16_f16:
12725 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
12726 Int =
12727 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
12728 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
12729 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
12730 }
12731 case NEON::BI__builtin_neon_vcvta_s16_f16:
12732 case NEON::BI__builtin_neon_vcvta_u16_f16:
12733 case NEON::BI__builtin_neon_vcvta_s32_v:
12734 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
12735 case NEON::BI__builtin_neon_vcvtaq_s32_v:
12736 case NEON::BI__builtin_neon_vcvta_u32_v:
12737 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
12738 case NEON::BI__builtin_neon_vcvtaq_u32_v:
12739 case NEON::BI__builtin_neon_vcvta_s64_v:
12740 case NEON::BI__builtin_neon_vcvtaq_s64_v:
12741 case NEON::BI__builtin_neon_vcvta_u64_v:
12742 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
12743 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
12744 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12745 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
12746 }
12747 case NEON::BI__builtin_neon_vcvtm_s16_f16:
12748 case NEON::BI__builtin_neon_vcvtm_s32_v:
12749 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
12750 case NEON::BI__builtin_neon_vcvtmq_s32_v:
12751 case NEON::BI__builtin_neon_vcvtm_u16_f16:
12752 case NEON::BI__builtin_neon_vcvtm_u32_v:
12753 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
12754 case NEON::BI__builtin_neon_vcvtmq_u32_v:
12755 case NEON::BI__builtin_neon_vcvtm_s64_v:
12756 case NEON::BI__builtin_neon_vcvtmq_s64_v:
12757 case NEON::BI__builtin_neon_vcvtm_u64_v:
12758 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
12759 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
12760 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12761 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
12762 }
12763 case NEON::BI__builtin_neon_vcvtn_s16_f16:
12764 case NEON::BI__builtin_neon_vcvtn_s32_v:
12765 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
12766 case NEON::BI__builtin_neon_vcvtnq_s32_v:
12767 case NEON::BI__builtin_neon_vcvtn_u16_f16:
12768 case NEON::BI__builtin_neon_vcvtn_u32_v:
12769 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
12770 case NEON::BI__builtin_neon_vcvtnq_u32_v:
12771 case NEON::BI__builtin_neon_vcvtn_s64_v:
12772 case NEON::BI__builtin_neon_vcvtnq_s64_v:
12773 case NEON::BI__builtin_neon_vcvtn_u64_v:
12774 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
12775 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
12776 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12777 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
12778 }
12779 case NEON::BI__builtin_neon_vcvtp_s16_f16:
12780 case NEON::BI__builtin_neon_vcvtp_s32_v:
12781 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
12782 case NEON::BI__builtin_neon_vcvtpq_s32_v:
12783 case NEON::BI__builtin_neon_vcvtp_u16_f16:
12784 case NEON::BI__builtin_neon_vcvtp_u32_v:
12785 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
12786 case NEON::BI__builtin_neon_vcvtpq_u32_v:
12787 case NEON::BI__builtin_neon_vcvtp_s64_v:
12788 case NEON::BI__builtin_neon_vcvtpq_s64_v:
12789 case NEON::BI__builtin_neon_vcvtp_u64_v:
12790 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
12791 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
12792 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12793 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
12794 }
12795 case NEON::BI__builtin_neon_vmulx_v:
12796 case NEON::BI__builtin_neon_vmulxq_v: {
12797 Int = Intrinsic::aarch64_neon_fmulx;
12798 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
12799 }
12800 case NEON::BI__builtin_neon_vmulxh_lane_f16:
12801 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
12802 // vmulx_lane should be mapped to Neon scalar mulx after
12803 // extracting the scalar element
12804 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12805 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12806 Ops.pop_back();
12807 Int = Intrinsic::aarch64_neon_fmulx;
12808 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
12809 }
12810 case NEON::BI__builtin_neon_vmul_lane_v:
12811 case NEON::BI__builtin_neon_vmul_laneq_v: {
12812 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
12813 bool Quad = false;
12814 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
12815 Quad = true;
12816 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12817 llvm::FixedVectorType *VTy =
12818 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
12819 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
12820 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12821 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
12822 return Builder.CreateBitCast(Result, Ty);
12823 }
12824 case NEON::BI__builtin_neon_vnegd_s64:
12825 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
12826 case NEON::BI__builtin_neon_vnegh_f16:
12827 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
12828 case NEON::BI__builtin_neon_vpmaxnm_v:
12829 case NEON::BI__builtin_neon_vpmaxnmq_v: {
12830 Int = Intrinsic::aarch64_neon_fmaxnmp;
12831 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
12832 }
12833 case NEON::BI__builtin_neon_vpminnm_v:
12834 case NEON::BI__builtin_neon_vpminnmq_v: {
12835 Int = Intrinsic::aarch64_neon_fminnmp;
12836 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
12837 }
12838 case NEON::BI__builtin_neon_vsqrth_f16: {
12839 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12840 Int = Builder.getIsFPConstrained()
12841 ? Intrinsic::experimental_constrained_sqrt
12842 : Intrinsic::sqrt;
12843 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
12844 }
12845 case NEON::BI__builtin_neon_vsqrt_v:
12846 case NEON::BI__builtin_neon_vsqrtq_v: {
12847 Int = Builder.getIsFPConstrained()
12848 ? Intrinsic::experimental_constrained_sqrt
12849 : Intrinsic::sqrt;
12850 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12851 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
12852 }
12853 case NEON::BI__builtin_neon_vrbit_v:
12854 case NEON::BI__builtin_neon_vrbitq_v: {
12855 Int = Intrinsic::bitreverse;
12856 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
12857 }
12858 case NEON::BI__builtin_neon_vaddv_u8:
12859 // FIXME: These are handled by the AArch64 scalar code.
12860 usgn = true;
12861 [[fallthrough]];
12862 case NEON::BI__builtin_neon_vaddv_s8: {
12863 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12864 Ty = Int32Ty;
12865 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12866 llvm::Type *Tys[2] = { Ty, VTy };
12867 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12868 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12869 return Builder.CreateTrunc(Ops[0], Int8Ty);
12870 }
12871 case NEON::BI__builtin_neon_vaddv_u16:
12872 usgn = true;
12873 [[fallthrough]];
12874 case NEON::BI__builtin_neon_vaddv_s16: {
12875 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12876 Ty = Int32Ty;
12877 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12878 llvm::Type *Tys[2] = { Ty, VTy };
12879 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12880 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12881 return Builder.CreateTrunc(Ops[0], Int16Ty);
12882 }
12883 case NEON::BI__builtin_neon_vaddvq_u8:
12884 usgn = true;
12885 [[fallthrough]];
12886 case NEON::BI__builtin_neon_vaddvq_s8: {
12887 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12888 Ty = Int32Ty;
12889 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12890 llvm::Type *Tys[2] = { Ty, VTy };
12891 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12892 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12893 return Builder.CreateTrunc(Ops[0], Int8Ty);
12894 }
12895 case NEON::BI__builtin_neon_vaddvq_u16:
12896 usgn = true;
12897 [[fallthrough]];
12898 case NEON::BI__builtin_neon_vaddvq_s16: {
12899 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12900 Ty = Int32Ty;
12901 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12902 llvm::Type *Tys[2] = { Ty, VTy };
12903 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12904 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12905 return Builder.CreateTrunc(Ops[0], Int16Ty);
12906 }
12907 case NEON::BI__builtin_neon_vmaxv_u8: {
12908 Int = Intrinsic::aarch64_neon_umaxv;
12909 Ty = Int32Ty;
12910 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12911 llvm::Type *Tys[2] = { Ty, VTy };
12912 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12913 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12914 return Builder.CreateTrunc(Ops[0], Int8Ty);
12915 }
12916 case NEON::BI__builtin_neon_vmaxv_u16: {
12917 Int = Intrinsic::aarch64_neon_umaxv;
12918 Ty = Int32Ty;
12919 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12920 llvm::Type *Tys[2] = { Ty, VTy };
12921 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12922 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12923 return Builder.CreateTrunc(Ops[0], Int16Ty);
12924 }
12925 case NEON::BI__builtin_neon_vmaxvq_u8: {
12926 Int = Intrinsic::aarch64_neon_umaxv;
12927 Ty = Int32Ty;
12928 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12929 llvm::Type *Tys[2] = { Ty, VTy };
12930 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12931 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12932 return Builder.CreateTrunc(Ops[0], Int8Ty);
12933 }
12934 case NEON::BI__builtin_neon_vmaxvq_u16: {
12935 Int = Intrinsic::aarch64_neon_umaxv;
12936 Ty = Int32Ty;
12937 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12938 llvm::Type *Tys[2] = { Ty, VTy };
12939 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12940 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12941 return Builder.CreateTrunc(Ops[0], Int16Ty);
12942 }
12943 case NEON::BI__builtin_neon_vmaxv_s8: {
12944 Int = Intrinsic::aarch64_neon_smaxv;
12945 Ty = Int32Ty;
12946 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12947 llvm::Type *Tys[2] = { Ty, VTy };
12948 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12949 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12950 return Builder.CreateTrunc(Ops[0], Int8Ty);
12951 }
12952 case NEON::BI__builtin_neon_vmaxv_s16: {
12953 Int = Intrinsic::aarch64_neon_smaxv;
12954 Ty = Int32Ty;
12955 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12956 llvm::Type *Tys[2] = { Ty, VTy };
12957 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12958 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12959 return Builder.CreateTrunc(Ops[0], Int16Ty);
12960 }
12961 case NEON::BI__builtin_neon_vmaxvq_s8: {
12962 Int = Intrinsic::aarch64_neon_smaxv;
12963 Ty = Int32Ty;
12964 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12965 llvm::Type *Tys[2] = { Ty, VTy };
12966 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12967 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12968 return Builder.CreateTrunc(Ops[0], Int8Ty);
12969 }
12970 case NEON::BI__builtin_neon_vmaxvq_s16: {
12971 Int = Intrinsic::aarch64_neon_smaxv;
12972 Ty = Int32Ty;
12973 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12974 llvm::Type *Tys[2] = { Ty, VTy };
12975 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12976 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12977 return Builder.CreateTrunc(Ops[0], Int16Ty);
12978 }
12979 case NEON::BI__builtin_neon_vmaxv_f16: {
12980 Int = Intrinsic::aarch64_neon_fmaxv;
12981 Ty = HalfTy;
12982 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12983 llvm::Type *Tys[2] = { Ty, VTy };
12984 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12985 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12986 return Builder.CreateTrunc(Ops[0], HalfTy);
12987 }
12988 case NEON::BI__builtin_neon_vmaxvq_f16: {
12989 Int = Intrinsic::aarch64_neon_fmaxv;
12990 Ty = HalfTy;
12991 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12992 llvm::Type *Tys[2] = { Ty, VTy };
12993 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12994 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12995 return Builder.CreateTrunc(Ops[0], HalfTy);
12996 }
12997 case NEON::BI__builtin_neon_vminv_u8: {
12998 Int = Intrinsic::aarch64_neon_uminv;
12999 Ty = Int32Ty;
13000 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13001 llvm::Type *Tys[2] = { Ty, VTy };
13002 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13003 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13004 return Builder.CreateTrunc(Ops[0], Int8Ty);
13005 }
13006 case NEON::BI__builtin_neon_vminv_u16: {
13007 Int = Intrinsic::aarch64_neon_uminv;
13008 Ty = Int32Ty;
13009 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13010 llvm::Type *Tys[2] = { Ty, VTy };
13011 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13012 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13013 return Builder.CreateTrunc(Ops[0], Int16Ty);
13014 }
13015 case NEON::BI__builtin_neon_vminvq_u8: {
13016 Int = Intrinsic::aarch64_neon_uminv;
13017 Ty = Int32Ty;
13018 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13019 llvm::Type *Tys[2] = { Ty, VTy };
13020 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13021 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13022 return Builder.CreateTrunc(Ops[0], Int8Ty);
13023 }
13024 case NEON::BI__builtin_neon_vminvq_u16: {
13025 Int = Intrinsic::aarch64_neon_uminv;
13026 Ty = Int32Ty;
13027 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13028 llvm::Type *Tys[2] = { Ty, VTy };
13029 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13030 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13031 return Builder.CreateTrunc(Ops[0], Int16Ty);
13032 }
13033 case NEON::BI__builtin_neon_vminv_s8: {
13034 Int = Intrinsic::aarch64_neon_sminv;
13035 Ty = Int32Ty;
13036 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13037 llvm::Type *Tys[2] = { Ty, VTy };
13038 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13039 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13040 return Builder.CreateTrunc(Ops[0], Int8Ty);
13041 }
13042 case NEON::BI__builtin_neon_vminv_s16: {
13043 Int = Intrinsic::aarch64_neon_sminv;
13044 Ty = Int32Ty;
13045 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13046 llvm::Type *Tys[2] = { Ty, VTy };
13047 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13048 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13049 return Builder.CreateTrunc(Ops[0], Int16Ty);
13050 }
13051 case NEON::BI__builtin_neon_vminvq_s8: {
13052 Int = Intrinsic::aarch64_neon_sminv;
13053 Ty = Int32Ty;
13054 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13055 llvm::Type *Tys[2] = { Ty, VTy };
13056 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13057 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13058 return Builder.CreateTrunc(Ops[0], Int8Ty);
13059 }
13060 case NEON::BI__builtin_neon_vminvq_s16: {
13061 Int = Intrinsic::aarch64_neon_sminv;
13062 Ty = Int32Ty;
13063 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13064 llvm::Type *Tys[2] = { Ty, VTy };
13065 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13066 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13067 return Builder.CreateTrunc(Ops[0], Int16Ty);
13068 }
13069 case NEON::BI__builtin_neon_vminv_f16: {
13070 Int = Intrinsic::aarch64_neon_fminv;
13071 Ty = HalfTy;
13072 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13073 llvm::Type *Tys[2] = { Ty, VTy };
13074 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13075 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13076 return Builder.CreateTrunc(Ops[0], HalfTy);
13077 }
13078 case NEON::BI__builtin_neon_vminvq_f16: {
13079 Int = Intrinsic::aarch64_neon_fminv;
13080 Ty = HalfTy;
13081 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13082 llvm::Type *Tys[2] = { Ty, VTy };
13083 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13084 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13085 return Builder.CreateTrunc(Ops[0], HalfTy);
13086 }
13087 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13088 Int = Intrinsic::aarch64_neon_fmaxnmv;
13089 Ty = HalfTy;
13090 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13091 llvm::Type *Tys[2] = { Ty, VTy };
13092 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13093 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13094 return Builder.CreateTrunc(Ops[0], HalfTy);
13095 }
13096 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13097 Int = Intrinsic::aarch64_neon_fmaxnmv;
13098 Ty = HalfTy;
13099 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13100 llvm::Type *Tys[2] = { Ty, VTy };
13101 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13102 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13103 return Builder.CreateTrunc(Ops[0], HalfTy);
13104 }
13105 case NEON::BI__builtin_neon_vminnmv_f16: {
13106 Int = Intrinsic::aarch64_neon_fminnmv;
13107 Ty = HalfTy;
13108 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13109 llvm::Type *Tys[2] = { Ty, VTy };
13110 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13111 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13112 return Builder.CreateTrunc(Ops[0], HalfTy);
13113 }
13114 case NEON::BI__builtin_neon_vminnmvq_f16: {
13115 Int = Intrinsic::aarch64_neon_fminnmv;
13116 Ty = HalfTy;
13117 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13118 llvm::Type *Tys[2] = { Ty, VTy };
13119 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13120 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13121 return Builder.CreateTrunc(Ops[0], HalfTy);
13122 }
13123 case NEON::BI__builtin_neon_vmul_n_f64: {
13124 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13125 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13126 return Builder.CreateFMul(Ops[0], RHS);
13127 }
13128 case NEON::BI__builtin_neon_vaddlv_u8: {
13129 Int = Intrinsic::aarch64_neon_uaddlv;
13130 Ty = Int32Ty;
13131 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13132 llvm::Type *Tys[2] = { Ty, VTy };
13133 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13134 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13135 return Builder.CreateTrunc(Ops[0], Int16Ty);
13136 }
13137 case NEON::BI__builtin_neon_vaddlv_u16: {
13138 Int = Intrinsic::aarch64_neon_uaddlv;
13139 Ty = Int32Ty;
13140 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13141 llvm::Type *Tys[2] = { Ty, VTy };
13142 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13143 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13144 }
13145 case NEON::BI__builtin_neon_vaddlvq_u8: {
13146 Int = Intrinsic::aarch64_neon_uaddlv;
13147 Ty = Int32Ty;
13148 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13149 llvm::Type *Tys[2] = { Ty, VTy };
13150 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13151 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13152 return Builder.CreateTrunc(Ops[0], Int16Ty);
13153 }
13154 case NEON::BI__builtin_neon_vaddlvq_u16: {
13155 Int = Intrinsic::aarch64_neon_uaddlv;
13156 Ty = Int32Ty;
13157 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13158 llvm::Type *Tys[2] = { Ty, VTy };
13159 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13160 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13161 }
13162 case NEON::BI__builtin_neon_vaddlv_s8: {
13163 Int = Intrinsic::aarch64_neon_saddlv;
13164 Ty = Int32Ty;
13165 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13166 llvm::Type *Tys[2] = { Ty, VTy };
13167 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13168 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13169 return Builder.CreateTrunc(Ops[0], Int16Ty);
13170 }
13171 case NEON::BI__builtin_neon_vaddlv_s16: {
13172 Int = Intrinsic::aarch64_neon_saddlv;
13173 Ty = Int32Ty;
13174 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13175 llvm::Type *Tys[2] = { Ty, VTy };
13176 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13177 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13178 }
13179 case NEON::BI__builtin_neon_vaddlvq_s8: {
13180 Int = Intrinsic::aarch64_neon_saddlv;
13181 Ty = Int32Ty;
13182 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13183 llvm::Type *Tys[2] = { Ty, VTy };
13184 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13185 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13186 return Builder.CreateTrunc(Ops[0], Int16Ty);
13187 }
13188 case NEON::BI__builtin_neon_vaddlvq_s16: {
13189 Int = Intrinsic::aarch64_neon_saddlv;
13190 Ty = Int32Ty;
13191 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13192 llvm::Type *Tys[2] = { Ty, VTy };
13193 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13194 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13195 }
13196 case NEON::BI__builtin_neon_vsri_n_v:
13197 case NEON::BI__builtin_neon_vsriq_n_v: {
13198 Int = Intrinsic::aarch64_neon_vsri;
13199 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13200 return EmitNeonCall(Intrin, Ops, "vsri_n");
13201 }
13202 case NEON::BI__builtin_neon_vsli_n_v:
13203 case NEON::BI__builtin_neon_vsliq_n_v: {
13204 Int = Intrinsic::aarch64_neon_vsli;
13205 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13206 return EmitNeonCall(Intrin, Ops, "vsli_n");
13207 }
13208 case NEON::BI__builtin_neon_vsra_n_v:
13209 case NEON::BI__builtin_neon_vsraq_n_v:
13210 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13211 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13212 return Builder.CreateAdd(Ops[0], Ops[1]);
13213 case NEON::BI__builtin_neon_vrsra_n_v:
13214 case NEON::BI__builtin_neon_vrsraq_n_v: {
13215 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13216 SmallVector<llvm::Value*,2> TmpOps;
13217 TmpOps.push_back(Ops[1]);
13218 TmpOps.push_back(Ops[2]);
13219 Function* F = CGM.getIntrinsic(Int, Ty);
13220 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13221 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13222 return Builder.CreateAdd(Ops[0], tmp);
13223 }
13224 case NEON::BI__builtin_neon_vld1_v:
13225 case NEON::BI__builtin_neon_vld1q_v: {
13226 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13227 }
13228 case NEON::BI__builtin_neon_vst1_v:
13229 case NEON::BI__builtin_neon_vst1q_v:
13230 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13231 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13232 case NEON::BI__builtin_neon_vld1_lane_v:
13233 case NEON::BI__builtin_neon_vld1q_lane_v: {
13234 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13235 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13236 PtrOp0.getAlignment());
13237 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13238 }
13239 case NEON::BI__builtin_neon_vldap1_lane_s64:
13240 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13241 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13242 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13243 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13244 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13245 Ops[0] = LI;
13246 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13247 }
13248 case NEON::BI__builtin_neon_vld1_dup_v:
13249 case NEON::BI__builtin_neon_vld1q_dup_v: {
13250 Value *V = PoisonValue::get(Ty);
13251 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13252 PtrOp0.getAlignment());
13253 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13254 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13255 return EmitNeonSplat(Ops[0], CI);
13256 }
13257 case NEON::BI__builtin_neon_vst1_lane_v:
13258 case NEON::BI__builtin_neon_vst1q_lane_v:
13259 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13260 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13261 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13262 case NEON::BI__builtin_neon_vstl1_lane_s64:
13263 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13264 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13265 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13266 llvm::StoreInst *SI =
13267 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13268 SI->setAtomic(llvm::AtomicOrdering::Release);
13269 return SI;
13270 }
13271 case NEON::BI__builtin_neon_vld2_v:
13272 case NEON::BI__builtin_neon_vld2q_v: {
13273 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13274 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13275 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13276 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13277 }
13278 case NEON::BI__builtin_neon_vld3_v:
13279 case NEON::BI__builtin_neon_vld3q_v: {
13280 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13281 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13282 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13283 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13284 }
13285 case NEON::BI__builtin_neon_vld4_v:
13286 case NEON::BI__builtin_neon_vld4q_v: {
13287 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13288 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13289 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13290 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13291 }
13292 case NEON::BI__builtin_neon_vld2_dup_v:
13293 case NEON::BI__builtin_neon_vld2q_dup_v: {
13294 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13295 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13296 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13297 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13298 }
13299 case NEON::BI__builtin_neon_vld3_dup_v:
13300 case NEON::BI__builtin_neon_vld3q_dup_v: {
13301 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13302 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13303 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13304 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13305 }
13306 case NEON::BI__builtin_neon_vld4_dup_v:
13307 case NEON::BI__builtin_neon_vld4q_dup_v: {
13308 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13309 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13310 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13311 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13312 }
13313 case NEON::BI__builtin_neon_vld2_lane_v:
13314 case NEON::BI__builtin_neon_vld2q_lane_v: {
13315 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13316 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13317 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13318 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13319 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13320 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13321 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13322 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13323 }
13324 case NEON::BI__builtin_neon_vld3_lane_v:
13325 case NEON::BI__builtin_neon_vld3q_lane_v: {
13326 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13327 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13328 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13329 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13330 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13331 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13332 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13333 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13334 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13335 }
13336 case NEON::BI__builtin_neon_vld4_lane_v:
13337 case NEON::BI__builtin_neon_vld4q_lane_v: {
13338 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13339 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13340 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13341 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13342 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13343 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13344 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13345 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13346 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13347 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13348 }
13349 case NEON::BI__builtin_neon_vst2_v:
13350 case NEON::BI__builtin_neon_vst2q_v: {
13351 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13352 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13353 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13354 Ops, "");
13355 }
13356 case NEON::BI__builtin_neon_vst2_lane_v:
13357 case NEON::BI__builtin_neon_vst2q_lane_v: {
13358 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13359 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13360 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13361 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13362 Ops, "");
13363 }
13364 case NEON::BI__builtin_neon_vst3_v:
13365 case NEON::BI__builtin_neon_vst3q_v: {
13366 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13367 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13368 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13369 Ops, "");
13370 }
13371 case NEON::BI__builtin_neon_vst3_lane_v:
13372 case NEON::BI__builtin_neon_vst3q_lane_v: {
13373 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13374 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13375 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13376 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13377 Ops, "");
13378 }
13379 case NEON::BI__builtin_neon_vst4_v:
13380 case NEON::BI__builtin_neon_vst4q_v: {
13381 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13382 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13383 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13384 Ops, "");
13385 }
13386 case NEON::BI__builtin_neon_vst4_lane_v:
13387 case NEON::BI__builtin_neon_vst4q_lane_v: {
13388 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13389 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13390 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13391 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13392 Ops, "");
13393 }
13394 case NEON::BI__builtin_neon_vtrn_v:
13395 case NEON::BI__builtin_neon_vtrnq_v: {
13396 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13397 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13398 Value *SV = nullptr;
13399
13400 for (unsigned vi = 0; vi != 2; ++vi) {
13401 SmallVector<int, 16> Indices;
13402 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13403 Indices.push_back(i+vi);
13404 Indices.push_back(i+e+vi);
13405 }
13406 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13407 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13408 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13409 }
13410 return SV;
13411 }
13412 case NEON::BI__builtin_neon_vuzp_v:
13413 case NEON::BI__builtin_neon_vuzpq_v: {
13414 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13415 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13416 Value *SV = nullptr;
13417
13418 for (unsigned vi = 0; vi != 2; ++vi) {
13419 SmallVector<int, 16> Indices;
13420 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13421 Indices.push_back(2*i+vi);
13422
13423 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13424 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13425 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13426 }
13427 return SV;
13428 }
13429 case NEON::BI__builtin_neon_vzip_v:
13430 case NEON::BI__builtin_neon_vzipq_v: {
13431 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13432 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13433 Value *SV = nullptr;
13434
13435 for (unsigned vi = 0; vi != 2; ++vi) {
13436 SmallVector<int, 16> Indices;
13437 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13438 Indices.push_back((i + vi*e) >> 1);
13439 Indices.push_back(((i + vi*e) >> 1)+e);
13440 }
13441 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13442 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13443 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13444 }
13445 return SV;
13446 }
13447 case NEON::BI__builtin_neon_vqtbl1q_v: {
13448 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13449 Ops, "vtbl1");
13450 }
13451 case NEON::BI__builtin_neon_vqtbl2q_v: {
13452 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13453 Ops, "vtbl2");
13454 }
13455 case NEON::BI__builtin_neon_vqtbl3q_v: {
13456 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13457 Ops, "vtbl3");
13458 }
13459 case NEON::BI__builtin_neon_vqtbl4q_v: {
13460 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13461 Ops, "vtbl4");
13462 }
13463 case NEON::BI__builtin_neon_vqtbx1q_v: {
13464 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13465 Ops, "vtbx1");
13466 }
13467 case NEON::BI__builtin_neon_vqtbx2q_v: {
13468 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13469 Ops, "vtbx2");
13470 }
13471 case NEON::BI__builtin_neon_vqtbx3q_v: {
13472 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13473 Ops, "vtbx3");
13474 }
13475 case NEON::BI__builtin_neon_vqtbx4q_v: {
13476 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13477 Ops, "vtbx4");
13478 }
13479 case NEON::BI__builtin_neon_vsqadd_v:
13480 case NEON::BI__builtin_neon_vsqaddq_v: {
13481 Int = Intrinsic::aarch64_neon_usqadd;
13482 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13483 }
13484 case NEON::BI__builtin_neon_vuqadd_v:
13485 case NEON::BI__builtin_neon_vuqaddq_v: {
13486 Int = Intrinsic::aarch64_neon_suqadd;
13487 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13488 }
13489 }
13490 }
13491
EmitBPFBuiltinExpr(unsigned BuiltinID,const CallExpr * E)13492 Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
13493 const CallExpr *E) {
13494 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13495 BuiltinID == BPF::BI__builtin_btf_type_id ||
13496 BuiltinID == BPF::BI__builtin_preserve_type_info ||
13497 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13498 "unexpected BPF builtin");
13499
13500 // A sequence number, injected into IR builtin functions, to
13501 // prevent CSE given the only difference of the function
13502 // may just be the debuginfo metadata.
13503 static uint32_t BuiltinSeqNum;
13504
13505 switch (BuiltinID) {
13506 default:
13507 llvm_unreachable("Unexpected BPF builtin");
13508 case BPF::BI__builtin_preserve_field_info: {
13509 const Expr *Arg = E->getArg(0);
13510 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
13511
13512 if (!getDebugInfo()) {
13513 CGM.Error(E->getExprLoc(),
13514 "using __builtin_preserve_field_info() without -g");
13515 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13516 : EmitLValue(Arg).emitRawPointer(*this);
13517 }
13518
13519 // Enable underlying preserve_*_access_index() generation.
13520 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
13521 IsInPreservedAIRegion = true;
13522 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13523 : EmitLValue(Arg).emitRawPointer(*this);
13524 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
13525
13526 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13527 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
13528
13529 // Built the IR for the preserve_field_info intrinsic.
13530 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
13531 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
13532 {FieldAddr->getType()});
13533 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
13534 }
13535 case BPF::BI__builtin_btf_type_id:
13536 case BPF::BI__builtin_preserve_type_info: {
13537 if (!getDebugInfo()) {
13538 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13539 return nullptr;
13540 }
13541
13542 const Expr *Arg0 = E->getArg(0);
13543 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13544 Arg0->getType(), Arg0->getExprLoc());
13545
13546 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13547 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13548 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13549
13550 llvm::Function *FnDecl;
13551 if (BuiltinID == BPF::BI__builtin_btf_type_id)
13552 FnDecl = llvm::Intrinsic::getDeclaration(
13553 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
13554 else
13555 FnDecl = llvm::Intrinsic::getDeclaration(
13556 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
13557 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
13558 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13559 return Fn;
13560 }
13561 case BPF::BI__builtin_preserve_enum_value: {
13562 if (!getDebugInfo()) {
13563 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13564 return nullptr;
13565 }
13566
13567 const Expr *Arg0 = E->getArg(0);
13568 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13569 Arg0->getType(), Arg0->getExprLoc());
13570
13571 // Find enumerator
13572 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
13573 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
13574 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
13575 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
13576
13577 auto InitVal = Enumerator->getInitVal();
13578 std::string InitValStr;
13579 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
13580 InitValStr = std::to_string(InitVal.getSExtValue());
13581 else
13582 InitValStr = std::to_string(InitVal.getZExtValue());
13583 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
13584 Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
13585
13586 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13587 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13588 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13589
13590 llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
13591 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
13592 CallInst *Fn =
13593 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
13594 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13595 return Fn;
13596 }
13597 }
13598 }
13599
13600 llvm::Value *CodeGenFunction::
BuildVector(ArrayRef<llvm::Value * > Ops)13601 BuildVector(ArrayRef<llvm::Value*> Ops) {
13602 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
13603 "Not a power-of-two sized vector!");
13604 bool AllConstants = true;
13605 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
13606 AllConstants &= isa<Constant>(Ops[i]);
13607
13608 // If this is a constant vector, create a ConstantVector.
13609 if (AllConstants) {
13610 SmallVector<llvm::Constant*, 16> CstOps;
13611 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13612 CstOps.push_back(cast<Constant>(Ops[i]));
13613 return llvm::ConstantVector::get(CstOps);
13614 }
13615
13616 // Otherwise, insertelement the values to build the vector.
13617 Value *Result = llvm::PoisonValue::get(
13618 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
13619
13620 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13621 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
13622
13623 return Result;
13624 }
13625
13626 // Convert the mask from an integer type to a vector of i1.
getMaskVecValue(CodeGenFunction & CGF,Value * Mask,unsigned NumElts)13627 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
13628 unsigned NumElts) {
13629
13630 auto *MaskTy = llvm::FixedVectorType::get(
13631 CGF.Builder.getInt1Ty(),
13632 cast<IntegerType>(Mask->getType())->getBitWidth());
13633 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
13634
13635 // If we have less than 8 elements, then the starting mask was an i8 and
13636 // we need to extract down to the right number of elements.
13637 if (NumElts < 8) {
13638 int Indices[4];
13639 for (unsigned i = 0; i != NumElts; ++i)
13640 Indices[i] = i;
13641 MaskVec = CGF.Builder.CreateShuffleVector(
13642 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
13643 }
13644 return MaskVec;
13645 }
13646
EmitX86MaskedStore(CodeGenFunction & CGF,ArrayRef<Value * > Ops,Align Alignment)13647 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
13648 Align Alignment) {
13649 Value *Ptr = Ops[0];
13650
13651 Value *MaskVec = getMaskVecValue(
13652 CGF, Ops[2],
13653 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
13654
13655 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
13656 }
13657
EmitX86MaskedLoad(CodeGenFunction & CGF,ArrayRef<Value * > Ops,Align Alignment)13658 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
13659 Align Alignment) {
13660 llvm::Type *Ty = Ops[1]->getType();
13661 Value *Ptr = Ops[0];
13662
13663 Value *MaskVec = getMaskVecValue(
13664 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
13665
13666 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
13667 }
13668
EmitX86ExpandLoad(CodeGenFunction & CGF,ArrayRef<Value * > Ops)13669 static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
13670 ArrayRef<Value *> Ops) {
13671 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
13672 Value *Ptr = Ops[0];
13673
13674 Value *MaskVec = getMaskVecValue(
13675 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
13676
13677 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
13678 ResultTy);
13679 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
13680 }
13681
EmitX86CompressExpand(CodeGenFunction & CGF,ArrayRef<Value * > Ops,bool IsCompress)13682 static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
13683 ArrayRef<Value *> Ops,
13684 bool IsCompress) {
13685 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13686
13687 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13688
13689 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
13690 : Intrinsic::x86_avx512_mask_expand;
13691 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
13692 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
13693 }
13694
EmitX86CompressStore(CodeGenFunction & CGF,ArrayRef<Value * > Ops)13695 static Value *EmitX86CompressStore(CodeGenFunction &CGF,
13696 ArrayRef<Value *> Ops) {
13697 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13698 Value *Ptr = Ops[0];
13699
13700 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13701
13702 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
13703 ResultTy);
13704 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
13705 }
13706
EmitX86MaskLogic(CodeGenFunction & CGF,Instruction::BinaryOps Opc,ArrayRef<Value * > Ops,bool InvertLHS=false)13707 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
13708 ArrayRef<Value *> Ops,
13709 bool InvertLHS = false) {
13710 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
13711 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
13712 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
13713
13714 if (InvertLHS)
13715 LHS = CGF.Builder.CreateNot(LHS);
13716
13717 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
13718 Ops[0]->getType());
13719 }
13720
EmitX86FunnelShift(CodeGenFunction & CGF,Value * Op0,Value * Op1,Value * Amt,bool IsRight)13721 static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
13722 Value *Amt, bool IsRight) {
13723 llvm::Type *Ty = Op0->getType();
13724
13725 // Amount may be scalar immediate, in which case create a splat vector.
13726 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
13727 // we only care about the lowest log2 bits anyway.
13728 if (Amt->getType() != Ty) {
13729 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
13730 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
13731 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
13732 }
13733
13734 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
13735 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
13736 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
13737 }
13738
EmitX86vpcom(CodeGenFunction & CGF,ArrayRef<Value * > Ops,bool IsSigned)13739 static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
13740 bool IsSigned) {
13741 Value *Op0 = Ops[0];
13742 Value *Op1 = Ops[1];
13743 llvm::Type *Ty = Op0->getType();
13744 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
13745
13746 CmpInst::Predicate Pred;
13747 switch (Imm) {
13748 case 0x0:
13749 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
13750 break;
13751 case 0x1:
13752 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
13753 break;
13754 case 0x2:
13755 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
13756 break;
13757 case 0x3:
13758 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
13759 break;
13760 case 0x4:
13761 Pred = ICmpInst::ICMP_EQ;
13762 break;
13763 case 0x5:
13764 Pred = ICmpInst::ICMP_NE;
13765 break;
13766 case 0x6:
13767 return llvm::Constant::getNullValue(Ty); // FALSE
13768 case 0x7:
13769 return llvm::Constant::getAllOnesValue(Ty); // TRUE
13770 default:
13771 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
13772 }
13773
13774 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
13775 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
13776 return Res;
13777 }
13778
EmitX86Select(CodeGenFunction & CGF,Value * Mask,Value * Op0,Value * Op1)13779 static Value *EmitX86Select(CodeGenFunction &CGF,
13780 Value *Mask, Value *Op0, Value *Op1) {
13781
13782 // If the mask is all ones just return first argument.
13783 if (const auto *C = dyn_cast<Constant>(Mask))
13784 if (C->isAllOnesValue())
13785 return Op0;
13786
13787 Mask = getMaskVecValue(
13788 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
13789
13790 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13791 }
13792
EmitX86ScalarSelect(CodeGenFunction & CGF,Value * Mask,Value * Op0,Value * Op1)13793 static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
13794 Value *Mask, Value *Op0, Value *Op1) {
13795 // If the mask is all ones just return first argument.
13796 if (const auto *C = dyn_cast<Constant>(Mask))
13797 if (C->isAllOnesValue())
13798 return Op0;
13799
13800 auto *MaskTy = llvm::FixedVectorType::get(
13801 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
13802 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
13803 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
13804 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13805 }
13806
EmitX86MaskedCompareResult(CodeGenFunction & CGF,Value * Cmp,unsigned NumElts,Value * MaskIn)13807 static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
13808 unsigned NumElts, Value *MaskIn) {
13809 if (MaskIn) {
13810 const auto *C = dyn_cast<Constant>(MaskIn);
13811 if (!C || !C->isAllOnesValue())
13812 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
13813 }
13814
13815 if (NumElts < 8) {
13816 int Indices[8];
13817 for (unsigned i = 0; i != NumElts; ++i)
13818 Indices[i] = i;
13819 for (unsigned i = NumElts; i != 8; ++i)
13820 Indices[i] = i % NumElts + NumElts;
13821 Cmp = CGF.Builder.CreateShuffleVector(
13822 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
13823 }
13824
13825 return CGF.Builder.CreateBitCast(Cmp,
13826 IntegerType::get(CGF.getLLVMContext(),
13827 std::max(NumElts, 8U)));
13828 }
13829
EmitX86MaskedCompare(CodeGenFunction & CGF,unsigned CC,bool Signed,ArrayRef<Value * > Ops)13830 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
13831 bool Signed, ArrayRef<Value *> Ops) {
13832 assert((Ops.size() == 2 || Ops.size() == 4) &&
13833 "Unexpected number of arguments");
13834 unsigned NumElts =
13835 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13836 Value *Cmp;
13837
13838 if (CC == 3) {
13839 Cmp = Constant::getNullValue(
13840 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13841 } else if (CC == 7) {
13842 Cmp = Constant::getAllOnesValue(
13843 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13844 } else {
13845 ICmpInst::Predicate Pred;
13846 switch (CC) {
13847 default: llvm_unreachable("Unknown condition code");
13848 case 0: Pred = ICmpInst::ICMP_EQ; break;
13849 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
13850 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
13851 case 4: Pred = ICmpInst::ICMP_NE; break;
13852 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
13853 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
13854 }
13855 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
13856 }
13857
13858 Value *MaskIn = nullptr;
13859 if (Ops.size() == 4)
13860 MaskIn = Ops[3];
13861
13862 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
13863 }
13864
EmitX86ConvertToMask(CodeGenFunction & CGF,Value * In)13865 static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
13866 Value *Zero = Constant::getNullValue(In->getType());
13867 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
13868 }
13869
EmitX86ConvertIntToFp(CodeGenFunction & CGF,const CallExpr * E,ArrayRef<Value * > Ops,bool IsSigned)13870 static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,
13871 ArrayRef<Value *> Ops, bool IsSigned) {
13872 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
13873 llvm::Type *Ty = Ops[1]->getType();
13874
13875 Value *Res;
13876 if (Rnd != 4) {
13877 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
13878 : Intrinsic::x86_avx512_uitofp_round;
13879 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
13880 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
13881 } else {
13882 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13883 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
13884 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
13885 }
13886
13887 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13888 }
13889
13890 // Lowers X86 FMA intrinsics to IR.
EmitX86FMAExpr(CodeGenFunction & CGF,const CallExpr * E,ArrayRef<Value * > Ops,unsigned BuiltinID,bool IsAddSub)13891 static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
13892 ArrayRef<Value *> Ops, unsigned BuiltinID,
13893 bool IsAddSub) {
13894
13895 bool Subtract = false;
13896 Intrinsic::ID IID = Intrinsic::not_intrinsic;
13897 switch (BuiltinID) {
13898 default: break;
13899 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13900 Subtract = true;
13901 [[fallthrough]];
13902 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13903 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13904 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13905 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
13906 break;
13907 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13908 Subtract = true;
13909 [[fallthrough]];
13910 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13911 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13912 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13913 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
13914 break;
13915 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13916 Subtract = true;
13917 [[fallthrough]];
13918 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13919 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13920 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13921 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
13922 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13923 Subtract = true;
13924 [[fallthrough]];
13925 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13926 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13927 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13928 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
13929 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13930 Subtract = true;
13931 [[fallthrough]];
13932 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13933 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13934 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13935 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
13936 break;
13937 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13938 Subtract = true;
13939 [[fallthrough]];
13940 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13941 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13942 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13943 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
13944 break;
13945 }
13946
13947 Value *A = Ops[0];
13948 Value *B = Ops[1];
13949 Value *C = Ops[2];
13950
13951 if (Subtract)
13952 C = CGF.Builder.CreateFNeg(C);
13953
13954 Value *Res;
13955
13956 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
13957 if (IID != Intrinsic::not_intrinsic &&
13958 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
13959 IsAddSub)) {
13960 Function *Intr = CGF.CGM.getIntrinsic(IID);
13961 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
13962 } else {
13963 llvm::Type *Ty = A->getType();
13964 Function *FMA;
13965 if (CGF.Builder.getIsFPConstrained()) {
13966 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13967 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
13968 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
13969 } else {
13970 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
13971 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
13972 }
13973 }
13974
13975 // Handle any required masking.
13976 Value *MaskFalseVal = nullptr;
13977 switch (BuiltinID) {
13978 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13979 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13980 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13981 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13982 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13983 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13984 MaskFalseVal = Ops[0];
13985 break;
13986 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13987 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13988 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13989 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13990 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13991 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13992 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
13993 break;
13994 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13995 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13996 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13997 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13998 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13999 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14000 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14001 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14002 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14003 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14004 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14005 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14006 MaskFalseVal = Ops[2];
14007 break;
14008 }
14009
14010 if (MaskFalseVal)
14011 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
14012
14013 return Res;
14014 }
14015
EmitScalarFMAExpr(CodeGenFunction & CGF,const CallExpr * E,MutableArrayRef<Value * > Ops,Value * Upper,bool ZeroMask=false,unsigned PTIdx=0,bool NegAcc=false)14016 static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
14017 MutableArrayRef<Value *> Ops, Value *Upper,
14018 bool ZeroMask = false, unsigned PTIdx = 0,
14019 bool NegAcc = false) {
14020 unsigned Rnd = 4;
14021 if (Ops.size() > 4)
14022 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14023
14024 if (NegAcc)
14025 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
14026
14027 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14028 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14029 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14030 Value *Res;
14031 if (Rnd != 4) {
14032 Intrinsic::ID IID;
14033
14034 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14035 case 16:
14036 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14037 break;
14038 case 32:
14039 IID = Intrinsic::x86_avx512_vfmadd_f32;
14040 break;
14041 case 64:
14042 IID = Intrinsic::x86_avx512_vfmadd_f64;
14043 break;
14044 default:
14045 llvm_unreachable("Unexpected size");
14046 }
14047 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14048 {Ops[0], Ops[1], Ops[2], Ops[4]});
14049 } else if (CGF.Builder.getIsFPConstrained()) {
14050 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14051 Function *FMA = CGF.CGM.getIntrinsic(
14052 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14053 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14054 } else {
14055 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
14056 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
14057 }
14058 // If we have more than 3 arguments, we need to do masking.
14059 if (Ops.size() > 3) {
14060 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
14061 : Ops[PTIdx];
14062
14063 // If we negated the accumulator and the its the PassThru value we need to
14064 // bypass the negate. Conveniently Upper should be the same thing in this
14065 // case.
14066 if (NegAcc && PTIdx == 2)
14067 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
14068
14069 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
14070 }
14071 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14072 }
14073
EmitX86Muldq(CodeGenFunction & CGF,bool IsSigned,ArrayRef<Value * > Ops)14074 static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14075 ArrayRef<Value *> Ops) {
14076 llvm::Type *Ty = Ops[0]->getType();
14077 // Arguments have a vXi32 type so cast to vXi64.
14078 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14079 Ty->getPrimitiveSizeInBits() / 64);
14080 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14081 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14082
14083 if (IsSigned) {
14084 // Shift left then arithmetic shift right.
14085 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14086 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14087 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14088 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14089 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14090 } else {
14091 // Clear the upper bits.
14092 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14093 LHS = CGF.Builder.CreateAnd(LHS, Mask);
14094 RHS = CGF.Builder.CreateAnd(RHS, Mask);
14095 }
14096
14097 return CGF.Builder.CreateMul(LHS, RHS);
14098 }
14099
14100 // Emit a masked pternlog intrinsic. This only exists because the header has to
14101 // use a macro and we aren't able to pass the input argument to a pternlog
14102 // builtin and a select builtin without evaluating it twice.
EmitX86Ternlog(CodeGenFunction & CGF,bool ZeroMask,ArrayRef<Value * > Ops)14103 static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14104 ArrayRef<Value *> Ops) {
14105 llvm::Type *Ty = Ops[0]->getType();
14106
14107 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14108 unsigned EltWidth = Ty->getScalarSizeInBits();
14109 Intrinsic::ID IID;
14110 if (VecWidth == 128 && EltWidth == 32)
14111 IID = Intrinsic::x86_avx512_pternlog_d_128;
14112 else if (VecWidth == 256 && EltWidth == 32)
14113 IID = Intrinsic::x86_avx512_pternlog_d_256;
14114 else if (VecWidth == 512 && EltWidth == 32)
14115 IID = Intrinsic::x86_avx512_pternlog_d_512;
14116 else if (VecWidth == 128 && EltWidth == 64)
14117 IID = Intrinsic::x86_avx512_pternlog_q_128;
14118 else if (VecWidth == 256 && EltWidth == 64)
14119 IID = Intrinsic::x86_avx512_pternlog_q_256;
14120 else if (VecWidth == 512 && EltWidth == 64)
14121 IID = Intrinsic::x86_avx512_pternlog_q_512;
14122 else
14123 llvm_unreachable("Unexpected intrinsic");
14124
14125 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14126 Ops.drop_back());
14127 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14128 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14129 }
14130
EmitX86SExtMask(CodeGenFunction & CGF,Value * Op,llvm::Type * DstTy)14131 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
14132 llvm::Type *DstTy) {
14133 unsigned NumberOfElements =
14134 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14135 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14136 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14137 }
14138
EmitX86CpuIs(const CallExpr * E)14139 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14140 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14141 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14142 return EmitX86CpuIs(CPUStr);
14143 }
14144
14145 // Convert F16 halfs to floats.
EmitX86CvtF16ToFloatExpr(CodeGenFunction & CGF,ArrayRef<Value * > Ops,llvm::Type * DstTy)14146 static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
14147 ArrayRef<Value *> Ops,
14148 llvm::Type *DstTy) {
14149 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14150 "Unknown cvtph2ps intrinsic");
14151
14152 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14153 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14154 Function *F =
14155 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14156 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14157 }
14158
14159 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14160 Value *Src = Ops[0];
14161
14162 // Extract the subvector.
14163 if (NumDstElts !=
14164 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14165 assert(NumDstElts == 4 && "Unexpected vector size");
14166 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14167 }
14168
14169 // Bitcast from vXi16 to vXf16.
14170 auto *HalfTy = llvm::FixedVectorType::get(
14171 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14172 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14173
14174 // Perform the fp-extension.
14175 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14176
14177 if (Ops.size() >= 3)
14178 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14179 return Res;
14180 }
14181
EmitX86CpuIs(StringRef CPUStr)14182 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14183
14184 llvm::Type *Int32Ty = Builder.getInt32Ty();
14185
14186 // Matching the struct layout from the compiler-rt/libgcc structure that is
14187 // filled in:
14188 // unsigned int __cpu_vendor;
14189 // unsigned int __cpu_type;
14190 // unsigned int __cpu_subtype;
14191 // unsigned int __cpu_features[1];
14192 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14193 llvm::ArrayType::get(Int32Ty, 1));
14194
14195 // Grab the global __cpu_model.
14196 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14197 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14198
14199 // Calculate the index needed to access the correct field based on the
14200 // range. Also adjust the expected value.
14201 unsigned Index;
14202 unsigned Value;
14203 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14204 #define X86_VENDOR(ENUM, STRING) \
14205 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14206 #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14207 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14208 #define X86_CPU_TYPE(ENUM, STR) \
14209 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14210 #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14211 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14212 #define X86_CPU_SUBTYPE(ENUM, STR) \
14213 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14214 #include "llvm/TargetParser/X86TargetParser.def"
14215 .Default({0, 0});
14216 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14217
14218 // Grab the appropriate field from __cpu_model.
14219 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14220 ConstantInt::get(Int32Ty, Index)};
14221 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14222 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14223 CharUnits::fromQuantity(4));
14224
14225 // Check the value of the field against the requested value.
14226 return Builder.CreateICmpEQ(CpuValue,
14227 llvm::ConstantInt::get(Int32Ty, Value));
14228 }
14229
EmitX86CpuSupports(const CallExpr * E)14230 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14231 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14232 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14233 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14234 return Builder.getFalse();
14235 return EmitX86CpuSupports(FeatureStr);
14236 }
14237
EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs)14238 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14239 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14240 }
14241
14242 llvm::Value *
EmitX86CpuSupports(std::array<uint32_t,4> FeatureMask)14243 CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14244 Value *Result = Builder.getTrue();
14245 if (FeatureMask[0] != 0) {
14246 // Matching the struct layout from the compiler-rt/libgcc structure that is
14247 // filled in:
14248 // unsigned int __cpu_vendor;
14249 // unsigned int __cpu_type;
14250 // unsigned int __cpu_subtype;
14251 // unsigned int __cpu_features[1];
14252 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14253 llvm::ArrayType::get(Int32Ty, 1));
14254
14255 // Grab the global __cpu_model.
14256 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14257 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14258
14259 // Grab the first (0th) element from the field __cpu_features off of the
14260 // global in the struct STy.
14261 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14262 Builder.getInt32(0)};
14263 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14264 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14265 CharUnits::fromQuantity(4));
14266
14267 // Check the value of the bit corresponding to the feature requested.
14268 Value *Mask = Builder.getInt32(FeatureMask[0]);
14269 Value *Bitset = Builder.CreateAnd(Features, Mask);
14270 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14271 Result = Builder.CreateAnd(Result, Cmp);
14272 }
14273
14274 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14275 llvm::Constant *CpuFeatures2 =
14276 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14277 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14278 for (int i = 1; i != 4; ++i) {
14279 const uint32_t M = FeatureMask[i];
14280 if (!M)
14281 continue;
14282 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14283 Value *Features = Builder.CreateAlignedLoad(
14284 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
14285 CharUnits::fromQuantity(4));
14286 // Check the value of the bit corresponding to the feature requested.
14287 Value *Mask = Builder.getInt32(M);
14288 Value *Bitset = Builder.CreateAnd(Features, Mask);
14289 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14290 Result = Builder.CreateAnd(Result, Cmp);
14291 }
14292
14293 return Result;
14294 }
14295
EmitAArch64CpuInit()14296 Value *CodeGenFunction::EmitAArch64CpuInit() {
14297 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14298 llvm::FunctionCallee Func =
14299 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14300 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14301 cast<llvm::GlobalValue>(Func.getCallee())
14302 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14303 return Builder.CreateCall(Func);
14304 }
14305
EmitX86CpuInit()14306 Value *CodeGenFunction::EmitX86CpuInit() {
14307 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14308 /*Variadic*/ false);
14309 llvm::FunctionCallee Func =
14310 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14311 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14312 cast<llvm::GlobalValue>(Func.getCallee())
14313 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14314 return Builder.CreateCall(Func);
14315 }
14316
EmitAArch64CpuSupports(const CallExpr * E)14317 Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14318 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
14319 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14320 llvm::SmallVector<StringRef, 8> Features;
14321 ArgStr.split(Features, "+");
14322 for (auto &Feature : Features) {
14323 Feature = Feature.trim();
14324 if (!llvm::AArch64::parseFMVExtension(Feature))
14325 return Builder.getFalse();
14326 if (Feature != "default")
14327 Features.push_back(Feature);
14328 }
14329 return EmitAArch64CpuSupports(Features);
14330 }
14331
14332 llvm::Value *
EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs)14333 CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14334 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14335 Value *Result = Builder.getTrue();
14336 if (FeaturesMask != 0) {
14337 // Get features from structure in runtime library
14338 // struct {
14339 // unsigned long long features;
14340 // } __aarch64_cpu_features;
14341 llvm::Type *STy = llvm::StructType::get(Int64Ty);
14342 llvm::Constant *AArch64CPUFeatures =
14343 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
14344 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
14345 llvm::Value *CpuFeatures = Builder.CreateGEP(
14346 STy, AArch64CPUFeatures,
14347 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
14348 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
14349 CharUnits::fromQuantity(8));
14350 Value *Mask = Builder.getInt64(FeaturesMask);
14351 Value *Bitset = Builder.CreateAnd(Features, Mask);
14352 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14353 Result = Builder.CreateAnd(Result, Cmp);
14354 }
14355 return Result;
14356 }
14357
EmitX86BuiltinExpr(unsigned BuiltinID,const CallExpr * E)14358 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
14359 const CallExpr *E) {
14360 if (BuiltinID == Builtin::BI__builtin_cpu_is)
14361 return EmitX86CpuIs(E);
14362 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
14363 return EmitX86CpuSupports(E);
14364 if (BuiltinID == Builtin::BI__builtin_cpu_init)
14365 return EmitX86CpuInit();
14366
14367 // Handle MSVC intrinsics before argument evaluation to prevent double
14368 // evaluation.
14369 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
14370 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
14371
14372 SmallVector<Value*, 4> Ops;
14373 bool IsMaskFCmp = false;
14374 bool IsConjFMA = false;
14375
14376 // Find out if any arguments are required to be integer constant expressions.
14377 unsigned ICEArguments = 0;
14378 ASTContext::GetBuiltinTypeError Error;
14379 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
14380 assert(Error == ASTContext::GE_None && "Should not codegen an error");
14381
14382 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
14383 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
14384 }
14385
14386 // These exist so that the builtin that takes an immediate can be bounds
14387 // checked by clang to avoid passing bad immediates to the backend. Since
14388 // AVX has a larger immediate than SSE we would need separate builtins to
14389 // do the different bounds checking. Rather than create a clang specific
14390 // SSE only builtin, this implements eight separate builtins to match gcc
14391 // implementation.
14392 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
14393 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
14394 llvm::Function *F = CGM.getIntrinsic(ID);
14395 return Builder.CreateCall(F, Ops);
14396 };
14397
14398 // For the vector forms of FP comparisons, translate the builtins directly to
14399 // IR.
14400 // TODO: The builtins could be removed if the SSE header files used vector
14401 // extension comparisons directly (vector ordered/unordered may need
14402 // additional support via __builtin_isnan()).
14403 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
14404 bool IsSignaling) {
14405 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14406 Value *Cmp;
14407 if (IsSignaling)
14408 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
14409 else
14410 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
14411 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
14412 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
14413 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
14414 return Builder.CreateBitCast(Sext, FPVecTy);
14415 };
14416
14417 switch (BuiltinID) {
14418 default: return nullptr;
14419 case X86::BI_mm_prefetch: {
14420 Value *Address = Ops[0];
14421 ConstantInt *C = cast<ConstantInt>(Ops[1]);
14422 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
14423 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
14424 Value *Data = ConstantInt::get(Int32Ty, 1);
14425 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
14426 return Builder.CreateCall(F, {Address, RW, Locality, Data});
14427 }
14428 case X86::BI_mm_clflush: {
14429 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
14430 Ops[0]);
14431 }
14432 case X86::BI_mm_lfence: {
14433 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
14434 }
14435 case X86::BI_mm_mfence: {
14436 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
14437 }
14438 case X86::BI_mm_sfence: {
14439 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
14440 }
14441 case X86::BI_mm_pause: {
14442 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
14443 }
14444 case X86::BI__rdtsc: {
14445 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
14446 }
14447 case X86::BI__builtin_ia32_rdtscp: {
14448 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
14449 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
14450 Ops[0]);
14451 return Builder.CreateExtractValue(Call, 0);
14452 }
14453 case X86::BI__builtin_ia32_lzcnt_u16:
14454 case X86::BI__builtin_ia32_lzcnt_u32:
14455 case X86::BI__builtin_ia32_lzcnt_u64: {
14456 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
14457 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14458 }
14459 case X86::BI__builtin_ia32_tzcnt_u16:
14460 case X86::BI__builtin_ia32_tzcnt_u32:
14461 case X86::BI__builtin_ia32_tzcnt_u64: {
14462 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
14463 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14464 }
14465 case X86::BI__builtin_ia32_undef128:
14466 case X86::BI__builtin_ia32_undef256:
14467 case X86::BI__builtin_ia32_undef512:
14468 // The x86 definition of "undef" is not the same as the LLVM definition
14469 // (PR32176). We leave optimizing away an unnecessary zero constant to the
14470 // IR optimizer and backend.
14471 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
14472 // value, we should use that here instead of a zero.
14473 return llvm::Constant::getNullValue(ConvertType(E->getType()));
14474 case X86::BI__builtin_ia32_vec_init_v8qi:
14475 case X86::BI__builtin_ia32_vec_init_v4hi:
14476 case X86::BI__builtin_ia32_vec_init_v2si:
14477 return Builder.CreateBitCast(BuildVector(Ops),
14478 llvm::Type::getX86_MMXTy(getLLVMContext()));
14479 case X86::BI__builtin_ia32_vec_ext_v2si:
14480 case X86::BI__builtin_ia32_vec_ext_v16qi:
14481 case X86::BI__builtin_ia32_vec_ext_v8hi:
14482 case X86::BI__builtin_ia32_vec_ext_v4si:
14483 case X86::BI__builtin_ia32_vec_ext_v4sf:
14484 case X86::BI__builtin_ia32_vec_ext_v2di:
14485 case X86::BI__builtin_ia32_vec_ext_v32qi:
14486 case X86::BI__builtin_ia32_vec_ext_v16hi:
14487 case X86::BI__builtin_ia32_vec_ext_v8si:
14488 case X86::BI__builtin_ia32_vec_ext_v4di: {
14489 unsigned NumElts =
14490 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14491 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14492 Index &= NumElts - 1;
14493 // These builtins exist so we can ensure the index is an ICE and in range.
14494 // Otherwise we could just do this in the header file.
14495 return Builder.CreateExtractElement(Ops[0], Index);
14496 }
14497 case X86::BI__builtin_ia32_vec_set_v16qi:
14498 case X86::BI__builtin_ia32_vec_set_v8hi:
14499 case X86::BI__builtin_ia32_vec_set_v4si:
14500 case X86::BI__builtin_ia32_vec_set_v2di:
14501 case X86::BI__builtin_ia32_vec_set_v32qi:
14502 case X86::BI__builtin_ia32_vec_set_v16hi:
14503 case X86::BI__builtin_ia32_vec_set_v8si:
14504 case X86::BI__builtin_ia32_vec_set_v4di: {
14505 unsigned NumElts =
14506 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14507 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14508 Index &= NumElts - 1;
14509 // These builtins exist so we can ensure the index is an ICE and in range.
14510 // Otherwise we could just do this in the header file.
14511 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
14512 }
14513 case X86::BI_mm_setcsr:
14514 case X86::BI__builtin_ia32_ldmxcsr: {
14515 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
14516 Builder.CreateStore(Ops[0], Tmp);
14517 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
14518 Tmp.getPointer());
14519 }
14520 case X86::BI_mm_getcsr:
14521 case X86::BI__builtin_ia32_stmxcsr: {
14522 RawAddress Tmp = CreateMemTemp(E->getType());
14523 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
14524 Tmp.getPointer());
14525 return Builder.CreateLoad(Tmp, "stmxcsr");
14526 }
14527 case X86::BI__builtin_ia32_xsave:
14528 case X86::BI__builtin_ia32_xsave64:
14529 case X86::BI__builtin_ia32_xrstor:
14530 case X86::BI__builtin_ia32_xrstor64:
14531 case X86::BI__builtin_ia32_xsaveopt:
14532 case X86::BI__builtin_ia32_xsaveopt64:
14533 case X86::BI__builtin_ia32_xrstors:
14534 case X86::BI__builtin_ia32_xrstors64:
14535 case X86::BI__builtin_ia32_xsavec:
14536 case X86::BI__builtin_ia32_xsavec64:
14537 case X86::BI__builtin_ia32_xsaves:
14538 case X86::BI__builtin_ia32_xsaves64:
14539 case X86::BI__builtin_ia32_xsetbv:
14540 case X86::BI_xsetbv: {
14541 Intrinsic::ID ID;
14542 #define INTRINSIC_X86_XSAVE_ID(NAME) \
14543 case X86::BI__builtin_ia32_##NAME: \
14544 ID = Intrinsic::x86_##NAME; \
14545 break
14546 switch (BuiltinID) {
14547 default: llvm_unreachable("Unsupported intrinsic!");
14548 INTRINSIC_X86_XSAVE_ID(xsave);
14549 INTRINSIC_X86_XSAVE_ID(xsave64);
14550 INTRINSIC_X86_XSAVE_ID(xrstor);
14551 INTRINSIC_X86_XSAVE_ID(xrstor64);
14552 INTRINSIC_X86_XSAVE_ID(xsaveopt);
14553 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
14554 INTRINSIC_X86_XSAVE_ID(xrstors);
14555 INTRINSIC_X86_XSAVE_ID(xrstors64);
14556 INTRINSIC_X86_XSAVE_ID(xsavec);
14557 INTRINSIC_X86_XSAVE_ID(xsavec64);
14558 INTRINSIC_X86_XSAVE_ID(xsaves);
14559 INTRINSIC_X86_XSAVE_ID(xsaves64);
14560 INTRINSIC_X86_XSAVE_ID(xsetbv);
14561 case X86::BI_xsetbv:
14562 ID = Intrinsic::x86_xsetbv;
14563 break;
14564 }
14565 #undef INTRINSIC_X86_XSAVE_ID
14566 Value *Mhi = Builder.CreateTrunc(
14567 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
14568 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
14569 Ops[1] = Mhi;
14570 Ops.push_back(Mlo);
14571 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14572 }
14573 case X86::BI__builtin_ia32_xgetbv:
14574 case X86::BI_xgetbv:
14575 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
14576 case X86::BI__builtin_ia32_storedqudi128_mask:
14577 case X86::BI__builtin_ia32_storedqusi128_mask:
14578 case X86::BI__builtin_ia32_storedquhi128_mask:
14579 case X86::BI__builtin_ia32_storedquqi128_mask:
14580 case X86::BI__builtin_ia32_storeupd128_mask:
14581 case X86::BI__builtin_ia32_storeups128_mask:
14582 case X86::BI__builtin_ia32_storedqudi256_mask:
14583 case X86::BI__builtin_ia32_storedqusi256_mask:
14584 case X86::BI__builtin_ia32_storedquhi256_mask:
14585 case X86::BI__builtin_ia32_storedquqi256_mask:
14586 case X86::BI__builtin_ia32_storeupd256_mask:
14587 case X86::BI__builtin_ia32_storeups256_mask:
14588 case X86::BI__builtin_ia32_storedqudi512_mask:
14589 case X86::BI__builtin_ia32_storedqusi512_mask:
14590 case X86::BI__builtin_ia32_storedquhi512_mask:
14591 case X86::BI__builtin_ia32_storedquqi512_mask:
14592 case X86::BI__builtin_ia32_storeupd512_mask:
14593 case X86::BI__builtin_ia32_storeups512_mask:
14594 return EmitX86MaskedStore(*this, Ops, Align(1));
14595
14596 case X86::BI__builtin_ia32_storesh128_mask:
14597 case X86::BI__builtin_ia32_storess128_mask:
14598 case X86::BI__builtin_ia32_storesd128_mask:
14599 return EmitX86MaskedStore(*this, Ops, Align(1));
14600
14601 case X86::BI__builtin_ia32_vpopcntb_128:
14602 case X86::BI__builtin_ia32_vpopcntd_128:
14603 case X86::BI__builtin_ia32_vpopcntq_128:
14604 case X86::BI__builtin_ia32_vpopcntw_128:
14605 case X86::BI__builtin_ia32_vpopcntb_256:
14606 case X86::BI__builtin_ia32_vpopcntd_256:
14607 case X86::BI__builtin_ia32_vpopcntq_256:
14608 case X86::BI__builtin_ia32_vpopcntw_256:
14609 case X86::BI__builtin_ia32_vpopcntb_512:
14610 case X86::BI__builtin_ia32_vpopcntd_512:
14611 case X86::BI__builtin_ia32_vpopcntq_512:
14612 case X86::BI__builtin_ia32_vpopcntw_512: {
14613 llvm::Type *ResultType = ConvertType(E->getType());
14614 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
14615 return Builder.CreateCall(F, Ops);
14616 }
14617 case X86::BI__builtin_ia32_cvtmask2b128:
14618 case X86::BI__builtin_ia32_cvtmask2b256:
14619 case X86::BI__builtin_ia32_cvtmask2b512:
14620 case X86::BI__builtin_ia32_cvtmask2w128:
14621 case X86::BI__builtin_ia32_cvtmask2w256:
14622 case X86::BI__builtin_ia32_cvtmask2w512:
14623 case X86::BI__builtin_ia32_cvtmask2d128:
14624 case X86::BI__builtin_ia32_cvtmask2d256:
14625 case X86::BI__builtin_ia32_cvtmask2d512:
14626 case X86::BI__builtin_ia32_cvtmask2q128:
14627 case X86::BI__builtin_ia32_cvtmask2q256:
14628 case X86::BI__builtin_ia32_cvtmask2q512:
14629 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
14630
14631 case X86::BI__builtin_ia32_cvtb2mask128:
14632 case X86::BI__builtin_ia32_cvtb2mask256:
14633 case X86::BI__builtin_ia32_cvtb2mask512:
14634 case X86::BI__builtin_ia32_cvtw2mask128:
14635 case X86::BI__builtin_ia32_cvtw2mask256:
14636 case X86::BI__builtin_ia32_cvtw2mask512:
14637 case X86::BI__builtin_ia32_cvtd2mask128:
14638 case X86::BI__builtin_ia32_cvtd2mask256:
14639 case X86::BI__builtin_ia32_cvtd2mask512:
14640 case X86::BI__builtin_ia32_cvtq2mask128:
14641 case X86::BI__builtin_ia32_cvtq2mask256:
14642 case X86::BI__builtin_ia32_cvtq2mask512:
14643 return EmitX86ConvertToMask(*this, Ops[0]);
14644
14645 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
14646 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
14647 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
14648 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
14649 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
14650 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
14651 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
14652 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
14653 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
14654 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
14655 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
14656 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
14657 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
14658 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
14659
14660 case X86::BI__builtin_ia32_vfmaddss3:
14661 case X86::BI__builtin_ia32_vfmaddsd3:
14662 case X86::BI__builtin_ia32_vfmaddsh3_mask:
14663 case X86::BI__builtin_ia32_vfmaddss3_mask:
14664 case X86::BI__builtin_ia32_vfmaddsd3_mask:
14665 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
14666 case X86::BI__builtin_ia32_vfmaddss:
14667 case X86::BI__builtin_ia32_vfmaddsd:
14668 return EmitScalarFMAExpr(*this, E, Ops,
14669 Constant::getNullValue(Ops[0]->getType()));
14670 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
14671 case X86::BI__builtin_ia32_vfmaddss3_maskz:
14672 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
14673 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
14674 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
14675 case X86::BI__builtin_ia32_vfmaddss3_mask3:
14676 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
14677 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
14678 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
14679 case X86::BI__builtin_ia32_vfmsubss3_mask3:
14680 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
14681 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
14682 /*NegAcc*/ true);
14683 case X86::BI__builtin_ia32_vfmaddph:
14684 case X86::BI__builtin_ia32_vfmaddps:
14685 case X86::BI__builtin_ia32_vfmaddpd:
14686 case X86::BI__builtin_ia32_vfmaddph256:
14687 case X86::BI__builtin_ia32_vfmaddps256:
14688 case X86::BI__builtin_ia32_vfmaddpd256:
14689 case X86::BI__builtin_ia32_vfmaddph512_mask:
14690 case X86::BI__builtin_ia32_vfmaddph512_maskz:
14691 case X86::BI__builtin_ia32_vfmaddph512_mask3:
14692 case X86::BI__builtin_ia32_vfmaddps512_mask:
14693 case X86::BI__builtin_ia32_vfmaddps512_maskz:
14694 case X86::BI__builtin_ia32_vfmaddps512_mask3:
14695 case X86::BI__builtin_ia32_vfmsubps512_mask3:
14696 case X86::BI__builtin_ia32_vfmaddpd512_mask:
14697 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
14698 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
14699 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
14700 case X86::BI__builtin_ia32_vfmsubph512_mask3:
14701 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
14702 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
14703 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14704 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14705 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14706 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
14707 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14708 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14709 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14710 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14711 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14712 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14713 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14714 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
14715
14716 case X86::BI__builtin_ia32_movdqa32store128_mask:
14717 case X86::BI__builtin_ia32_movdqa64store128_mask:
14718 case X86::BI__builtin_ia32_storeaps128_mask:
14719 case X86::BI__builtin_ia32_storeapd128_mask:
14720 case X86::BI__builtin_ia32_movdqa32store256_mask:
14721 case X86::BI__builtin_ia32_movdqa64store256_mask:
14722 case X86::BI__builtin_ia32_storeaps256_mask:
14723 case X86::BI__builtin_ia32_storeapd256_mask:
14724 case X86::BI__builtin_ia32_movdqa32store512_mask:
14725 case X86::BI__builtin_ia32_movdqa64store512_mask:
14726 case X86::BI__builtin_ia32_storeaps512_mask:
14727 case X86::BI__builtin_ia32_storeapd512_mask:
14728 return EmitX86MaskedStore(
14729 *this, Ops,
14730 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14731
14732 case X86::BI__builtin_ia32_loadups128_mask:
14733 case X86::BI__builtin_ia32_loadups256_mask:
14734 case X86::BI__builtin_ia32_loadups512_mask:
14735 case X86::BI__builtin_ia32_loadupd128_mask:
14736 case X86::BI__builtin_ia32_loadupd256_mask:
14737 case X86::BI__builtin_ia32_loadupd512_mask:
14738 case X86::BI__builtin_ia32_loaddquqi128_mask:
14739 case X86::BI__builtin_ia32_loaddquqi256_mask:
14740 case X86::BI__builtin_ia32_loaddquqi512_mask:
14741 case X86::BI__builtin_ia32_loaddquhi128_mask:
14742 case X86::BI__builtin_ia32_loaddquhi256_mask:
14743 case X86::BI__builtin_ia32_loaddquhi512_mask:
14744 case X86::BI__builtin_ia32_loaddqusi128_mask:
14745 case X86::BI__builtin_ia32_loaddqusi256_mask:
14746 case X86::BI__builtin_ia32_loaddqusi512_mask:
14747 case X86::BI__builtin_ia32_loaddqudi128_mask:
14748 case X86::BI__builtin_ia32_loaddqudi256_mask:
14749 case X86::BI__builtin_ia32_loaddqudi512_mask:
14750 return EmitX86MaskedLoad(*this, Ops, Align(1));
14751
14752 case X86::BI__builtin_ia32_loadsh128_mask:
14753 case X86::BI__builtin_ia32_loadss128_mask:
14754 case X86::BI__builtin_ia32_loadsd128_mask:
14755 return EmitX86MaskedLoad(*this, Ops, Align(1));
14756
14757 case X86::BI__builtin_ia32_loadaps128_mask:
14758 case X86::BI__builtin_ia32_loadaps256_mask:
14759 case X86::BI__builtin_ia32_loadaps512_mask:
14760 case X86::BI__builtin_ia32_loadapd128_mask:
14761 case X86::BI__builtin_ia32_loadapd256_mask:
14762 case X86::BI__builtin_ia32_loadapd512_mask:
14763 case X86::BI__builtin_ia32_movdqa32load128_mask:
14764 case X86::BI__builtin_ia32_movdqa32load256_mask:
14765 case X86::BI__builtin_ia32_movdqa32load512_mask:
14766 case X86::BI__builtin_ia32_movdqa64load128_mask:
14767 case X86::BI__builtin_ia32_movdqa64load256_mask:
14768 case X86::BI__builtin_ia32_movdqa64load512_mask:
14769 return EmitX86MaskedLoad(
14770 *this, Ops,
14771 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14772
14773 case X86::BI__builtin_ia32_expandloaddf128_mask:
14774 case X86::BI__builtin_ia32_expandloaddf256_mask:
14775 case X86::BI__builtin_ia32_expandloaddf512_mask:
14776 case X86::BI__builtin_ia32_expandloadsf128_mask:
14777 case X86::BI__builtin_ia32_expandloadsf256_mask:
14778 case X86::BI__builtin_ia32_expandloadsf512_mask:
14779 case X86::BI__builtin_ia32_expandloaddi128_mask:
14780 case X86::BI__builtin_ia32_expandloaddi256_mask:
14781 case X86::BI__builtin_ia32_expandloaddi512_mask:
14782 case X86::BI__builtin_ia32_expandloadsi128_mask:
14783 case X86::BI__builtin_ia32_expandloadsi256_mask:
14784 case X86::BI__builtin_ia32_expandloadsi512_mask:
14785 case X86::BI__builtin_ia32_expandloadhi128_mask:
14786 case X86::BI__builtin_ia32_expandloadhi256_mask:
14787 case X86::BI__builtin_ia32_expandloadhi512_mask:
14788 case X86::BI__builtin_ia32_expandloadqi128_mask:
14789 case X86::BI__builtin_ia32_expandloadqi256_mask:
14790 case X86::BI__builtin_ia32_expandloadqi512_mask:
14791 return EmitX86ExpandLoad(*this, Ops);
14792
14793 case X86::BI__builtin_ia32_compressstoredf128_mask:
14794 case X86::BI__builtin_ia32_compressstoredf256_mask:
14795 case X86::BI__builtin_ia32_compressstoredf512_mask:
14796 case X86::BI__builtin_ia32_compressstoresf128_mask:
14797 case X86::BI__builtin_ia32_compressstoresf256_mask:
14798 case X86::BI__builtin_ia32_compressstoresf512_mask:
14799 case X86::BI__builtin_ia32_compressstoredi128_mask:
14800 case X86::BI__builtin_ia32_compressstoredi256_mask:
14801 case X86::BI__builtin_ia32_compressstoredi512_mask:
14802 case X86::BI__builtin_ia32_compressstoresi128_mask:
14803 case X86::BI__builtin_ia32_compressstoresi256_mask:
14804 case X86::BI__builtin_ia32_compressstoresi512_mask:
14805 case X86::BI__builtin_ia32_compressstorehi128_mask:
14806 case X86::BI__builtin_ia32_compressstorehi256_mask:
14807 case X86::BI__builtin_ia32_compressstorehi512_mask:
14808 case X86::BI__builtin_ia32_compressstoreqi128_mask:
14809 case X86::BI__builtin_ia32_compressstoreqi256_mask:
14810 case X86::BI__builtin_ia32_compressstoreqi512_mask:
14811 return EmitX86CompressStore(*this, Ops);
14812
14813 case X86::BI__builtin_ia32_expanddf128_mask:
14814 case X86::BI__builtin_ia32_expanddf256_mask:
14815 case X86::BI__builtin_ia32_expanddf512_mask:
14816 case X86::BI__builtin_ia32_expandsf128_mask:
14817 case X86::BI__builtin_ia32_expandsf256_mask:
14818 case X86::BI__builtin_ia32_expandsf512_mask:
14819 case X86::BI__builtin_ia32_expanddi128_mask:
14820 case X86::BI__builtin_ia32_expanddi256_mask:
14821 case X86::BI__builtin_ia32_expanddi512_mask:
14822 case X86::BI__builtin_ia32_expandsi128_mask:
14823 case X86::BI__builtin_ia32_expandsi256_mask:
14824 case X86::BI__builtin_ia32_expandsi512_mask:
14825 case X86::BI__builtin_ia32_expandhi128_mask:
14826 case X86::BI__builtin_ia32_expandhi256_mask:
14827 case X86::BI__builtin_ia32_expandhi512_mask:
14828 case X86::BI__builtin_ia32_expandqi128_mask:
14829 case X86::BI__builtin_ia32_expandqi256_mask:
14830 case X86::BI__builtin_ia32_expandqi512_mask:
14831 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
14832
14833 case X86::BI__builtin_ia32_compressdf128_mask:
14834 case X86::BI__builtin_ia32_compressdf256_mask:
14835 case X86::BI__builtin_ia32_compressdf512_mask:
14836 case X86::BI__builtin_ia32_compresssf128_mask:
14837 case X86::BI__builtin_ia32_compresssf256_mask:
14838 case X86::BI__builtin_ia32_compresssf512_mask:
14839 case X86::BI__builtin_ia32_compressdi128_mask:
14840 case X86::BI__builtin_ia32_compressdi256_mask:
14841 case X86::BI__builtin_ia32_compressdi512_mask:
14842 case X86::BI__builtin_ia32_compresssi128_mask:
14843 case X86::BI__builtin_ia32_compresssi256_mask:
14844 case X86::BI__builtin_ia32_compresssi512_mask:
14845 case X86::BI__builtin_ia32_compresshi128_mask:
14846 case X86::BI__builtin_ia32_compresshi256_mask:
14847 case X86::BI__builtin_ia32_compresshi512_mask:
14848 case X86::BI__builtin_ia32_compressqi128_mask:
14849 case X86::BI__builtin_ia32_compressqi256_mask:
14850 case X86::BI__builtin_ia32_compressqi512_mask:
14851 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
14852
14853 case X86::BI__builtin_ia32_gather3div2df:
14854 case X86::BI__builtin_ia32_gather3div2di:
14855 case X86::BI__builtin_ia32_gather3div4df:
14856 case X86::BI__builtin_ia32_gather3div4di:
14857 case X86::BI__builtin_ia32_gather3div4sf:
14858 case X86::BI__builtin_ia32_gather3div4si:
14859 case X86::BI__builtin_ia32_gather3div8sf:
14860 case X86::BI__builtin_ia32_gather3div8si:
14861 case X86::BI__builtin_ia32_gather3siv2df:
14862 case X86::BI__builtin_ia32_gather3siv2di:
14863 case X86::BI__builtin_ia32_gather3siv4df:
14864 case X86::BI__builtin_ia32_gather3siv4di:
14865 case X86::BI__builtin_ia32_gather3siv4sf:
14866 case X86::BI__builtin_ia32_gather3siv4si:
14867 case X86::BI__builtin_ia32_gather3siv8sf:
14868 case X86::BI__builtin_ia32_gather3siv8si:
14869 case X86::BI__builtin_ia32_gathersiv8df:
14870 case X86::BI__builtin_ia32_gathersiv16sf:
14871 case X86::BI__builtin_ia32_gatherdiv8df:
14872 case X86::BI__builtin_ia32_gatherdiv16sf:
14873 case X86::BI__builtin_ia32_gathersiv8di:
14874 case X86::BI__builtin_ia32_gathersiv16si:
14875 case X86::BI__builtin_ia32_gatherdiv8di:
14876 case X86::BI__builtin_ia32_gatherdiv16si: {
14877 Intrinsic::ID IID;
14878 switch (BuiltinID) {
14879 default: llvm_unreachable("Unexpected builtin");
14880 case X86::BI__builtin_ia32_gather3div2df:
14881 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
14882 break;
14883 case X86::BI__builtin_ia32_gather3div2di:
14884 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
14885 break;
14886 case X86::BI__builtin_ia32_gather3div4df:
14887 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
14888 break;
14889 case X86::BI__builtin_ia32_gather3div4di:
14890 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
14891 break;
14892 case X86::BI__builtin_ia32_gather3div4sf:
14893 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
14894 break;
14895 case X86::BI__builtin_ia32_gather3div4si:
14896 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
14897 break;
14898 case X86::BI__builtin_ia32_gather3div8sf:
14899 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
14900 break;
14901 case X86::BI__builtin_ia32_gather3div8si:
14902 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
14903 break;
14904 case X86::BI__builtin_ia32_gather3siv2df:
14905 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
14906 break;
14907 case X86::BI__builtin_ia32_gather3siv2di:
14908 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
14909 break;
14910 case X86::BI__builtin_ia32_gather3siv4df:
14911 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
14912 break;
14913 case X86::BI__builtin_ia32_gather3siv4di:
14914 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
14915 break;
14916 case X86::BI__builtin_ia32_gather3siv4sf:
14917 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
14918 break;
14919 case X86::BI__builtin_ia32_gather3siv4si:
14920 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
14921 break;
14922 case X86::BI__builtin_ia32_gather3siv8sf:
14923 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
14924 break;
14925 case X86::BI__builtin_ia32_gather3siv8si:
14926 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
14927 break;
14928 case X86::BI__builtin_ia32_gathersiv8df:
14929 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
14930 break;
14931 case X86::BI__builtin_ia32_gathersiv16sf:
14932 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
14933 break;
14934 case X86::BI__builtin_ia32_gatherdiv8df:
14935 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
14936 break;
14937 case X86::BI__builtin_ia32_gatherdiv16sf:
14938 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
14939 break;
14940 case X86::BI__builtin_ia32_gathersiv8di:
14941 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
14942 break;
14943 case X86::BI__builtin_ia32_gathersiv16si:
14944 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
14945 break;
14946 case X86::BI__builtin_ia32_gatherdiv8di:
14947 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
14948 break;
14949 case X86::BI__builtin_ia32_gatherdiv16si:
14950 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
14951 break;
14952 }
14953
14954 unsigned MinElts = std::min(
14955 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
14956 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
14957 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
14958 Function *Intr = CGM.getIntrinsic(IID);
14959 return Builder.CreateCall(Intr, Ops);
14960 }
14961
14962 case X86::BI__builtin_ia32_scattersiv8df:
14963 case X86::BI__builtin_ia32_scattersiv16sf:
14964 case X86::BI__builtin_ia32_scatterdiv8df:
14965 case X86::BI__builtin_ia32_scatterdiv16sf:
14966 case X86::BI__builtin_ia32_scattersiv8di:
14967 case X86::BI__builtin_ia32_scattersiv16si:
14968 case X86::BI__builtin_ia32_scatterdiv8di:
14969 case X86::BI__builtin_ia32_scatterdiv16si:
14970 case X86::BI__builtin_ia32_scatterdiv2df:
14971 case X86::BI__builtin_ia32_scatterdiv2di:
14972 case X86::BI__builtin_ia32_scatterdiv4df:
14973 case X86::BI__builtin_ia32_scatterdiv4di:
14974 case X86::BI__builtin_ia32_scatterdiv4sf:
14975 case X86::BI__builtin_ia32_scatterdiv4si:
14976 case X86::BI__builtin_ia32_scatterdiv8sf:
14977 case X86::BI__builtin_ia32_scatterdiv8si:
14978 case X86::BI__builtin_ia32_scattersiv2df:
14979 case X86::BI__builtin_ia32_scattersiv2di:
14980 case X86::BI__builtin_ia32_scattersiv4df:
14981 case X86::BI__builtin_ia32_scattersiv4di:
14982 case X86::BI__builtin_ia32_scattersiv4sf:
14983 case X86::BI__builtin_ia32_scattersiv4si:
14984 case X86::BI__builtin_ia32_scattersiv8sf:
14985 case X86::BI__builtin_ia32_scattersiv8si: {
14986 Intrinsic::ID IID;
14987 switch (BuiltinID) {
14988 default: llvm_unreachable("Unexpected builtin");
14989 case X86::BI__builtin_ia32_scattersiv8df:
14990 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
14991 break;
14992 case X86::BI__builtin_ia32_scattersiv16sf:
14993 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
14994 break;
14995 case X86::BI__builtin_ia32_scatterdiv8df:
14996 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
14997 break;
14998 case X86::BI__builtin_ia32_scatterdiv16sf:
14999 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15000 break;
15001 case X86::BI__builtin_ia32_scattersiv8di:
15002 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15003 break;
15004 case X86::BI__builtin_ia32_scattersiv16si:
15005 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15006 break;
15007 case X86::BI__builtin_ia32_scatterdiv8di:
15008 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15009 break;
15010 case X86::BI__builtin_ia32_scatterdiv16si:
15011 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15012 break;
15013 case X86::BI__builtin_ia32_scatterdiv2df:
15014 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15015 break;
15016 case X86::BI__builtin_ia32_scatterdiv2di:
15017 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15018 break;
15019 case X86::BI__builtin_ia32_scatterdiv4df:
15020 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15021 break;
15022 case X86::BI__builtin_ia32_scatterdiv4di:
15023 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15024 break;
15025 case X86::BI__builtin_ia32_scatterdiv4sf:
15026 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15027 break;
15028 case X86::BI__builtin_ia32_scatterdiv4si:
15029 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15030 break;
15031 case X86::BI__builtin_ia32_scatterdiv8sf:
15032 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15033 break;
15034 case X86::BI__builtin_ia32_scatterdiv8si:
15035 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15036 break;
15037 case X86::BI__builtin_ia32_scattersiv2df:
15038 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15039 break;
15040 case X86::BI__builtin_ia32_scattersiv2di:
15041 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15042 break;
15043 case X86::BI__builtin_ia32_scattersiv4df:
15044 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15045 break;
15046 case X86::BI__builtin_ia32_scattersiv4di:
15047 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15048 break;
15049 case X86::BI__builtin_ia32_scattersiv4sf:
15050 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15051 break;
15052 case X86::BI__builtin_ia32_scattersiv4si:
15053 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15054 break;
15055 case X86::BI__builtin_ia32_scattersiv8sf:
15056 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15057 break;
15058 case X86::BI__builtin_ia32_scattersiv8si:
15059 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15060 break;
15061 }
15062
15063 unsigned MinElts = std::min(
15064 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15065 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15066 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
15067 Function *Intr = CGM.getIntrinsic(IID);
15068 return Builder.CreateCall(Intr, Ops);
15069 }
15070
15071 case X86::BI__builtin_ia32_vextractf128_pd256:
15072 case X86::BI__builtin_ia32_vextractf128_ps256:
15073 case X86::BI__builtin_ia32_vextractf128_si256:
15074 case X86::BI__builtin_ia32_extract128i256:
15075 case X86::BI__builtin_ia32_extractf64x4_mask:
15076 case X86::BI__builtin_ia32_extractf32x4_mask:
15077 case X86::BI__builtin_ia32_extracti64x4_mask:
15078 case X86::BI__builtin_ia32_extracti32x4_mask:
15079 case X86::BI__builtin_ia32_extractf32x8_mask:
15080 case X86::BI__builtin_ia32_extracti32x8_mask:
15081 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15082 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15083 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15084 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15085 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15086 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15087 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15088 unsigned NumElts = DstTy->getNumElements();
15089 unsigned SrcNumElts =
15090 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15091 unsigned SubVectors = SrcNumElts / NumElts;
15092 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15093 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15094 Index &= SubVectors - 1; // Remove any extra bits.
15095 Index *= NumElts;
15096
15097 int Indices[16];
15098 for (unsigned i = 0; i != NumElts; ++i)
15099 Indices[i] = i + Index;
15100
15101 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15102 "extract");
15103
15104 if (Ops.size() == 4)
15105 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15106
15107 return Res;
15108 }
15109 case X86::BI__builtin_ia32_vinsertf128_pd256:
15110 case X86::BI__builtin_ia32_vinsertf128_ps256:
15111 case X86::BI__builtin_ia32_vinsertf128_si256:
15112 case X86::BI__builtin_ia32_insert128i256:
15113 case X86::BI__builtin_ia32_insertf64x4:
15114 case X86::BI__builtin_ia32_insertf32x4:
15115 case X86::BI__builtin_ia32_inserti64x4:
15116 case X86::BI__builtin_ia32_inserti32x4:
15117 case X86::BI__builtin_ia32_insertf32x8:
15118 case X86::BI__builtin_ia32_inserti32x8:
15119 case X86::BI__builtin_ia32_insertf32x4_256:
15120 case X86::BI__builtin_ia32_inserti32x4_256:
15121 case X86::BI__builtin_ia32_insertf64x2_256:
15122 case X86::BI__builtin_ia32_inserti64x2_256:
15123 case X86::BI__builtin_ia32_insertf64x2_512:
15124 case X86::BI__builtin_ia32_inserti64x2_512: {
15125 unsigned DstNumElts =
15126 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15127 unsigned SrcNumElts =
15128 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15129 unsigned SubVectors = DstNumElts / SrcNumElts;
15130 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15131 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15132 Index &= SubVectors - 1; // Remove any extra bits.
15133 Index *= SrcNumElts;
15134
15135 int Indices[16];
15136 for (unsigned i = 0; i != DstNumElts; ++i)
15137 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15138
15139 Value *Op1 = Builder.CreateShuffleVector(
15140 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15141
15142 for (unsigned i = 0; i != DstNumElts; ++i) {
15143 if (i >= Index && i < (Index + SrcNumElts))
15144 Indices[i] = (i - Index) + DstNumElts;
15145 else
15146 Indices[i] = i;
15147 }
15148
15149 return Builder.CreateShuffleVector(Ops[0], Op1,
15150 ArrayRef(Indices, DstNumElts), "insert");
15151 }
15152 case X86::BI__builtin_ia32_pmovqd512_mask:
15153 case X86::BI__builtin_ia32_pmovwb512_mask: {
15154 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15155 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15156 }
15157 case X86::BI__builtin_ia32_pmovdb512_mask:
15158 case X86::BI__builtin_ia32_pmovdw512_mask:
15159 case X86::BI__builtin_ia32_pmovqw512_mask: {
15160 if (const auto *C = dyn_cast<Constant>(Ops[2]))
15161 if (C->isAllOnesValue())
15162 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15163
15164 Intrinsic::ID IID;
15165 switch (BuiltinID) {
15166 default: llvm_unreachable("Unsupported intrinsic!");
15167 case X86::BI__builtin_ia32_pmovdb512_mask:
15168 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15169 break;
15170 case X86::BI__builtin_ia32_pmovdw512_mask:
15171 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15172 break;
15173 case X86::BI__builtin_ia32_pmovqw512_mask:
15174 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15175 break;
15176 }
15177
15178 Function *Intr = CGM.getIntrinsic(IID);
15179 return Builder.CreateCall(Intr, Ops);
15180 }
15181 case X86::BI__builtin_ia32_pblendw128:
15182 case X86::BI__builtin_ia32_blendpd:
15183 case X86::BI__builtin_ia32_blendps:
15184 case X86::BI__builtin_ia32_blendpd256:
15185 case X86::BI__builtin_ia32_blendps256:
15186 case X86::BI__builtin_ia32_pblendw256:
15187 case X86::BI__builtin_ia32_pblendd128:
15188 case X86::BI__builtin_ia32_pblendd256: {
15189 unsigned NumElts =
15190 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15191 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15192
15193 int Indices[16];
15194 // If there are more than 8 elements, the immediate is used twice so make
15195 // sure we handle that.
15196 for (unsigned i = 0; i != NumElts; ++i)
15197 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15198
15199 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15200 ArrayRef(Indices, NumElts), "blend");
15201 }
15202 case X86::BI__builtin_ia32_pshuflw:
15203 case X86::BI__builtin_ia32_pshuflw256:
15204 case X86::BI__builtin_ia32_pshuflw512: {
15205 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15206 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15207 unsigned NumElts = Ty->getNumElements();
15208
15209 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15210 Imm = (Imm & 0xff) * 0x01010101;
15211
15212 int Indices[32];
15213 for (unsigned l = 0; l != NumElts; l += 8) {
15214 for (unsigned i = 0; i != 4; ++i) {
15215 Indices[l + i] = l + (Imm & 3);
15216 Imm >>= 2;
15217 }
15218 for (unsigned i = 4; i != 8; ++i)
15219 Indices[l + i] = l + i;
15220 }
15221
15222 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15223 "pshuflw");
15224 }
15225 case X86::BI__builtin_ia32_pshufhw:
15226 case X86::BI__builtin_ia32_pshufhw256:
15227 case X86::BI__builtin_ia32_pshufhw512: {
15228 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15229 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15230 unsigned NumElts = Ty->getNumElements();
15231
15232 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15233 Imm = (Imm & 0xff) * 0x01010101;
15234
15235 int Indices[32];
15236 for (unsigned l = 0; l != NumElts; l += 8) {
15237 for (unsigned i = 0; i != 4; ++i)
15238 Indices[l + i] = l + i;
15239 for (unsigned i = 4; i != 8; ++i) {
15240 Indices[l + i] = l + 4 + (Imm & 3);
15241 Imm >>= 2;
15242 }
15243 }
15244
15245 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15246 "pshufhw");
15247 }
15248 case X86::BI__builtin_ia32_pshufd:
15249 case X86::BI__builtin_ia32_pshufd256:
15250 case X86::BI__builtin_ia32_pshufd512:
15251 case X86::BI__builtin_ia32_vpermilpd:
15252 case X86::BI__builtin_ia32_vpermilps:
15253 case X86::BI__builtin_ia32_vpermilpd256:
15254 case X86::BI__builtin_ia32_vpermilps256:
15255 case X86::BI__builtin_ia32_vpermilpd512:
15256 case X86::BI__builtin_ia32_vpermilps512: {
15257 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15258 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15259 unsigned NumElts = Ty->getNumElements();
15260 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15261 unsigned NumLaneElts = NumElts / NumLanes;
15262
15263 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15264 Imm = (Imm & 0xff) * 0x01010101;
15265
15266 int Indices[16];
15267 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15268 for (unsigned i = 0; i != NumLaneElts; ++i) {
15269 Indices[i + l] = (Imm % NumLaneElts) + l;
15270 Imm /= NumLaneElts;
15271 }
15272 }
15273
15274 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15275 "permil");
15276 }
15277 case X86::BI__builtin_ia32_shufpd:
15278 case X86::BI__builtin_ia32_shufpd256:
15279 case X86::BI__builtin_ia32_shufpd512:
15280 case X86::BI__builtin_ia32_shufps:
15281 case X86::BI__builtin_ia32_shufps256:
15282 case X86::BI__builtin_ia32_shufps512: {
15283 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15284 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15285 unsigned NumElts = Ty->getNumElements();
15286 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15287 unsigned NumLaneElts = NumElts / NumLanes;
15288
15289 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15290 Imm = (Imm & 0xff) * 0x01010101;
15291
15292 int Indices[16];
15293 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15294 for (unsigned i = 0; i != NumLaneElts; ++i) {
15295 unsigned Index = Imm % NumLaneElts;
15296 Imm /= NumLaneElts;
15297 if (i >= (NumLaneElts / 2))
15298 Index += NumElts;
15299 Indices[l + i] = l + Index;
15300 }
15301 }
15302
15303 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15304 ArrayRef(Indices, NumElts), "shufp");
15305 }
15306 case X86::BI__builtin_ia32_permdi256:
15307 case X86::BI__builtin_ia32_permdf256:
15308 case X86::BI__builtin_ia32_permdi512:
15309 case X86::BI__builtin_ia32_permdf512: {
15310 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15311 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15312 unsigned NumElts = Ty->getNumElements();
15313
15314 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
15315 int Indices[8];
15316 for (unsigned l = 0; l != NumElts; l += 4)
15317 for (unsigned i = 0; i != 4; ++i)
15318 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
15319
15320 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15321 "perm");
15322 }
15323 case X86::BI__builtin_ia32_palignr128:
15324 case X86::BI__builtin_ia32_palignr256:
15325 case X86::BI__builtin_ia32_palignr512: {
15326 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15327
15328 unsigned NumElts =
15329 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15330 assert(NumElts % 16 == 0);
15331
15332 // If palignr is shifting the pair of vectors more than the size of two
15333 // lanes, emit zero.
15334 if (ShiftVal >= 32)
15335 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15336
15337 // If palignr is shifting the pair of input vectors more than one lane,
15338 // but less than two lanes, convert to shifting in zeroes.
15339 if (ShiftVal > 16) {
15340 ShiftVal -= 16;
15341 Ops[1] = Ops[0];
15342 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
15343 }
15344
15345 int Indices[64];
15346 // 256-bit palignr operates on 128-bit lanes so we need to handle that
15347 for (unsigned l = 0; l != NumElts; l += 16) {
15348 for (unsigned i = 0; i != 16; ++i) {
15349 unsigned Idx = ShiftVal + i;
15350 if (Idx >= 16)
15351 Idx += NumElts - 16; // End of lane, switch operand.
15352 Indices[l + i] = Idx + l;
15353 }
15354 }
15355
15356 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15357 ArrayRef(Indices, NumElts), "palignr");
15358 }
15359 case X86::BI__builtin_ia32_alignd128:
15360 case X86::BI__builtin_ia32_alignd256:
15361 case X86::BI__builtin_ia32_alignd512:
15362 case X86::BI__builtin_ia32_alignq128:
15363 case X86::BI__builtin_ia32_alignq256:
15364 case X86::BI__builtin_ia32_alignq512: {
15365 unsigned NumElts =
15366 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15367 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15368
15369 // Mask the shift amount to width of a vector.
15370 ShiftVal &= NumElts - 1;
15371
15372 int Indices[16];
15373 for (unsigned i = 0; i != NumElts; ++i)
15374 Indices[i] = i + ShiftVal;
15375
15376 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15377 ArrayRef(Indices, NumElts), "valign");
15378 }
15379 case X86::BI__builtin_ia32_shuf_f32x4_256:
15380 case X86::BI__builtin_ia32_shuf_f64x2_256:
15381 case X86::BI__builtin_ia32_shuf_i32x4_256:
15382 case X86::BI__builtin_ia32_shuf_i64x2_256:
15383 case X86::BI__builtin_ia32_shuf_f32x4:
15384 case X86::BI__builtin_ia32_shuf_f64x2:
15385 case X86::BI__builtin_ia32_shuf_i32x4:
15386 case X86::BI__builtin_ia32_shuf_i64x2: {
15387 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15388 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15389 unsigned NumElts = Ty->getNumElements();
15390 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
15391 unsigned NumLaneElts = NumElts / NumLanes;
15392
15393 int Indices[16];
15394 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15395 unsigned Index = (Imm % NumLanes) * NumLaneElts;
15396 Imm /= NumLanes; // Discard the bits we just used.
15397 if (l >= (NumElts / 2))
15398 Index += NumElts; // Switch to other source.
15399 for (unsigned i = 0; i != NumLaneElts; ++i) {
15400 Indices[l + i] = Index + i;
15401 }
15402 }
15403
15404 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15405 ArrayRef(Indices, NumElts), "shuf");
15406 }
15407
15408 case X86::BI__builtin_ia32_vperm2f128_pd256:
15409 case X86::BI__builtin_ia32_vperm2f128_ps256:
15410 case X86::BI__builtin_ia32_vperm2f128_si256:
15411 case X86::BI__builtin_ia32_permti256: {
15412 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15413 unsigned NumElts =
15414 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15415
15416 // This takes a very simple approach since there are two lanes and a
15417 // shuffle can have 2 inputs. So we reserve the first input for the first
15418 // lane and the second input for the second lane. This may result in
15419 // duplicate sources, but this can be dealt with in the backend.
15420
15421 Value *OutOps[2];
15422 int Indices[8];
15423 for (unsigned l = 0; l != 2; ++l) {
15424 // Determine the source for this lane.
15425 if (Imm & (1 << ((l * 4) + 3)))
15426 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
15427 else if (Imm & (1 << ((l * 4) + 1)))
15428 OutOps[l] = Ops[1];
15429 else
15430 OutOps[l] = Ops[0];
15431
15432 for (unsigned i = 0; i != NumElts/2; ++i) {
15433 // Start with ith element of the source for this lane.
15434 unsigned Idx = (l * NumElts) + i;
15435 // If bit 0 of the immediate half is set, switch to the high half of
15436 // the source.
15437 if (Imm & (1 << (l * 4)))
15438 Idx += NumElts/2;
15439 Indices[(l * (NumElts/2)) + i] = Idx;
15440 }
15441 }
15442
15443 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
15444 ArrayRef(Indices, NumElts), "vperm");
15445 }
15446
15447 case X86::BI__builtin_ia32_pslldqi128_byteshift:
15448 case X86::BI__builtin_ia32_pslldqi256_byteshift:
15449 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
15450 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15451 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15452 // Builtin type is vXi64 so multiply by 8 to get bytes.
15453 unsigned NumElts = ResultType->getNumElements() * 8;
15454
15455 // If pslldq is shifting the vector more than 15 bytes, emit zero.
15456 if (ShiftVal >= 16)
15457 return llvm::Constant::getNullValue(ResultType);
15458
15459 int Indices[64];
15460 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
15461 for (unsigned l = 0; l != NumElts; l += 16) {
15462 for (unsigned i = 0; i != 16; ++i) {
15463 unsigned Idx = NumElts + i - ShiftVal;
15464 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
15465 Indices[l + i] = Idx + l;
15466 }
15467 }
15468
15469 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15470 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15471 Value *Zero = llvm::Constant::getNullValue(VecTy);
15472 Value *SV = Builder.CreateShuffleVector(
15473 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
15474 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
15475 }
15476 case X86::BI__builtin_ia32_psrldqi128_byteshift:
15477 case X86::BI__builtin_ia32_psrldqi256_byteshift:
15478 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
15479 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15480 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15481 // Builtin type is vXi64 so multiply by 8 to get bytes.
15482 unsigned NumElts = ResultType->getNumElements() * 8;
15483
15484 // If psrldq is shifting the vector more than 15 bytes, emit zero.
15485 if (ShiftVal >= 16)
15486 return llvm::Constant::getNullValue(ResultType);
15487
15488 int Indices[64];
15489 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
15490 for (unsigned l = 0; l != NumElts; l += 16) {
15491 for (unsigned i = 0; i != 16; ++i) {
15492 unsigned Idx = i + ShiftVal;
15493 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
15494 Indices[l + i] = Idx + l;
15495 }
15496 }
15497
15498 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15499 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15500 Value *Zero = llvm::Constant::getNullValue(VecTy);
15501 Value *SV = Builder.CreateShuffleVector(
15502 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
15503 return Builder.CreateBitCast(SV, ResultType, "cast");
15504 }
15505 case X86::BI__builtin_ia32_kshiftliqi:
15506 case X86::BI__builtin_ia32_kshiftlihi:
15507 case X86::BI__builtin_ia32_kshiftlisi:
15508 case X86::BI__builtin_ia32_kshiftlidi: {
15509 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15510 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15511
15512 if (ShiftVal >= NumElts)
15513 return llvm::Constant::getNullValue(Ops[0]->getType());
15514
15515 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15516
15517 int Indices[64];
15518 for (unsigned i = 0; i != NumElts; ++i)
15519 Indices[i] = NumElts + i - ShiftVal;
15520
15521 Value *Zero = llvm::Constant::getNullValue(In->getType());
15522 Value *SV = Builder.CreateShuffleVector(
15523 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
15524 return Builder.CreateBitCast(SV, Ops[0]->getType());
15525 }
15526 case X86::BI__builtin_ia32_kshiftriqi:
15527 case X86::BI__builtin_ia32_kshiftrihi:
15528 case X86::BI__builtin_ia32_kshiftrisi:
15529 case X86::BI__builtin_ia32_kshiftridi: {
15530 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15531 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15532
15533 if (ShiftVal >= NumElts)
15534 return llvm::Constant::getNullValue(Ops[0]->getType());
15535
15536 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15537
15538 int Indices[64];
15539 for (unsigned i = 0; i != NumElts; ++i)
15540 Indices[i] = i + ShiftVal;
15541
15542 Value *Zero = llvm::Constant::getNullValue(In->getType());
15543 Value *SV = Builder.CreateShuffleVector(
15544 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
15545 return Builder.CreateBitCast(SV, Ops[0]->getType());
15546 }
15547 case X86::BI__builtin_ia32_movnti:
15548 case X86::BI__builtin_ia32_movnti64:
15549 case X86::BI__builtin_ia32_movntsd:
15550 case X86::BI__builtin_ia32_movntss: {
15551 llvm::MDNode *Node = llvm::MDNode::get(
15552 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
15553
15554 Value *Ptr = Ops[0];
15555 Value *Src = Ops[1];
15556
15557 // Extract the 0'th element of the source vector.
15558 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
15559 BuiltinID == X86::BI__builtin_ia32_movntss)
15560 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
15561
15562 // Unaligned nontemporal store of the scalar value.
15563 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
15564 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
15565 SI->setAlignment(llvm::Align(1));
15566 return SI;
15567 }
15568 // Rotate is a special case of funnel shift - 1st 2 args are the same.
15569 case X86::BI__builtin_ia32_vprotb:
15570 case X86::BI__builtin_ia32_vprotw:
15571 case X86::BI__builtin_ia32_vprotd:
15572 case X86::BI__builtin_ia32_vprotq:
15573 case X86::BI__builtin_ia32_vprotbi:
15574 case X86::BI__builtin_ia32_vprotwi:
15575 case X86::BI__builtin_ia32_vprotdi:
15576 case X86::BI__builtin_ia32_vprotqi:
15577 case X86::BI__builtin_ia32_prold128:
15578 case X86::BI__builtin_ia32_prold256:
15579 case X86::BI__builtin_ia32_prold512:
15580 case X86::BI__builtin_ia32_prolq128:
15581 case X86::BI__builtin_ia32_prolq256:
15582 case X86::BI__builtin_ia32_prolq512:
15583 case X86::BI__builtin_ia32_prolvd128:
15584 case X86::BI__builtin_ia32_prolvd256:
15585 case X86::BI__builtin_ia32_prolvd512:
15586 case X86::BI__builtin_ia32_prolvq128:
15587 case X86::BI__builtin_ia32_prolvq256:
15588 case X86::BI__builtin_ia32_prolvq512:
15589 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
15590 case X86::BI__builtin_ia32_prord128:
15591 case X86::BI__builtin_ia32_prord256:
15592 case X86::BI__builtin_ia32_prord512:
15593 case X86::BI__builtin_ia32_prorq128:
15594 case X86::BI__builtin_ia32_prorq256:
15595 case X86::BI__builtin_ia32_prorq512:
15596 case X86::BI__builtin_ia32_prorvd128:
15597 case X86::BI__builtin_ia32_prorvd256:
15598 case X86::BI__builtin_ia32_prorvd512:
15599 case X86::BI__builtin_ia32_prorvq128:
15600 case X86::BI__builtin_ia32_prorvq256:
15601 case X86::BI__builtin_ia32_prorvq512:
15602 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
15603 case X86::BI__builtin_ia32_selectb_128:
15604 case X86::BI__builtin_ia32_selectb_256:
15605 case X86::BI__builtin_ia32_selectb_512:
15606 case X86::BI__builtin_ia32_selectw_128:
15607 case X86::BI__builtin_ia32_selectw_256:
15608 case X86::BI__builtin_ia32_selectw_512:
15609 case X86::BI__builtin_ia32_selectd_128:
15610 case X86::BI__builtin_ia32_selectd_256:
15611 case X86::BI__builtin_ia32_selectd_512:
15612 case X86::BI__builtin_ia32_selectq_128:
15613 case X86::BI__builtin_ia32_selectq_256:
15614 case X86::BI__builtin_ia32_selectq_512:
15615 case X86::BI__builtin_ia32_selectph_128:
15616 case X86::BI__builtin_ia32_selectph_256:
15617 case X86::BI__builtin_ia32_selectph_512:
15618 case X86::BI__builtin_ia32_selectpbf_128:
15619 case X86::BI__builtin_ia32_selectpbf_256:
15620 case X86::BI__builtin_ia32_selectpbf_512:
15621 case X86::BI__builtin_ia32_selectps_128:
15622 case X86::BI__builtin_ia32_selectps_256:
15623 case X86::BI__builtin_ia32_selectps_512:
15624 case X86::BI__builtin_ia32_selectpd_128:
15625 case X86::BI__builtin_ia32_selectpd_256:
15626 case X86::BI__builtin_ia32_selectpd_512:
15627 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
15628 case X86::BI__builtin_ia32_selectsh_128:
15629 case X86::BI__builtin_ia32_selectsbf_128:
15630 case X86::BI__builtin_ia32_selectss_128:
15631 case X86::BI__builtin_ia32_selectsd_128: {
15632 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15633 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15634 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
15635 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
15636 }
15637 case X86::BI__builtin_ia32_cmpb128_mask:
15638 case X86::BI__builtin_ia32_cmpb256_mask:
15639 case X86::BI__builtin_ia32_cmpb512_mask:
15640 case X86::BI__builtin_ia32_cmpw128_mask:
15641 case X86::BI__builtin_ia32_cmpw256_mask:
15642 case X86::BI__builtin_ia32_cmpw512_mask:
15643 case X86::BI__builtin_ia32_cmpd128_mask:
15644 case X86::BI__builtin_ia32_cmpd256_mask:
15645 case X86::BI__builtin_ia32_cmpd512_mask:
15646 case X86::BI__builtin_ia32_cmpq128_mask:
15647 case X86::BI__builtin_ia32_cmpq256_mask:
15648 case X86::BI__builtin_ia32_cmpq512_mask: {
15649 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15650 return EmitX86MaskedCompare(*this, CC, true, Ops);
15651 }
15652 case X86::BI__builtin_ia32_ucmpb128_mask:
15653 case X86::BI__builtin_ia32_ucmpb256_mask:
15654 case X86::BI__builtin_ia32_ucmpb512_mask:
15655 case X86::BI__builtin_ia32_ucmpw128_mask:
15656 case X86::BI__builtin_ia32_ucmpw256_mask:
15657 case X86::BI__builtin_ia32_ucmpw512_mask:
15658 case X86::BI__builtin_ia32_ucmpd128_mask:
15659 case X86::BI__builtin_ia32_ucmpd256_mask:
15660 case X86::BI__builtin_ia32_ucmpd512_mask:
15661 case X86::BI__builtin_ia32_ucmpq128_mask:
15662 case X86::BI__builtin_ia32_ucmpq256_mask:
15663 case X86::BI__builtin_ia32_ucmpq512_mask: {
15664 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15665 return EmitX86MaskedCompare(*this, CC, false, Ops);
15666 }
15667 case X86::BI__builtin_ia32_vpcomb:
15668 case X86::BI__builtin_ia32_vpcomw:
15669 case X86::BI__builtin_ia32_vpcomd:
15670 case X86::BI__builtin_ia32_vpcomq:
15671 return EmitX86vpcom(*this, Ops, true);
15672 case X86::BI__builtin_ia32_vpcomub:
15673 case X86::BI__builtin_ia32_vpcomuw:
15674 case X86::BI__builtin_ia32_vpcomud:
15675 case X86::BI__builtin_ia32_vpcomuq:
15676 return EmitX86vpcom(*this, Ops, false);
15677
15678 case X86::BI__builtin_ia32_kortestcqi:
15679 case X86::BI__builtin_ia32_kortestchi:
15680 case X86::BI__builtin_ia32_kortestcsi:
15681 case X86::BI__builtin_ia32_kortestcdi: {
15682 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15683 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
15684 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15685 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15686 }
15687 case X86::BI__builtin_ia32_kortestzqi:
15688 case X86::BI__builtin_ia32_kortestzhi:
15689 case X86::BI__builtin_ia32_kortestzsi:
15690 case X86::BI__builtin_ia32_kortestzdi: {
15691 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15692 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
15693 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15694 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15695 }
15696
15697 case X86::BI__builtin_ia32_ktestcqi:
15698 case X86::BI__builtin_ia32_ktestzqi:
15699 case X86::BI__builtin_ia32_ktestchi:
15700 case X86::BI__builtin_ia32_ktestzhi:
15701 case X86::BI__builtin_ia32_ktestcsi:
15702 case X86::BI__builtin_ia32_ktestzsi:
15703 case X86::BI__builtin_ia32_ktestcdi:
15704 case X86::BI__builtin_ia32_ktestzdi: {
15705 Intrinsic::ID IID;
15706 switch (BuiltinID) {
15707 default: llvm_unreachable("Unsupported intrinsic!");
15708 case X86::BI__builtin_ia32_ktestcqi:
15709 IID = Intrinsic::x86_avx512_ktestc_b;
15710 break;
15711 case X86::BI__builtin_ia32_ktestzqi:
15712 IID = Intrinsic::x86_avx512_ktestz_b;
15713 break;
15714 case X86::BI__builtin_ia32_ktestchi:
15715 IID = Intrinsic::x86_avx512_ktestc_w;
15716 break;
15717 case X86::BI__builtin_ia32_ktestzhi:
15718 IID = Intrinsic::x86_avx512_ktestz_w;
15719 break;
15720 case X86::BI__builtin_ia32_ktestcsi:
15721 IID = Intrinsic::x86_avx512_ktestc_d;
15722 break;
15723 case X86::BI__builtin_ia32_ktestzsi:
15724 IID = Intrinsic::x86_avx512_ktestz_d;
15725 break;
15726 case X86::BI__builtin_ia32_ktestcdi:
15727 IID = Intrinsic::x86_avx512_ktestc_q;
15728 break;
15729 case X86::BI__builtin_ia32_ktestzdi:
15730 IID = Intrinsic::x86_avx512_ktestz_q;
15731 break;
15732 }
15733
15734 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15735 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15736 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15737 Function *Intr = CGM.getIntrinsic(IID);
15738 return Builder.CreateCall(Intr, {LHS, RHS});
15739 }
15740
15741 case X86::BI__builtin_ia32_kaddqi:
15742 case X86::BI__builtin_ia32_kaddhi:
15743 case X86::BI__builtin_ia32_kaddsi:
15744 case X86::BI__builtin_ia32_kadddi: {
15745 Intrinsic::ID IID;
15746 switch (BuiltinID) {
15747 default: llvm_unreachable("Unsupported intrinsic!");
15748 case X86::BI__builtin_ia32_kaddqi:
15749 IID = Intrinsic::x86_avx512_kadd_b;
15750 break;
15751 case X86::BI__builtin_ia32_kaddhi:
15752 IID = Intrinsic::x86_avx512_kadd_w;
15753 break;
15754 case X86::BI__builtin_ia32_kaddsi:
15755 IID = Intrinsic::x86_avx512_kadd_d;
15756 break;
15757 case X86::BI__builtin_ia32_kadddi:
15758 IID = Intrinsic::x86_avx512_kadd_q;
15759 break;
15760 }
15761
15762 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15763 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15764 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15765 Function *Intr = CGM.getIntrinsic(IID);
15766 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
15767 return Builder.CreateBitCast(Res, Ops[0]->getType());
15768 }
15769 case X86::BI__builtin_ia32_kandqi:
15770 case X86::BI__builtin_ia32_kandhi:
15771 case X86::BI__builtin_ia32_kandsi:
15772 case X86::BI__builtin_ia32_kanddi:
15773 return EmitX86MaskLogic(*this, Instruction::And, Ops);
15774 case X86::BI__builtin_ia32_kandnqi:
15775 case X86::BI__builtin_ia32_kandnhi:
15776 case X86::BI__builtin_ia32_kandnsi:
15777 case X86::BI__builtin_ia32_kandndi:
15778 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
15779 case X86::BI__builtin_ia32_korqi:
15780 case X86::BI__builtin_ia32_korhi:
15781 case X86::BI__builtin_ia32_korsi:
15782 case X86::BI__builtin_ia32_kordi:
15783 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
15784 case X86::BI__builtin_ia32_kxnorqi:
15785 case X86::BI__builtin_ia32_kxnorhi:
15786 case X86::BI__builtin_ia32_kxnorsi:
15787 case X86::BI__builtin_ia32_kxnordi:
15788 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
15789 case X86::BI__builtin_ia32_kxorqi:
15790 case X86::BI__builtin_ia32_kxorhi:
15791 case X86::BI__builtin_ia32_kxorsi:
15792 case X86::BI__builtin_ia32_kxordi:
15793 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
15794 case X86::BI__builtin_ia32_knotqi:
15795 case X86::BI__builtin_ia32_knothi:
15796 case X86::BI__builtin_ia32_knotsi:
15797 case X86::BI__builtin_ia32_knotdi: {
15798 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15799 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15800 return Builder.CreateBitCast(Builder.CreateNot(Res),
15801 Ops[0]->getType());
15802 }
15803 case X86::BI__builtin_ia32_kmovb:
15804 case X86::BI__builtin_ia32_kmovw:
15805 case X86::BI__builtin_ia32_kmovd:
15806 case X86::BI__builtin_ia32_kmovq: {
15807 // Bitcast to vXi1 type and then back to integer. This gets the mask
15808 // register type into the IR, but might be optimized out depending on
15809 // what's around it.
15810 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15811 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15812 return Builder.CreateBitCast(Res, Ops[0]->getType());
15813 }
15814
15815 case X86::BI__builtin_ia32_kunpckdi:
15816 case X86::BI__builtin_ia32_kunpcksi:
15817 case X86::BI__builtin_ia32_kunpckhi: {
15818 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15819 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15820 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15821 int Indices[64];
15822 for (unsigned i = 0; i != NumElts; ++i)
15823 Indices[i] = i;
15824
15825 // First extract half of each vector. This gives better codegen than
15826 // doing it in a single shuffle.
15827 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
15828 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
15829 // Concat the vectors.
15830 // NOTE: Operands are swapped to match the intrinsic definition.
15831 Value *Res =
15832 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
15833 return Builder.CreateBitCast(Res, Ops[0]->getType());
15834 }
15835
15836 case X86::BI__builtin_ia32_vplzcntd_128:
15837 case X86::BI__builtin_ia32_vplzcntd_256:
15838 case X86::BI__builtin_ia32_vplzcntd_512:
15839 case X86::BI__builtin_ia32_vplzcntq_128:
15840 case X86::BI__builtin_ia32_vplzcntq_256:
15841 case X86::BI__builtin_ia32_vplzcntq_512: {
15842 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15843 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
15844 }
15845 case X86::BI__builtin_ia32_sqrtss:
15846 case X86::BI__builtin_ia32_sqrtsd: {
15847 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
15848 Function *F;
15849 if (Builder.getIsFPConstrained()) {
15850 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15851 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15852 A->getType());
15853 A = Builder.CreateConstrainedFPCall(F, {A});
15854 } else {
15855 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15856 A = Builder.CreateCall(F, {A});
15857 }
15858 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15859 }
15860 case X86::BI__builtin_ia32_sqrtsh_round_mask:
15861 case X86::BI__builtin_ia32_sqrtsd_round_mask:
15862 case X86::BI__builtin_ia32_sqrtss_round_mask: {
15863 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
15864 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15865 // otherwise keep the intrinsic.
15866 if (CC != 4) {
15867 Intrinsic::ID IID;
15868
15869 switch (BuiltinID) {
15870 default:
15871 llvm_unreachable("Unsupported intrinsic!");
15872 case X86::BI__builtin_ia32_sqrtsh_round_mask:
15873 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
15874 break;
15875 case X86::BI__builtin_ia32_sqrtsd_round_mask:
15876 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
15877 break;
15878 case X86::BI__builtin_ia32_sqrtss_round_mask:
15879 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
15880 break;
15881 }
15882 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15883 }
15884 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15885 Function *F;
15886 if (Builder.getIsFPConstrained()) {
15887 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15888 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15889 A->getType());
15890 A = Builder.CreateConstrainedFPCall(F, A);
15891 } else {
15892 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15893 A = Builder.CreateCall(F, A);
15894 }
15895 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15896 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
15897 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15898 }
15899 case X86::BI__builtin_ia32_sqrtpd256:
15900 case X86::BI__builtin_ia32_sqrtpd:
15901 case X86::BI__builtin_ia32_sqrtps256:
15902 case X86::BI__builtin_ia32_sqrtps:
15903 case X86::BI__builtin_ia32_sqrtph256:
15904 case X86::BI__builtin_ia32_sqrtph:
15905 case X86::BI__builtin_ia32_sqrtph512:
15906 case X86::BI__builtin_ia32_sqrtps512:
15907 case X86::BI__builtin_ia32_sqrtpd512: {
15908 if (Ops.size() == 2) {
15909 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15910 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15911 // otherwise keep the intrinsic.
15912 if (CC != 4) {
15913 Intrinsic::ID IID;
15914
15915 switch (BuiltinID) {
15916 default:
15917 llvm_unreachable("Unsupported intrinsic!");
15918 case X86::BI__builtin_ia32_sqrtph512:
15919 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
15920 break;
15921 case X86::BI__builtin_ia32_sqrtps512:
15922 IID = Intrinsic::x86_avx512_sqrt_ps_512;
15923 break;
15924 case X86::BI__builtin_ia32_sqrtpd512:
15925 IID = Intrinsic::x86_avx512_sqrt_pd_512;
15926 break;
15927 }
15928 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15929 }
15930 }
15931 if (Builder.getIsFPConstrained()) {
15932 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15933 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15934 Ops[0]->getType());
15935 return Builder.CreateConstrainedFPCall(F, Ops[0]);
15936 } else {
15937 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
15938 return Builder.CreateCall(F, Ops[0]);
15939 }
15940 }
15941
15942 case X86::BI__builtin_ia32_pmuludq128:
15943 case X86::BI__builtin_ia32_pmuludq256:
15944 case X86::BI__builtin_ia32_pmuludq512:
15945 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
15946
15947 case X86::BI__builtin_ia32_pmuldq128:
15948 case X86::BI__builtin_ia32_pmuldq256:
15949 case X86::BI__builtin_ia32_pmuldq512:
15950 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
15951
15952 case X86::BI__builtin_ia32_pternlogd512_mask:
15953 case X86::BI__builtin_ia32_pternlogq512_mask:
15954 case X86::BI__builtin_ia32_pternlogd128_mask:
15955 case X86::BI__builtin_ia32_pternlogd256_mask:
15956 case X86::BI__builtin_ia32_pternlogq128_mask:
15957 case X86::BI__builtin_ia32_pternlogq256_mask:
15958 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
15959
15960 case X86::BI__builtin_ia32_pternlogd512_maskz:
15961 case X86::BI__builtin_ia32_pternlogq512_maskz:
15962 case X86::BI__builtin_ia32_pternlogd128_maskz:
15963 case X86::BI__builtin_ia32_pternlogd256_maskz:
15964 case X86::BI__builtin_ia32_pternlogq128_maskz:
15965 case X86::BI__builtin_ia32_pternlogq256_maskz:
15966 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
15967
15968 case X86::BI__builtin_ia32_vpshldd128:
15969 case X86::BI__builtin_ia32_vpshldd256:
15970 case X86::BI__builtin_ia32_vpshldd512:
15971 case X86::BI__builtin_ia32_vpshldq128:
15972 case X86::BI__builtin_ia32_vpshldq256:
15973 case X86::BI__builtin_ia32_vpshldq512:
15974 case X86::BI__builtin_ia32_vpshldw128:
15975 case X86::BI__builtin_ia32_vpshldw256:
15976 case X86::BI__builtin_ia32_vpshldw512:
15977 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
15978
15979 case X86::BI__builtin_ia32_vpshrdd128:
15980 case X86::BI__builtin_ia32_vpshrdd256:
15981 case X86::BI__builtin_ia32_vpshrdd512:
15982 case X86::BI__builtin_ia32_vpshrdq128:
15983 case X86::BI__builtin_ia32_vpshrdq256:
15984 case X86::BI__builtin_ia32_vpshrdq512:
15985 case X86::BI__builtin_ia32_vpshrdw128:
15986 case X86::BI__builtin_ia32_vpshrdw256:
15987 case X86::BI__builtin_ia32_vpshrdw512:
15988 // Ops 0 and 1 are swapped.
15989 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
15990
15991 case X86::BI__builtin_ia32_vpshldvd128:
15992 case X86::BI__builtin_ia32_vpshldvd256:
15993 case X86::BI__builtin_ia32_vpshldvd512:
15994 case X86::BI__builtin_ia32_vpshldvq128:
15995 case X86::BI__builtin_ia32_vpshldvq256:
15996 case X86::BI__builtin_ia32_vpshldvq512:
15997 case X86::BI__builtin_ia32_vpshldvw128:
15998 case X86::BI__builtin_ia32_vpshldvw256:
15999 case X86::BI__builtin_ia32_vpshldvw512:
16000 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16001
16002 case X86::BI__builtin_ia32_vpshrdvd128:
16003 case X86::BI__builtin_ia32_vpshrdvd256:
16004 case X86::BI__builtin_ia32_vpshrdvd512:
16005 case X86::BI__builtin_ia32_vpshrdvq128:
16006 case X86::BI__builtin_ia32_vpshrdvq256:
16007 case X86::BI__builtin_ia32_vpshrdvq512:
16008 case X86::BI__builtin_ia32_vpshrdvw128:
16009 case X86::BI__builtin_ia32_vpshrdvw256:
16010 case X86::BI__builtin_ia32_vpshrdvw512:
16011 // Ops 0 and 1 are swapped.
16012 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16013
16014 // Reductions
16015 case X86::BI__builtin_ia32_reduce_fadd_pd512:
16016 case X86::BI__builtin_ia32_reduce_fadd_ps512:
16017 case X86::BI__builtin_ia32_reduce_fadd_ph512:
16018 case X86::BI__builtin_ia32_reduce_fadd_ph256:
16019 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16020 Function *F =
16021 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
16022 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16023 Builder.getFastMathFlags().setAllowReassoc();
16024 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16025 }
16026 case X86::BI__builtin_ia32_reduce_fmul_pd512:
16027 case X86::BI__builtin_ia32_reduce_fmul_ps512:
16028 case X86::BI__builtin_ia32_reduce_fmul_ph512:
16029 case X86::BI__builtin_ia32_reduce_fmul_ph256:
16030 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16031 Function *F =
16032 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
16033 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16034 Builder.getFastMathFlags().setAllowReassoc();
16035 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16036 }
16037 case X86::BI__builtin_ia32_reduce_fmax_pd512:
16038 case X86::BI__builtin_ia32_reduce_fmax_ps512:
16039 case X86::BI__builtin_ia32_reduce_fmax_ph512:
16040 case X86::BI__builtin_ia32_reduce_fmax_ph256:
16041 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16042 Function *F =
16043 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
16044 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16045 Builder.getFastMathFlags().setNoNaNs();
16046 return Builder.CreateCall(F, {Ops[0]});
16047 }
16048 case X86::BI__builtin_ia32_reduce_fmin_pd512:
16049 case X86::BI__builtin_ia32_reduce_fmin_ps512:
16050 case X86::BI__builtin_ia32_reduce_fmin_ph512:
16051 case X86::BI__builtin_ia32_reduce_fmin_ph256:
16052 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16053 Function *F =
16054 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
16055 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16056 Builder.getFastMathFlags().setNoNaNs();
16057 return Builder.CreateCall(F, {Ops[0]});
16058 }
16059
16060 case X86::BI__builtin_ia32_rdrand16_step:
16061 case X86::BI__builtin_ia32_rdrand32_step:
16062 case X86::BI__builtin_ia32_rdrand64_step:
16063 case X86::BI__builtin_ia32_rdseed16_step:
16064 case X86::BI__builtin_ia32_rdseed32_step:
16065 case X86::BI__builtin_ia32_rdseed64_step: {
16066 Intrinsic::ID ID;
16067 switch (BuiltinID) {
16068 default: llvm_unreachable("Unsupported intrinsic!");
16069 case X86::BI__builtin_ia32_rdrand16_step:
16070 ID = Intrinsic::x86_rdrand_16;
16071 break;
16072 case X86::BI__builtin_ia32_rdrand32_step:
16073 ID = Intrinsic::x86_rdrand_32;
16074 break;
16075 case X86::BI__builtin_ia32_rdrand64_step:
16076 ID = Intrinsic::x86_rdrand_64;
16077 break;
16078 case X86::BI__builtin_ia32_rdseed16_step:
16079 ID = Intrinsic::x86_rdseed_16;
16080 break;
16081 case X86::BI__builtin_ia32_rdseed32_step:
16082 ID = Intrinsic::x86_rdseed_32;
16083 break;
16084 case X86::BI__builtin_ia32_rdseed64_step:
16085 ID = Intrinsic::x86_rdseed_64;
16086 break;
16087 }
16088
16089 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16090 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16091 Ops[0]);
16092 return Builder.CreateExtractValue(Call, 1);
16093 }
16094 case X86::BI__builtin_ia32_addcarryx_u32:
16095 case X86::BI__builtin_ia32_addcarryx_u64:
16096 case X86::BI__builtin_ia32_subborrow_u32:
16097 case X86::BI__builtin_ia32_subborrow_u64: {
16098 Intrinsic::ID IID;
16099 switch (BuiltinID) {
16100 default: llvm_unreachable("Unsupported intrinsic!");
16101 case X86::BI__builtin_ia32_addcarryx_u32:
16102 IID = Intrinsic::x86_addcarry_32;
16103 break;
16104 case X86::BI__builtin_ia32_addcarryx_u64:
16105 IID = Intrinsic::x86_addcarry_64;
16106 break;
16107 case X86::BI__builtin_ia32_subborrow_u32:
16108 IID = Intrinsic::x86_subborrow_32;
16109 break;
16110 case X86::BI__builtin_ia32_subborrow_u64:
16111 IID = Intrinsic::x86_subborrow_64;
16112 break;
16113 }
16114
16115 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16116 { Ops[0], Ops[1], Ops[2] });
16117 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16118 Ops[3]);
16119 return Builder.CreateExtractValue(Call, 0);
16120 }
16121
16122 case X86::BI__builtin_ia32_fpclassps128_mask:
16123 case X86::BI__builtin_ia32_fpclassps256_mask:
16124 case X86::BI__builtin_ia32_fpclassps512_mask:
16125 case X86::BI__builtin_ia32_fpclassph128_mask:
16126 case X86::BI__builtin_ia32_fpclassph256_mask:
16127 case X86::BI__builtin_ia32_fpclassph512_mask:
16128 case X86::BI__builtin_ia32_fpclasspd128_mask:
16129 case X86::BI__builtin_ia32_fpclasspd256_mask:
16130 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16131 unsigned NumElts =
16132 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16133 Value *MaskIn = Ops[2];
16134 Ops.erase(&Ops[2]);
16135
16136 Intrinsic::ID ID;
16137 switch (BuiltinID) {
16138 default: llvm_unreachable("Unsupported intrinsic!");
16139 case X86::BI__builtin_ia32_fpclassph128_mask:
16140 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16141 break;
16142 case X86::BI__builtin_ia32_fpclassph256_mask:
16143 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16144 break;
16145 case X86::BI__builtin_ia32_fpclassph512_mask:
16146 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16147 break;
16148 case X86::BI__builtin_ia32_fpclassps128_mask:
16149 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16150 break;
16151 case X86::BI__builtin_ia32_fpclassps256_mask:
16152 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16153 break;
16154 case X86::BI__builtin_ia32_fpclassps512_mask:
16155 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16156 break;
16157 case X86::BI__builtin_ia32_fpclasspd128_mask:
16158 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16159 break;
16160 case X86::BI__builtin_ia32_fpclasspd256_mask:
16161 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16162 break;
16163 case X86::BI__builtin_ia32_fpclasspd512_mask:
16164 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16165 break;
16166 }
16167
16168 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16169 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16170 }
16171
16172 case X86::BI__builtin_ia32_vp2intersect_q_512:
16173 case X86::BI__builtin_ia32_vp2intersect_q_256:
16174 case X86::BI__builtin_ia32_vp2intersect_q_128:
16175 case X86::BI__builtin_ia32_vp2intersect_d_512:
16176 case X86::BI__builtin_ia32_vp2intersect_d_256:
16177 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16178 unsigned NumElts =
16179 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16180 Intrinsic::ID ID;
16181
16182 switch (BuiltinID) {
16183 default: llvm_unreachable("Unsupported intrinsic!");
16184 case X86::BI__builtin_ia32_vp2intersect_q_512:
16185 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16186 break;
16187 case X86::BI__builtin_ia32_vp2intersect_q_256:
16188 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16189 break;
16190 case X86::BI__builtin_ia32_vp2intersect_q_128:
16191 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16192 break;
16193 case X86::BI__builtin_ia32_vp2intersect_d_512:
16194 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16195 break;
16196 case X86::BI__builtin_ia32_vp2intersect_d_256:
16197 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16198 break;
16199 case X86::BI__builtin_ia32_vp2intersect_d_128:
16200 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16201 break;
16202 }
16203
16204 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16205 Value *Result = Builder.CreateExtractValue(Call, 0);
16206 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16207 Builder.CreateDefaultAlignedStore(Result, Ops[2]);
16208
16209 Result = Builder.CreateExtractValue(Call, 1);
16210 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16211 return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
16212 }
16213
16214 case X86::BI__builtin_ia32_vpmultishiftqb128:
16215 case X86::BI__builtin_ia32_vpmultishiftqb256:
16216 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16217 Intrinsic::ID ID;
16218 switch (BuiltinID) {
16219 default: llvm_unreachable("Unsupported intrinsic!");
16220 case X86::BI__builtin_ia32_vpmultishiftqb128:
16221 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16222 break;
16223 case X86::BI__builtin_ia32_vpmultishiftqb256:
16224 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16225 break;
16226 case X86::BI__builtin_ia32_vpmultishiftqb512:
16227 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16228 break;
16229 }
16230
16231 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16232 }
16233
16234 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16235 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16236 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16237 unsigned NumElts =
16238 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16239 Value *MaskIn = Ops[2];
16240 Ops.erase(&Ops[2]);
16241
16242 Intrinsic::ID ID;
16243 switch (BuiltinID) {
16244 default: llvm_unreachable("Unsupported intrinsic!");
16245 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16246 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16247 break;
16248 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16249 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16250 break;
16251 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16252 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16253 break;
16254 }
16255
16256 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16257 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
16258 }
16259
16260 // packed comparison intrinsics
16261 case X86::BI__builtin_ia32_cmpeqps:
16262 case X86::BI__builtin_ia32_cmpeqpd:
16263 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
16264 case X86::BI__builtin_ia32_cmpltps:
16265 case X86::BI__builtin_ia32_cmpltpd:
16266 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
16267 case X86::BI__builtin_ia32_cmpleps:
16268 case X86::BI__builtin_ia32_cmplepd:
16269 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
16270 case X86::BI__builtin_ia32_cmpunordps:
16271 case X86::BI__builtin_ia32_cmpunordpd:
16272 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
16273 case X86::BI__builtin_ia32_cmpneqps:
16274 case X86::BI__builtin_ia32_cmpneqpd:
16275 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
16276 case X86::BI__builtin_ia32_cmpnltps:
16277 case X86::BI__builtin_ia32_cmpnltpd:
16278 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
16279 case X86::BI__builtin_ia32_cmpnleps:
16280 case X86::BI__builtin_ia32_cmpnlepd:
16281 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
16282 case X86::BI__builtin_ia32_cmpordps:
16283 case X86::BI__builtin_ia32_cmpordpd:
16284 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
16285 case X86::BI__builtin_ia32_cmpph128_mask:
16286 case X86::BI__builtin_ia32_cmpph256_mask:
16287 case X86::BI__builtin_ia32_cmpph512_mask:
16288 case X86::BI__builtin_ia32_cmpps128_mask:
16289 case X86::BI__builtin_ia32_cmpps256_mask:
16290 case X86::BI__builtin_ia32_cmpps512_mask:
16291 case X86::BI__builtin_ia32_cmppd128_mask:
16292 case X86::BI__builtin_ia32_cmppd256_mask:
16293 case X86::BI__builtin_ia32_cmppd512_mask:
16294 IsMaskFCmp = true;
16295 [[fallthrough]];
16296 case X86::BI__builtin_ia32_cmpps:
16297 case X86::BI__builtin_ia32_cmpps256:
16298 case X86::BI__builtin_ia32_cmppd:
16299 case X86::BI__builtin_ia32_cmppd256: {
16300 // Lowering vector comparisons to fcmp instructions, while
16301 // ignoring signalling behaviour requested
16302 // ignoring rounding mode requested
16303 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
16304
16305 // The third argument is the comparison condition, and integer in the
16306 // range [0, 31]
16307 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
16308
16309 // Lowering to IR fcmp instruction.
16310 // Ignoring requested signaling behaviour,
16311 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
16312 FCmpInst::Predicate Pred;
16313 bool IsSignaling;
16314 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
16315 // behavior is inverted. We'll handle that after the switch.
16316 switch (CC & 0xf) {
16317 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
16318 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
16319 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
16320 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
16321 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
16322 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
16323 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
16324 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
16325 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
16326 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
16327 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
16328 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
16329 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
16330 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
16331 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
16332 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
16333 default: llvm_unreachable("Unhandled CC");
16334 }
16335
16336 // Invert the signalling behavior for 16-31.
16337 if (CC & 0x10)
16338 IsSignaling = !IsSignaling;
16339
16340 // If the predicate is true or false and we're using constrained intrinsics,
16341 // we don't have a compare intrinsic we can use. Just use the legacy X86
16342 // specific intrinsic.
16343 // If the intrinsic is mask enabled and we're using constrained intrinsics,
16344 // use the legacy X86 specific intrinsic.
16345 if (Builder.getIsFPConstrained() &&
16346 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
16347 IsMaskFCmp)) {
16348
16349 Intrinsic::ID IID;
16350 switch (BuiltinID) {
16351 default: llvm_unreachable("Unexpected builtin");
16352 case X86::BI__builtin_ia32_cmpps:
16353 IID = Intrinsic::x86_sse_cmp_ps;
16354 break;
16355 case X86::BI__builtin_ia32_cmpps256:
16356 IID = Intrinsic::x86_avx_cmp_ps_256;
16357 break;
16358 case X86::BI__builtin_ia32_cmppd:
16359 IID = Intrinsic::x86_sse2_cmp_pd;
16360 break;
16361 case X86::BI__builtin_ia32_cmppd256:
16362 IID = Intrinsic::x86_avx_cmp_pd_256;
16363 break;
16364 case X86::BI__builtin_ia32_cmpph128_mask:
16365 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
16366 break;
16367 case X86::BI__builtin_ia32_cmpph256_mask:
16368 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
16369 break;
16370 case X86::BI__builtin_ia32_cmpph512_mask:
16371 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
16372 break;
16373 case X86::BI__builtin_ia32_cmpps512_mask:
16374 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
16375 break;
16376 case X86::BI__builtin_ia32_cmppd512_mask:
16377 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
16378 break;
16379 case X86::BI__builtin_ia32_cmpps128_mask:
16380 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
16381 break;
16382 case X86::BI__builtin_ia32_cmpps256_mask:
16383 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
16384 break;
16385 case X86::BI__builtin_ia32_cmppd128_mask:
16386 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
16387 break;
16388 case X86::BI__builtin_ia32_cmppd256_mask:
16389 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
16390 break;
16391 }
16392
16393 Function *Intr = CGM.getIntrinsic(IID);
16394 if (IsMaskFCmp) {
16395 unsigned NumElts =
16396 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16397 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
16398 Value *Cmp = Builder.CreateCall(Intr, Ops);
16399 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
16400 }
16401
16402 return Builder.CreateCall(Intr, Ops);
16403 }
16404
16405 // Builtins without the _mask suffix return a vector of integers
16406 // of the same width as the input vectors
16407 if (IsMaskFCmp) {
16408 // We ignore SAE if strict FP is disabled. We only keep precise
16409 // exception behavior under strict FP.
16410 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
16411 // object will be required.
16412 unsigned NumElts =
16413 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16414 Value *Cmp;
16415 if (IsSignaling)
16416 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
16417 else
16418 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
16419 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
16420 }
16421
16422 return getVectorFCmpIR(Pred, IsSignaling);
16423 }
16424
16425 // SSE scalar comparison intrinsics
16426 case X86::BI__builtin_ia32_cmpeqss:
16427 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
16428 case X86::BI__builtin_ia32_cmpltss:
16429 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
16430 case X86::BI__builtin_ia32_cmpless:
16431 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
16432 case X86::BI__builtin_ia32_cmpunordss:
16433 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
16434 case X86::BI__builtin_ia32_cmpneqss:
16435 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
16436 case X86::BI__builtin_ia32_cmpnltss:
16437 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
16438 case X86::BI__builtin_ia32_cmpnless:
16439 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
16440 case X86::BI__builtin_ia32_cmpordss:
16441 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
16442 case X86::BI__builtin_ia32_cmpeqsd:
16443 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
16444 case X86::BI__builtin_ia32_cmpltsd:
16445 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
16446 case X86::BI__builtin_ia32_cmplesd:
16447 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
16448 case X86::BI__builtin_ia32_cmpunordsd:
16449 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
16450 case X86::BI__builtin_ia32_cmpneqsd:
16451 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
16452 case X86::BI__builtin_ia32_cmpnltsd:
16453 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
16454 case X86::BI__builtin_ia32_cmpnlesd:
16455 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
16456 case X86::BI__builtin_ia32_cmpordsd:
16457 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
16458
16459 // f16c half2float intrinsics
16460 case X86::BI__builtin_ia32_vcvtph2ps:
16461 case X86::BI__builtin_ia32_vcvtph2ps256:
16462 case X86::BI__builtin_ia32_vcvtph2ps_mask:
16463 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
16464 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
16465 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16466 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
16467 }
16468
16469 // AVX512 bf16 intrinsics
16470 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
16471 Ops[2] = getMaskVecValue(
16472 *this, Ops[2],
16473 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
16474 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
16475 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16476 }
16477 case X86::BI__builtin_ia32_cvtsbf162ss_32:
16478 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
16479
16480 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16481 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
16482 Intrinsic::ID IID;
16483 switch (BuiltinID) {
16484 default: llvm_unreachable("Unsupported intrinsic!");
16485 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16486 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
16487 break;
16488 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
16489 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
16490 break;
16491 }
16492 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
16493 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
16494 }
16495
16496 case X86::BI__cpuid:
16497 case X86::BI__cpuidex: {
16498 Value *FuncId = EmitScalarExpr(E->getArg(1));
16499 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
16500 ? EmitScalarExpr(E->getArg(2))
16501 : llvm::ConstantInt::get(Int32Ty, 0);
16502
16503 llvm::StructType *CpuidRetTy =
16504 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
16505 llvm::FunctionType *FTy =
16506 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
16507
16508 StringRef Asm, Constraints;
16509 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
16510 Asm = "cpuid";
16511 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
16512 } else {
16513 // x86-64 uses %rbx as the base register, so preserve it.
16514 Asm = "xchgq %rbx, ${1:q}\n"
16515 "cpuid\n"
16516 "xchgq %rbx, ${1:q}";
16517 Constraints = "={ax},=r,={cx},={dx},0,2";
16518 }
16519
16520 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
16521 /*hasSideEffects=*/false);
16522 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
16523 Value *BasePtr = EmitScalarExpr(E->getArg(0));
16524 Value *Store = nullptr;
16525 for (unsigned i = 0; i < 4; i++) {
16526 Value *Extracted = Builder.CreateExtractValue(IACall, i);
16527 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
16528 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
16529 }
16530
16531 // Return the last store instruction to signal that we have emitted the
16532 // the intrinsic.
16533 return Store;
16534 }
16535
16536 case X86::BI__emul:
16537 case X86::BI__emulu: {
16538 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
16539 bool isSigned = (BuiltinID == X86::BI__emul);
16540 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
16541 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
16542 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
16543 }
16544 case X86::BI__mulh:
16545 case X86::BI__umulh:
16546 case X86::BI_mul128:
16547 case X86::BI_umul128: {
16548 llvm::Type *ResType = ConvertType(E->getType());
16549 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
16550
16551 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
16552 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
16553 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
16554
16555 Value *MulResult, *HigherBits;
16556 if (IsSigned) {
16557 MulResult = Builder.CreateNSWMul(LHS, RHS);
16558 HigherBits = Builder.CreateAShr(MulResult, 64);
16559 } else {
16560 MulResult = Builder.CreateNUWMul(LHS, RHS);
16561 HigherBits = Builder.CreateLShr(MulResult, 64);
16562 }
16563 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
16564
16565 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
16566 return HigherBits;
16567
16568 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
16569 Builder.CreateStore(HigherBits, HighBitsAddress);
16570 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
16571 }
16572
16573 case X86::BI__faststorefence: {
16574 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16575 llvm::SyncScope::System);
16576 }
16577 case X86::BI__shiftleft128:
16578 case X86::BI__shiftright128: {
16579 llvm::Function *F = CGM.getIntrinsic(
16580 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
16581 Int64Ty);
16582 // Flip low/high ops and zero-extend amount to matching type.
16583 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
16584 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
16585 std::swap(Ops[0], Ops[1]);
16586 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
16587 return Builder.CreateCall(F, Ops);
16588 }
16589 case X86::BI_ReadWriteBarrier:
16590 case X86::BI_ReadBarrier:
16591 case X86::BI_WriteBarrier: {
16592 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16593 llvm::SyncScope::SingleThread);
16594 }
16595
16596 case X86::BI_AddressOfReturnAddress: {
16597 Function *F =
16598 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
16599 return Builder.CreateCall(F);
16600 }
16601 case X86::BI__stosb: {
16602 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
16603 // instruction, but it will create a memset that won't be optimized away.
16604 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
16605 }
16606 case X86::BI__ud2:
16607 // llvm.trap makes a ud2a instruction on x86.
16608 return EmitTrapCall(Intrinsic::trap);
16609 case X86::BI__int2c: {
16610 // This syscall signals a driver assertion failure in x86 NT kernels.
16611 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
16612 llvm::InlineAsm *IA =
16613 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
16614 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
16615 getLLVMContext(), llvm::AttributeList::FunctionIndex,
16616 llvm::Attribute::NoReturn);
16617 llvm::CallInst *CI = Builder.CreateCall(IA);
16618 CI->setAttributes(NoReturnAttr);
16619 return CI;
16620 }
16621 case X86::BI__readfsbyte:
16622 case X86::BI__readfsword:
16623 case X86::BI__readfsdword:
16624 case X86::BI__readfsqword: {
16625 llvm::Type *IntTy = ConvertType(E->getType());
16626 Value *Ptr = Builder.CreateIntToPtr(
16627 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
16628 LoadInst *Load = Builder.CreateAlignedLoad(
16629 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16630 Load->setVolatile(true);
16631 return Load;
16632 }
16633 case X86::BI__readgsbyte:
16634 case X86::BI__readgsword:
16635 case X86::BI__readgsdword:
16636 case X86::BI__readgsqword: {
16637 llvm::Type *IntTy = ConvertType(E->getType());
16638 Value *Ptr = Builder.CreateIntToPtr(
16639 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
16640 LoadInst *Load = Builder.CreateAlignedLoad(
16641 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16642 Load->setVolatile(true);
16643 return Load;
16644 }
16645 case X86::BI__builtin_ia32_encodekey128_u32: {
16646 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
16647
16648 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
16649
16650 for (int i = 0; i < 3; ++i) {
16651 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16652 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
16653 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16654 }
16655
16656 return Builder.CreateExtractValue(Call, 0);
16657 }
16658 case X86::BI__builtin_ia32_encodekey256_u32: {
16659 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
16660
16661 Value *Call =
16662 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
16663
16664 for (int i = 0; i < 4; ++i) {
16665 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16666 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
16667 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16668 }
16669
16670 return Builder.CreateExtractValue(Call, 0);
16671 }
16672 case X86::BI__builtin_ia32_aesenc128kl_u8:
16673 case X86::BI__builtin_ia32_aesdec128kl_u8:
16674 case X86::BI__builtin_ia32_aesenc256kl_u8:
16675 case X86::BI__builtin_ia32_aesdec256kl_u8: {
16676 Intrinsic::ID IID;
16677 StringRef BlockName;
16678 switch (BuiltinID) {
16679 default:
16680 llvm_unreachable("Unexpected builtin");
16681 case X86::BI__builtin_ia32_aesenc128kl_u8:
16682 IID = Intrinsic::x86_aesenc128kl;
16683 BlockName = "aesenc128kl";
16684 break;
16685 case X86::BI__builtin_ia32_aesdec128kl_u8:
16686 IID = Intrinsic::x86_aesdec128kl;
16687 BlockName = "aesdec128kl";
16688 break;
16689 case X86::BI__builtin_ia32_aesenc256kl_u8:
16690 IID = Intrinsic::x86_aesenc256kl;
16691 BlockName = "aesenc256kl";
16692 break;
16693 case X86::BI__builtin_ia32_aesdec256kl_u8:
16694 IID = Intrinsic::x86_aesdec256kl;
16695 BlockName = "aesdec256kl";
16696 break;
16697 }
16698
16699 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
16700
16701 BasicBlock *NoError =
16702 createBasicBlock(BlockName + "_no_error", this->CurFn);
16703 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16704 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16705
16706 Value *Ret = Builder.CreateExtractValue(Call, 0);
16707 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16708 Value *Out = Builder.CreateExtractValue(Call, 1);
16709 Builder.CreateCondBr(Succ, NoError, Error);
16710
16711 Builder.SetInsertPoint(NoError);
16712 Builder.CreateDefaultAlignedStore(Out, Ops[0]);
16713 Builder.CreateBr(End);
16714
16715 Builder.SetInsertPoint(Error);
16716 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16717 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
16718 Builder.CreateBr(End);
16719
16720 Builder.SetInsertPoint(End);
16721 return Builder.CreateExtractValue(Call, 0);
16722 }
16723 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16724 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16725 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16726 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
16727 Intrinsic::ID IID;
16728 StringRef BlockName;
16729 switch (BuiltinID) {
16730 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16731 IID = Intrinsic::x86_aesencwide128kl;
16732 BlockName = "aesencwide128kl";
16733 break;
16734 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16735 IID = Intrinsic::x86_aesdecwide128kl;
16736 BlockName = "aesdecwide128kl";
16737 break;
16738 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16739 IID = Intrinsic::x86_aesencwide256kl;
16740 BlockName = "aesencwide256kl";
16741 break;
16742 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
16743 IID = Intrinsic::x86_aesdecwide256kl;
16744 BlockName = "aesdecwide256kl";
16745 break;
16746 }
16747
16748 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
16749 Value *InOps[9];
16750 InOps[0] = Ops[2];
16751 for (int i = 0; i != 8; ++i) {
16752 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
16753 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
16754 }
16755
16756 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
16757
16758 BasicBlock *NoError =
16759 createBasicBlock(BlockName + "_no_error", this->CurFn);
16760 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16761 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16762
16763 Value *Ret = Builder.CreateExtractValue(Call, 0);
16764 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16765 Builder.CreateCondBr(Succ, NoError, Error);
16766
16767 Builder.SetInsertPoint(NoError);
16768 for (int i = 0; i != 8; ++i) {
16769 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16770 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
16771 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
16772 }
16773 Builder.CreateBr(End);
16774
16775 Builder.SetInsertPoint(Error);
16776 for (int i = 0; i != 8; ++i) {
16777 Value *Out = Builder.CreateExtractValue(Call, i + 1);
16778 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16779 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
16780 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
16781 }
16782 Builder.CreateBr(End);
16783
16784 Builder.SetInsertPoint(End);
16785 return Builder.CreateExtractValue(Call, 0);
16786 }
16787 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
16788 IsConjFMA = true;
16789 [[fallthrough]];
16790 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
16791 Intrinsic::ID IID = IsConjFMA
16792 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
16793 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
16794 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16795 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
16796 }
16797 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
16798 IsConjFMA = true;
16799 [[fallthrough]];
16800 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
16801 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16802 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16803 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16804 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
16805 return EmitX86Select(*this, And, Call, Ops[0]);
16806 }
16807 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
16808 IsConjFMA = true;
16809 [[fallthrough]];
16810 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
16811 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16812 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16813 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16814 static constexpr int Mask[] = {0, 5, 6, 7};
16815 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
16816 }
16817 case X86::BI__builtin_ia32_prefetchi:
16818 return Builder.CreateCall(
16819 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
16820 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
16821 llvm::ConstantInt::get(Int32Ty, 0)});
16822 }
16823 }
16824
EmitPPCBuiltinExpr(unsigned BuiltinID,const CallExpr * E)16825 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
16826 const CallExpr *E) {
16827 // Do not emit the builtin arguments in the arguments of a function call,
16828 // because the evaluation order of function arguments is not specified in C++.
16829 // This is important when testing to ensure the arguments are emitted in the
16830 // same order every time. Eg:
16831 // Instead of:
16832 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
16833 // EmitScalarExpr(E->getArg(1)), "swdiv");
16834 // Use:
16835 // Value *Op0 = EmitScalarExpr(E->getArg(0));
16836 // Value *Op1 = EmitScalarExpr(E->getArg(1));
16837 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
16838
16839 Intrinsic::ID ID = Intrinsic::not_intrinsic;
16840
16841 #include "llvm/TargetParser/PPCTargetParser.def"
16842 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
16843 unsigned Mask, CmpInst::Predicate CompOp,
16844 unsigned OpValue) -> Value * {
16845 if (SupportMethod == BUILTIN_PPC_FALSE)
16846 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
16847
16848 if (SupportMethod == BUILTIN_PPC_TRUE)
16849 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
16850
16851 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
16852
16853 llvm::Value *FieldValue = nullptr;
16854 if (SupportMethod == USE_SYS_CONF) {
16855 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
16856 llvm::Constant *SysConf =
16857 CGM.CreateRuntimeVariable(STy, "_system_configuration");
16858
16859 // Grab the appropriate field from _system_configuration.
16860 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
16861 ConstantInt::get(Int32Ty, FieldIdx)};
16862
16863 FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs);
16864 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
16865 CharUnits::fromQuantity(4));
16866 } else if (SupportMethod == SYS_CALL) {
16867 llvm::FunctionType *FTy =
16868 llvm::FunctionType::get(Int64Ty, Int32Ty, false);
16869 llvm::FunctionCallee Func =
16870 CGM.CreateRuntimeFunction(FTy, "getsystemcfg");
16871
16872 FieldValue =
16873 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});
16874 }
16875 assert(FieldValue &&
16876 "SupportMethod value is not defined in PPCTargetParser.def.");
16877
16878 if (Mask)
16879 FieldValue = Builder.CreateAnd(FieldValue, Mask);
16880
16881 llvm::Type *ValueType = FieldValue->getType();
16882 bool IsValueType64Bit = ValueType->isIntegerTy(64);
16883 assert(
16884 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
16885 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
16886
16887 return Builder.CreateICmp(
16888 CompOp, FieldValue,
16889 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
16890 };
16891
16892 switch (BuiltinID) {
16893 default: return nullptr;
16894
16895 case Builtin::BI__builtin_cpu_is: {
16896 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
16897 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
16898 llvm::Triple Triple = getTarget().getTriple();
16899
16900 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;
16901 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
16902
16903 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =
16904 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)
16905 #define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
16906 AIXID) \
16907 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
16908 #include "llvm/TargetParser/PPCTargetParser.def"
16909 .Default({BUILTIN_PPC_UNSUPPORTED, 0,
16910 BUILTIN_PPC_UNSUPPORTED, 0}));
16911
16912 if (Triple.isOSAIX()) {
16913 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
16914 "Invalid CPU name. Missed by SemaChecking?");
16915 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,
16916 ICmpInst::ICMP_EQ, AIXIDValue);
16917 }
16918
16919 assert(Triple.isOSLinux() &&
16920 "__builtin_cpu_is() is only supported for AIX and Linux.");
16921
16922 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
16923 "Invalid CPU name. Missed by SemaChecking?");
16924
16925 if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
16926 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
16927
16928 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
16929 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
16930 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
16931 return Builder.CreateICmpEQ(TheCall,
16932 llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
16933 }
16934 case Builtin::BI__builtin_cpu_supports: {
16935 llvm::Triple Triple = getTarget().getTriple();
16936 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
16937 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
16938 if (Triple.isOSAIX()) {
16939 unsigned SupportMethod, FieldIdx, Mask, Value;
16940 CmpInst::Predicate CompOp;
16941 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
16942 unsigned>
16943 CPUSupportType;
16944 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =
16945 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
16946 #define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
16947 VALUE) \
16948 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
16949 #include "llvm/TargetParser/PPCTargetParser.def"
16950 .Default({BUILTIN_PPC_FALSE, 0, 0,
16951 CmpInst::Predicate(), 0}));
16952 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
16953 Value);
16954 }
16955
16956 assert(Triple.isOSLinux() &&
16957 "__builtin_cpu_supports() is only supported for AIX and Linux.");
16958 unsigned FeatureWord;
16959 unsigned BitMask;
16960 std::tie(FeatureWord, BitMask) =
16961 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
16962 #define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
16963 .Case(Name, {FA_WORD, Bitmask})
16964 #include "llvm/TargetParser/PPCTargetParser.def"
16965 .Default({0, 0});
16966 if (!BitMask)
16967 return Builder.getFalse();
16968 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
16969 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
16970 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
16971 Value *Mask =
16972 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
16973 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
16974 #undef PPC_FAWORD_HWCAP
16975 #undef PPC_FAWORD_HWCAP2
16976 #undef PPC_FAWORD_CPUID
16977 }
16978
16979 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
16980 // call __builtin_readcyclecounter.
16981 case PPC::BI__builtin_ppc_get_timebase:
16982 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
16983
16984 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
16985 case PPC::BI__builtin_altivec_lvx:
16986 case PPC::BI__builtin_altivec_lvxl:
16987 case PPC::BI__builtin_altivec_lvebx:
16988 case PPC::BI__builtin_altivec_lvehx:
16989 case PPC::BI__builtin_altivec_lvewx:
16990 case PPC::BI__builtin_altivec_lvsl:
16991 case PPC::BI__builtin_altivec_lvsr:
16992 case PPC::BI__builtin_vsx_lxvd2x:
16993 case PPC::BI__builtin_vsx_lxvw4x:
16994 case PPC::BI__builtin_vsx_lxvd2x_be:
16995 case PPC::BI__builtin_vsx_lxvw4x_be:
16996 case PPC::BI__builtin_vsx_lxvl:
16997 case PPC::BI__builtin_vsx_lxvll:
16998 {
16999 SmallVector<Value *, 2> Ops;
17000 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17001 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17002 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
17003 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
17004 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17005 Ops.pop_back();
17006 }
17007
17008 switch (BuiltinID) {
17009 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
17010 case PPC::BI__builtin_altivec_lvx:
17011 ID = Intrinsic::ppc_altivec_lvx;
17012 break;
17013 case PPC::BI__builtin_altivec_lvxl:
17014 ID = Intrinsic::ppc_altivec_lvxl;
17015 break;
17016 case PPC::BI__builtin_altivec_lvebx:
17017 ID = Intrinsic::ppc_altivec_lvebx;
17018 break;
17019 case PPC::BI__builtin_altivec_lvehx:
17020 ID = Intrinsic::ppc_altivec_lvehx;
17021 break;
17022 case PPC::BI__builtin_altivec_lvewx:
17023 ID = Intrinsic::ppc_altivec_lvewx;
17024 break;
17025 case PPC::BI__builtin_altivec_lvsl:
17026 ID = Intrinsic::ppc_altivec_lvsl;
17027 break;
17028 case PPC::BI__builtin_altivec_lvsr:
17029 ID = Intrinsic::ppc_altivec_lvsr;
17030 break;
17031 case PPC::BI__builtin_vsx_lxvd2x:
17032 ID = Intrinsic::ppc_vsx_lxvd2x;
17033 break;
17034 case PPC::BI__builtin_vsx_lxvw4x:
17035 ID = Intrinsic::ppc_vsx_lxvw4x;
17036 break;
17037 case PPC::BI__builtin_vsx_lxvd2x_be:
17038 ID = Intrinsic::ppc_vsx_lxvd2x_be;
17039 break;
17040 case PPC::BI__builtin_vsx_lxvw4x_be:
17041 ID = Intrinsic::ppc_vsx_lxvw4x_be;
17042 break;
17043 case PPC::BI__builtin_vsx_lxvl:
17044 ID = Intrinsic::ppc_vsx_lxvl;
17045 break;
17046 case PPC::BI__builtin_vsx_lxvll:
17047 ID = Intrinsic::ppc_vsx_lxvll;
17048 break;
17049 }
17050 llvm::Function *F = CGM.getIntrinsic(ID);
17051 return Builder.CreateCall(F, Ops, "");
17052 }
17053
17054 // vec_st, vec_xst_be
17055 case PPC::BI__builtin_altivec_stvx:
17056 case PPC::BI__builtin_altivec_stvxl:
17057 case PPC::BI__builtin_altivec_stvebx:
17058 case PPC::BI__builtin_altivec_stvehx:
17059 case PPC::BI__builtin_altivec_stvewx:
17060 case PPC::BI__builtin_vsx_stxvd2x:
17061 case PPC::BI__builtin_vsx_stxvw4x:
17062 case PPC::BI__builtin_vsx_stxvd2x_be:
17063 case PPC::BI__builtin_vsx_stxvw4x_be:
17064 case PPC::BI__builtin_vsx_stxvl:
17065 case PPC::BI__builtin_vsx_stxvll:
17066 {
17067 SmallVector<Value *, 3> Ops;
17068 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17069 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17070 Ops.push_back(EmitScalarExpr(E->getArg(2)));
17071 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
17072 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
17073 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17074 Ops.pop_back();
17075 }
17076
17077 switch (BuiltinID) {
17078 default: llvm_unreachable("Unsupported st intrinsic!");
17079 case PPC::BI__builtin_altivec_stvx:
17080 ID = Intrinsic::ppc_altivec_stvx;
17081 break;
17082 case PPC::BI__builtin_altivec_stvxl:
17083 ID = Intrinsic::ppc_altivec_stvxl;
17084 break;
17085 case PPC::BI__builtin_altivec_stvebx:
17086 ID = Intrinsic::ppc_altivec_stvebx;
17087 break;
17088 case PPC::BI__builtin_altivec_stvehx:
17089 ID = Intrinsic::ppc_altivec_stvehx;
17090 break;
17091 case PPC::BI__builtin_altivec_stvewx:
17092 ID = Intrinsic::ppc_altivec_stvewx;
17093 break;
17094 case PPC::BI__builtin_vsx_stxvd2x:
17095 ID = Intrinsic::ppc_vsx_stxvd2x;
17096 break;
17097 case PPC::BI__builtin_vsx_stxvw4x:
17098 ID = Intrinsic::ppc_vsx_stxvw4x;
17099 break;
17100 case PPC::BI__builtin_vsx_stxvd2x_be:
17101 ID = Intrinsic::ppc_vsx_stxvd2x_be;
17102 break;
17103 case PPC::BI__builtin_vsx_stxvw4x_be:
17104 ID = Intrinsic::ppc_vsx_stxvw4x_be;
17105 break;
17106 case PPC::BI__builtin_vsx_stxvl:
17107 ID = Intrinsic::ppc_vsx_stxvl;
17108 break;
17109 case PPC::BI__builtin_vsx_stxvll:
17110 ID = Intrinsic::ppc_vsx_stxvll;
17111 break;
17112 }
17113 llvm::Function *F = CGM.getIntrinsic(ID);
17114 return Builder.CreateCall(F, Ops, "");
17115 }
17116 case PPC::BI__builtin_vsx_ldrmb: {
17117 // Essentially boils down to performing an unaligned VMX load sequence so
17118 // as to avoid crossing a page boundary and then shuffling the elements
17119 // into the right side of the vector register.
17120 Value *Op0 = EmitScalarExpr(E->getArg(0));
17121 Value *Op1 = EmitScalarExpr(E->getArg(1));
17122 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17123 llvm::Type *ResTy = ConvertType(E->getType());
17124 bool IsLE = getTarget().isLittleEndian();
17125
17126 // If the user wants the entire vector, just load the entire vector.
17127 if (NumBytes == 16) {
17128 Value *LD =
17129 Builder.CreateLoad(Address(Op0, ResTy, CharUnits::fromQuantity(1)));
17130 if (!IsLE)
17131 return LD;
17132
17133 // Reverse the bytes on LE.
17134 SmallVector<int, 16> RevMask;
17135 for (int Idx = 0; Idx < 16; Idx++)
17136 RevMask.push_back(15 - Idx);
17137 return Builder.CreateShuffleVector(LD, LD, RevMask);
17138 }
17139
17140 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
17141 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17142 : Intrinsic::ppc_altivec_lvsl);
17143 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
17144 Value *HiMem = Builder.CreateGEP(
17145 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
17146 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
17147 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
17148 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
17149
17150 Op0 = IsLE ? HiLd : LoLd;
17151 Op1 = IsLE ? LoLd : HiLd;
17152 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
17153 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
17154
17155 if (IsLE) {
17156 SmallVector<int, 16> Consts;
17157 for (int Idx = 0; Idx < 16; Idx++) {
17158 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
17159 : 16 - (NumBytes - Idx);
17160 Consts.push_back(Val);
17161 }
17162 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
17163 Zero, Consts);
17164 }
17165 SmallVector<Constant *, 16> Consts;
17166 for (int Idx = 0; Idx < 16; Idx++)
17167 Consts.push_back(Builder.getInt8(NumBytes + Idx));
17168 Value *Mask2 = ConstantVector::get(Consts);
17169 return Builder.CreateBitCast(
17170 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
17171 }
17172 case PPC::BI__builtin_vsx_strmb: {
17173 Value *Op0 = EmitScalarExpr(E->getArg(0));
17174 Value *Op1 = EmitScalarExpr(E->getArg(1));
17175 Value *Op2 = EmitScalarExpr(E->getArg(2));
17176 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17177 bool IsLE = getTarget().isLittleEndian();
17178 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
17179 // Storing the whole vector, simply store it on BE and reverse bytes and
17180 // store on LE.
17181 if (Width == 16) {
17182 Value *StVec = Op2;
17183 if (IsLE) {
17184 SmallVector<int, 16> RevMask;
17185 for (int Idx = 0; Idx < 16; Idx++)
17186 RevMask.push_back(15 - Idx);
17187 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
17188 }
17189 return Builder.CreateStore(
17190 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
17191 }
17192 auto *ConvTy = Int64Ty;
17193 unsigned NumElts = 0;
17194 switch (Width) {
17195 default:
17196 llvm_unreachable("width for stores must be a power of 2");
17197 case 8:
17198 ConvTy = Int64Ty;
17199 NumElts = 2;
17200 break;
17201 case 4:
17202 ConvTy = Int32Ty;
17203 NumElts = 4;
17204 break;
17205 case 2:
17206 ConvTy = Int16Ty;
17207 NumElts = 8;
17208 break;
17209 case 1:
17210 ConvTy = Int8Ty;
17211 NumElts = 16;
17212 break;
17213 }
17214 Value *Vec = Builder.CreateBitCast(
17215 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
17216 Value *Ptr =
17217 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
17218 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
17219 if (IsLE && Width > 1) {
17220 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
17221 Elt = Builder.CreateCall(F, Elt);
17222 }
17223 return Builder.CreateStore(
17224 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
17225 };
17226 unsigned Stored = 0;
17227 unsigned RemainingBytes = NumBytes;
17228 Value *Result;
17229 if (NumBytes == 16)
17230 return StoreSubVec(16, 0, 0);
17231 if (NumBytes >= 8) {
17232 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
17233 RemainingBytes -= 8;
17234 Stored += 8;
17235 }
17236 if (RemainingBytes >= 4) {
17237 Result = StoreSubVec(4, NumBytes - Stored - 4,
17238 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
17239 RemainingBytes -= 4;
17240 Stored += 4;
17241 }
17242 if (RemainingBytes >= 2) {
17243 Result = StoreSubVec(2, NumBytes - Stored - 2,
17244 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
17245 RemainingBytes -= 2;
17246 Stored += 2;
17247 }
17248 if (RemainingBytes)
17249 Result =
17250 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
17251 return Result;
17252 }
17253 // Square root
17254 case PPC::BI__builtin_vsx_xvsqrtsp:
17255 case PPC::BI__builtin_vsx_xvsqrtdp: {
17256 llvm::Type *ResultType = ConvertType(E->getType());
17257 Value *X = EmitScalarExpr(E->getArg(0));
17258 if (Builder.getIsFPConstrained()) {
17259 llvm::Function *F = CGM.getIntrinsic(
17260 Intrinsic::experimental_constrained_sqrt, ResultType);
17261 return Builder.CreateConstrainedFPCall(F, X);
17262 } else {
17263 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17264 return Builder.CreateCall(F, X);
17265 }
17266 }
17267 // Count leading zeros
17268 case PPC::BI__builtin_altivec_vclzb:
17269 case PPC::BI__builtin_altivec_vclzh:
17270 case PPC::BI__builtin_altivec_vclzw:
17271 case PPC::BI__builtin_altivec_vclzd: {
17272 llvm::Type *ResultType = ConvertType(E->getType());
17273 Value *X = EmitScalarExpr(E->getArg(0));
17274 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17275 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
17276 return Builder.CreateCall(F, {X, Undef});
17277 }
17278 case PPC::BI__builtin_altivec_vctzb:
17279 case PPC::BI__builtin_altivec_vctzh:
17280 case PPC::BI__builtin_altivec_vctzw:
17281 case PPC::BI__builtin_altivec_vctzd: {
17282 llvm::Type *ResultType = ConvertType(E->getType());
17283 Value *X = EmitScalarExpr(E->getArg(0));
17284 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17285 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
17286 return Builder.CreateCall(F, {X, Undef});
17287 }
17288 case PPC::BI__builtin_altivec_vinsd:
17289 case PPC::BI__builtin_altivec_vinsw:
17290 case PPC::BI__builtin_altivec_vinsd_elt:
17291 case PPC::BI__builtin_altivec_vinsw_elt: {
17292 llvm::Type *ResultType = ConvertType(E->getType());
17293 Value *Op0 = EmitScalarExpr(E->getArg(0));
17294 Value *Op1 = EmitScalarExpr(E->getArg(1));
17295 Value *Op2 = EmitScalarExpr(E->getArg(2));
17296
17297 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17298 BuiltinID == PPC::BI__builtin_altivec_vinsd);
17299
17300 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17301 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
17302
17303 // The third argument must be a compile time constant.
17304 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17305 assert(ArgCI &&
17306 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
17307
17308 // Valid value for the third argument is dependent on the input type and
17309 // builtin called.
17310 int ValidMaxValue = 0;
17311 if (IsUnaligned)
17312 ValidMaxValue = (Is32bit) ? 12 : 8;
17313 else
17314 ValidMaxValue = (Is32bit) ? 3 : 1;
17315
17316 // Get value of third argument.
17317 int64_t ConstArg = ArgCI->getSExtValue();
17318
17319 // Compose range checking error message.
17320 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
17321 RangeErrMsg += " number " + llvm::to_string(ConstArg);
17322 RangeErrMsg += " is outside of the valid range [0, ";
17323 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
17324
17325 // Issue error if third argument is not within the valid range.
17326 if (ConstArg < 0 || ConstArg > ValidMaxValue)
17327 CGM.Error(E->getExprLoc(), RangeErrMsg);
17328
17329 // Input to vec_replace_elt is an element index, convert to byte index.
17330 if (!IsUnaligned) {
17331 ConstArg *= Is32bit ? 4 : 8;
17332 // Fix the constant according to endianess.
17333 if (getTarget().isLittleEndian())
17334 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
17335 }
17336
17337 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
17338 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
17339 // Casting input to vector int as per intrinsic definition.
17340 Op0 =
17341 Is32bit
17342 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
17343 : Builder.CreateBitCast(Op0,
17344 llvm::FixedVectorType::get(Int64Ty, 2));
17345 return Builder.CreateBitCast(
17346 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
17347 }
17348 case PPC::BI__builtin_altivec_vpopcntb:
17349 case PPC::BI__builtin_altivec_vpopcnth:
17350 case PPC::BI__builtin_altivec_vpopcntw:
17351 case PPC::BI__builtin_altivec_vpopcntd: {
17352 llvm::Type *ResultType = ConvertType(E->getType());
17353 Value *X = EmitScalarExpr(E->getArg(0));
17354 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
17355 return Builder.CreateCall(F, X);
17356 }
17357 case PPC::BI__builtin_altivec_vadduqm:
17358 case PPC::BI__builtin_altivec_vsubuqm: {
17359 Value *Op0 = EmitScalarExpr(E->getArg(0));
17360 Value *Op1 = EmitScalarExpr(E->getArg(1));
17361 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17362 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
17363 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
17364 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
17365 return Builder.CreateAdd(Op0, Op1, "vadduqm");
17366 else
17367 return Builder.CreateSub(Op0, Op1, "vsubuqm");
17368 }
17369 case PPC::BI__builtin_altivec_vaddcuq_c:
17370 case PPC::BI__builtin_altivec_vsubcuq_c: {
17371 SmallVector<Value *, 2> Ops;
17372 Value *Op0 = EmitScalarExpr(E->getArg(0));
17373 Value *Op1 = EmitScalarExpr(E->getArg(1));
17374 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17375 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17376 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17377 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17378 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
17379 ? Intrinsic::ppc_altivec_vaddcuq
17380 : Intrinsic::ppc_altivec_vsubcuq;
17381 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17382 }
17383 case PPC::BI__builtin_altivec_vaddeuqm_c:
17384 case PPC::BI__builtin_altivec_vaddecuq_c:
17385 case PPC::BI__builtin_altivec_vsubeuqm_c:
17386 case PPC::BI__builtin_altivec_vsubecuq_c: {
17387 SmallVector<Value *, 3> Ops;
17388 Value *Op0 = EmitScalarExpr(E->getArg(0));
17389 Value *Op1 = EmitScalarExpr(E->getArg(1));
17390 Value *Op2 = EmitScalarExpr(E->getArg(2));
17391 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17392 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17393 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17394 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17395 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
17396 switch (BuiltinID) {
17397 default:
17398 llvm_unreachable("Unsupported intrinsic!");
17399 case PPC::BI__builtin_altivec_vaddeuqm_c:
17400 ID = Intrinsic::ppc_altivec_vaddeuqm;
17401 break;
17402 case PPC::BI__builtin_altivec_vaddecuq_c:
17403 ID = Intrinsic::ppc_altivec_vaddecuq;
17404 break;
17405 case PPC::BI__builtin_altivec_vsubeuqm_c:
17406 ID = Intrinsic::ppc_altivec_vsubeuqm;
17407 break;
17408 case PPC::BI__builtin_altivec_vsubecuq_c:
17409 ID = Intrinsic::ppc_altivec_vsubecuq;
17410 break;
17411 }
17412 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17413 }
17414 case PPC::BI__builtin_ppc_rldimi:
17415 case PPC::BI__builtin_ppc_rlwimi: {
17416 Value *Op0 = EmitScalarExpr(E->getArg(0));
17417 Value *Op1 = EmitScalarExpr(E->getArg(1));
17418 Value *Op2 = EmitScalarExpr(E->getArg(2));
17419 Value *Op3 = EmitScalarExpr(E->getArg(3));
17420 // rldimi is 64-bit instruction, expand the intrinsic before isel to
17421 // leverage peephole and avoid legalization efforts.
17422 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
17423 !getTarget().getTriple().isPPC64()) {
17424 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
17425 Op2 = Builder.CreateZExt(Op2, Int64Ty);
17426 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
17427 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
17428 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
17429 }
17430 return Builder.CreateCall(
17431 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
17432 ? Intrinsic::ppc_rldimi
17433 : Intrinsic::ppc_rlwimi),
17434 {Op0, Op1, Op2, Op3});
17435 }
17436 case PPC::BI__builtin_ppc_rlwnm: {
17437 Value *Op0 = EmitScalarExpr(E->getArg(0));
17438 Value *Op1 = EmitScalarExpr(E->getArg(1));
17439 Value *Op2 = EmitScalarExpr(E->getArg(2));
17440 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
17441 {Op0, Op1, Op2});
17442 }
17443 case PPC::BI__builtin_ppc_poppar4:
17444 case PPC::BI__builtin_ppc_poppar8: {
17445 Value *Op0 = EmitScalarExpr(E->getArg(0));
17446 llvm::Type *ArgType = Op0->getType();
17447 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
17448 Value *Tmp = Builder.CreateCall(F, Op0);
17449
17450 llvm::Type *ResultType = ConvertType(E->getType());
17451 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
17452 if (Result->getType() != ResultType)
17453 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
17454 "cast");
17455 return Result;
17456 }
17457 case PPC::BI__builtin_ppc_cmpb: {
17458 Value *Op0 = EmitScalarExpr(E->getArg(0));
17459 Value *Op1 = EmitScalarExpr(E->getArg(1));
17460 if (getTarget().getTriple().isPPC64()) {
17461 Function *F =
17462 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
17463 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
17464 }
17465 // For 32 bit, emit the code as below:
17466 // %conv = trunc i64 %a to i32
17467 // %conv1 = trunc i64 %b to i32
17468 // %shr = lshr i64 %a, 32
17469 // %conv2 = trunc i64 %shr to i32
17470 // %shr3 = lshr i64 %b, 32
17471 // %conv4 = trunc i64 %shr3 to i32
17472 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
17473 // %conv5 = zext i32 %0 to i64
17474 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
17475 // %conv614 = zext i32 %1 to i64
17476 // %shl = shl nuw i64 %conv614, 32
17477 // %or = or i64 %shl, %conv5
17478 // ret i64 %or
17479 Function *F =
17480 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
17481 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
17482 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
17483 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
17484 Value *ArgOneHi =
17485 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
17486 Value *ArgTwoHi =
17487 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
17488 Value *ResLo = Builder.CreateZExt(
17489 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
17490 Value *ResHiShift = Builder.CreateZExt(
17491 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
17492 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
17493 return Builder.CreateOr(ResLo, ResHi);
17494 }
17495 // Copy sign
17496 case PPC::BI__builtin_vsx_xvcpsgnsp:
17497 case PPC::BI__builtin_vsx_xvcpsgndp: {
17498 llvm::Type *ResultType = ConvertType(E->getType());
17499 Value *X = EmitScalarExpr(E->getArg(0));
17500 Value *Y = EmitScalarExpr(E->getArg(1));
17501 ID = Intrinsic::copysign;
17502 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17503 return Builder.CreateCall(F, {X, Y});
17504 }
17505 // Rounding/truncation
17506 case PPC::BI__builtin_vsx_xvrspip:
17507 case PPC::BI__builtin_vsx_xvrdpip:
17508 case PPC::BI__builtin_vsx_xvrdpim:
17509 case PPC::BI__builtin_vsx_xvrspim:
17510 case PPC::BI__builtin_vsx_xvrdpi:
17511 case PPC::BI__builtin_vsx_xvrspi:
17512 case PPC::BI__builtin_vsx_xvrdpic:
17513 case PPC::BI__builtin_vsx_xvrspic:
17514 case PPC::BI__builtin_vsx_xvrdpiz:
17515 case PPC::BI__builtin_vsx_xvrspiz: {
17516 llvm::Type *ResultType = ConvertType(E->getType());
17517 Value *X = EmitScalarExpr(E->getArg(0));
17518 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
17519 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
17520 ID = Builder.getIsFPConstrained()
17521 ? Intrinsic::experimental_constrained_floor
17522 : Intrinsic::floor;
17523 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
17524 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
17525 ID = Builder.getIsFPConstrained()
17526 ? Intrinsic::experimental_constrained_round
17527 : Intrinsic::round;
17528 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
17529 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
17530 ID = Builder.getIsFPConstrained()
17531 ? Intrinsic::experimental_constrained_rint
17532 : Intrinsic::rint;
17533 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
17534 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
17535 ID = Builder.getIsFPConstrained()
17536 ? Intrinsic::experimental_constrained_ceil
17537 : Intrinsic::ceil;
17538 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
17539 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
17540 ID = Builder.getIsFPConstrained()
17541 ? Intrinsic::experimental_constrained_trunc
17542 : Intrinsic::trunc;
17543 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17544 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
17545 : Builder.CreateCall(F, X);
17546 }
17547
17548 // Absolute value
17549 case PPC::BI__builtin_vsx_xvabsdp:
17550 case PPC::BI__builtin_vsx_xvabssp: {
17551 llvm::Type *ResultType = ConvertType(E->getType());
17552 Value *X = EmitScalarExpr(E->getArg(0));
17553 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17554 return Builder.CreateCall(F, X);
17555 }
17556
17557 // Fastmath by default
17558 case PPC::BI__builtin_ppc_recipdivf:
17559 case PPC::BI__builtin_ppc_recipdivd:
17560 case PPC::BI__builtin_ppc_rsqrtf:
17561 case PPC::BI__builtin_ppc_rsqrtd: {
17562 FastMathFlags FMF = Builder.getFastMathFlags();
17563 Builder.getFastMathFlags().setFast();
17564 llvm::Type *ResultType = ConvertType(E->getType());
17565 Value *X = EmitScalarExpr(E->getArg(0));
17566
17567 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
17568 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
17569 Value *Y = EmitScalarExpr(E->getArg(1));
17570 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
17571 Builder.getFastMathFlags() &= (FMF);
17572 return FDiv;
17573 }
17574 auto *One = ConstantFP::get(ResultType, 1.0);
17575 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17576 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
17577 Builder.getFastMathFlags() &= (FMF);
17578 return FDiv;
17579 }
17580 case PPC::BI__builtin_ppc_alignx: {
17581 Value *Op0 = EmitScalarExpr(E->getArg(0));
17582 Value *Op1 = EmitScalarExpr(E->getArg(1));
17583 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
17584 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
17585 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
17586 llvm::Value::MaximumAlignment);
17587
17588 emitAlignmentAssumption(Op1, E->getArg(1),
17589 /*The expr loc is sufficient.*/ SourceLocation(),
17590 AlignmentCI, nullptr);
17591 return Op1;
17592 }
17593 case PPC::BI__builtin_ppc_rdlam: {
17594 Value *Op0 = EmitScalarExpr(E->getArg(0));
17595 Value *Op1 = EmitScalarExpr(E->getArg(1));
17596 Value *Op2 = EmitScalarExpr(E->getArg(2));
17597 llvm::Type *Ty = Op0->getType();
17598 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
17599 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
17600 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
17601 return Builder.CreateAnd(Rotate, Op2);
17602 }
17603 case PPC::BI__builtin_ppc_load2r: {
17604 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
17605 Value *Op0 = EmitScalarExpr(E->getArg(0));
17606 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
17607 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
17608 }
17609 // FMA variations
17610 case PPC::BI__builtin_ppc_fnmsub:
17611 case PPC::BI__builtin_ppc_fnmsubs:
17612 case PPC::BI__builtin_vsx_xvmaddadp:
17613 case PPC::BI__builtin_vsx_xvmaddasp:
17614 case PPC::BI__builtin_vsx_xvnmaddadp:
17615 case PPC::BI__builtin_vsx_xvnmaddasp:
17616 case PPC::BI__builtin_vsx_xvmsubadp:
17617 case PPC::BI__builtin_vsx_xvmsubasp:
17618 case PPC::BI__builtin_vsx_xvnmsubadp:
17619 case PPC::BI__builtin_vsx_xvnmsubasp: {
17620 llvm::Type *ResultType = ConvertType(E->getType());
17621 Value *X = EmitScalarExpr(E->getArg(0));
17622 Value *Y = EmitScalarExpr(E->getArg(1));
17623 Value *Z = EmitScalarExpr(E->getArg(2));
17624 llvm::Function *F;
17625 if (Builder.getIsFPConstrained())
17626 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17627 else
17628 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17629 switch (BuiltinID) {
17630 case PPC::BI__builtin_vsx_xvmaddadp:
17631 case PPC::BI__builtin_vsx_xvmaddasp:
17632 if (Builder.getIsFPConstrained())
17633 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
17634 else
17635 return Builder.CreateCall(F, {X, Y, Z});
17636 case PPC::BI__builtin_vsx_xvnmaddadp:
17637 case PPC::BI__builtin_vsx_xvnmaddasp:
17638 if (Builder.getIsFPConstrained())
17639 return Builder.CreateFNeg(
17640 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
17641 else
17642 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
17643 case PPC::BI__builtin_vsx_xvmsubadp:
17644 case PPC::BI__builtin_vsx_xvmsubasp:
17645 if (Builder.getIsFPConstrained())
17646 return Builder.CreateConstrainedFPCall(
17647 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17648 else
17649 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17650 case PPC::BI__builtin_ppc_fnmsub:
17651 case PPC::BI__builtin_ppc_fnmsubs:
17652 case PPC::BI__builtin_vsx_xvnmsubadp:
17653 case PPC::BI__builtin_vsx_xvnmsubasp:
17654 if (Builder.getIsFPConstrained())
17655 return Builder.CreateFNeg(
17656 Builder.CreateConstrainedFPCall(
17657 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
17658 "neg");
17659 else
17660 return Builder.CreateCall(
17661 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
17662 }
17663 llvm_unreachable("Unknown FMA operation");
17664 return nullptr; // Suppress no-return warning
17665 }
17666
17667 case PPC::BI__builtin_vsx_insertword: {
17668 Value *Op0 = EmitScalarExpr(E->getArg(0));
17669 Value *Op1 = EmitScalarExpr(E->getArg(1));
17670 Value *Op2 = EmitScalarExpr(E->getArg(2));
17671 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
17672
17673 // Third argument is a compile time constant int. It must be clamped to
17674 // to the range [0, 12].
17675 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17676 assert(ArgCI &&
17677 "Third arg to xxinsertw intrinsic must be constant integer");
17678 const int64_t MaxIndex = 12;
17679 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17680
17681 // The builtin semantics don't exactly match the xxinsertw instructions
17682 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
17683 // word from the first argument, and inserts it in the second argument. The
17684 // instruction extracts the word from its second input register and inserts
17685 // it into its first input register, so swap the first and second arguments.
17686 std::swap(Op0, Op1);
17687
17688 // Need to cast the second argument from a vector of unsigned int to a
17689 // vector of long long.
17690 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17691
17692 if (getTarget().isLittleEndian()) {
17693 // Reverse the double words in the vector we will extract from.
17694 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17695 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
17696
17697 // Reverse the index.
17698 Index = MaxIndex - Index;
17699 }
17700
17701 // Intrinsic expects the first arg to be a vector of int.
17702 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17703 Op2 = ConstantInt::getSigned(Int32Ty, Index);
17704 return Builder.CreateCall(F, {Op0, Op1, Op2});
17705 }
17706
17707 case PPC::BI__builtin_vsx_extractuword: {
17708 Value *Op0 = EmitScalarExpr(E->getArg(0));
17709 Value *Op1 = EmitScalarExpr(E->getArg(1));
17710 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
17711
17712 // Intrinsic expects the first argument to be a vector of doublewords.
17713 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17714
17715 // The second argument is a compile time constant int that needs to
17716 // be clamped to the range [0, 12].
17717 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
17718 assert(ArgCI &&
17719 "Second Arg to xxextractuw intrinsic must be a constant integer!");
17720 const int64_t MaxIndex = 12;
17721 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17722
17723 if (getTarget().isLittleEndian()) {
17724 // Reverse the index.
17725 Index = MaxIndex - Index;
17726 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17727
17728 // Emit the call, then reverse the double words of the results vector.
17729 Value *Call = Builder.CreateCall(F, {Op0, Op1});
17730
17731 Value *ShuffleCall =
17732 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
17733 return ShuffleCall;
17734 } else {
17735 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17736 return Builder.CreateCall(F, {Op0, Op1});
17737 }
17738 }
17739
17740 case PPC::BI__builtin_vsx_xxpermdi: {
17741 Value *Op0 = EmitScalarExpr(E->getArg(0));
17742 Value *Op1 = EmitScalarExpr(E->getArg(1));
17743 Value *Op2 = EmitScalarExpr(E->getArg(2));
17744 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17745 assert(ArgCI && "Third arg must be constant integer!");
17746
17747 unsigned Index = ArgCI->getZExtValue();
17748 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17749 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17750
17751 // Account for endianness by treating this as just a shuffle. So we use the
17752 // same indices for both LE and BE in order to produce expected results in
17753 // both cases.
17754 int ElemIdx0 = (Index & 2) >> 1;
17755 int ElemIdx1 = 2 + (Index & 1);
17756
17757 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
17758 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17759 QualType BIRetType = E->getType();
17760 auto RetTy = ConvertType(BIRetType);
17761 return Builder.CreateBitCast(ShuffleCall, RetTy);
17762 }
17763
17764 case PPC::BI__builtin_vsx_xxsldwi: {
17765 Value *Op0 = EmitScalarExpr(E->getArg(0));
17766 Value *Op1 = EmitScalarExpr(E->getArg(1));
17767 Value *Op2 = EmitScalarExpr(E->getArg(2));
17768 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17769 assert(ArgCI && "Third argument must be a compile time constant");
17770 unsigned Index = ArgCI->getZExtValue() & 0x3;
17771 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17772 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
17773
17774 // Create a shuffle mask
17775 int ElemIdx0;
17776 int ElemIdx1;
17777 int ElemIdx2;
17778 int ElemIdx3;
17779 if (getTarget().isLittleEndian()) {
17780 // Little endian element N comes from element 8+N-Index of the
17781 // concatenated wide vector (of course, using modulo arithmetic on
17782 // the total number of elements).
17783 ElemIdx0 = (8 - Index) % 8;
17784 ElemIdx1 = (9 - Index) % 8;
17785 ElemIdx2 = (10 - Index) % 8;
17786 ElemIdx3 = (11 - Index) % 8;
17787 } else {
17788 // Big endian ElemIdx<N> = Index + N
17789 ElemIdx0 = Index;
17790 ElemIdx1 = Index + 1;
17791 ElemIdx2 = Index + 2;
17792 ElemIdx3 = Index + 3;
17793 }
17794
17795 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
17796 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17797 QualType BIRetType = E->getType();
17798 auto RetTy = ConvertType(BIRetType);
17799 return Builder.CreateBitCast(ShuffleCall, RetTy);
17800 }
17801
17802 case PPC::BI__builtin_pack_vector_int128: {
17803 Value *Op0 = EmitScalarExpr(E->getArg(0));
17804 Value *Op1 = EmitScalarExpr(E->getArg(1));
17805 bool isLittleEndian = getTarget().isLittleEndian();
17806 Value *PoisonValue =
17807 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
17808 Value *Res = Builder.CreateInsertElement(
17809 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
17810 Res = Builder.CreateInsertElement(Res, Op1,
17811 (uint64_t)(isLittleEndian ? 0 : 1));
17812 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
17813 }
17814
17815 case PPC::BI__builtin_unpack_vector_int128: {
17816 Value *Op0 = EmitScalarExpr(E->getArg(0));
17817 Value *Op1 = EmitScalarExpr(E->getArg(1));
17818 ConstantInt *Index = cast<ConstantInt>(Op1);
17819 Value *Unpacked = Builder.CreateBitCast(
17820 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
17821
17822 if (getTarget().isLittleEndian())
17823 Index =
17824 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
17825
17826 return Builder.CreateExtractElement(Unpacked, Index);
17827 }
17828
17829 case PPC::BI__builtin_ppc_sthcx: {
17830 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
17831 Value *Op0 = EmitScalarExpr(E->getArg(0));
17832 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
17833 return Builder.CreateCall(F, {Op0, Op1});
17834 }
17835
17836 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
17837 // Some of the MMA instructions accumulate their result into an existing
17838 // accumulator whereas the others generate a new accumulator. So we need to
17839 // use custom code generation to expand a builtin call with a pointer to a
17840 // load (if the corresponding instruction accumulates its result) followed by
17841 // the call to the intrinsic and a store of the result.
17842 #define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
17843 case PPC::BI__builtin_##Name:
17844 #include "clang/Basic/BuiltinsPPC.def"
17845 {
17846 SmallVector<Value *, 4> Ops;
17847 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
17848 if (E->getArg(i)->getType()->isArrayType())
17849 Ops.push_back(
17850 EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));
17851 else
17852 Ops.push_back(EmitScalarExpr(E->getArg(i)));
17853 // The first argument of these two builtins is a pointer used to store their
17854 // result. However, the llvm intrinsics return their result in multiple
17855 // return values. So, here we emit code extracting these values from the
17856 // intrinsic results and storing them using that pointer.
17857 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
17858 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
17859 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
17860 unsigned NumVecs = 2;
17861 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
17862 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
17863 NumVecs = 4;
17864 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
17865 }
17866 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
17867 Address Addr = EmitPointerWithAlignment(E->getArg(1));
17868 Value *Vec = Builder.CreateLoad(Addr);
17869 Value *Call = Builder.CreateCall(F, {Vec});
17870 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
17871 Value *Ptr = Ops[0];
17872 for (unsigned i=0; i<NumVecs; i++) {
17873 Value *Vec = Builder.CreateExtractValue(Call, i);
17874 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
17875 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
17876 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
17877 }
17878 return Call;
17879 }
17880 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
17881 BuiltinID == PPC::BI__builtin_mma_build_acc) {
17882 // Reverse the order of the operands for LE, so the
17883 // same builtin call can be used on both LE and BE
17884 // without the need for the programmer to swap operands.
17885 // The operands are reversed starting from the second argument,
17886 // the first operand is the pointer to the pair/accumulator
17887 // that is being built.
17888 if (getTarget().isLittleEndian())
17889 std::reverse(Ops.begin() + 1, Ops.end());
17890 }
17891 bool Accumulate;
17892 switch (BuiltinID) {
17893 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
17894 case PPC::BI__builtin_##Name: \
17895 ID = Intrinsic::ppc_##Intr; \
17896 Accumulate = Acc; \
17897 break;
17898 #include "clang/Basic/BuiltinsPPC.def"
17899 }
17900 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17901 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
17902 BuiltinID == PPC::BI__builtin_mma_lxvp ||
17903 BuiltinID == PPC::BI__builtin_mma_stxvp) {
17904 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17905 BuiltinID == PPC::BI__builtin_mma_lxvp) {
17906 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17907 } else {
17908 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17909 }
17910 Ops.pop_back();
17911 llvm::Function *F = CGM.getIntrinsic(ID);
17912 return Builder.CreateCall(F, Ops, "");
17913 }
17914 SmallVector<Value*, 4> CallOps;
17915 if (Accumulate) {
17916 Address Addr = EmitPointerWithAlignment(E->getArg(0));
17917 Value *Acc = Builder.CreateLoad(Addr);
17918 CallOps.push_back(Acc);
17919 }
17920 for (unsigned i=1; i<Ops.size(); i++)
17921 CallOps.push_back(Ops[i]);
17922 llvm::Function *F = CGM.getIntrinsic(ID);
17923 Value *Call = Builder.CreateCall(F, CallOps);
17924 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
17925 }
17926
17927 case PPC::BI__builtin_ppc_compare_and_swap:
17928 case PPC::BI__builtin_ppc_compare_and_swaplp: {
17929 Address Addr = EmitPointerWithAlignment(E->getArg(0));
17930 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
17931 Value *OldVal = Builder.CreateLoad(OldValAddr);
17932 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
17933 LValue LV = MakeAddrLValue(Addr, AtomicTy);
17934 Value *Op2 = EmitScalarExpr(E->getArg(2));
17935 auto Pair = EmitAtomicCompareExchange(
17936 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
17937 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
17938 // Unlike c11's atomic_compare_exchange, according to
17939 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
17940 // > In either case, the contents of the memory location specified by addr
17941 // > are copied into the memory location specified by old_val_addr.
17942 // But it hasn't specified storing to OldValAddr is atomic or not and
17943 // which order to use. Now following XL's codegen, treat it as a normal
17944 // store.
17945 Value *LoadedVal = Pair.first.getScalarVal();
17946 Builder.CreateStore(LoadedVal, OldValAddr);
17947 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
17948 }
17949 case PPC::BI__builtin_ppc_fetch_and_add:
17950 case PPC::BI__builtin_ppc_fetch_and_addlp: {
17951 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
17952 llvm::AtomicOrdering::Monotonic);
17953 }
17954 case PPC::BI__builtin_ppc_fetch_and_and:
17955 case PPC::BI__builtin_ppc_fetch_and_andlp: {
17956 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
17957 llvm::AtomicOrdering::Monotonic);
17958 }
17959
17960 case PPC::BI__builtin_ppc_fetch_and_or:
17961 case PPC::BI__builtin_ppc_fetch_and_orlp: {
17962 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
17963 llvm::AtomicOrdering::Monotonic);
17964 }
17965 case PPC::BI__builtin_ppc_fetch_and_swap:
17966 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
17967 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
17968 llvm::AtomicOrdering::Monotonic);
17969 }
17970 case PPC::BI__builtin_ppc_ldarx:
17971 case PPC::BI__builtin_ppc_lwarx:
17972 case PPC::BI__builtin_ppc_lharx:
17973 case PPC::BI__builtin_ppc_lbarx:
17974 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
17975 case PPC::BI__builtin_ppc_mfspr: {
17976 Value *Op0 = EmitScalarExpr(E->getArg(0));
17977 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17978 ? Int32Ty
17979 : Int64Ty;
17980 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
17981 return Builder.CreateCall(F, {Op0});
17982 }
17983 case PPC::BI__builtin_ppc_mtspr: {
17984 Value *Op0 = EmitScalarExpr(E->getArg(0));
17985 Value *Op1 = EmitScalarExpr(E->getArg(1));
17986 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17987 ? Int32Ty
17988 : Int64Ty;
17989 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
17990 return Builder.CreateCall(F, {Op0, Op1});
17991 }
17992 case PPC::BI__builtin_ppc_popcntb: {
17993 Value *ArgValue = EmitScalarExpr(E->getArg(0));
17994 llvm::Type *ArgType = ArgValue->getType();
17995 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
17996 return Builder.CreateCall(F, {ArgValue}, "popcntb");
17997 }
17998 case PPC::BI__builtin_ppc_mtfsf: {
17999 // The builtin takes a uint32 that needs to be cast to an
18000 // f64 to be passed to the intrinsic.
18001 Value *Op0 = EmitScalarExpr(E->getArg(0));
18002 Value *Op1 = EmitScalarExpr(E->getArg(1));
18003 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
18004 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
18005 return Builder.CreateCall(F, {Op0, Cast}, "");
18006 }
18007
18008 case PPC::BI__builtin_ppc_swdiv_nochk:
18009 case PPC::BI__builtin_ppc_swdivs_nochk: {
18010 Value *Op0 = EmitScalarExpr(E->getArg(0));
18011 Value *Op1 = EmitScalarExpr(E->getArg(1));
18012 FastMathFlags FMF = Builder.getFastMathFlags();
18013 Builder.getFastMathFlags().setFast();
18014 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
18015 Builder.getFastMathFlags() &= (FMF);
18016 return FDiv;
18017 }
18018 case PPC::BI__builtin_ppc_fric:
18019 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18020 *this, E, Intrinsic::rint,
18021 Intrinsic::experimental_constrained_rint))
18022 .getScalarVal();
18023 case PPC::BI__builtin_ppc_frim:
18024 case PPC::BI__builtin_ppc_frims:
18025 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18026 *this, E, Intrinsic::floor,
18027 Intrinsic::experimental_constrained_floor))
18028 .getScalarVal();
18029 case PPC::BI__builtin_ppc_frin:
18030 case PPC::BI__builtin_ppc_frins:
18031 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18032 *this, E, Intrinsic::round,
18033 Intrinsic::experimental_constrained_round))
18034 .getScalarVal();
18035 case PPC::BI__builtin_ppc_frip:
18036 case PPC::BI__builtin_ppc_frips:
18037 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18038 *this, E, Intrinsic::ceil,
18039 Intrinsic::experimental_constrained_ceil))
18040 .getScalarVal();
18041 case PPC::BI__builtin_ppc_friz:
18042 case PPC::BI__builtin_ppc_frizs:
18043 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18044 *this, E, Intrinsic::trunc,
18045 Intrinsic::experimental_constrained_trunc))
18046 .getScalarVal();
18047 case PPC::BI__builtin_ppc_fsqrt:
18048 case PPC::BI__builtin_ppc_fsqrts:
18049 return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(
18050 *this, E, Intrinsic::sqrt,
18051 Intrinsic::experimental_constrained_sqrt))
18052 .getScalarVal();
18053 case PPC::BI__builtin_ppc_test_data_class: {
18054 Value *Op0 = EmitScalarExpr(E->getArg(0));
18055 Value *Op1 = EmitScalarExpr(E->getArg(1));
18056 return Builder.CreateCall(
18057 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
18058 {Op0, Op1}, "test_data_class");
18059 }
18060 case PPC::BI__builtin_ppc_maxfe: {
18061 Value *Op0 = EmitScalarExpr(E->getArg(0));
18062 Value *Op1 = EmitScalarExpr(E->getArg(1));
18063 Value *Op2 = EmitScalarExpr(E->getArg(2));
18064 Value *Op3 = EmitScalarExpr(E->getArg(3));
18065 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
18066 {Op0, Op1, Op2, Op3});
18067 }
18068 case PPC::BI__builtin_ppc_maxfl: {
18069 Value *Op0 = EmitScalarExpr(E->getArg(0));
18070 Value *Op1 = EmitScalarExpr(E->getArg(1));
18071 Value *Op2 = EmitScalarExpr(E->getArg(2));
18072 Value *Op3 = EmitScalarExpr(E->getArg(3));
18073 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
18074 {Op0, Op1, Op2, Op3});
18075 }
18076 case PPC::BI__builtin_ppc_maxfs: {
18077 Value *Op0 = EmitScalarExpr(E->getArg(0));
18078 Value *Op1 = EmitScalarExpr(E->getArg(1));
18079 Value *Op2 = EmitScalarExpr(E->getArg(2));
18080 Value *Op3 = EmitScalarExpr(E->getArg(3));
18081 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
18082 {Op0, Op1, Op2, Op3});
18083 }
18084 case PPC::BI__builtin_ppc_minfe: {
18085 Value *Op0 = EmitScalarExpr(E->getArg(0));
18086 Value *Op1 = EmitScalarExpr(E->getArg(1));
18087 Value *Op2 = EmitScalarExpr(E->getArg(2));
18088 Value *Op3 = EmitScalarExpr(E->getArg(3));
18089 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
18090 {Op0, Op1, Op2, Op3});
18091 }
18092 case PPC::BI__builtin_ppc_minfl: {
18093 Value *Op0 = EmitScalarExpr(E->getArg(0));
18094 Value *Op1 = EmitScalarExpr(E->getArg(1));
18095 Value *Op2 = EmitScalarExpr(E->getArg(2));
18096 Value *Op3 = EmitScalarExpr(E->getArg(3));
18097 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
18098 {Op0, Op1, Op2, Op3});
18099 }
18100 case PPC::BI__builtin_ppc_minfs: {
18101 Value *Op0 = EmitScalarExpr(E->getArg(0));
18102 Value *Op1 = EmitScalarExpr(E->getArg(1));
18103 Value *Op2 = EmitScalarExpr(E->getArg(2));
18104 Value *Op3 = EmitScalarExpr(E->getArg(3));
18105 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
18106 {Op0, Op1, Op2, Op3});
18107 }
18108 case PPC::BI__builtin_ppc_swdiv:
18109 case PPC::BI__builtin_ppc_swdivs: {
18110 Value *Op0 = EmitScalarExpr(E->getArg(0));
18111 Value *Op1 = EmitScalarExpr(E->getArg(1));
18112 return Builder.CreateFDiv(Op0, Op1, "swdiv");
18113 }
18114 case PPC::BI__builtin_ppc_set_fpscr_rn:
18115 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
18116 {EmitScalarExpr(E->getArg(0))});
18117 case PPC::BI__builtin_ppc_mffs:
18118 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
18119 }
18120 }
18121
18122 namespace {
18123 // If \p E is not null pointer, insert address space cast to match return
18124 // type of \p E if necessary.
EmitAMDGPUDispatchPtr(CodeGenFunction & CGF,const CallExpr * E=nullptr)18125 Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
18126 const CallExpr *E = nullptr) {
18127 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
18128 auto *Call = CGF.Builder.CreateCall(F);
18129 Call->addRetAttr(
18130 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
18131 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
18132 if (!E)
18133 return Call;
18134 QualType BuiltinRetType = E->getType();
18135 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
18136 if (RetTy == Call->getType())
18137 return Call;
18138 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
18139 }
18140
EmitAMDGPUImplicitArgPtr(CodeGenFunction & CGF)18141 Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
18142 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
18143 auto *Call = CGF.Builder.CreateCall(F);
18144 Call->addRetAttr(
18145 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
18146 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
18147 return Call;
18148 }
18149
18150 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18151 /// Emit code based on Code Object ABI version.
18152 /// COV_4 : Emit code to use dispatch ptr
18153 /// COV_5+ : Emit code to use implicitarg ptr
18154 /// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
18155 /// and use its value for COV_4 or COV_5+ approach. It is used for
18156 /// compiling device libraries in an ABI-agnostic way.
18157 ///
18158 /// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
18159 /// clang during compilation of user code.
EmitAMDGPUWorkGroupSize(CodeGenFunction & CGF,unsigned Index)18160 Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
18161 llvm::LoadInst *LD;
18162
18163 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
18164
18165 if (Cov == CodeObjectVersionKind::COV_None) {
18166 StringRef Name = "__oclc_ABI_version";
18167 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
18168 if (!ABIVersionC)
18169 ABIVersionC = new llvm::GlobalVariable(
18170 CGF.CGM.getModule(), CGF.Int32Ty, false,
18171 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
18172 llvm::GlobalVariable::NotThreadLocal,
18173 CGF.CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
18174
18175 // This load will be eliminated by the IPSCCP because it is constant
18176 // weak_odr without externally_initialized. Either changing it to weak or
18177 // adding externally_initialized will keep the load.
18178 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
18179 CGF.CGM.getIntAlign());
18180
18181 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
18182 ABIVersion,
18183 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
18184
18185 // Indexing the implicit kernarg segment.
18186 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
18187 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18188
18189 // Indexing the HSA kernel_dispatch_packet struct.
18190 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
18191 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18192
18193 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
18194 LD = CGF.Builder.CreateLoad(
18195 Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
18196 } else {
18197 Value *GEP = nullptr;
18198 if (Cov >= CodeObjectVersionKind::COV_5) {
18199 // Indexing the implicit kernarg segment.
18200 GEP = CGF.Builder.CreateConstGEP1_32(
18201 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18202 } else {
18203 // Indexing the HSA kernel_dispatch_packet struct.
18204 GEP = CGF.Builder.CreateConstGEP1_32(
18205 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18206 }
18207 LD = CGF.Builder.CreateLoad(
18208 Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
18209 }
18210
18211 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
18212 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
18213 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
18214 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
18215 LD->setMetadata(llvm::LLVMContext::MD_noundef,
18216 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18217 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18218 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18219 return LD;
18220 }
18221
18222 // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
EmitAMDGPUGridSize(CodeGenFunction & CGF,unsigned Index)18223 Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
18224 const unsigned XOffset = 12;
18225 auto *DP = EmitAMDGPUDispatchPtr(CGF);
18226 // Indexing the HSA kernel_dispatch_packet struct.
18227 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
18228 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
18229 auto *LD = CGF.Builder.CreateLoad(
18230 Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));
18231 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18232 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18233 return LD;
18234 }
18235 } // namespace
18236
18237 // For processing memory ordering and memory scope arguments of various
18238 // amdgcn builtins.
18239 // \p Order takes a C++11 comptabile memory-ordering specifier and converts
18240 // it into LLVM's memory ordering specifier using atomic C ABI, and writes
18241 // to \p AO. \p Scope takes a const char * and converts it into AMDGCN
18242 // specific SyncScopeID and writes it to \p SSID.
ProcessOrderScopeAMDGCN(Value * Order,Value * Scope,llvm::AtomicOrdering & AO,llvm::SyncScope::ID & SSID)18243 void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
18244 llvm::AtomicOrdering &AO,
18245 llvm::SyncScope::ID &SSID) {
18246 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
18247
18248 // Map C11/C++11 memory ordering to LLVM memory ordering
18249 assert(llvm::isValidAtomicOrderingCABI(ord));
18250 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
18251 case llvm::AtomicOrderingCABI::acquire:
18252 case llvm::AtomicOrderingCABI::consume:
18253 AO = llvm::AtomicOrdering::Acquire;
18254 break;
18255 case llvm::AtomicOrderingCABI::release:
18256 AO = llvm::AtomicOrdering::Release;
18257 break;
18258 case llvm::AtomicOrderingCABI::acq_rel:
18259 AO = llvm::AtomicOrdering::AcquireRelease;
18260 break;
18261 case llvm::AtomicOrderingCABI::seq_cst:
18262 AO = llvm::AtomicOrdering::SequentiallyConsistent;
18263 break;
18264 case llvm::AtomicOrderingCABI::relaxed:
18265 AO = llvm::AtomicOrdering::Monotonic;
18266 break;
18267 }
18268
18269 // Some of the atomic builtins take the scope as a string name.
18270 StringRef scp;
18271 if (llvm::getConstantStringInfo(Scope, scp)) {
18272 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
18273 return;
18274 }
18275
18276 // Older builtins had an enum argument for the memory scope.
18277 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
18278 switch (scope) {
18279 case 0: // __MEMORY_SCOPE_SYSTEM
18280 SSID = llvm::SyncScope::System;
18281 break;
18282 case 1: // __MEMORY_SCOPE_DEVICE
18283 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
18284 break;
18285 case 2: // __MEMORY_SCOPE_WRKGRP
18286 SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
18287 break;
18288 case 3: // __MEMORY_SCOPE_WVFRNT
18289 SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
18290 break;
18291 case 4: // __MEMORY_SCOPE_SINGLE
18292 SSID = llvm::SyncScope::SingleThread;
18293 break;
18294 default:
18295 SSID = llvm::SyncScope::System;
18296 break;
18297 }
18298 }
18299
EmitScalarOrConstFoldImmArg(unsigned ICEArguments,unsigned Idx,const CallExpr * E)18300 llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
18301 unsigned Idx,
18302 const CallExpr *E) {
18303 llvm::Value *Arg = nullptr;
18304 if ((ICEArguments & (1 << Idx)) == 0) {
18305 Arg = EmitScalarExpr(E->getArg(Idx));
18306 } else {
18307 // If this is required to be a constant, constant fold it so that we
18308 // know that the generated intrinsic gets a ConstantInt.
18309 std::optional<llvm::APSInt> Result =
18310 E->getArg(Idx)->getIntegerConstantExpr(getContext());
18311 assert(Result && "Expected argument to be a constant");
18312 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
18313 }
18314 return Arg;
18315 }
18316
getDotProductIntrinsic(QualType QT,int elementCount)18317 Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) {
18318 if (QT->hasFloatingRepresentation()) {
18319 switch (elementCount) {
18320 case 2:
18321 return Intrinsic::dx_dot2;
18322 case 3:
18323 return Intrinsic::dx_dot3;
18324 case 4:
18325 return Intrinsic::dx_dot4;
18326 }
18327 }
18328 if (QT->hasSignedIntegerRepresentation())
18329 return Intrinsic::dx_sdot;
18330
18331 assert(QT->hasUnsignedIntegerRepresentation());
18332 return Intrinsic::dx_udot;
18333 }
18334
EmitHLSLBuiltinExpr(unsigned BuiltinID,const CallExpr * E)18335 Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
18336 const CallExpr *E) {
18337 if (!getLangOpts().HLSL)
18338 return nullptr;
18339
18340 switch (BuiltinID) {
18341 case Builtin::BI__builtin_hlsl_elementwise_all: {
18342 Value *Op0 = EmitScalarExpr(E->getArg(0));
18343 return Builder.CreateIntrinsic(
18344 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18345 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18346 "hlsl.all");
18347 }
18348 case Builtin::BI__builtin_hlsl_elementwise_any: {
18349 Value *Op0 = EmitScalarExpr(E->getArg(0));
18350 return Builder.CreateIntrinsic(
18351 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18352 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18353 "hlsl.any");
18354 }
18355 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
18356 Value *OpX = EmitScalarExpr(E->getArg(0));
18357 Value *OpMin = EmitScalarExpr(E->getArg(1));
18358 Value *OpMax = EmitScalarExpr(E->getArg(2));
18359
18360 QualType Ty = E->getArg(0)->getType();
18361 bool IsUnsigned = false;
18362 if (auto *VecTy = Ty->getAs<VectorType>())
18363 Ty = VecTy->getElementType();
18364 IsUnsigned = Ty->isUnsignedIntegerType();
18365 return Builder.CreateIntrinsic(
18366 /*ReturnType=*/OpX->getType(),
18367 IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,
18368 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
18369 }
18370 case Builtin::BI__builtin_hlsl_dot: {
18371 Value *Op0 = EmitScalarExpr(E->getArg(0));
18372 Value *Op1 = EmitScalarExpr(E->getArg(1));
18373 llvm::Type *T0 = Op0->getType();
18374 llvm::Type *T1 = Op1->getType();
18375 if (!T0->isVectorTy() && !T1->isVectorTy()) {
18376 if (T0->isFloatingPointTy())
18377 return Builder.CreateFMul(Op0, Op1, "dx.dot");
18378
18379 if (T0->isIntegerTy())
18380 return Builder.CreateMul(Op0, Op1, "dx.dot");
18381
18382 // Bools should have been promoted
18383 llvm_unreachable(
18384 "Scalar dot product is only supported on ints and floats.");
18385 }
18386 // A VectorSplat should have happened
18387 assert(T0->isVectorTy() && T1->isVectorTy() &&
18388 "Dot product of vector and scalar is not supported.");
18389
18390 // A vector sext or sitofp should have happened
18391 assert(T0->getScalarType() == T1->getScalarType() &&
18392 "Dot product of vectors need the same element types.");
18393
18394 auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
18395 [[maybe_unused]] auto *VecTy1 =
18396 E->getArg(1)->getType()->getAs<VectorType>();
18397 // A HLSLVectorTruncation should have happend
18398 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
18399 "Dot product requires vectors to be of the same size.");
18400
18401 return Builder.CreateIntrinsic(
18402 /*ReturnType=*/T0->getScalarType(),
18403 getDotProductIntrinsic(E->getArg(0)->getType(),
18404 VecTy0->getNumElements()),
18405 ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot");
18406 } break;
18407 case Builtin::BI__builtin_hlsl_lerp: {
18408 Value *X = EmitScalarExpr(E->getArg(0));
18409 Value *Y = EmitScalarExpr(E->getArg(1));
18410 Value *S = EmitScalarExpr(E->getArg(2));
18411 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18412 llvm_unreachable("lerp operand must have a float representation");
18413 return Builder.CreateIntrinsic(
18414 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
18415 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
18416 }
18417 case Builtin::BI__builtin_hlsl_elementwise_frac: {
18418 Value *Op0 = EmitScalarExpr(E->getArg(0));
18419 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18420 llvm_unreachable("frac operand must have a float representation");
18421 return Builder.CreateIntrinsic(
18422 /*ReturnType=*/Op0->getType(), Intrinsic::dx_frac,
18423 ArrayRef<Value *>{Op0}, nullptr, "dx.frac");
18424 }
18425 case Builtin::BI__builtin_hlsl_elementwise_isinf: {
18426 Value *Op0 = EmitScalarExpr(E->getArg(0));
18427 llvm::Type *Xty = Op0->getType();
18428 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
18429 if (Xty->isVectorTy()) {
18430 auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
18431 retType = llvm::VectorType::get(
18432 retType, ElementCount::getFixed(XVecTy->getNumElements()));
18433 }
18434 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18435 llvm_unreachable("isinf operand must have a float representation");
18436 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
18437 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
18438 }
18439 case Builtin::BI__builtin_hlsl_mad: {
18440 Value *M = EmitScalarExpr(E->getArg(0));
18441 Value *A = EmitScalarExpr(E->getArg(1));
18442 Value *B = EmitScalarExpr(E->getArg(2));
18443 if (E->getArg(0)->getType()->hasFloatingRepresentation())
18444 return Builder.CreateIntrinsic(
18445 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
18446 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
18447
18448 if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
18449 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18450 return Builder.CreateIntrinsic(
18451 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
18452 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
18453
18454 Value *Mul = Builder.CreateNSWMul(M, A);
18455 return Builder.CreateNSWAdd(Mul, B);
18456 }
18457 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
18458 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18459 return Builder.CreateIntrinsic(
18460 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
18461 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
18462
18463 Value *Mul = Builder.CreateNUWMul(M, A);
18464 return Builder.CreateNUWAdd(Mul, B);
18465 }
18466 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
18467 Value *Op0 = EmitScalarExpr(E->getArg(0));
18468 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18469 llvm_unreachable("rcp operand must have a float representation");
18470 llvm::Type *Ty = Op0->getType();
18471 llvm::Type *EltTy = Ty->getScalarType();
18472 Constant *One = Ty->isVectorTy()
18473 ? ConstantVector::getSplat(
18474 ElementCount::getFixed(
18475 cast<FixedVectorType>(Ty)->getNumElements()),
18476 ConstantFP::get(EltTy, 1.0))
18477 : ConstantFP::get(EltTy, 1.0);
18478 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
18479 }
18480 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
18481 Value *Op0 = EmitScalarExpr(E->getArg(0));
18482 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18483 llvm_unreachable("rsqrt operand must have a float representation");
18484 return Builder.CreateIntrinsic(
18485 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
18486 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
18487 }
18488 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
18489 return EmitRuntimeCall(CGM.CreateRuntimeFunction(
18490 llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index",
18491 {}, false, true));
18492 }
18493 }
18494 return nullptr;
18495 }
18496
AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction * Inst,const CallExpr * E)18497 void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
18498 const CallExpr *E) {
18499 constexpr const char *Tag = "amdgpu-as";
18500
18501 LLVMContext &Ctx = Inst->getContext();
18502 SmallVector<MMRAMetadata::TagT, 3> MMRAs;
18503 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
18504 llvm::Value *V = EmitScalarExpr(E->getArg(K));
18505 StringRef AS;
18506 if (llvm::getConstantStringInfo(V, AS)) {
18507 MMRAs.push_back({Tag, AS});
18508 // TODO: Delete the resulting unused constant?
18509 continue;
18510 }
18511 CGM.Error(E->getExprLoc(),
18512 "expected an address space name as a string literal");
18513 }
18514
18515 llvm::sort(MMRAs);
18516 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
18517 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
18518 }
18519
EmitAMDGPUBuiltinExpr(unsigned BuiltinID,const CallExpr * E)18520 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18521 const CallExpr *E) {
18522 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
18523 llvm::SyncScope::ID SSID;
18524 switch (BuiltinID) {
18525 case AMDGPU::BI__builtin_amdgcn_div_scale:
18526 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
18527 // Translate from the intrinsics's struct return to the builtin's out
18528 // argument.
18529
18530 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
18531
18532 llvm::Value *X = EmitScalarExpr(E->getArg(0));
18533 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
18534 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
18535
18536 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
18537 X->getType());
18538
18539 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
18540
18541 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
18542 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
18543
18544 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
18545
18546 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
18547 Builder.CreateStore(FlagExt, FlagOutPtr);
18548 return Result;
18549 }
18550 case AMDGPU::BI__builtin_amdgcn_div_fmas:
18551 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
18552 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18553 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18554 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18555 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
18556
18557 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
18558 Src0->getType());
18559 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
18560 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
18561 }
18562
18563 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
18564 return emitBuiltinWithOneOverloadedType<2>(*this, E,
18565 Intrinsic::amdgcn_ds_swizzle);
18566 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
18567 return emitBuiltinWithOneOverloadedType<2>(*this, E,
18568 Intrinsic::amdgcn_mov_dpp8);
18569 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
18570 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
18571 llvm::SmallVector<llvm::Value *, 6> Args;
18572 // Find out if any arguments are required to be integer constant
18573 // expressions.
18574 unsigned ICEArguments = 0;
18575 ASTContext::GetBuiltinTypeError Error;
18576 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
18577 assert(Error == ASTContext::GE_None && "Should not codegen an error");
18578 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
18579 Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
18580 }
18581 assert(Args.size() == 5 || Args.size() == 6);
18582 if (Args.size() == 5)
18583 Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
18584 Function *F =
18585 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
18586 return Builder.CreateCall(F, Args);
18587 }
18588 case AMDGPU::BI__builtin_amdgcn_permlane16:
18589 case AMDGPU::BI__builtin_amdgcn_permlanex16:
18590 return emitBuiltinWithOneOverloadedType<6>(
18591 *this, E,
18592 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
18593 ? Intrinsic::amdgcn_permlane16
18594 : Intrinsic::amdgcn_permlanex16);
18595 case AMDGPU::BI__builtin_amdgcn_permlane64:
18596 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18597 Intrinsic::amdgcn_permlane64);
18598 case AMDGPU::BI__builtin_amdgcn_readlane:
18599 return emitBuiltinWithOneOverloadedType<2>(*this, E,
18600 Intrinsic::amdgcn_readlane);
18601 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
18602 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18603 Intrinsic::amdgcn_readfirstlane);
18604 case AMDGPU::BI__builtin_amdgcn_div_fixup:
18605 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
18606 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
18607 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18608 Intrinsic::amdgcn_div_fixup);
18609 case AMDGPU::BI__builtin_amdgcn_trig_preop:
18610 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
18611 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
18612 case AMDGPU::BI__builtin_amdgcn_rcp:
18613 case AMDGPU::BI__builtin_amdgcn_rcpf:
18614 case AMDGPU::BI__builtin_amdgcn_rcph:
18615 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
18616 case AMDGPU::BI__builtin_amdgcn_sqrt:
18617 case AMDGPU::BI__builtin_amdgcn_sqrtf:
18618 case AMDGPU::BI__builtin_amdgcn_sqrth:
18619 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18620 Intrinsic::amdgcn_sqrt);
18621 case AMDGPU::BI__builtin_amdgcn_rsq:
18622 case AMDGPU::BI__builtin_amdgcn_rsqf:
18623 case AMDGPU::BI__builtin_amdgcn_rsqh:
18624 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
18625 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
18626 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
18627 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18628 Intrinsic::amdgcn_rsq_clamp);
18629 case AMDGPU::BI__builtin_amdgcn_sinf:
18630 case AMDGPU::BI__builtin_amdgcn_sinh:
18631 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
18632 case AMDGPU::BI__builtin_amdgcn_cosf:
18633 case AMDGPU::BI__builtin_amdgcn_cosh:
18634 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
18635 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
18636 return EmitAMDGPUDispatchPtr(*this, E);
18637 case AMDGPU::BI__builtin_amdgcn_logf:
18638 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
18639 case AMDGPU::BI__builtin_amdgcn_exp2f:
18640 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18641 Intrinsic::amdgcn_exp2);
18642 case AMDGPU::BI__builtin_amdgcn_log_clampf:
18643 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18644 Intrinsic::amdgcn_log_clamp);
18645 case AMDGPU::BI__builtin_amdgcn_ldexp:
18646 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
18647 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18648 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18649 llvm::Function *F =
18650 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
18651 return Builder.CreateCall(F, {Src0, Src1});
18652 }
18653 case AMDGPU::BI__builtin_amdgcn_ldexph: {
18654 // The raw instruction has a different behavior for out of bounds exponent
18655 // values (implicit truncation instead of saturate to short_min/short_max).
18656 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18657 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18658 llvm::Function *F =
18659 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
18660 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
18661 }
18662 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
18663 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
18664 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
18665 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18666 Intrinsic::amdgcn_frexp_mant);
18667 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
18668 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
18669 Value *Src0 = EmitScalarExpr(E->getArg(0));
18670 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18671 { Builder.getInt32Ty(), Src0->getType() });
18672 return Builder.CreateCall(F, Src0);
18673 }
18674 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
18675 Value *Src0 = EmitScalarExpr(E->getArg(0));
18676 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18677 { Builder.getInt16Ty(), Src0->getType() });
18678 return Builder.CreateCall(F, Src0);
18679 }
18680 case AMDGPU::BI__builtin_amdgcn_fract:
18681 case AMDGPU::BI__builtin_amdgcn_fractf:
18682 case AMDGPU::BI__builtin_amdgcn_fracth:
18683 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18684 Intrinsic::amdgcn_fract);
18685 case AMDGPU::BI__builtin_amdgcn_lerp:
18686 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18687 Intrinsic::amdgcn_lerp);
18688 case AMDGPU::BI__builtin_amdgcn_ubfe:
18689 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18690 Intrinsic::amdgcn_ubfe);
18691 case AMDGPU::BI__builtin_amdgcn_sbfe:
18692 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18693 Intrinsic::amdgcn_sbfe);
18694 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
18695 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
18696 llvm::Type *ResultType = ConvertType(E->getType());
18697 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
18698 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
18699 return Builder.CreateCall(F, { Src });
18700 }
18701 case AMDGPU::BI__builtin_amdgcn_uicmp:
18702 case AMDGPU::BI__builtin_amdgcn_uicmpl:
18703 case AMDGPU::BI__builtin_amdgcn_sicmp:
18704 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
18705 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18706 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18707 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18708
18709 // FIXME-GFX10: How should 32 bit mask be handled?
18710 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
18711 { Builder.getInt64Ty(), Src0->getType() });
18712 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18713 }
18714 case AMDGPU::BI__builtin_amdgcn_fcmp:
18715 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
18716 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18717 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18718 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18719
18720 // FIXME-GFX10: How should 32 bit mask be handled?
18721 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
18722 { Builder.getInt64Ty(), Src0->getType() });
18723 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18724 }
18725 case AMDGPU::BI__builtin_amdgcn_class:
18726 case AMDGPU::BI__builtin_amdgcn_classf:
18727 case AMDGPU::BI__builtin_amdgcn_classh:
18728 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
18729 case AMDGPU::BI__builtin_amdgcn_fmed3f:
18730 case AMDGPU::BI__builtin_amdgcn_fmed3h:
18731 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18732 Intrinsic::amdgcn_fmed3);
18733 case AMDGPU::BI__builtin_amdgcn_ds_append:
18734 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
18735 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
18736 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
18737 Value *Src0 = EmitScalarExpr(E->getArg(0));
18738 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
18739 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
18740 }
18741 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18742 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18743 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18744 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18745 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18746 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18747 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18748 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18749 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18750 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
18751 Intrinsic::ID IID;
18752 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18753 switch (BuiltinID) {
18754 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18755 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18756 IID = Intrinsic::amdgcn_global_atomic_fadd;
18757 break;
18758 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18759 ArgTy = llvm::FixedVectorType::get(
18760 llvm::Type::getHalfTy(getLLVMContext()), 2);
18761 IID = Intrinsic::amdgcn_global_atomic_fadd;
18762 break;
18763 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18764 IID = Intrinsic::amdgcn_global_atomic_fadd;
18765 break;
18766 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18767 IID = Intrinsic::amdgcn_global_atomic_fmin;
18768 break;
18769 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18770 IID = Intrinsic::amdgcn_global_atomic_fmax;
18771 break;
18772 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18773 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18774 break;
18775 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18776 IID = Intrinsic::amdgcn_flat_atomic_fmin;
18777 break;
18778 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18779 IID = Intrinsic::amdgcn_flat_atomic_fmax;
18780 break;
18781 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18782 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18783 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18784 break;
18785 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
18786 ArgTy = llvm::FixedVectorType::get(
18787 llvm::Type::getHalfTy(getLLVMContext()), 2);
18788 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18789 break;
18790 }
18791 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18792 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18793 llvm::Function *F =
18794 CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
18795 return Builder.CreateCall(F, {Addr, Val});
18796 }
18797 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18798 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
18799 Intrinsic::ID IID;
18800 switch (BuiltinID) {
18801 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18802 IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
18803 break;
18804 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
18805 IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
18806 break;
18807 }
18808 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18809 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18810 llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
18811 return Builder.CreateCall(F, {Addr, Val});
18812 }
18813 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18814 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18815 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18816 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
18817 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
18818 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
18819 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
18820 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: {
18821
18822 Intrinsic::ID IID;
18823 switch (BuiltinID) {
18824 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18825 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18826 IID = Intrinsic::amdgcn_global_load_tr_b64;
18827 break;
18828 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18829 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
18830 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
18831 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
18832 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
18833 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
18834 IID = Intrinsic::amdgcn_global_load_tr_b128;
18835 break;
18836 }
18837 llvm::Type *LoadTy = ConvertType(E->getType());
18838 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18839 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
18840 return Builder.CreateCall(F, {Addr});
18841 }
18842 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
18843 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
18844 {llvm::Type::getInt64Ty(getLLVMContext())});
18845 return Builder.CreateCall(F);
18846 }
18847 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
18848 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
18849 {llvm::Type::getInt64Ty(getLLVMContext())});
18850 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
18851 return Builder.CreateCall(F, {Env});
18852 }
18853 case AMDGPU::BI__builtin_amdgcn_read_exec:
18854 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
18855 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
18856 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
18857 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
18858 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
18859 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
18860 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
18861 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
18862 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
18863 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
18864 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
18865 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
18866 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
18867 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
18868 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
18869
18870 // The builtins take these arguments as vec4 where the last element is
18871 // ignored. The intrinsic takes them as vec3.
18872 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
18873 ArrayRef<int>{0, 1, 2});
18874 RayDir =
18875 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
18876 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
18877 ArrayRef<int>{0, 1, 2});
18878
18879 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
18880 {NodePtr->getType(), RayDir->getType()});
18881 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
18882 RayInverseDir, TextureDescr});
18883 }
18884
18885 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
18886 SmallVector<Value *, 4> Args;
18887 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18888 Args.push_back(EmitScalarExpr(E->getArg(i)));
18889
18890 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
18891 Value *Call = Builder.CreateCall(F, Args);
18892 Value *Rtn = Builder.CreateExtractValue(Call, 0);
18893 Value *A = Builder.CreateExtractValue(Call, 1);
18894 llvm::Type *RetTy = ConvertType(E->getType());
18895 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
18896 (uint64_t)0);
18897 return Builder.CreateInsertElement(I0, A, 1);
18898 }
18899
18900 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18901 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18902 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18903 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18904 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18905 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18906 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18907 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18908 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18909 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18910 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18911 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18912 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18913 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18914 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18915 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18916 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18917 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18918 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18919 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18920 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18921 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18922 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18923 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18924 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
18925 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18926 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
18927 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18928 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
18929 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18930 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
18931 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18932 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
18933 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18934 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
18935 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
18936 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
18937 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
18938 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
18939 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
18940 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
18941 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
18942 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
18943 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
18944 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
18945 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
18946 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
18947 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
18948 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
18949 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
18950 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
18951 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
18952 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
18953 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
18954 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
18955 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
18956 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
18957 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
18958 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
18959 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
18960
18961 // These operations perform a matrix multiplication and accumulation of
18962 // the form:
18963 // D = A * B + C
18964 // We need to specify one type for matrices AB and one for matrices CD.
18965 // Sparse matrix operations can have different types for A and B as well as
18966 // an additional type for sparsity index.
18967 // Destination type should be put before types used for source operands.
18968 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
18969 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
18970 // There is no need for the variable opsel argument, so always set it to
18971 // "false".
18972 bool AppendFalseForOpselArg = false;
18973 unsigned BuiltinWMMAOp;
18974
18975 switch (BuiltinID) {
18976 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18977 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18978 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18979 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18980 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18981 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
18982 break;
18983 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18984 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18985 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18986 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18987 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18988 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
18989 break;
18990 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18991 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18992 AppendFalseForOpselArg = true;
18993 [[fallthrough]];
18994 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18995 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18996 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18997 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
18998 break;
18999 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
19000 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
19001 AppendFalseForOpselArg = true;
19002 [[fallthrough]];
19003 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
19004 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
19005 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19006 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
19007 break;
19008 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
19009 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
19010 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19011 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
19012 break;
19013 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
19014 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
19015 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19016 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
19017 break;
19018 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
19019 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
19020 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
19021 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
19022 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
19023 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
19024 break;
19025 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
19026 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
19027 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
19028 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
19029 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
19030 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
19031 break;
19032 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
19033 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
19034 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19035 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
19036 break;
19037 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
19038 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
19039 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19040 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
19041 break;
19042 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
19043 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
19044 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19045 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
19046 break;
19047 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
19048 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
19049 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19050 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
19051 break;
19052 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
19053 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
19054 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
19055 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
19056 break;
19057 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
19058 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
19059 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19060 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
19061 break;
19062 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
19063 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
19064 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19065 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
19066 break;
19067 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
19068 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
19069 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19070 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
19071 break;
19072 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
19073 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
19074 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19075 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
19076 break;
19077 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
19078 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
19079 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19080 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
19081 break;
19082 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
19083 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
19084 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19085 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
19086 break;
19087 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
19088 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
19089 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19090 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
19091 break;
19092 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
19093 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
19094 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19095 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
19096 break;
19097 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
19098 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
19099 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19100 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
19101 break;
19102 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
19103 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
19104 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19105 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
19106 break;
19107 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
19108 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
19109 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19110 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
19111 break;
19112 }
19113
19114 SmallVector<Value *, 6> Args;
19115 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
19116 Args.push_back(EmitScalarExpr(E->getArg(i)));
19117 if (AppendFalseForOpselArg)
19118 Args.push_back(Builder.getFalse());
19119
19120 SmallVector<llvm::Type *, 6> ArgTypes;
19121 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
19122 ArgTypes.push_back(Args[ArgIdx]->getType());
19123
19124 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
19125 return Builder.CreateCall(F, Args);
19126 }
19127
19128 // amdgcn workitem
19129 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
19130 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
19131 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
19132 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
19133 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
19134 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
19135
19136 // amdgcn workgroup size
19137 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
19138 return EmitAMDGPUWorkGroupSize(*this, 0);
19139 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
19140 return EmitAMDGPUWorkGroupSize(*this, 1);
19141 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
19142 return EmitAMDGPUWorkGroupSize(*this, 2);
19143
19144 // amdgcn grid size
19145 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
19146 return EmitAMDGPUGridSize(*this, 0);
19147 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
19148 return EmitAMDGPUGridSize(*this, 1);
19149 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
19150 return EmitAMDGPUGridSize(*this, 2);
19151
19152 // r600 intrinsics
19153 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
19154 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
19155 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19156 Intrinsic::r600_recipsqrt_ieee);
19157 case AMDGPU::BI__builtin_r600_read_tidig_x:
19158 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
19159 case AMDGPU::BI__builtin_r600_read_tidig_y:
19160 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
19161 case AMDGPU::BI__builtin_r600_read_tidig_z:
19162 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
19163 case AMDGPU::BI__builtin_amdgcn_alignbit: {
19164 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19165 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19166 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19167 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
19168 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19169 }
19170 case AMDGPU::BI__builtin_amdgcn_fence: {
19171 ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
19172 EmitScalarExpr(E->getArg(1)), AO, SSID);
19173 FenceInst *Fence = Builder.CreateFence(AO, SSID);
19174 if (E->getNumArgs() > 2)
19175 AddAMDGPUFenceAddressSpaceMMRA(Fence, E);
19176 return Fence;
19177 }
19178 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19179 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19180 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19181 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19182 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
19183 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
19184 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
19185 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
19186 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
19187 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
19188 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
19189 llvm::AtomicRMWInst::BinOp BinOp;
19190 switch (BuiltinID) {
19191 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19192 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19193 BinOp = llvm::AtomicRMWInst::UIncWrap;
19194 break;
19195 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19196 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19197 BinOp = llvm::AtomicRMWInst::UDecWrap;
19198 break;
19199 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
19200 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
19201 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
19202 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
19203 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
19204 BinOp = llvm::AtomicRMWInst::FAdd;
19205 break;
19206 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
19207 BinOp = llvm::AtomicRMWInst::FMin;
19208 break;
19209 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
19210 BinOp = llvm::AtomicRMWInst::FMax;
19211 break;
19212 }
19213
19214 Address Ptr = CheckAtomicAlignment(*this, E);
19215 Value *Val = EmitScalarExpr(E->getArg(1));
19216 llvm::Type *OrigTy = Val->getType();
19217 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
19218
19219 bool Volatile;
19220
19221 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
19222 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
19223 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
19224 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
19225 Volatile =
19226 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
19227 } else {
19228 // Infer volatile from the passed type.
19229 Volatile =
19230 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
19231 }
19232
19233 if (E->getNumArgs() >= 4) {
19234 // Some of the builtins have explicit ordering and scope arguments.
19235 ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
19236 EmitScalarExpr(E->getArg(3)), AO, SSID);
19237 } else {
19238 // The ds_atomic_fadd_* builtins do not have syncscope/order arguments.
19239 SSID = llvm::SyncScope::System;
19240 AO = AtomicOrdering::SequentiallyConsistent;
19241
19242 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
19243 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) {
19244 llvm::Type *V2BF16Ty = FixedVectorType::get(
19245 llvm::Type::getBFloatTy(Builder.getContext()), 2);
19246 Val = Builder.CreateBitCast(Val, V2BF16Ty);
19247 }
19248 }
19249
19250 llvm::AtomicRMWInst *RMW =
19251 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
19252 if (Volatile)
19253 RMW->setVolatile(true);
19254 return Builder.CreateBitCast(RMW, OrigTy);
19255 }
19256 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
19257 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
19258 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
19259 llvm::Type *ResultType = ConvertType(E->getType());
19260 // s_sendmsg_rtn is mangled using return type only.
19261 Function *F =
19262 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
19263 return Builder.CreateCall(F, {Arg});
19264 }
19265 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
19266 return emitBuiltinWithOneOverloadedType<4>(
19267 *this, E, Intrinsic::amdgcn_make_buffer_rsrc);
19268 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
19269 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
19270 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
19271 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
19272 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
19273 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
19274 return emitBuiltinWithOneOverloadedType<5>(
19275 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
19276 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
19277 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
19278 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
19279 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
19280 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
19281 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
19282 llvm::Type *RetTy = nullptr;
19283 switch (BuiltinID) {
19284 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
19285 RetTy = Int8Ty;
19286 break;
19287 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
19288 RetTy = Int16Ty;
19289 break;
19290 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
19291 RetTy = Int32Ty;
19292 break;
19293 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
19294 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
19295 break;
19296 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
19297 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
19298 break;
19299 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
19300 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
19301 break;
19302 }
19303 Function *F =
19304 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
19305 return Builder.CreateCall(
19306 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
19307 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
19308 }
19309 default:
19310 return nullptr;
19311 }
19312 }
19313
19314 /// Handle a SystemZ function in which the final argument is a pointer
19315 /// to an int that receives the post-instruction CC value. At the LLVM level
19316 /// this is represented as a function that returns a {result, cc} pair.
EmitSystemZIntrinsicWithCC(CodeGenFunction & CGF,unsigned IntrinsicID,const CallExpr * E)19317 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
19318 unsigned IntrinsicID,
19319 const CallExpr *E) {
19320 unsigned NumArgs = E->getNumArgs() - 1;
19321 SmallVector<Value *, 8> Args(NumArgs);
19322 for (unsigned I = 0; I < NumArgs; ++I)
19323 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
19324 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
19325 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
19326 Value *Call = CGF.Builder.CreateCall(F, Args);
19327 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
19328 CGF.Builder.CreateStore(CC, CCPtr);
19329 return CGF.Builder.CreateExtractValue(Call, 0);
19330 }
19331
EmitSystemZBuiltinExpr(unsigned BuiltinID,const CallExpr * E)19332 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
19333 const CallExpr *E) {
19334 switch (BuiltinID) {
19335 case SystemZ::BI__builtin_tbegin: {
19336 Value *TDB = EmitScalarExpr(E->getArg(0));
19337 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19338 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
19339 return Builder.CreateCall(F, {TDB, Control});
19340 }
19341 case SystemZ::BI__builtin_tbegin_nofloat: {
19342 Value *TDB = EmitScalarExpr(E->getArg(0));
19343 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19344 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
19345 return Builder.CreateCall(F, {TDB, Control});
19346 }
19347 case SystemZ::BI__builtin_tbeginc: {
19348 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
19349 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
19350 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
19351 return Builder.CreateCall(F, {TDB, Control});
19352 }
19353 case SystemZ::BI__builtin_tabort: {
19354 Value *Data = EmitScalarExpr(E->getArg(0));
19355 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
19356 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
19357 }
19358 case SystemZ::BI__builtin_non_tx_store: {
19359 Value *Address = EmitScalarExpr(E->getArg(0));
19360 Value *Data = EmitScalarExpr(E->getArg(1));
19361 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
19362 return Builder.CreateCall(F, {Data, Address});
19363 }
19364
19365 // Vector builtins. Note that most vector builtins are mapped automatically
19366 // to target-specific LLVM intrinsics. The ones handled specially here can
19367 // be represented via standard LLVM IR, which is preferable to enable common
19368 // LLVM optimizations.
19369
19370 case SystemZ::BI__builtin_s390_vpopctb:
19371 case SystemZ::BI__builtin_s390_vpopcth:
19372 case SystemZ::BI__builtin_s390_vpopctf:
19373 case SystemZ::BI__builtin_s390_vpopctg: {
19374 llvm::Type *ResultType = ConvertType(E->getType());
19375 Value *X = EmitScalarExpr(E->getArg(0));
19376 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
19377 return Builder.CreateCall(F, X);
19378 }
19379
19380 case SystemZ::BI__builtin_s390_vclzb:
19381 case SystemZ::BI__builtin_s390_vclzh:
19382 case SystemZ::BI__builtin_s390_vclzf:
19383 case SystemZ::BI__builtin_s390_vclzg: {
19384 llvm::Type *ResultType = ConvertType(E->getType());
19385 Value *X = EmitScalarExpr(E->getArg(0));
19386 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19387 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
19388 return Builder.CreateCall(F, {X, Undef});
19389 }
19390
19391 case SystemZ::BI__builtin_s390_vctzb:
19392 case SystemZ::BI__builtin_s390_vctzh:
19393 case SystemZ::BI__builtin_s390_vctzf:
19394 case SystemZ::BI__builtin_s390_vctzg: {
19395 llvm::Type *ResultType = ConvertType(E->getType());
19396 Value *X = EmitScalarExpr(E->getArg(0));
19397 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19398 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
19399 return Builder.CreateCall(F, {X, Undef});
19400 }
19401
19402 case SystemZ::BI__builtin_s390_verllb:
19403 case SystemZ::BI__builtin_s390_verllh:
19404 case SystemZ::BI__builtin_s390_verllf:
19405 case SystemZ::BI__builtin_s390_verllg: {
19406 llvm::Type *ResultType = ConvertType(E->getType());
19407 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19408 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19409 // Splat scalar rotate amount to vector type.
19410 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
19411 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
19412 Amt = Builder.CreateVectorSplat(NumElts, Amt);
19413 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19414 return Builder.CreateCall(F, { Src, Src, Amt });
19415 }
19416
19417 case SystemZ::BI__builtin_s390_verllvb:
19418 case SystemZ::BI__builtin_s390_verllvh:
19419 case SystemZ::BI__builtin_s390_verllvf:
19420 case SystemZ::BI__builtin_s390_verllvg: {
19421 llvm::Type *ResultType = ConvertType(E->getType());
19422 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19423 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19424 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19425 return Builder.CreateCall(F, { Src, Src, Amt });
19426 }
19427
19428 case SystemZ::BI__builtin_s390_vfsqsb:
19429 case SystemZ::BI__builtin_s390_vfsqdb: {
19430 llvm::Type *ResultType = ConvertType(E->getType());
19431 Value *X = EmitScalarExpr(E->getArg(0));
19432 if (Builder.getIsFPConstrained()) {
19433 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
19434 return Builder.CreateConstrainedFPCall(F, { X });
19435 } else {
19436 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
19437 return Builder.CreateCall(F, X);
19438 }
19439 }
19440 case SystemZ::BI__builtin_s390_vfmasb:
19441 case SystemZ::BI__builtin_s390_vfmadb: {
19442 llvm::Type *ResultType = ConvertType(E->getType());
19443 Value *X = EmitScalarExpr(E->getArg(0));
19444 Value *Y = EmitScalarExpr(E->getArg(1));
19445 Value *Z = EmitScalarExpr(E->getArg(2));
19446 if (Builder.getIsFPConstrained()) {
19447 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19448 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
19449 } else {
19450 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19451 return Builder.CreateCall(F, {X, Y, Z});
19452 }
19453 }
19454 case SystemZ::BI__builtin_s390_vfmssb:
19455 case SystemZ::BI__builtin_s390_vfmsdb: {
19456 llvm::Type *ResultType = ConvertType(E->getType());
19457 Value *X = EmitScalarExpr(E->getArg(0));
19458 Value *Y = EmitScalarExpr(E->getArg(1));
19459 Value *Z = EmitScalarExpr(E->getArg(2));
19460 if (Builder.getIsFPConstrained()) {
19461 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19462 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19463 } else {
19464 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19465 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19466 }
19467 }
19468 case SystemZ::BI__builtin_s390_vfnmasb:
19469 case SystemZ::BI__builtin_s390_vfnmadb: {
19470 llvm::Type *ResultType = ConvertType(E->getType());
19471 Value *X = EmitScalarExpr(E->getArg(0));
19472 Value *Y = EmitScalarExpr(E->getArg(1));
19473 Value *Z = EmitScalarExpr(E->getArg(2));
19474 if (Builder.getIsFPConstrained()) {
19475 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19476 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
19477 } else {
19478 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19479 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
19480 }
19481 }
19482 case SystemZ::BI__builtin_s390_vfnmssb:
19483 case SystemZ::BI__builtin_s390_vfnmsdb: {
19484 llvm::Type *ResultType = ConvertType(E->getType());
19485 Value *X = EmitScalarExpr(E->getArg(0));
19486 Value *Y = EmitScalarExpr(E->getArg(1));
19487 Value *Z = EmitScalarExpr(E->getArg(2));
19488 if (Builder.getIsFPConstrained()) {
19489 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19490 Value *NegZ = Builder.CreateFNeg(Z, "sub");
19491 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
19492 } else {
19493 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19494 Value *NegZ = Builder.CreateFNeg(Z, "neg");
19495 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
19496 }
19497 }
19498 case SystemZ::BI__builtin_s390_vflpsb:
19499 case SystemZ::BI__builtin_s390_vflpdb: {
19500 llvm::Type *ResultType = ConvertType(E->getType());
19501 Value *X = EmitScalarExpr(E->getArg(0));
19502 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19503 return Builder.CreateCall(F, X);
19504 }
19505 case SystemZ::BI__builtin_s390_vflnsb:
19506 case SystemZ::BI__builtin_s390_vflndb: {
19507 llvm::Type *ResultType = ConvertType(E->getType());
19508 Value *X = EmitScalarExpr(E->getArg(0));
19509 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19510 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
19511 }
19512 case SystemZ::BI__builtin_s390_vfisb:
19513 case SystemZ::BI__builtin_s390_vfidb: {
19514 llvm::Type *ResultType = ConvertType(E->getType());
19515 Value *X = EmitScalarExpr(E->getArg(0));
19516 // Constant-fold the M4 and M5 mask arguments.
19517 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
19518 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19519 // Check whether this instance can be represented via a LLVM standard
19520 // intrinsic. We only support some combinations of M4 and M5.
19521 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19522 Intrinsic::ID CI;
19523 switch (M4.getZExtValue()) {
19524 default: break;
19525 case 0: // IEEE-inexact exception allowed
19526 switch (M5.getZExtValue()) {
19527 default: break;
19528 case 0: ID = Intrinsic::rint;
19529 CI = Intrinsic::experimental_constrained_rint; break;
19530 }
19531 break;
19532 case 4: // IEEE-inexact exception suppressed
19533 switch (M5.getZExtValue()) {
19534 default: break;
19535 case 0: ID = Intrinsic::nearbyint;
19536 CI = Intrinsic::experimental_constrained_nearbyint; break;
19537 case 1: ID = Intrinsic::round;
19538 CI = Intrinsic::experimental_constrained_round; break;
19539 case 5: ID = Intrinsic::trunc;
19540 CI = Intrinsic::experimental_constrained_trunc; break;
19541 case 6: ID = Intrinsic::ceil;
19542 CI = Intrinsic::experimental_constrained_ceil; break;
19543 case 7: ID = Intrinsic::floor;
19544 CI = Intrinsic::experimental_constrained_floor; break;
19545 }
19546 break;
19547 }
19548 if (ID != Intrinsic::not_intrinsic) {
19549 if (Builder.getIsFPConstrained()) {
19550 Function *F = CGM.getIntrinsic(CI, ResultType);
19551 return Builder.CreateConstrainedFPCall(F, X);
19552 } else {
19553 Function *F = CGM.getIntrinsic(ID, ResultType);
19554 return Builder.CreateCall(F, X);
19555 }
19556 }
19557 switch (BuiltinID) { // FIXME: constrained version?
19558 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
19559 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
19560 default: llvm_unreachable("Unknown BuiltinID");
19561 }
19562 Function *F = CGM.getIntrinsic(ID);
19563 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19564 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
19565 return Builder.CreateCall(F, {X, M4Value, M5Value});
19566 }
19567 case SystemZ::BI__builtin_s390_vfmaxsb:
19568 case SystemZ::BI__builtin_s390_vfmaxdb: {
19569 llvm::Type *ResultType = ConvertType(E->getType());
19570 Value *X = EmitScalarExpr(E->getArg(0));
19571 Value *Y = EmitScalarExpr(E->getArg(1));
19572 // Constant-fold the M4 mask argument.
19573 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19574 // Check whether this instance can be represented via a LLVM standard
19575 // intrinsic. We only support some values of M4.
19576 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19577 Intrinsic::ID CI;
19578 switch (M4.getZExtValue()) {
19579 default: break;
19580 case 4: ID = Intrinsic::maxnum;
19581 CI = Intrinsic::experimental_constrained_maxnum; break;
19582 }
19583 if (ID != Intrinsic::not_intrinsic) {
19584 if (Builder.getIsFPConstrained()) {
19585 Function *F = CGM.getIntrinsic(CI, ResultType);
19586 return Builder.CreateConstrainedFPCall(F, {X, Y});
19587 } else {
19588 Function *F = CGM.getIntrinsic(ID, ResultType);
19589 return Builder.CreateCall(F, {X, Y});
19590 }
19591 }
19592 switch (BuiltinID) {
19593 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
19594 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
19595 default: llvm_unreachable("Unknown BuiltinID");
19596 }
19597 Function *F = CGM.getIntrinsic(ID);
19598 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19599 return Builder.CreateCall(F, {X, Y, M4Value});
19600 }
19601 case SystemZ::BI__builtin_s390_vfminsb:
19602 case SystemZ::BI__builtin_s390_vfmindb: {
19603 llvm::Type *ResultType = ConvertType(E->getType());
19604 Value *X = EmitScalarExpr(E->getArg(0));
19605 Value *Y = EmitScalarExpr(E->getArg(1));
19606 // Constant-fold the M4 mask argument.
19607 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19608 // Check whether this instance can be represented via a LLVM standard
19609 // intrinsic. We only support some values of M4.
19610 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19611 Intrinsic::ID CI;
19612 switch (M4.getZExtValue()) {
19613 default: break;
19614 case 4: ID = Intrinsic::minnum;
19615 CI = Intrinsic::experimental_constrained_minnum; break;
19616 }
19617 if (ID != Intrinsic::not_intrinsic) {
19618 if (Builder.getIsFPConstrained()) {
19619 Function *F = CGM.getIntrinsic(CI, ResultType);
19620 return Builder.CreateConstrainedFPCall(F, {X, Y});
19621 } else {
19622 Function *F = CGM.getIntrinsic(ID, ResultType);
19623 return Builder.CreateCall(F, {X, Y});
19624 }
19625 }
19626 switch (BuiltinID) {
19627 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
19628 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
19629 default: llvm_unreachable("Unknown BuiltinID");
19630 }
19631 Function *F = CGM.getIntrinsic(ID);
19632 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19633 return Builder.CreateCall(F, {X, Y, M4Value});
19634 }
19635
19636 case SystemZ::BI__builtin_s390_vlbrh:
19637 case SystemZ::BI__builtin_s390_vlbrf:
19638 case SystemZ::BI__builtin_s390_vlbrg: {
19639 llvm::Type *ResultType = ConvertType(E->getType());
19640 Value *X = EmitScalarExpr(E->getArg(0));
19641 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
19642 return Builder.CreateCall(F, X);
19643 }
19644
19645 // Vector intrinsics that output the post-instruction CC value.
19646
19647 #define INTRINSIC_WITH_CC(NAME) \
19648 case SystemZ::BI__builtin_##NAME: \
19649 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
19650
19651 INTRINSIC_WITH_CC(s390_vpkshs);
19652 INTRINSIC_WITH_CC(s390_vpksfs);
19653 INTRINSIC_WITH_CC(s390_vpksgs);
19654
19655 INTRINSIC_WITH_CC(s390_vpklshs);
19656 INTRINSIC_WITH_CC(s390_vpklsfs);
19657 INTRINSIC_WITH_CC(s390_vpklsgs);
19658
19659 INTRINSIC_WITH_CC(s390_vceqbs);
19660 INTRINSIC_WITH_CC(s390_vceqhs);
19661 INTRINSIC_WITH_CC(s390_vceqfs);
19662 INTRINSIC_WITH_CC(s390_vceqgs);
19663
19664 INTRINSIC_WITH_CC(s390_vchbs);
19665 INTRINSIC_WITH_CC(s390_vchhs);
19666 INTRINSIC_WITH_CC(s390_vchfs);
19667 INTRINSIC_WITH_CC(s390_vchgs);
19668
19669 INTRINSIC_WITH_CC(s390_vchlbs);
19670 INTRINSIC_WITH_CC(s390_vchlhs);
19671 INTRINSIC_WITH_CC(s390_vchlfs);
19672 INTRINSIC_WITH_CC(s390_vchlgs);
19673
19674 INTRINSIC_WITH_CC(s390_vfaebs);
19675 INTRINSIC_WITH_CC(s390_vfaehs);
19676 INTRINSIC_WITH_CC(s390_vfaefs);
19677
19678 INTRINSIC_WITH_CC(s390_vfaezbs);
19679 INTRINSIC_WITH_CC(s390_vfaezhs);
19680 INTRINSIC_WITH_CC(s390_vfaezfs);
19681
19682 INTRINSIC_WITH_CC(s390_vfeebs);
19683 INTRINSIC_WITH_CC(s390_vfeehs);
19684 INTRINSIC_WITH_CC(s390_vfeefs);
19685
19686 INTRINSIC_WITH_CC(s390_vfeezbs);
19687 INTRINSIC_WITH_CC(s390_vfeezhs);
19688 INTRINSIC_WITH_CC(s390_vfeezfs);
19689
19690 INTRINSIC_WITH_CC(s390_vfenebs);
19691 INTRINSIC_WITH_CC(s390_vfenehs);
19692 INTRINSIC_WITH_CC(s390_vfenefs);
19693
19694 INTRINSIC_WITH_CC(s390_vfenezbs);
19695 INTRINSIC_WITH_CC(s390_vfenezhs);
19696 INTRINSIC_WITH_CC(s390_vfenezfs);
19697
19698 INTRINSIC_WITH_CC(s390_vistrbs);
19699 INTRINSIC_WITH_CC(s390_vistrhs);
19700 INTRINSIC_WITH_CC(s390_vistrfs);
19701
19702 INTRINSIC_WITH_CC(s390_vstrcbs);
19703 INTRINSIC_WITH_CC(s390_vstrchs);
19704 INTRINSIC_WITH_CC(s390_vstrcfs);
19705
19706 INTRINSIC_WITH_CC(s390_vstrczbs);
19707 INTRINSIC_WITH_CC(s390_vstrczhs);
19708 INTRINSIC_WITH_CC(s390_vstrczfs);
19709
19710 INTRINSIC_WITH_CC(s390_vfcesbs);
19711 INTRINSIC_WITH_CC(s390_vfcedbs);
19712 INTRINSIC_WITH_CC(s390_vfchsbs);
19713 INTRINSIC_WITH_CC(s390_vfchdbs);
19714 INTRINSIC_WITH_CC(s390_vfchesbs);
19715 INTRINSIC_WITH_CC(s390_vfchedbs);
19716
19717 INTRINSIC_WITH_CC(s390_vftcisb);
19718 INTRINSIC_WITH_CC(s390_vftcidb);
19719
19720 INTRINSIC_WITH_CC(s390_vstrsb);
19721 INTRINSIC_WITH_CC(s390_vstrsh);
19722 INTRINSIC_WITH_CC(s390_vstrsf);
19723
19724 INTRINSIC_WITH_CC(s390_vstrszb);
19725 INTRINSIC_WITH_CC(s390_vstrszh);
19726 INTRINSIC_WITH_CC(s390_vstrszf);
19727
19728 #undef INTRINSIC_WITH_CC
19729
19730 default:
19731 return nullptr;
19732 }
19733 }
19734
19735 namespace {
19736 // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
19737 struct NVPTXMmaLdstInfo {
19738 unsigned NumResults; // Number of elements to load/store
19739 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
19740 unsigned IID_col;
19741 unsigned IID_row;
19742 };
19743
19744 #define MMA_INTR(geom_op_type, layout) \
19745 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
19746 #define MMA_LDST(n, geom_op_type) \
19747 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
19748
getNVPTXMmaLdstInfo(unsigned BuiltinID)19749 static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
19750 switch (BuiltinID) {
19751 // FP MMA loads
19752 case NVPTX::BI__hmma_m16n16k16_ld_a:
19753 return MMA_LDST(8, m16n16k16_load_a_f16);
19754 case NVPTX::BI__hmma_m16n16k16_ld_b:
19755 return MMA_LDST(8, m16n16k16_load_b_f16);
19756 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
19757 return MMA_LDST(4, m16n16k16_load_c_f16);
19758 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
19759 return MMA_LDST(8, m16n16k16_load_c_f32);
19760 case NVPTX::BI__hmma_m32n8k16_ld_a:
19761 return MMA_LDST(8, m32n8k16_load_a_f16);
19762 case NVPTX::BI__hmma_m32n8k16_ld_b:
19763 return MMA_LDST(8, m32n8k16_load_b_f16);
19764 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
19765 return MMA_LDST(4, m32n8k16_load_c_f16);
19766 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
19767 return MMA_LDST(8, m32n8k16_load_c_f32);
19768 case NVPTX::BI__hmma_m8n32k16_ld_a:
19769 return MMA_LDST(8, m8n32k16_load_a_f16);
19770 case NVPTX::BI__hmma_m8n32k16_ld_b:
19771 return MMA_LDST(8, m8n32k16_load_b_f16);
19772 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
19773 return MMA_LDST(4, m8n32k16_load_c_f16);
19774 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
19775 return MMA_LDST(8, m8n32k16_load_c_f32);
19776
19777 // Integer MMA loads
19778 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
19779 return MMA_LDST(2, m16n16k16_load_a_s8);
19780 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
19781 return MMA_LDST(2, m16n16k16_load_a_u8);
19782 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
19783 return MMA_LDST(2, m16n16k16_load_b_s8);
19784 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
19785 return MMA_LDST(2, m16n16k16_load_b_u8);
19786 case NVPTX::BI__imma_m16n16k16_ld_c:
19787 return MMA_LDST(8, m16n16k16_load_c_s32);
19788 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
19789 return MMA_LDST(4, m32n8k16_load_a_s8);
19790 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
19791 return MMA_LDST(4, m32n8k16_load_a_u8);
19792 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
19793 return MMA_LDST(1, m32n8k16_load_b_s8);
19794 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
19795 return MMA_LDST(1, m32n8k16_load_b_u8);
19796 case NVPTX::BI__imma_m32n8k16_ld_c:
19797 return MMA_LDST(8, m32n8k16_load_c_s32);
19798 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
19799 return MMA_LDST(1, m8n32k16_load_a_s8);
19800 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
19801 return MMA_LDST(1, m8n32k16_load_a_u8);
19802 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
19803 return MMA_LDST(4, m8n32k16_load_b_s8);
19804 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
19805 return MMA_LDST(4, m8n32k16_load_b_u8);
19806 case NVPTX::BI__imma_m8n32k16_ld_c:
19807 return MMA_LDST(8, m8n32k16_load_c_s32);
19808
19809 // Sub-integer MMA loads.
19810 // Only row/col layout is supported by A/B fragments.
19811 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
19812 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
19813 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
19814 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
19815 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
19816 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
19817 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
19818 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
19819 case NVPTX::BI__imma_m8n8k32_ld_c:
19820 return MMA_LDST(2, m8n8k32_load_c_s32);
19821 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
19822 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
19823 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
19824 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
19825 case NVPTX::BI__bmma_m8n8k128_ld_c:
19826 return MMA_LDST(2, m8n8k128_load_c_s32);
19827
19828 // Double MMA loads
19829 case NVPTX::BI__dmma_m8n8k4_ld_a:
19830 return MMA_LDST(1, m8n8k4_load_a_f64);
19831 case NVPTX::BI__dmma_m8n8k4_ld_b:
19832 return MMA_LDST(1, m8n8k4_load_b_f64);
19833 case NVPTX::BI__dmma_m8n8k4_ld_c:
19834 return MMA_LDST(2, m8n8k4_load_c_f64);
19835
19836 // Alternate float MMA loads
19837 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
19838 return MMA_LDST(4, m16n16k16_load_a_bf16);
19839 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
19840 return MMA_LDST(4, m16n16k16_load_b_bf16);
19841 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
19842 return MMA_LDST(2, m8n32k16_load_a_bf16);
19843 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
19844 return MMA_LDST(8, m8n32k16_load_b_bf16);
19845 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
19846 return MMA_LDST(8, m32n8k16_load_a_bf16);
19847 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
19848 return MMA_LDST(2, m32n8k16_load_b_bf16);
19849 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
19850 return MMA_LDST(4, m16n16k8_load_a_tf32);
19851 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
19852 return MMA_LDST(4, m16n16k8_load_b_tf32);
19853 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
19854 return MMA_LDST(8, m16n16k8_load_c_f32);
19855
19856 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
19857 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
19858 // use fragment C for both loads and stores.
19859 // FP MMA stores.
19860 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
19861 return MMA_LDST(4, m16n16k16_store_d_f16);
19862 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
19863 return MMA_LDST(8, m16n16k16_store_d_f32);
19864 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
19865 return MMA_LDST(4, m32n8k16_store_d_f16);
19866 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
19867 return MMA_LDST(8, m32n8k16_store_d_f32);
19868 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
19869 return MMA_LDST(4, m8n32k16_store_d_f16);
19870 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
19871 return MMA_LDST(8, m8n32k16_store_d_f32);
19872
19873 // Integer and sub-integer MMA stores.
19874 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
19875 // name, integer loads/stores use LLVM's i32.
19876 case NVPTX::BI__imma_m16n16k16_st_c_i32:
19877 return MMA_LDST(8, m16n16k16_store_d_s32);
19878 case NVPTX::BI__imma_m32n8k16_st_c_i32:
19879 return MMA_LDST(8, m32n8k16_store_d_s32);
19880 case NVPTX::BI__imma_m8n32k16_st_c_i32:
19881 return MMA_LDST(8, m8n32k16_store_d_s32);
19882 case NVPTX::BI__imma_m8n8k32_st_c_i32:
19883 return MMA_LDST(2, m8n8k32_store_d_s32);
19884 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
19885 return MMA_LDST(2, m8n8k128_store_d_s32);
19886
19887 // Double MMA store
19888 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
19889 return MMA_LDST(2, m8n8k4_store_d_f64);
19890
19891 // Alternate float MMA store
19892 case NVPTX::BI__mma_m16n16k8_st_c_f32:
19893 return MMA_LDST(8, m16n16k8_store_d_f32);
19894
19895 default:
19896 llvm_unreachable("Unknown MMA builtin");
19897 }
19898 }
19899 #undef MMA_LDST
19900 #undef MMA_INTR
19901
19902
19903 struct NVPTXMmaInfo {
19904 unsigned NumEltsA;
19905 unsigned NumEltsB;
19906 unsigned NumEltsC;
19907 unsigned NumEltsD;
19908
19909 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
19910 // over 'col' for layout. The index of non-satf variants is expected to match
19911 // the undocumented layout constants used by CUDA's mma.hpp.
19912 std::array<unsigned, 8> Variants;
19913
getMMAIntrinsic__anon6c984ebf1711::NVPTXMmaInfo19914 unsigned getMMAIntrinsic(int Layout, bool Satf) {
19915 unsigned Index = Layout + 4 * Satf;
19916 if (Index >= Variants.size())
19917 return 0;
19918 return Variants[Index];
19919 }
19920 };
19921
19922 // Returns an intrinsic that matches Layout and Satf for valid combinations of
19923 // Layout and Satf, 0 otherwise.
getNVPTXMmaInfo(unsigned BuiltinID)19924 static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
19925 // clang-format off
19926 #define MMA_VARIANTS(geom, type) \
19927 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
19928 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19929 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
19930 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
19931 #define MMA_SATF_VARIANTS(geom, type) \
19932 MMA_VARIANTS(geom, type), \
19933 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
19934 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19935 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
19936 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
19937 // Sub-integer MMA only supports row.col layout.
19938 #define MMA_VARIANTS_I4(geom, type) \
19939 0, \
19940 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19941 0, \
19942 0, \
19943 0, \
19944 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19945 0, \
19946 0
19947 // b1 MMA does not support .satfinite.
19948 #define MMA_VARIANTS_B1_XOR(geom, type) \
19949 0, \
19950 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
19951 0, \
19952 0, \
19953 0, \
19954 0, \
19955 0, \
19956 0
19957 #define MMA_VARIANTS_B1_AND(geom, type) \
19958 0, \
19959 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
19960 0, \
19961 0, \
19962 0, \
19963 0, \
19964 0, \
19965 0
19966 // clang-format on
19967 switch (BuiltinID) {
19968 // FP MMA
19969 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
19970 // NumEltsN of return value are ordered as A,B,C,D.
19971 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
19972 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
19973 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
19974 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
19975 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
19976 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
19977 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
19978 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
19979 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
19980 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
19981 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
19982 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
19983 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
19984 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
19985 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
19986 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
19987 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
19988 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
19989 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
19990 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
19991 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
19992 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
19993 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
19994 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
19995
19996 // Integer MMA
19997 case NVPTX::BI__imma_m16n16k16_mma_s8:
19998 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
19999 case NVPTX::BI__imma_m16n16k16_mma_u8:
20000 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
20001 case NVPTX::BI__imma_m32n8k16_mma_s8:
20002 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
20003 case NVPTX::BI__imma_m32n8k16_mma_u8:
20004 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
20005 case NVPTX::BI__imma_m8n32k16_mma_s8:
20006 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
20007 case NVPTX::BI__imma_m8n32k16_mma_u8:
20008 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
20009
20010 // Sub-integer MMA
20011 case NVPTX::BI__imma_m8n8k32_mma_s4:
20012 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
20013 case NVPTX::BI__imma_m8n8k32_mma_u4:
20014 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
20015 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
20016 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
20017 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
20018 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
20019
20020 // Double MMA
20021 case NVPTX::BI__dmma_m8n8k4_mma_f64:
20022 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
20023
20024 // Alternate FP MMA
20025 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
20026 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
20027 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
20028 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
20029 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
20030 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
20031 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
20032 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
20033 default:
20034 llvm_unreachable("Unexpected builtin ID.");
20035 }
20036 #undef MMA_VARIANTS
20037 #undef MMA_SATF_VARIANTS
20038 #undef MMA_VARIANTS_I4
20039 #undef MMA_VARIANTS_B1_AND
20040 #undef MMA_VARIANTS_B1_XOR
20041 }
20042
MakeLdgLdu(unsigned IntrinsicID,CodeGenFunction & CGF,const CallExpr * E)20043 static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
20044 const CallExpr *E) {
20045 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
20046 QualType ArgType = E->getArg(0)->getType();
20047 clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
20048 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
20049 return CGF.Builder.CreateCall(
20050 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
20051 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
20052 }
20053
MakeScopedAtomic(unsigned IntrinsicID,CodeGenFunction & CGF,const CallExpr * E)20054 static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
20055 const CallExpr *E) {
20056 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
20057 llvm::Type *ElemTy =
20058 CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
20059 return CGF.Builder.CreateCall(
20060 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
20061 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
20062 }
20063
MakeCpAsync(unsigned IntrinsicID,unsigned IntrinsicIDS,CodeGenFunction & CGF,const CallExpr * E,int SrcSize)20064 static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
20065 CodeGenFunction &CGF, const CallExpr *E,
20066 int SrcSize) {
20067 return E->getNumArgs() == 3
20068 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
20069 {CGF.EmitScalarExpr(E->getArg(0)),
20070 CGF.EmitScalarExpr(E->getArg(1)),
20071 CGF.EmitScalarExpr(E->getArg(2))})
20072 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
20073 {CGF.EmitScalarExpr(E->getArg(0)),
20074 CGF.EmitScalarExpr(E->getArg(1))});
20075 }
20076
MakeHalfType(unsigned IntrinsicID,unsigned BuiltinID,const CallExpr * E,CodeGenFunction & CGF)20077 static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
20078 const CallExpr *E, CodeGenFunction &CGF) {
20079 auto &C = CGF.CGM.getContext();
20080 if (!(C.getLangOpts().NativeHalfType ||
20081 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
20082 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
20083 " requires native half type support.");
20084 return nullptr;
20085 }
20086
20087 if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
20088 IntrinsicID == Intrinsic::nvvm_ldu_global_f)
20089 return MakeLdgLdu(IntrinsicID, CGF, E);
20090
20091 SmallVector<Value *, 16> Args;
20092 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
20093 auto *FTy = F->getFunctionType();
20094 unsigned ICEArguments = 0;
20095 ASTContext::GetBuiltinTypeError Error;
20096 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
20097 assert(Error == ASTContext::GE_None && "Should not codegen an error");
20098 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
20099 assert((ICEArguments & (1 << i)) == 0);
20100 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
20101 auto *PTy = FTy->getParamType(i);
20102 if (PTy != ArgValue->getType())
20103 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
20104 Args.push_back(ArgValue);
20105 }
20106
20107 return CGF.Builder.CreateCall(F, Args);
20108 }
20109 } // namespace
20110
EmitNVPTXBuiltinExpr(unsigned BuiltinID,const CallExpr * E)20111 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
20112 const CallExpr *E) {
20113 switch (BuiltinID) {
20114 case NVPTX::BI__nvvm_atom_add_gen_i:
20115 case NVPTX::BI__nvvm_atom_add_gen_l:
20116 case NVPTX::BI__nvvm_atom_add_gen_ll:
20117 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
20118
20119 case NVPTX::BI__nvvm_atom_sub_gen_i:
20120 case NVPTX::BI__nvvm_atom_sub_gen_l:
20121 case NVPTX::BI__nvvm_atom_sub_gen_ll:
20122 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
20123
20124 case NVPTX::BI__nvvm_atom_and_gen_i:
20125 case NVPTX::BI__nvvm_atom_and_gen_l:
20126 case NVPTX::BI__nvvm_atom_and_gen_ll:
20127 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
20128
20129 case NVPTX::BI__nvvm_atom_or_gen_i:
20130 case NVPTX::BI__nvvm_atom_or_gen_l:
20131 case NVPTX::BI__nvvm_atom_or_gen_ll:
20132 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
20133
20134 case NVPTX::BI__nvvm_atom_xor_gen_i:
20135 case NVPTX::BI__nvvm_atom_xor_gen_l:
20136 case NVPTX::BI__nvvm_atom_xor_gen_ll:
20137 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
20138
20139 case NVPTX::BI__nvvm_atom_xchg_gen_i:
20140 case NVPTX::BI__nvvm_atom_xchg_gen_l:
20141 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
20142 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
20143
20144 case NVPTX::BI__nvvm_atom_max_gen_i:
20145 case NVPTX::BI__nvvm_atom_max_gen_l:
20146 case NVPTX::BI__nvvm_atom_max_gen_ll:
20147 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
20148
20149 case NVPTX::BI__nvvm_atom_max_gen_ui:
20150 case NVPTX::BI__nvvm_atom_max_gen_ul:
20151 case NVPTX::BI__nvvm_atom_max_gen_ull:
20152 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
20153
20154 case NVPTX::BI__nvvm_atom_min_gen_i:
20155 case NVPTX::BI__nvvm_atom_min_gen_l:
20156 case NVPTX::BI__nvvm_atom_min_gen_ll:
20157 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
20158
20159 case NVPTX::BI__nvvm_atom_min_gen_ui:
20160 case NVPTX::BI__nvvm_atom_min_gen_ul:
20161 case NVPTX::BI__nvvm_atom_min_gen_ull:
20162 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
20163
20164 case NVPTX::BI__nvvm_atom_cas_gen_i:
20165 case NVPTX::BI__nvvm_atom_cas_gen_l:
20166 case NVPTX::BI__nvvm_atom_cas_gen_ll:
20167 // __nvvm_atom_cas_gen_* should return the old value rather than the
20168 // success flag.
20169 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
20170
20171 case NVPTX::BI__nvvm_atom_add_gen_f:
20172 case NVPTX::BI__nvvm_atom_add_gen_d: {
20173 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
20174 Value *Val = EmitScalarExpr(E->getArg(1));
20175
20176 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
20177 AtomicOrdering::SequentiallyConsistent);
20178 }
20179
20180 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
20181 Value *Ptr = EmitScalarExpr(E->getArg(0));
20182 Value *Val = EmitScalarExpr(E->getArg(1));
20183 Function *FnALI32 =
20184 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
20185 return Builder.CreateCall(FnALI32, {Ptr, Val});
20186 }
20187
20188 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
20189 Value *Ptr = EmitScalarExpr(E->getArg(0));
20190 Value *Val = EmitScalarExpr(E->getArg(1));
20191 Function *FnALD32 =
20192 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
20193 return Builder.CreateCall(FnALD32, {Ptr, Val});
20194 }
20195
20196 case NVPTX::BI__nvvm_ldg_c:
20197 case NVPTX::BI__nvvm_ldg_sc:
20198 case NVPTX::BI__nvvm_ldg_c2:
20199 case NVPTX::BI__nvvm_ldg_sc2:
20200 case NVPTX::BI__nvvm_ldg_c4:
20201 case NVPTX::BI__nvvm_ldg_sc4:
20202 case NVPTX::BI__nvvm_ldg_s:
20203 case NVPTX::BI__nvvm_ldg_s2:
20204 case NVPTX::BI__nvvm_ldg_s4:
20205 case NVPTX::BI__nvvm_ldg_i:
20206 case NVPTX::BI__nvvm_ldg_i2:
20207 case NVPTX::BI__nvvm_ldg_i4:
20208 case NVPTX::BI__nvvm_ldg_l:
20209 case NVPTX::BI__nvvm_ldg_l2:
20210 case NVPTX::BI__nvvm_ldg_ll:
20211 case NVPTX::BI__nvvm_ldg_ll2:
20212 case NVPTX::BI__nvvm_ldg_uc:
20213 case NVPTX::BI__nvvm_ldg_uc2:
20214 case NVPTX::BI__nvvm_ldg_uc4:
20215 case NVPTX::BI__nvvm_ldg_us:
20216 case NVPTX::BI__nvvm_ldg_us2:
20217 case NVPTX::BI__nvvm_ldg_us4:
20218 case NVPTX::BI__nvvm_ldg_ui:
20219 case NVPTX::BI__nvvm_ldg_ui2:
20220 case NVPTX::BI__nvvm_ldg_ui4:
20221 case NVPTX::BI__nvvm_ldg_ul:
20222 case NVPTX::BI__nvvm_ldg_ul2:
20223 case NVPTX::BI__nvvm_ldg_ull:
20224 case NVPTX::BI__nvvm_ldg_ull2:
20225 // PTX Interoperability section 2.2: "For a vector with an even number of
20226 // elements, its alignment is set to number of elements times the alignment
20227 // of its member: n*alignof(t)."
20228 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
20229 case NVPTX::BI__nvvm_ldg_f:
20230 case NVPTX::BI__nvvm_ldg_f2:
20231 case NVPTX::BI__nvvm_ldg_f4:
20232 case NVPTX::BI__nvvm_ldg_d:
20233 case NVPTX::BI__nvvm_ldg_d2:
20234 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
20235
20236 case NVPTX::BI__nvvm_ldu_c:
20237 case NVPTX::BI__nvvm_ldu_sc:
20238 case NVPTX::BI__nvvm_ldu_c2:
20239 case NVPTX::BI__nvvm_ldu_sc2:
20240 case NVPTX::BI__nvvm_ldu_c4:
20241 case NVPTX::BI__nvvm_ldu_sc4:
20242 case NVPTX::BI__nvvm_ldu_s:
20243 case NVPTX::BI__nvvm_ldu_s2:
20244 case NVPTX::BI__nvvm_ldu_s4:
20245 case NVPTX::BI__nvvm_ldu_i:
20246 case NVPTX::BI__nvvm_ldu_i2:
20247 case NVPTX::BI__nvvm_ldu_i4:
20248 case NVPTX::BI__nvvm_ldu_l:
20249 case NVPTX::BI__nvvm_ldu_l2:
20250 case NVPTX::BI__nvvm_ldu_ll:
20251 case NVPTX::BI__nvvm_ldu_ll2:
20252 case NVPTX::BI__nvvm_ldu_uc:
20253 case NVPTX::BI__nvvm_ldu_uc2:
20254 case NVPTX::BI__nvvm_ldu_uc4:
20255 case NVPTX::BI__nvvm_ldu_us:
20256 case NVPTX::BI__nvvm_ldu_us2:
20257 case NVPTX::BI__nvvm_ldu_us4:
20258 case NVPTX::BI__nvvm_ldu_ui:
20259 case NVPTX::BI__nvvm_ldu_ui2:
20260 case NVPTX::BI__nvvm_ldu_ui4:
20261 case NVPTX::BI__nvvm_ldu_ul:
20262 case NVPTX::BI__nvvm_ldu_ul2:
20263 case NVPTX::BI__nvvm_ldu_ull:
20264 case NVPTX::BI__nvvm_ldu_ull2:
20265 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
20266 case NVPTX::BI__nvvm_ldu_f:
20267 case NVPTX::BI__nvvm_ldu_f2:
20268 case NVPTX::BI__nvvm_ldu_f4:
20269 case NVPTX::BI__nvvm_ldu_d:
20270 case NVPTX::BI__nvvm_ldu_d2:
20271 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
20272
20273 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
20274 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
20275 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
20276 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
20277 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
20278 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
20279 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
20280 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
20281 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
20282 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
20283 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
20284 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
20285 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
20286 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
20287 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
20288 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
20289 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
20290 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
20291 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
20292 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
20293 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
20294 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
20295 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
20296 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
20297 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
20298 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
20299 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
20300 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
20301 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
20302 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
20303 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
20304 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
20305 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
20306 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
20307 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
20308 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
20309 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
20310 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
20311 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
20312 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
20313 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
20314 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
20315 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
20316 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
20317 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
20318 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
20319 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
20320 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
20321 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
20322 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
20323 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
20324 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
20325 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
20326 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
20327 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
20328 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
20329 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
20330 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
20331 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
20332 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
20333 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
20334 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
20335 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
20336 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
20337 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
20338 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
20339 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
20340 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
20341 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
20342 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
20343 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
20344 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
20345 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
20346 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
20347 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
20348 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
20349 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
20350 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
20351 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
20352 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
20353 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
20354 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
20355 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
20356 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
20357 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
20358 Value *Ptr = EmitScalarExpr(E->getArg(0));
20359 llvm::Type *ElemTy =
20360 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
20361 return Builder.CreateCall(
20362 CGM.getIntrinsic(
20363 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
20364 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20365 }
20366 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
20367 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
20368 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
20369 Value *Ptr = EmitScalarExpr(E->getArg(0));
20370 llvm::Type *ElemTy =
20371 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
20372 return Builder.CreateCall(
20373 CGM.getIntrinsic(
20374 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
20375 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20376 }
20377 case NVPTX::BI__nvvm_match_all_sync_i32p:
20378 case NVPTX::BI__nvvm_match_all_sync_i64p: {
20379 Value *Mask = EmitScalarExpr(E->getArg(0));
20380 Value *Val = EmitScalarExpr(E->getArg(1));
20381 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
20382 Value *ResultPair = Builder.CreateCall(
20383 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
20384 ? Intrinsic::nvvm_match_all_sync_i32p
20385 : Intrinsic::nvvm_match_all_sync_i64p),
20386 {Mask, Val});
20387 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
20388 PredOutPtr.getElementType());
20389 Builder.CreateStore(Pred, PredOutPtr);
20390 return Builder.CreateExtractValue(ResultPair, 0);
20391 }
20392
20393 // FP MMA loads
20394 case NVPTX::BI__hmma_m16n16k16_ld_a:
20395 case NVPTX::BI__hmma_m16n16k16_ld_b:
20396 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20397 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20398 case NVPTX::BI__hmma_m32n8k16_ld_a:
20399 case NVPTX::BI__hmma_m32n8k16_ld_b:
20400 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20401 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20402 case NVPTX::BI__hmma_m8n32k16_ld_a:
20403 case NVPTX::BI__hmma_m8n32k16_ld_b:
20404 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20405 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20406 // Integer MMA loads.
20407 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20408 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20409 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20410 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20411 case NVPTX::BI__imma_m16n16k16_ld_c:
20412 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20413 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20414 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20415 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20416 case NVPTX::BI__imma_m32n8k16_ld_c:
20417 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20418 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20419 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20420 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20421 case NVPTX::BI__imma_m8n32k16_ld_c:
20422 // Sub-integer MMA loads.
20423 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20424 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20425 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20426 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20427 case NVPTX::BI__imma_m8n8k32_ld_c:
20428 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20429 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20430 case NVPTX::BI__bmma_m8n8k128_ld_c:
20431 // Double MMA loads.
20432 case NVPTX::BI__dmma_m8n8k4_ld_a:
20433 case NVPTX::BI__dmma_m8n8k4_ld_b:
20434 case NVPTX::BI__dmma_m8n8k4_ld_c:
20435 // Alternate float MMA loads.
20436 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20437 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20438 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20439 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20440 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20441 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20442 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20443 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20444 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
20445 Address Dst = EmitPointerWithAlignment(E->getArg(0));
20446 Value *Src = EmitScalarExpr(E->getArg(1));
20447 Value *Ldm = EmitScalarExpr(E->getArg(2));
20448 std::optional<llvm::APSInt> isColMajorArg =
20449 E->getArg(3)->getIntegerConstantExpr(getContext());
20450 if (!isColMajorArg)
20451 return nullptr;
20452 bool isColMajor = isColMajorArg->getSExtValue();
20453 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20454 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20455 if (IID == 0)
20456 return nullptr;
20457
20458 Value *Result =
20459 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
20460
20461 // Save returned values.
20462 assert(II.NumResults);
20463 if (II.NumResults == 1) {
20464 Builder.CreateAlignedStore(Result, Dst.emitRawPointer(*this),
20465 CharUnits::fromQuantity(4));
20466 } else {
20467 for (unsigned i = 0; i < II.NumResults; ++i) {
20468 Builder.CreateAlignedStore(
20469 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
20470 Dst.getElementType()),
20471 Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this),
20472 llvm::ConstantInt::get(IntTy, i)),
20473 CharUnits::fromQuantity(4));
20474 }
20475 }
20476 return Result;
20477 }
20478
20479 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20480 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20481 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20482 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20483 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20484 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20485 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20486 case NVPTX::BI__imma_m32n8k16_st_c_i32:
20487 case NVPTX::BI__imma_m8n32k16_st_c_i32:
20488 case NVPTX::BI__imma_m8n8k32_st_c_i32:
20489 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
20490 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
20491 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
20492 Value *Dst = EmitScalarExpr(E->getArg(0));
20493 Address Src = EmitPointerWithAlignment(E->getArg(1));
20494 Value *Ldm = EmitScalarExpr(E->getArg(2));
20495 std::optional<llvm::APSInt> isColMajorArg =
20496 E->getArg(3)->getIntegerConstantExpr(getContext());
20497 if (!isColMajorArg)
20498 return nullptr;
20499 bool isColMajor = isColMajorArg->getSExtValue();
20500 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20501 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20502 if (IID == 0)
20503 return nullptr;
20504 Function *Intrinsic =
20505 CGM.getIntrinsic(IID, Dst->getType());
20506 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
20507 SmallVector<Value *, 10> Values = {Dst};
20508 for (unsigned i = 0; i < II.NumResults; ++i) {
20509 Value *V = Builder.CreateAlignedLoad(
20510 Src.getElementType(),
20511 Builder.CreateGEP(Src.getElementType(), Src.emitRawPointer(*this),
20512 llvm::ConstantInt::get(IntTy, i)),
20513 CharUnits::fromQuantity(4));
20514 Values.push_back(Builder.CreateBitCast(V, ParamType));
20515 }
20516 Values.push_back(Ldm);
20517 Value *Result = Builder.CreateCall(Intrinsic, Values);
20518 return Result;
20519 }
20520
20521 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
20522 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
20523 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
20524 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
20525 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
20526 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
20527 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
20528 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
20529 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
20530 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
20531 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
20532 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
20533 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
20534 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
20535 case NVPTX::BI__imma_m16n16k16_mma_s8:
20536 case NVPTX::BI__imma_m16n16k16_mma_u8:
20537 case NVPTX::BI__imma_m32n8k16_mma_s8:
20538 case NVPTX::BI__imma_m32n8k16_mma_u8:
20539 case NVPTX::BI__imma_m8n32k16_mma_s8:
20540 case NVPTX::BI__imma_m8n32k16_mma_u8:
20541 case NVPTX::BI__imma_m8n8k32_mma_s4:
20542 case NVPTX::BI__imma_m8n8k32_mma_u4:
20543 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
20544 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
20545 case NVPTX::BI__dmma_m8n8k4_mma_f64:
20546 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
20547 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
20548 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
20549 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
20550 Address Dst = EmitPointerWithAlignment(E->getArg(0));
20551 Address SrcA = EmitPointerWithAlignment(E->getArg(1));
20552 Address SrcB = EmitPointerWithAlignment(E->getArg(2));
20553 Address SrcC = EmitPointerWithAlignment(E->getArg(3));
20554 std::optional<llvm::APSInt> LayoutArg =
20555 E->getArg(4)->getIntegerConstantExpr(getContext());
20556 if (!LayoutArg)
20557 return nullptr;
20558 int Layout = LayoutArg->getSExtValue();
20559 if (Layout < 0 || Layout > 3)
20560 return nullptr;
20561 llvm::APSInt SatfArg;
20562 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
20563 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
20564 SatfArg = 0; // .b1 does not have satf argument.
20565 else if (std::optional<llvm::APSInt> OptSatfArg =
20566 E->getArg(5)->getIntegerConstantExpr(getContext()))
20567 SatfArg = *OptSatfArg;
20568 else
20569 return nullptr;
20570 bool Satf = SatfArg.getSExtValue();
20571 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
20572 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
20573 if (IID == 0) // Unsupported combination of Layout/Satf.
20574 return nullptr;
20575
20576 SmallVector<Value *, 24> Values;
20577 Function *Intrinsic = CGM.getIntrinsic(IID);
20578 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
20579 // Load A
20580 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
20581 Value *V = Builder.CreateAlignedLoad(
20582 SrcA.getElementType(),
20583 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
20584 llvm::ConstantInt::get(IntTy, i)),
20585 CharUnits::fromQuantity(4));
20586 Values.push_back(Builder.CreateBitCast(V, AType));
20587 }
20588 // Load B
20589 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
20590 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
20591 Value *V = Builder.CreateAlignedLoad(
20592 SrcB.getElementType(),
20593 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
20594 llvm::ConstantInt::get(IntTy, i)),
20595 CharUnits::fromQuantity(4));
20596 Values.push_back(Builder.CreateBitCast(V, BType));
20597 }
20598 // Load C
20599 llvm::Type *CType =
20600 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
20601 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
20602 Value *V = Builder.CreateAlignedLoad(
20603 SrcC.getElementType(),
20604 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
20605 llvm::ConstantInt::get(IntTy, i)),
20606 CharUnits::fromQuantity(4));
20607 Values.push_back(Builder.CreateBitCast(V, CType));
20608 }
20609 Value *Result = Builder.CreateCall(Intrinsic, Values);
20610 llvm::Type *DType = Dst.getElementType();
20611 for (unsigned i = 0; i < MI.NumEltsD; ++i)
20612 Builder.CreateAlignedStore(
20613 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
20614 Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this),
20615 llvm::ConstantInt::get(IntTy, i)),
20616 CharUnits::fromQuantity(4));
20617 return Result;
20618 }
20619 // The following builtins require half type support
20620 case NVPTX::BI__nvvm_ex2_approx_f16:
20621 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
20622 case NVPTX::BI__nvvm_ex2_approx_f16x2:
20623 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
20624 case NVPTX::BI__nvvm_ff2f16x2_rn:
20625 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
20626 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
20627 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
20628 case NVPTX::BI__nvvm_ff2f16x2_rz:
20629 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
20630 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
20631 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
20632 case NVPTX::BI__nvvm_fma_rn_f16:
20633 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
20634 case NVPTX::BI__nvvm_fma_rn_f16x2:
20635 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
20636 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
20637 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
20638 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
20639 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
20640 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
20641 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
20642 *this);
20643 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
20644 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
20645 *this);
20646 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
20647 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
20648 *this);
20649 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
20650 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
20651 *this);
20652 case NVPTX::BI__nvvm_fma_rn_relu_f16:
20653 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
20654 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
20655 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
20656 case NVPTX::BI__nvvm_fma_rn_sat_f16:
20657 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
20658 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
20659 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
20660 case NVPTX::BI__nvvm_fmax_f16:
20661 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
20662 case NVPTX::BI__nvvm_fmax_f16x2:
20663 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
20664 case NVPTX::BI__nvvm_fmax_ftz_f16:
20665 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
20666 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
20667 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
20668 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
20669 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
20670 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
20671 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
20672 *this);
20673 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
20674 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
20675 E, *this);
20676 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
20677 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
20678 BuiltinID, E, *this);
20679 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
20680 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
20681 *this);
20682 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
20683 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
20684 E, *this);
20685 case NVPTX::BI__nvvm_fmax_nan_f16:
20686 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
20687 case NVPTX::BI__nvvm_fmax_nan_f16x2:
20688 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
20689 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
20690 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
20691 *this);
20692 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
20693 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
20694 E, *this);
20695 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
20696 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
20697 *this);
20698 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
20699 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
20700 *this);
20701 case NVPTX::BI__nvvm_fmin_f16:
20702 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
20703 case NVPTX::BI__nvvm_fmin_f16x2:
20704 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
20705 case NVPTX::BI__nvvm_fmin_ftz_f16:
20706 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
20707 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
20708 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
20709 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
20710 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
20711 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
20712 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
20713 *this);
20714 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
20715 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
20716 E, *this);
20717 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
20718 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
20719 BuiltinID, E, *this);
20720 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
20721 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
20722 *this);
20723 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
20724 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
20725 E, *this);
20726 case NVPTX::BI__nvvm_fmin_nan_f16:
20727 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
20728 case NVPTX::BI__nvvm_fmin_nan_f16x2:
20729 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
20730 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
20731 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
20732 *this);
20733 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
20734 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
20735 E, *this);
20736 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
20737 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
20738 *this);
20739 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
20740 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
20741 *this);
20742 case NVPTX::BI__nvvm_ldg_h:
20743 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20744 case NVPTX::BI__nvvm_ldg_h2:
20745 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20746 case NVPTX::BI__nvvm_ldu_h:
20747 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20748 case NVPTX::BI__nvvm_ldu_h2: {
20749 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20750 }
20751 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
20752 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
20753 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
20754 4);
20755 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
20756 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
20757 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
20758 8);
20759 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
20760 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
20761 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
20762 16);
20763 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
20764 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
20765 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
20766 16);
20767 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
20768 return Builder.CreateCall(
20769 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
20770 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
20771 return Builder.CreateCall(
20772 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
20773 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
20774 return Builder.CreateCall(
20775 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
20776 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
20777 return Builder.CreateCall(
20778 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
20779 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
20780 return Builder.CreateCall(
20781 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
20782 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
20783 return Builder.CreateCall(
20784 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
20785 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
20786 return Builder.CreateCall(
20787 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
20788 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
20789 return Builder.CreateCall(
20790 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
20791 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
20792 return Builder.CreateCall(
20793 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
20794 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
20795 return Builder.CreateCall(
20796 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
20797 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
20798 return Builder.CreateCall(
20799 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
20800 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
20801 return Builder.CreateCall(
20802 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
20803 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
20804 return Builder.CreateCall(
20805 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
20806 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
20807 return Builder.CreateCall(
20808 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
20809 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
20810 return Builder.CreateCall(
20811 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
20812 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
20813 return Builder.CreateCall(
20814 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
20815 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
20816 return Builder.CreateCall(
20817 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
20818 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
20819 return Builder.CreateCall(
20820 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
20821 case NVPTX::BI__nvvm_is_explicit_cluster:
20822 return Builder.CreateCall(
20823 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
20824 case NVPTX::BI__nvvm_isspacep_shared_cluster:
20825 return Builder.CreateCall(
20826 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
20827 EmitScalarExpr(E->getArg(0)));
20828 case NVPTX::BI__nvvm_mapa:
20829 return Builder.CreateCall(
20830 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
20831 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20832 case NVPTX::BI__nvvm_mapa_shared_cluster:
20833 return Builder.CreateCall(
20834 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
20835 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20836 case NVPTX::BI__nvvm_getctarank:
20837 return Builder.CreateCall(
20838 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
20839 EmitScalarExpr(E->getArg(0)));
20840 case NVPTX::BI__nvvm_getctarank_shared_cluster:
20841 return Builder.CreateCall(
20842 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
20843 EmitScalarExpr(E->getArg(0)));
20844 case NVPTX::BI__nvvm_barrier_cluster_arrive:
20845 return Builder.CreateCall(
20846 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
20847 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
20848 return Builder.CreateCall(
20849 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
20850 case NVPTX::BI__nvvm_barrier_cluster_wait:
20851 return Builder.CreateCall(
20852 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
20853 case NVPTX::BI__nvvm_fence_sc_cluster:
20854 return Builder.CreateCall(
20855 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
20856 default:
20857 return nullptr;
20858 }
20859 }
20860
20861 namespace {
20862 struct BuiltinAlignArgs {
20863 llvm::Value *Src = nullptr;
20864 llvm::Type *SrcType = nullptr;
20865 llvm::Value *Alignment = nullptr;
20866 llvm::Value *Mask = nullptr;
20867 llvm::IntegerType *IntType = nullptr;
20868
BuiltinAlignArgs__anon6c984ebf1811::BuiltinAlignArgs20869 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
20870 QualType AstType = E->getArg(0)->getType();
20871 if (AstType->isArrayType())
20872 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
20873 else
20874 Src = CGF.EmitScalarExpr(E->getArg(0));
20875 SrcType = Src->getType();
20876 if (SrcType->isPointerTy()) {
20877 IntType = IntegerType::get(
20878 CGF.getLLVMContext(),
20879 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
20880 } else {
20881 assert(SrcType->isIntegerTy());
20882 IntType = cast<llvm::IntegerType>(SrcType);
20883 }
20884 Alignment = CGF.EmitScalarExpr(E->getArg(1));
20885 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
20886 auto *One = llvm::ConstantInt::get(IntType, 1);
20887 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
20888 }
20889 };
20890 } // namespace
20891
20892 /// Generate (x & (y-1)) == 0.
EmitBuiltinIsAligned(const CallExpr * E)20893 RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) {
20894 BuiltinAlignArgs Args(E, *this);
20895 llvm::Value *SrcAddress = Args.Src;
20896 if (Args.SrcType->isPointerTy())
20897 SrcAddress =
20898 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
20899 return RValue::get(Builder.CreateICmpEQ(
20900 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
20901 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
20902 }
20903
20904 /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
20905 /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
20906 /// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
EmitBuiltinAlignTo(const CallExpr * E,bool AlignUp)20907 RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
20908 BuiltinAlignArgs Args(E, *this);
20909 llvm::Value *SrcForMask = Args.Src;
20910 if (AlignUp) {
20911 // When aligning up we have to first add the mask to ensure we go over the
20912 // next alignment value and then align down to the next valid multiple.
20913 // By adding the mask, we ensure that align_up on an already aligned
20914 // value will not change the value.
20915 if (Args.Src->getType()->isPointerTy()) {
20916 if (getLangOpts().isSignedOverflowDefined())
20917 SrcForMask =
20918 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
20919 else
20920 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
20921 /*SignedIndices=*/true,
20922 /*isSubtraction=*/false,
20923 E->getExprLoc(), "over_boundary");
20924 } else {
20925 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
20926 }
20927 }
20928 // Invert the mask to only clear the lower bits.
20929 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
20930 llvm::Value *Result = nullptr;
20931 if (Args.Src->getType()->isPointerTy()) {
20932 Result = Builder.CreateIntrinsic(
20933 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
20934 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
20935 } else {
20936 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
20937 }
20938 assert(Result->getType() == Args.SrcType);
20939 return RValue::get(Result);
20940 }
20941
EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,const CallExpr * E)20942 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
20943 const CallExpr *E) {
20944 switch (BuiltinID) {
20945 case WebAssembly::BI__builtin_wasm_memory_size: {
20946 llvm::Type *ResultType = ConvertType(E->getType());
20947 Value *I = EmitScalarExpr(E->getArg(0));
20948 Function *Callee =
20949 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
20950 return Builder.CreateCall(Callee, I);
20951 }
20952 case WebAssembly::BI__builtin_wasm_memory_grow: {
20953 llvm::Type *ResultType = ConvertType(E->getType());
20954 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
20955 EmitScalarExpr(E->getArg(1))};
20956 Function *Callee =
20957 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
20958 return Builder.CreateCall(Callee, Args);
20959 }
20960 case WebAssembly::BI__builtin_wasm_tls_size: {
20961 llvm::Type *ResultType = ConvertType(E->getType());
20962 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
20963 return Builder.CreateCall(Callee);
20964 }
20965 case WebAssembly::BI__builtin_wasm_tls_align: {
20966 llvm::Type *ResultType = ConvertType(E->getType());
20967 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
20968 return Builder.CreateCall(Callee);
20969 }
20970 case WebAssembly::BI__builtin_wasm_tls_base: {
20971 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
20972 return Builder.CreateCall(Callee);
20973 }
20974 case WebAssembly::BI__builtin_wasm_throw: {
20975 Value *Tag = EmitScalarExpr(E->getArg(0));
20976 Value *Obj = EmitScalarExpr(E->getArg(1));
20977 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
20978 return Builder.CreateCall(Callee, {Tag, Obj});
20979 }
20980 case WebAssembly::BI__builtin_wasm_rethrow: {
20981 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
20982 return Builder.CreateCall(Callee);
20983 }
20984 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
20985 Value *Addr = EmitScalarExpr(E->getArg(0));
20986 Value *Expected = EmitScalarExpr(E->getArg(1));
20987 Value *Timeout = EmitScalarExpr(E->getArg(2));
20988 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
20989 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
20990 }
20991 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
20992 Value *Addr = EmitScalarExpr(E->getArg(0));
20993 Value *Expected = EmitScalarExpr(E->getArg(1));
20994 Value *Timeout = EmitScalarExpr(E->getArg(2));
20995 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
20996 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
20997 }
20998 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
20999 Value *Addr = EmitScalarExpr(E->getArg(0));
21000 Value *Count = EmitScalarExpr(E->getArg(1));
21001 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
21002 return Builder.CreateCall(Callee, {Addr, Count});
21003 }
21004 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
21005 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
21006 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
21007 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
21008 Value *Src = EmitScalarExpr(E->getArg(0));
21009 llvm::Type *ResT = ConvertType(E->getType());
21010 Function *Callee =
21011 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
21012 return Builder.CreateCall(Callee, {Src});
21013 }
21014 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
21015 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
21016 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
21017 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
21018 Value *Src = EmitScalarExpr(E->getArg(0));
21019 llvm::Type *ResT = ConvertType(E->getType());
21020 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
21021 {ResT, Src->getType()});
21022 return Builder.CreateCall(Callee, {Src});
21023 }
21024 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
21025 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
21026 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
21027 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
21028 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
21029 Value *Src = EmitScalarExpr(E->getArg(0));
21030 llvm::Type *ResT = ConvertType(E->getType());
21031 Function *Callee =
21032 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
21033 return Builder.CreateCall(Callee, {Src});
21034 }
21035 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
21036 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
21037 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
21038 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
21039 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
21040 Value *Src = EmitScalarExpr(E->getArg(0));
21041 llvm::Type *ResT = ConvertType(E->getType());
21042 Function *Callee =
21043 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
21044 return Builder.CreateCall(Callee, {Src});
21045 }
21046 case WebAssembly::BI__builtin_wasm_min_f32:
21047 case WebAssembly::BI__builtin_wasm_min_f64:
21048 case WebAssembly::BI__builtin_wasm_min_f16x8:
21049 case WebAssembly::BI__builtin_wasm_min_f32x4:
21050 case WebAssembly::BI__builtin_wasm_min_f64x2: {
21051 Value *LHS = EmitScalarExpr(E->getArg(0));
21052 Value *RHS = EmitScalarExpr(E->getArg(1));
21053 Function *Callee =
21054 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
21055 return Builder.CreateCall(Callee, {LHS, RHS});
21056 }
21057 case WebAssembly::BI__builtin_wasm_max_f32:
21058 case WebAssembly::BI__builtin_wasm_max_f64:
21059 case WebAssembly::BI__builtin_wasm_max_f16x8:
21060 case WebAssembly::BI__builtin_wasm_max_f32x4:
21061 case WebAssembly::BI__builtin_wasm_max_f64x2: {
21062 Value *LHS = EmitScalarExpr(E->getArg(0));
21063 Value *RHS = EmitScalarExpr(E->getArg(1));
21064 Function *Callee =
21065 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
21066 return Builder.CreateCall(Callee, {LHS, RHS});
21067 }
21068 case WebAssembly::BI__builtin_wasm_pmin_f16x8:
21069 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
21070 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
21071 Value *LHS = EmitScalarExpr(E->getArg(0));
21072 Value *RHS = EmitScalarExpr(E->getArg(1));
21073 Function *Callee =
21074 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
21075 return Builder.CreateCall(Callee, {LHS, RHS});
21076 }
21077 case WebAssembly::BI__builtin_wasm_pmax_f16x8:
21078 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
21079 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
21080 Value *LHS = EmitScalarExpr(E->getArg(0));
21081 Value *RHS = EmitScalarExpr(E->getArg(1));
21082 Function *Callee =
21083 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
21084 return Builder.CreateCall(Callee, {LHS, RHS});
21085 }
21086 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
21087 case WebAssembly::BI__builtin_wasm_floor_f32x4:
21088 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
21089 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
21090 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
21091 case WebAssembly::BI__builtin_wasm_floor_f64x2:
21092 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
21093 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
21094 unsigned IntNo;
21095 switch (BuiltinID) {
21096 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
21097 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
21098 IntNo = Intrinsic::ceil;
21099 break;
21100 case WebAssembly::BI__builtin_wasm_floor_f32x4:
21101 case WebAssembly::BI__builtin_wasm_floor_f64x2:
21102 IntNo = Intrinsic::floor;
21103 break;
21104 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
21105 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
21106 IntNo = Intrinsic::trunc;
21107 break;
21108 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
21109 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
21110 IntNo = Intrinsic::nearbyint;
21111 break;
21112 default:
21113 llvm_unreachable("unexpected builtin ID");
21114 }
21115 Value *Value = EmitScalarExpr(E->getArg(0));
21116 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
21117 return Builder.CreateCall(Callee, Value);
21118 }
21119 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
21120 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
21121 return Builder.CreateCall(Callee);
21122 }
21123 case WebAssembly::BI__builtin_wasm_ref_null_func: {
21124 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
21125 return Builder.CreateCall(Callee);
21126 }
21127 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
21128 Value *Src = EmitScalarExpr(E->getArg(0));
21129 Value *Indices = EmitScalarExpr(E->getArg(1));
21130 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
21131 return Builder.CreateCall(Callee, {Src, Indices});
21132 }
21133 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
21134 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
21135 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
21136 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
21137 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
21138 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
21139 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
21140 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
21141 unsigned IntNo;
21142 switch (BuiltinID) {
21143 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
21144 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
21145 IntNo = Intrinsic::sadd_sat;
21146 break;
21147 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
21148 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
21149 IntNo = Intrinsic::uadd_sat;
21150 break;
21151 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
21152 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
21153 IntNo = Intrinsic::wasm_sub_sat_signed;
21154 break;
21155 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
21156 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
21157 IntNo = Intrinsic::wasm_sub_sat_unsigned;
21158 break;
21159 default:
21160 llvm_unreachable("unexpected builtin ID");
21161 }
21162 Value *LHS = EmitScalarExpr(E->getArg(0));
21163 Value *RHS = EmitScalarExpr(E->getArg(1));
21164 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
21165 return Builder.CreateCall(Callee, {LHS, RHS});
21166 }
21167 case WebAssembly::BI__builtin_wasm_abs_i8x16:
21168 case WebAssembly::BI__builtin_wasm_abs_i16x8:
21169 case WebAssembly::BI__builtin_wasm_abs_i32x4:
21170 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
21171 Value *Vec = EmitScalarExpr(E->getArg(0));
21172 Value *Neg = Builder.CreateNeg(Vec, "neg");
21173 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
21174 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
21175 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
21176 }
21177 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
21178 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
21179 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
21180 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
21181 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
21182 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
21183 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
21184 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
21185 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
21186 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
21187 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
21188 case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
21189 Value *LHS = EmitScalarExpr(E->getArg(0));
21190 Value *RHS = EmitScalarExpr(E->getArg(1));
21191 Value *ICmp;
21192 switch (BuiltinID) {
21193 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
21194 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
21195 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
21196 ICmp = Builder.CreateICmpSLT(LHS, RHS);
21197 break;
21198 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
21199 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
21200 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
21201 ICmp = Builder.CreateICmpULT(LHS, RHS);
21202 break;
21203 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
21204 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
21205 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
21206 ICmp = Builder.CreateICmpSGT(LHS, RHS);
21207 break;
21208 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
21209 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
21210 case WebAssembly::BI__builtin_wasm_max_u_i32x4:
21211 ICmp = Builder.CreateICmpUGT(LHS, RHS);
21212 break;
21213 default:
21214 llvm_unreachable("unexpected builtin ID");
21215 }
21216 return Builder.CreateSelect(ICmp, LHS, RHS);
21217 }
21218 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
21219 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
21220 Value *LHS = EmitScalarExpr(E->getArg(0));
21221 Value *RHS = EmitScalarExpr(E->getArg(1));
21222 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
21223 ConvertType(E->getType()));
21224 return Builder.CreateCall(Callee, {LHS, RHS});
21225 }
21226 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
21227 Value *LHS = EmitScalarExpr(E->getArg(0));
21228 Value *RHS = EmitScalarExpr(E->getArg(1));
21229 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
21230 return Builder.CreateCall(Callee, {LHS, RHS});
21231 }
21232 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
21233 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
21234 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
21235 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
21236 Value *Vec = EmitScalarExpr(E->getArg(0));
21237 unsigned IntNo;
21238 switch (BuiltinID) {
21239 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
21240 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
21241 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
21242 break;
21243 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
21244 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
21245 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
21246 break;
21247 default:
21248 llvm_unreachable("unexpected builtin ID");
21249 }
21250
21251 Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
21252 return Builder.CreateCall(Callee, Vec);
21253 }
21254 case WebAssembly::BI__builtin_wasm_bitselect: {
21255 Value *V1 = EmitScalarExpr(E->getArg(0));
21256 Value *V2 = EmitScalarExpr(E->getArg(1));
21257 Value *C = EmitScalarExpr(E->getArg(2));
21258 Function *Callee =
21259 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
21260 return Builder.CreateCall(Callee, {V1, V2, C});
21261 }
21262 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
21263 Value *LHS = EmitScalarExpr(E->getArg(0));
21264 Value *RHS = EmitScalarExpr(E->getArg(1));
21265 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
21266 return Builder.CreateCall(Callee, {LHS, RHS});
21267 }
21268 case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
21269 Value *Vec = EmitScalarExpr(E->getArg(0));
21270 Function *Callee =
21271 CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
21272 return Builder.CreateCall(Callee, {Vec});
21273 }
21274 case WebAssembly::BI__builtin_wasm_any_true_v128:
21275 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21276 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21277 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21278 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
21279 unsigned IntNo;
21280 switch (BuiltinID) {
21281 case WebAssembly::BI__builtin_wasm_any_true_v128:
21282 IntNo = Intrinsic::wasm_anytrue;
21283 break;
21284 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21285 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21286 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21287 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
21288 IntNo = Intrinsic::wasm_alltrue;
21289 break;
21290 default:
21291 llvm_unreachable("unexpected builtin ID");
21292 }
21293 Value *Vec = EmitScalarExpr(E->getArg(0));
21294 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
21295 return Builder.CreateCall(Callee, {Vec});
21296 }
21297 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
21298 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
21299 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
21300 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
21301 Value *Vec = EmitScalarExpr(E->getArg(0));
21302 Function *Callee =
21303 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
21304 return Builder.CreateCall(Callee, {Vec});
21305 }
21306 case WebAssembly::BI__builtin_wasm_abs_f32x4:
21307 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
21308 Value *Vec = EmitScalarExpr(E->getArg(0));
21309 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
21310 return Builder.CreateCall(Callee, {Vec});
21311 }
21312 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
21313 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
21314 Value *Vec = EmitScalarExpr(E->getArg(0));
21315 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
21316 return Builder.CreateCall(Callee, {Vec});
21317 }
21318 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21319 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21320 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21321 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
21322 Value *Low = EmitScalarExpr(E->getArg(0));
21323 Value *High = EmitScalarExpr(E->getArg(1));
21324 unsigned IntNo;
21325 switch (BuiltinID) {
21326 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21327 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21328 IntNo = Intrinsic::wasm_narrow_signed;
21329 break;
21330 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21331 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
21332 IntNo = Intrinsic::wasm_narrow_unsigned;
21333 break;
21334 default:
21335 llvm_unreachable("unexpected builtin ID");
21336 }
21337 Function *Callee =
21338 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
21339 return Builder.CreateCall(Callee, {Low, High});
21340 }
21341 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21342 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
21343 Value *Vec = EmitScalarExpr(E->getArg(0));
21344 unsigned IntNo;
21345 switch (BuiltinID) {
21346 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21347 IntNo = Intrinsic::fptosi_sat;
21348 break;
21349 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
21350 IntNo = Intrinsic::fptoui_sat;
21351 break;
21352 default:
21353 llvm_unreachable("unexpected builtin ID");
21354 }
21355 llvm::Type *SrcT = Vec->getType();
21356 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
21357 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
21358 Value *Trunc = Builder.CreateCall(Callee, Vec);
21359 Value *Splat = Constant::getNullValue(TruncT);
21360 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
21361 }
21362 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
21363 Value *Ops[18];
21364 size_t OpIdx = 0;
21365 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
21366 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
21367 while (OpIdx < 18) {
21368 std::optional<llvm::APSInt> LaneConst =
21369 E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
21370 assert(LaneConst && "Constant arg isn't actually constant?");
21371 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
21372 }
21373 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
21374 return Builder.CreateCall(Callee, Ops);
21375 }
21376 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
21377 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
21378 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21379 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21380 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21381 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
21382 Value *A = EmitScalarExpr(E->getArg(0));
21383 Value *B = EmitScalarExpr(E->getArg(1));
21384 Value *C = EmitScalarExpr(E->getArg(2));
21385 unsigned IntNo;
21386 switch (BuiltinID) {
21387 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
21388 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21389 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21390 IntNo = Intrinsic::wasm_relaxed_madd;
21391 break;
21392 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
21393 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21394 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
21395 IntNo = Intrinsic::wasm_relaxed_nmadd;
21396 break;
21397 default:
21398 llvm_unreachable("unexpected builtin ID");
21399 }
21400 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
21401 return Builder.CreateCall(Callee, {A, B, C});
21402 }
21403 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
21404 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
21405 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
21406 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
21407 Value *A = EmitScalarExpr(E->getArg(0));
21408 Value *B = EmitScalarExpr(E->getArg(1));
21409 Value *C = EmitScalarExpr(E->getArg(2));
21410 Function *Callee =
21411 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
21412 return Builder.CreateCall(Callee, {A, B, C});
21413 }
21414 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
21415 Value *Src = EmitScalarExpr(E->getArg(0));
21416 Value *Indices = EmitScalarExpr(E->getArg(1));
21417 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
21418 return Builder.CreateCall(Callee, {Src, Indices});
21419 }
21420 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21421 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21422 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21423 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
21424 Value *LHS = EmitScalarExpr(E->getArg(0));
21425 Value *RHS = EmitScalarExpr(E->getArg(1));
21426 unsigned IntNo;
21427 switch (BuiltinID) {
21428 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21429 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21430 IntNo = Intrinsic::wasm_relaxed_min;
21431 break;
21432 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21433 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
21434 IntNo = Intrinsic::wasm_relaxed_max;
21435 break;
21436 default:
21437 llvm_unreachable("unexpected builtin ID");
21438 }
21439 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
21440 return Builder.CreateCall(Callee, {LHS, RHS});
21441 }
21442 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21443 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21444 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21445 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
21446 Value *Vec = EmitScalarExpr(E->getArg(0));
21447 unsigned IntNo;
21448 switch (BuiltinID) {
21449 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21450 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
21451 break;
21452 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21453 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
21454 break;
21455 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21456 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
21457 break;
21458 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
21459 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
21460 break;
21461 default:
21462 llvm_unreachable("unexpected builtin ID");
21463 }
21464 Function *Callee = CGM.getIntrinsic(IntNo);
21465 return Builder.CreateCall(Callee, {Vec});
21466 }
21467 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
21468 Value *LHS = EmitScalarExpr(E->getArg(0));
21469 Value *RHS = EmitScalarExpr(E->getArg(1));
21470 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
21471 return Builder.CreateCall(Callee, {LHS, RHS});
21472 }
21473 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
21474 Value *LHS = EmitScalarExpr(E->getArg(0));
21475 Value *RHS = EmitScalarExpr(E->getArg(1));
21476 Function *Callee =
21477 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
21478 return Builder.CreateCall(Callee, {LHS, RHS});
21479 }
21480 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
21481 Value *LHS = EmitScalarExpr(E->getArg(0));
21482 Value *RHS = EmitScalarExpr(E->getArg(1));
21483 Value *Acc = EmitScalarExpr(E->getArg(2));
21484 Function *Callee =
21485 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
21486 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21487 }
21488 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
21489 Value *LHS = EmitScalarExpr(E->getArg(0));
21490 Value *RHS = EmitScalarExpr(E->getArg(1));
21491 Value *Acc = EmitScalarExpr(E->getArg(2));
21492 Function *Callee =
21493 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
21494 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21495 }
21496 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
21497 Value *Addr = EmitScalarExpr(E->getArg(0));
21498 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
21499 return Builder.CreateCall(Callee, {Addr});
21500 }
21501 case WebAssembly::BI__builtin_wasm_storef16_f32: {
21502 Value *Val = EmitScalarExpr(E->getArg(0));
21503 Value *Addr = EmitScalarExpr(E->getArg(1));
21504 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
21505 return Builder.CreateCall(Callee, {Val, Addr});
21506 }
21507 case WebAssembly::BI__builtin_wasm_splat_f16x8: {
21508 Value *Val = EmitScalarExpr(E->getArg(0));
21509 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8);
21510 return Builder.CreateCall(Callee, {Val});
21511 }
21512 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {
21513 Value *Vector = EmitScalarExpr(E->getArg(0));
21514 Value *Index = EmitScalarExpr(E->getArg(1));
21515 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
21516 return Builder.CreateCall(Callee, {Vector, Index});
21517 }
21518 case WebAssembly::BI__builtin_wasm_table_get: {
21519 assert(E->getArg(0)->getType()->isArrayType());
21520 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21521 Value *Index = EmitScalarExpr(E->getArg(1));
21522 Function *Callee;
21523 if (E->getType().isWebAssemblyExternrefType())
21524 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
21525 else if (E->getType().isWebAssemblyFuncrefType())
21526 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
21527 else
21528 llvm_unreachable(
21529 "Unexpected reference type for __builtin_wasm_table_get");
21530 return Builder.CreateCall(Callee, {Table, Index});
21531 }
21532 case WebAssembly::BI__builtin_wasm_table_set: {
21533 assert(E->getArg(0)->getType()->isArrayType());
21534 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21535 Value *Index = EmitScalarExpr(E->getArg(1));
21536 Value *Val = EmitScalarExpr(E->getArg(2));
21537 Function *Callee;
21538 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
21539 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
21540 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21541 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
21542 else
21543 llvm_unreachable(
21544 "Unexpected reference type for __builtin_wasm_table_set");
21545 return Builder.CreateCall(Callee, {Table, Index, Val});
21546 }
21547 case WebAssembly::BI__builtin_wasm_table_size: {
21548 assert(E->getArg(0)->getType()->isArrayType());
21549 Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21550 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
21551 return Builder.CreateCall(Callee, Value);
21552 }
21553 case WebAssembly::BI__builtin_wasm_table_grow: {
21554 assert(E->getArg(0)->getType()->isArrayType());
21555 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21556 Value *Val = EmitScalarExpr(E->getArg(1));
21557 Value *NElems = EmitScalarExpr(E->getArg(2));
21558
21559 Function *Callee;
21560 if (E->getArg(1)->getType().isWebAssemblyExternrefType())
21561 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
21562 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21563 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21564 else
21565 llvm_unreachable(
21566 "Unexpected reference type for __builtin_wasm_table_grow");
21567
21568 return Builder.CreateCall(Callee, {Table, Val, NElems});
21569 }
21570 case WebAssembly::BI__builtin_wasm_table_fill: {
21571 assert(E->getArg(0)->getType()->isArrayType());
21572 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21573 Value *Index = EmitScalarExpr(E->getArg(1));
21574 Value *Val = EmitScalarExpr(E->getArg(2));
21575 Value *NElems = EmitScalarExpr(E->getArg(3));
21576
21577 Function *Callee;
21578 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
21579 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
21580 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21581 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21582 else
21583 llvm_unreachable(
21584 "Unexpected reference type for __builtin_wasm_table_fill");
21585
21586 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
21587 }
21588 case WebAssembly::BI__builtin_wasm_table_copy: {
21589 assert(E->getArg(0)->getType()->isArrayType());
21590 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21591 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
21592 Value *DstIdx = EmitScalarExpr(E->getArg(2));
21593 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
21594 Value *NElems = EmitScalarExpr(E->getArg(4));
21595
21596 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
21597
21598 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
21599 }
21600 default:
21601 return nullptr;
21602 }
21603 }
21604
21605 static std::pair<Intrinsic::ID, unsigned>
getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)21606 getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) {
21607 struct Info {
21608 unsigned BuiltinID;
21609 Intrinsic::ID IntrinsicID;
21610 unsigned VecLen;
21611 };
21612 static Info Infos[] = {
21613 #define CUSTOM_BUILTIN_MAPPING(x,s) \
21614 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
21615 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
21616 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
21617 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
21618 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
21619 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
21620 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
21621 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
21622 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
21623 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
21624 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
21625 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
21626 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
21627 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
21628 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
21629 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
21630 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
21631 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
21632 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
21633 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
21634 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
21635 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
21636 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
21637 // Legacy builtins that take a vector in place of a vector predicate.
21638 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
21639 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
21640 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
21641 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
21642 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
21643 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
21644 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
21645 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
21646 #include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
21647 #undef CUSTOM_BUILTIN_MAPPING
21648 };
21649
21650 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
21651 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
21652 (void)SortOnce;
21653
21654 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
21655 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
21656 return {Intrinsic::not_intrinsic, 0};
21657
21658 return {F->IntrinsicID, F->VecLen};
21659 }
21660
EmitHexagonBuiltinExpr(unsigned BuiltinID,const CallExpr * E)21661 Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
21662 const CallExpr *E) {
21663 Intrinsic::ID ID;
21664 unsigned VecLen;
21665 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
21666
21667 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
21668 // The base pointer is passed by address, so it needs to be loaded.
21669 Address A = EmitPointerWithAlignment(E->getArg(0));
21670 Address BP = Address(A.emitRawPointer(*this), Int8PtrTy, A.getAlignment());
21671 llvm::Value *Base = Builder.CreateLoad(BP);
21672 // The treatment of both loads and stores is the same: the arguments for
21673 // the builtin are the same as the arguments for the intrinsic.
21674 // Load:
21675 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
21676 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
21677 // Store:
21678 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
21679 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
21680 SmallVector<llvm::Value*,5> Ops = { Base };
21681 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
21682 Ops.push_back(EmitScalarExpr(E->getArg(i)));
21683
21684 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
21685 // The load intrinsics generate two results (Value, NewBase), stores
21686 // generate one (NewBase). The new base address needs to be stored.
21687 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
21688 : Result;
21689 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
21690 Address Dest = EmitPointerWithAlignment(E->getArg(0));
21691 llvm::Value *RetVal =
21692 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
21693 if (IsLoad)
21694 RetVal = Builder.CreateExtractValue(Result, 0);
21695 return RetVal;
21696 };
21697
21698 // Handle the conversion of bit-reverse load intrinsics to bit code.
21699 // The intrinsic call after this function only reads from memory and the
21700 // write to memory is dealt by the store instruction.
21701 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
21702 // The intrinsic generates one result, which is the new value for the base
21703 // pointer. It needs to be returned. The result of the load instruction is
21704 // passed to intrinsic by address, so the value needs to be stored.
21705 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
21706
21707 // Expressions like &(*pt++) will be incremented per evaluation.
21708 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
21709 // per call.
21710 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
21711 DestAddr = DestAddr.withElementType(Int8Ty);
21712 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
21713
21714 // Operands are Base, Dest, Modifier.
21715 // The intrinsic format in LLVM IR is defined as
21716 // { ValueType, i8* } (i8*, i32).
21717 llvm::Value *Result = Builder.CreateCall(
21718 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
21719
21720 // The value needs to be stored as the variable is passed by reference.
21721 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
21722
21723 // The store needs to be truncated to fit the destination type.
21724 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
21725 // to be handled with stores of respective destination type.
21726 DestVal = Builder.CreateTrunc(DestVal, DestTy);
21727
21728 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
21729 // The updated value of the base pointer is returned.
21730 return Builder.CreateExtractValue(Result, 1);
21731 };
21732
21733 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
21734 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
21735 : Intrinsic::hexagon_V6_vandvrt;
21736 return Builder.CreateCall(CGM.getIntrinsic(ID),
21737 {Vec, Builder.getInt32(-1)});
21738 };
21739 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
21740 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
21741 : Intrinsic::hexagon_V6_vandqrt;
21742 return Builder.CreateCall(CGM.getIntrinsic(ID),
21743 {Pred, Builder.getInt32(-1)});
21744 };
21745
21746 switch (BuiltinID) {
21747 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
21748 // and the corresponding C/C++ builtins use loads/stores to update
21749 // the predicate.
21750 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
21751 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
21752 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
21753 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
21754 // Get the type from the 0-th argument.
21755 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21756 Address PredAddr =
21757 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
21758 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
21759 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21760 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
21761
21762 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21763 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
21764 PredAddr.getAlignment());
21765 return Builder.CreateExtractValue(Result, 0);
21766 }
21767 // These are identical to the builtins above, except they don't consume
21768 // input carry, only generate carry-out. Since they still produce two
21769 // outputs, generate the store of the predicate, but no load.
21770 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
21771 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
21772 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
21773 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
21774 // Get the type from the 0-th argument.
21775 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21776 Address PredAddr =
21777 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
21778 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21779 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21780
21781 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21782 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
21783 PredAddr.getAlignment());
21784 return Builder.CreateExtractValue(Result, 0);
21785 }
21786
21787 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
21788 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
21789 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
21790 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
21791 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
21792 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
21793 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
21794 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
21795 SmallVector<llvm::Value*,4> Ops;
21796 const Expr *PredOp = E->getArg(0);
21797 // There will be an implicit cast to a boolean vector. Strip it.
21798 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
21799 if (Cast->getCastKind() == CK_BitCast)
21800 PredOp = Cast->getSubExpr();
21801 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
21802 }
21803 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
21804 Ops.push_back(EmitScalarExpr(E->getArg(i)));
21805 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
21806 }
21807
21808 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
21809 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
21810 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
21811 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
21812 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
21813 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
21814 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
21815 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
21816 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
21817 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
21818 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
21819 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
21820 return MakeCircOp(ID, /*IsLoad=*/true);
21821 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
21822 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
21823 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
21824 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
21825 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
21826 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
21827 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
21828 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
21829 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
21830 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
21831 return MakeCircOp(ID, /*IsLoad=*/false);
21832 case Hexagon::BI__builtin_brev_ldub:
21833 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
21834 case Hexagon::BI__builtin_brev_ldb:
21835 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
21836 case Hexagon::BI__builtin_brev_lduh:
21837 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
21838 case Hexagon::BI__builtin_brev_ldh:
21839 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
21840 case Hexagon::BI__builtin_brev_ldw:
21841 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
21842 case Hexagon::BI__builtin_brev_ldd:
21843 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
21844 } // switch
21845
21846 return nullptr;
21847 }
21848
EmitRISCVBuiltinExpr(unsigned BuiltinID,const CallExpr * E,ReturnValueSlot ReturnValue)21849 Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
21850 const CallExpr *E,
21851 ReturnValueSlot ReturnValue) {
21852 SmallVector<Value *, 4> Ops;
21853 llvm::Type *ResultType = ConvertType(E->getType());
21854
21855 // Find out if any arguments are required to be integer constant expressions.
21856 unsigned ICEArguments = 0;
21857 ASTContext::GetBuiltinTypeError Error;
21858 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
21859 if (Error == ASTContext::GE_Missing_type) {
21860 // Vector intrinsics don't have a type string.
21861 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
21862 BuiltinID <= clang::RISCV::LastRVVBuiltin);
21863 ICEArguments = 0;
21864 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
21865 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
21866 ICEArguments = 1 << 1;
21867 } else {
21868 assert(Error == ASTContext::GE_None && "Unexpected error");
21869 }
21870
21871 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
21872 ICEArguments |= (1 << 1);
21873 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
21874 ICEArguments |= (1 << 2);
21875
21876 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
21877 // Handle aggregate argument, namely RVV tuple types in segment load/store
21878 if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
21879 LValue L = EmitAggExprToLValue(E->getArg(i));
21880 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress());
21881 Ops.push_back(AggValue);
21882 continue;
21883 }
21884 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
21885 }
21886
21887 Intrinsic::ID ID = Intrinsic::not_intrinsic;
21888 unsigned NF = 1;
21889 // The 0th bit simulates the `vta` of RVV
21890 // The 1st bit simulates the `vma` of RVV
21891 constexpr unsigned RVV_VTA = 0x1;
21892 constexpr unsigned RVV_VMA = 0x2;
21893 int PolicyAttrs = 0;
21894 bool IsMasked = false;
21895
21896 // Required for overloaded intrinsics.
21897 llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
21898 switch (BuiltinID) {
21899 default: llvm_unreachable("unexpected builtin ID");
21900 case RISCV::BI__builtin_riscv_orc_b_32:
21901 case RISCV::BI__builtin_riscv_orc_b_64:
21902 case RISCV::BI__builtin_riscv_clz_32:
21903 case RISCV::BI__builtin_riscv_clz_64:
21904 case RISCV::BI__builtin_riscv_ctz_32:
21905 case RISCV::BI__builtin_riscv_ctz_64:
21906 case RISCV::BI__builtin_riscv_clmul_32:
21907 case RISCV::BI__builtin_riscv_clmul_64:
21908 case RISCV::BI__builtin_riscv_clmulh_32:
21909 case RISCV::BI__builtin_riscv_clmulh_64:
21910 case RISCV::BI__builtin_riscv_clmulr_32:
21911 case RISCV::BI__builtin_riscv_clmulr_64:
21912 case RISCV::BI__builtin_riscv_xperm4_32:
21913 case RISCV::BI__builtin_riscv_xperm4_64:
21914 case RISCV::BI__builtin_riscv_xperm8_32:
21915 case RISCV::BI__builtin_riscv_xperm8_64:
21916 case RISCV::BI__builtin_riscv_brev8_32:
21917 case RISCV::BI__builtin_riscv_brev8_64:
21918 case RISCV::BI__builtin_riscv_zip_32:
21919 case RISCV::BI__builtin_riscv_unzip_32: {
21920 switch (BuiltinID) {
21921 default: llvm_unreachable("unexpected builtin ID");
21922 // Zbb
21923 case RISCV::BI__builtin_riscv_orc_b_32:
21924 case RISCV::BI__builtin_riscv_orc_b_64:
21925 ID = Intrinsic::riscv_orc_b;
21926 break;
21927 case RISCV::BI__builtin_riscv_clz_32:
21928 case RISCV::BI__builtin_riscv_clz_64: {
21929 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
21930 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
21931 if (Result->getType() != ResultType)
21932 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
21933 "cast");
21934 return Result;
21935 }
21936 case RISCV::BI__builtin_riscv_ctz_32:
21937 case RISCV::BI__builtin_riscv_ctz_64: {
21938 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
21939 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
21940 if (Result->getType() != ResultType)
21941 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
21942 "cast");
21943 return Result;
21944 }
21945
21946 // Zbc
21947 case RISCV::BI__builtin_riscv_clmul_32:
21948 case RISCV::BI__builtin_riscv_clmul_64:
21949 ID = Intrinsic::riscv_clmul;
21950 break;
21951 case RISCV::BI__builtin_riscv_clmulh_32:
21952 case RISCV::BI__builtin_riscv_clmulh_64:
21953 ID = Intrinsic::riscv_clmulh;
21954 break;
21955 case RISCV::BI__builtin_riscv_clmulr_32:
21956 case RISCV::BI__builtin_riscv_clmulr_64:
21957 ID = Intrinsic::riscv_clmulr;
21958 break;
21959
21960 // Zbkx
21961 case RISCV::BI__builtin_riscv_xperm8_32:
21962 case RISCV::BI__builtin_riscv_xperm8_64:
21963 ID = Intrinsic::riscv_xperm8;
21964 break;
21965 case RISCV::BI__builtin_riscv_xperm4_32:
21966 case RISCV::BI__builtin_riscv_xperm4_64:
21967 ID = Intrinsic::riscv_xperm4;
21968 break;
21969
21970 // Zbkb
21971 case RISCV::BI__builtin_riscv_brev8_32:
21972 case RISCV::BI__builtin_riscv_brev8_64:
21973 ID = Intrinsic::riscv_brev8;
21974 break;
21975 case RISCV::BI__builtin_riscv_zip_32:
21976 ID = Intrinsic::riscv_zip;
21977 break;
21978 case RISCV::BI__builtin_riscv_unzip_32:
21979 ID = Intrinsic::riscv_unzip;
21980 break;
21981 }
21982
21983 IntrinsicTypes = {ResultType};
21984 break;
21985 }
21986
21987 // Zk builtins
21988
21989 // Zknh
21990 case RISCV::BI__builtin_riscv_sha256sig0:
21991 ID = Intrinsic::riscv_sha256sig0;
21992 break;
21993 case RISCV::BI__builtin_riscv_sha256sig1:
21994 ID = Intrinsic::riscv_sha256sig1;
21995 break;
21996 case RISCV::BI__builtin_riscv_sha256sum0:
21997 ID = Intrinsic::riscv_sha256sum0;
21998 break;
21999 case RISCV::BI__builtin_riscv_sha256sum1:
22000 ID = Intrinsic::riscv_sha256sum1;
22001 break;
22002
22003 // Zksed
22004 case RISCV::BI__builtin_riscv_sm4ks:
22005 ID = Intrinsic::riscv_sm4ks;
22006 break;
22007 case RISCV::BI__builtin_riscv_sm4ed:
22008 ID = Intrinsic::riscv_sm4ed;
22009 break;
22010
22011 // Zksh
22012 case RISCV::BI__builtin_riscv_sm3p0:
22013 ID = Intrinsic::riscv_sm3p0;
22014 break;
22015 case RISCV::BI__builtin_riscv_sm3p1:
22016 ID = Intrinsic::riscv_sm3p1;
22017 break;
22018
22019 // Zihintntl
22020 case RISCV::BI__builtin_riscv_ntl_load: {
22021 llvm::Type *ResTy = ConvertType(E->getType());
22022 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
22023 if (Ops.size() == 2)
22024 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
22025
22026 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
22027 getLLVMContext(),
22028 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
22029 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
22030 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
22031
22032 int Width;
22033 if(ResTy->isScalableTy()) {
22034 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
22035 llvm::Type *ScalarTy = ResTy->getScalarType();
22036 Width = ScalarTy->getPrimitiveSizeInBits() *
22037 SVTy->getElementCount().getKnownMinValue();
22038 } else
22039 Width = ResTy->getPrimitiveSizeInBits();
22040 LoadInst *Load = Builder.CreateLoad(
22041 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
22042
22043 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
22044 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
22045 RISCVDomainNode);
22046
22047 return Load;
22048 }
22049 case RISCV::BI__builtin_riscv_ntl_store: {
22050 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
22051 if (Ops.size() == 3)
22052 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
22053
22054 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
22055 getLLVMContext(),
22056 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
22057 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
22058 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
22059
22060 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
22061 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
22062 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
22063 RISCVDomainNode);
22064
22065 return Store;
22066 }
22067
22068 // Vector builtins are handled from here.
22069 #include "clang/Basic/riscv_vector_builtin_cg.inc"
22070 // SiFive Vector builtins are handled from here.
22071 #include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
22072 }
22073
22074 assert(ID != Intrinsic::not_intrinsic);
22075
22076 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
22077 return Builder.CreateCall(F, Ops, "");
22078 }
22079