xref: /freebsd/contrib/llvm-project/clang/lib/CodeGen/TargetBuiltins/X86.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1*700637cbSDimitry Andric //===---------- X86.cpp - Emit LLVM Code for builtins ---------------------===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric // This contains code to emit Builtin calls as LLVM code.
10*700637cbSDimitry Andric //
11*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
12*700637cbSDimitry Andric 
13*700637cbSDimitry Andric #include "CGBuiltin.h"
14*700637cbSDimitry Andric #include "clang/Basic/TargetBuiltins.h"
15*700637cbSDimitry Andric #include "llvm/IR/InlineAsm.h"
16*700637cbSDimitry Andric #include "llvm/IR/IntrinsicsX86.h"
17*700637cbSDimitry Andric #include "llvm/TargetParser/X86TargetParser.h"
18*700637cbSDimitry Andric 
19*700637cbSDimitry Andric using namespace clang;
20*700637cbSDimitry Andric using namespace CodeGen;
21*700637cbSDimitry Andric using namespace llvm;
22*700637cbSDimitry Andric 
23*700637cbSDimitry Andric static std::optional<CodeGenFunction::MSVCIntrin>
translateX86ToMsvcIntrin(unsigned BuiltinID)24*700637cbSDimitry Andric translateX86ToMsvcIntrin(unsigned BuiltinID) {
25*700637cbSDimitry Andric   using MSVCIntrin = CodeGenFunction::MSVCIntrin;
26*700637cbSDimitry Andric   switch (BuiltinID) {
27*700637cbSDimitry Andric   default:
28*700637cbSDimitry Andric     return std::nullopt;
29*700637cbSDimitry Andric   case clang::X86::BI_BitScanForward:
30*700637cbSDimitry Andric   case clang::X86::BI_BitScanForward64:
31*700637cbSDimitry Andric     return MSVCIntrin::_BitScanForward;
32*700637cbSDimitry Andric   case clang::X86::BI_BitScanReverse:
33*700637cbSDimitry Andric   case clang::X86::BI_BitScanReverse64:
34*700637cbSDimitry Andric     return MSVCIntrin::_BitScanReverse;
35*700637cbSDimitry Andric   case clang::X86::BI_InterlockedAnd64:
36*700637cbSDimitry Andric     return MSVCIntrin::_InterlockedAnd;
37*700637cbSDimitry Andric   case clang::X86::BI_InterlockedCompareExchange128:
38*700637cbSDimitry Andric     return MSVCIntrin::_InterlockedCompareExchange128;
39*700637cbSDimitry Andric   case clang::X86::BI_InterlockedExchange64:
40*700637cbSDimitry Andric     return MSVCIntrin::_InterlockedExchange;
41*700637cbSDimitry Andric   case clang::X86::BI_InterlockedExchangeAdd64:
42*700637cbSDimitry Andric     return MSVCIntrin::_InterlockedExchangeAdd;
43*700637cbSDimitry Andric   case clang::X86::BI_InterlockedExchangeSub64:
44*700637cbSDimitry Andric     return MSVCIntrin::_InterlockedExchangeSub;
45*700637cbSDimitry Andric   case clang::X86::BI_InterlockedOr64:
46*700637cbSDimitry Andric     return MSVCIntrin::_InterlockedOr;
47*700637cbSDimitry Andric   case clang::X86::BI_InterlockedXor64:
48*700637cbSDimitry Andric     return MSVCIntrin::_InterlockedXor;
49*700637cbSDimitry Andric   case clang::X86::BI_InterlockedDecrement64:
50*700637cbSDimitry Andric     return MSVCIntrin::_InterlockedDecrement;
51*700637cbSDimitry Andric   case clang::X86::BI_InterlockedIncrement64:
52*700637cbSDimitry Andric     return MSVCIntrin::_InterlockedIncrement;
53*700637cbSDimitry Andric   }
54*700637cbSDimitry Andric   llvm_unreachable("must return from switch");
55*700637cbSDimitry Andric }
56*700637cbSDimitry Andric 
57*700637cbSDimitry Andric // Convert the mask from an integer type to a vector of i1.
getMaskVecValue(CodeGenFunction & CGF,Value * Mask,unsigned NumElts)58*700637cbSDimitry Andric static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
59*700637cbSDimitry Andric                               unsigned NumElts) {
60*700637cbSDimitry Andric 
61*700637cbSDimitry Andric   auto *MaskTy = llvm::FixedVectorType::get(
62*700637cbSDimitry Andric       CGF.Builder.getInt1Ty(),
63*700637cbSDimitry Andric       cast<IntegerType>(Mask->getType())->getBitWidth());
64*700637cbSDimitry Andric   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
65*700637cbSDimitry Andric 
66*700637cbSDimitry Andric   // If we have less than 8 elements, then the starting mask was an i8 and
67*700637cbSDimitry Andric   // we need to extract down to the right number of elements.
68*700637cbSDimitry Andric   if (NumElts < 8) {
69*700637cbSDimitry Andric     int Indices[4];
70*700637cbSDimitry Andric     for (unsigned i = 0; i != NumElts; ++i)
71*700637cbSDimitry Andric       Indices[i] = i;
72*700637cbSDimitry Andric     MaskVec = CGF.Builder.CreateShuffleVector(
73*700637cbSDimitry Andric         MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
74*700637cbSDimitry Andric   }
75*700637cbSDimitry Andric   return MaskVec;
76*700637cbSDimitry Andric }
77*700637cbSDimitry Andric 
EmitX86MaskedStore(CodeGenFunction & CGF,ArrayRef<Value * > Ops,Align Alignment)78*700637cbSDimitry Andric static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
79*700637cbSDimitry Andric                                  Align Alignment) {
80*700637cbSDimitry Andric   Value *Ptr = Ops[0];
81*700637cbSDimitry Andric 
82*700637cbSDimitry Andric   Value *MaskVec = getMaskVecValue(
83*700637cbSDimitry Andric       CGF, Ops[2],
84*700637cbSDimitry Andric       cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
85*700637cbSDimitry Andric 
86*700637cbSDimitry Andric   return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
87*700637cbSDimitry Andric }
88*700637cbSDimitry Andric 
EmitX86MaskedLoad(CodeGenFunction & CGF,ArrayRef<Value * > Ops,Align Alignment)89*700637cbSDimitry Andric static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
90*700637cbSDimitry Andric                                 Align Alignment) {
91*700637cbSDimitry Andric   llvm::Type *Ty = Ops[1]->getType();
92*700637cbSDimitry Andric   Value *Ptr = Ops[0];
93*700637cbSDimitry Andric 
94*700637cbSDimitry Andric   Value *MaskVec = getMaskVecValue(
95*700637cbSDimitry Andric       CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
96*700637cbSDimitry Andric 
97*700637cbSDimitry Andric   return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
98*700637cbSDimitry Andric }
99*700637cbSDimitry Andric 
EmitX86ExpandLoad(CodeGenFunction & CGF,ArrayRef<Value * > Ops)100*700637cbSDimitry Andric static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
101*700637cbSDimitry Andric                                 ArrayRef<Value *> Ops) {
102*700637cbSDimitry Andric   auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
103*700637cbSDimitry Andric   Value *Ptr = Ops[0];
104*700637cbSDimitry Andric 
105*700637cbSDimitry Andric   Value *MaskVec = getMaskVecValue(
106*700637cbSDimitry Andric       CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
107*700637cbSDimitry Andric 
108*700637cbSDimitry Andric   llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
109*700637cbSDimitry Andric                                            ResultTy);
110*700637cbSDimitry Andric   return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
111*700637cbSDimitry Andric }
112*700637cbSDimitry Andric 
EmitX86CompressExpand(CodeGenFunction & CGF,ArrayRef<Value * > Ops,bool IsCompress)113*700637cbSDimitry Andric static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
114*700637cbSDimitry Andric                                     ArrayRef<Value *> Ops,
115*700637cbSDimitry Andric                                     bool IsCompress) {
116*700637cbSDimitry Andric   auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
117*700637cbSDimitry Andric 
118*700637cbSDimitry Andric   Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
119*700637cbSDimitry Andric 
120*700637cbSDimitry Andric   Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
121*700637cbSDimitry Andric                                  : Intrinsic::x86_avx512_mask_expand;
122*700637cbSDimitry Andric   llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
123*700637cbSDimitry Andric   return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
124*700637cbSDimitry Andric }
125*700637cbSDimitry Andric 
EmitX86CompressStore(CodeGenFunction & CGF,ArrayRef<Value * > Ops)126*700637cbSDimitry Andric static Value *EmitX86CompressStore(CodeGenFunction &CGF,
127*700637cbSDimitry Andric                                    ArrayRef<Value *> Ops) {
128*700637cbSDimitry Andric   auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
129*700637cbSDimitry Andric   Value *Ptr = Ops[0];
130*700637cbSDimitry Andric 
131*700637cbSDimitry Andric   Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
132*700637cbSDimitry Andric 
133*700637cbSDimitry Andric   llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
134*700637cbSDimitry Andric                                            ResultTy);
135*700637cbSDimitry Andric   return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
136*700637cbSDimitry Andric }
137*700637cbSDimitry Andric 
EmitX86MaskLogic(CodeGenFunction & CGF,Instruction::BinaryOps Opc,ArrayRef<Value * > Ops,bool InvertLHS=false)138*700637cbSDimitry Andric static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
139*700637cbSDimitry Andric                               ArrayRef<Value *> Ops,
140*700637cbSDimitry Andric                               bool InvertLHS = false) {
141*700637cbSDimitry Andric   unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
142*700637cbSDimitry Andric   Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
143*700637cbSDimitry Andric   Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
144*700637cbSDimitry Andric 
145*700637cbSDimitry Andric   if (InvertLHS)
146*700637cbSDimitry Andric     LHS = CGF.Builder.CreateNot(LHS);
147*700637cbSDimitry Andric 
148*700637cbSDimitry Andric   return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
149*700637cbSDimitry Andric                                    Ops[0]->getType());
150*700637cbSDimitry Andric }
151*700637cbSDimitry Andric 
EmitX86FunnelShift(CodeGenFunction & CGF,Value * Op0,Value * Op1,Value * Amt,bool IsRight)152*700637cbSDimitry Andric static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
153*700637cbSDimitry Andric                                  Value *Amt, bool IsRight) {
154*700637cbSDimitry Andric   llvm::Type *Ty = Op0->getType();
155*700637cbSDimitry Andric 
156*700637cbSDimitry Andric   // Amount may be scalar immediate, in which case create a splat vector.
157*700637cbSDimitry Andric   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
158*700637cbSDimitry Andric   // we only care about the lowest log2 bits anyway.
159*700637cbSDimitry Andric   if (Amt->getType() != Ty) {
160*700637cbSDimitry Andric     unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
161*700637cbSDimitry Andric     Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
162*700637cbSDimitry Andric     Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
163*700637cbSDimitry Andric   }
164*700637cbSDimitry Andric 
165*700637cbSDimitry Andric   unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
166*700637cbSDimitry Andric   Function *F = CGF.CGM.getIntrinsic(IID, Ty);
167*700637cbSDimitry Andric   return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
168*700637cbSDimitry Andric }
169*700637cbSDimitry Andric 
EmitX86vpcom(CodeGenFunction & CGF,ArrayRef<Value * > Ops,bool IsSigned)170*700637cbSDimitry Andric static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
171*700637cbSDimitry Andric                            bool IsSigned) {
172*700637cbSDimitry Andric   Value *Op0 = Ops[0];
173*700637cbSDimitry Andric   Value *Op1 = Ops[1];
174*700637cbSDimitry Andric   llvm::Type *Ty = Op0->getType();
175*700637cbSDimitry Andric   uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
176*700637cbSDimitry Andric 
177*700637cbSDimitry Andric   CmpInst::Predicate Pred;
178*700637cbSDimitry Andric   switch (Imm) {
179*700637cbSDimitry Andric   case 0x0:
180*700637cbSDimitry Andric     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
181*700637cbSDimitry Andric     break;
182*700637cbSDimitry Andric   case 0x1:
183*700637cbSDimitry Andric     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
184*700637cbSDimitry Andric     break;
185*700637cbSDimitry Andric   case 0x2:
186*700637cbSDimitry Andric     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
187*700637cbSDimitry Andric     break;
188*700637cbSDimitry Andric   case 0x3:
189*700637cbSDimitry Andric     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
190*700637cbSDimitry Andric     break;
191*700637cbSDimitry Andric   case 0x4:
192*700637cbSDimitry Andric     Pred = ICmpInst::ICMP_EQ;
193*700637cbSDimitry Andric     break;
194*700637cbSDimitry Andric   case 0x5:
195*700637cbSDimitry Andric     Pred = ICmpInst::ICMP_NE;
196*700637cbSDimitry Andric     break;
197*700637cbSDimitry Andric   case 0x6:
198*700637cbSDimitry Andric     return llvm::Constant::getNullValue(Ty); // FALSE
199*700637cbSDimitry Andric   case 0x7:
200*700637cbSDimitry Andric     return llvm::Constant::getAllOnesValue(Ty); // TRUE
201*700637cbSDimitry Andric   default:
202*700637cbSDimitry Andric     llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
203*700637cbSDimitry Andric   }
204*700637cbSDimitry Andric 
205*700637cbSDimitry Andric   Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
206*700637cbSDimitry Andric   Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
207*700637cbSDimitry Andric   return Res;
208*700637cbSDimitry Andric }
209*700637cbSDimitry Andric 
EmitX86Select(CodeGenFunction & CGF,Value * Mask,Value * Op0,Value * Op1)210*700637cbSDimitry Andric static Value *EmitX86Select(CodeGenFunction &CGF,
211*700637cbSDimitry Andric                             Value *Mask, Value *Op0, Value *Op1) {
212*700637cbSDimitry Andric 
213*700637cbSDimitry Andric   // If the mask is all ones just return first argument.
214*700637cbSDimitry Andric   if (const auto *C = dyn_cast<Constant>(Mask))
215*700637cbSDimitry Andric     if (C->isAllOnesValue())
216*700637cbSDimitry Andric       return Op0;
217*700637cbSDimitry Andric 
218*700637cbSDimitry Andric   Mask = getMaskVecValue(
219*700637cbSDimitry Andric       CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
220*700637cbSDimitry Andric 
221*700637cbSDimitry Andric   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
222*700637cbSDimitry Andric }
223*700637cbSDimitry Andric 
EmitX86ScalarSelect(CodeGenFunction & CGF,Value * Mask,Value * Op0,Value * Op1)224*700637cbSDimitry Andric static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
225*700637cbSDimitry Andric                                   Value *Mask, Value *Op0, Value *Op1) {
226*700637cbSDimitry Andric   // If the mask is all ones just return first argument.
227*700637cbSDimitry Andric   if (const auto *C = dyn_cast<Constant>(Mask))
228*700637cbSDimitry Andric     if (C->isAllOnesValue())
229*700637cbSDimitry Andric       return Op0;
230*700637cbSDimitry Andric 
231*700637cbSDimitry Andric   auto *MaskTy = llvm::FixedVectorType::get(
232*700637cbSDimitry Andric       CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
233*700637cbSDimitry Andric   Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
234*700637cbSDimitry Andric   Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
235*700637cbSDimitry Andric   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
236*700637cbSDimitry Andric }
237*700637cbSDimitry Andric 
EmitX86MaskedCompareResult(CodeGenFunction & CGF,Value * Cmp,unsigned NumElts,Value * MaskIn)238*700637cbSDimitry Andric static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
239*700637cbSDimitry Andric                                          unsigned NumElts, Value *MaskIn) {
240*700637cbSDimitry Andric   if (MaskIn) {
241*700637cbSDimitry Andric     const auto *C = dyn_cast<Constant>(MaskIn);
242*700637cbSDimitry Andric     if (!C || !C->isAllOnesValue())
243*700637cbSDimitry Andric       Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
244*700637cbSDimitry Andric   }
245*700637cbSDimitry Andric 
246*700637cbSDimitry Andric   if (NumElts < 8) {
247*700637cbSDimitry Andric     int Indices[8];
248*700637cbSDimitry Andric     for (unsigned i = 0; i != NumElts; ++i)
249*700637cbSDimitry Andric       Indices[i] = i;
250*700637cbSDimitry Andric     for (unsigned i = NumElts; i != 8; ++i)
251*700637cbSDimitry Andric       Indices[i] = i % NumElts + NumElts;
252*700637cbSDimitry Andric     Cmp = CGF.Builder.CreateShuffleVector(
253*700637cbSDimitry Andric         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
254*700637cbSDimitry Andric   }
255*700637cbSDimitry Andric 
256*700637cbSDimitry Andric   return CGF.Builder.CreateBitCast(Cmp,
257*700637cbSDimitry Andric                                    IntegerType::get(CGF.getLLVMContext(),
258*700637cbSDimitry Andric                                                     std::max(NumElts, 8U)));
259*700637cbSDimitry Andric }
260*700637cbSDimitry Andric 
EmitX86MaskedCompare(CodeGenFunction & CGF,unsigned CC,bool Signed,ArrayRef<Value * > Ops)261*700637cbSDimitry Andric static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
262*700637cbSDimitry Andric                                    bool Signed, ArrayRef<Value *> Ops) {
263*700637cbSDimitry Andric   assert((Ops.size() == 2 || Ops.size() == 4) &&
264*700637cbSDimitry Andric          "Unexpected number of arguments");
265*700637cbSDimitry Andric   unsigned NumElts =
266*700637cbSDimitry Andric       cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
267*700637cbSDimitry Andric   Value *Cmp;
268*700637cbSDimitry Andric 
269*700637cbSDimitry Andric   if (CC == 3) {
270*700637cbSDimitry Andric     Cmp = Constant::getNullValue(
271*700637cbSDimitry Andric         llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
272*700637cbSDimitry Andric   } else if (CC == 7) {
273*700637cbSDimitry Andric     Cmp = Constant::getAllOnesValue(
274*700637cbSDimitry Andric         llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
275*700637cbSDimitry Andric   } else {
276*700637cbSDimitry Andric     ICmpInst::Predicate Pred;
277*700637cbSDimitry Andric     switch (CC) {
278*700637cbSDimitry Andric     default: llvm_unreachable("Unknown condition code");
279*700637cbSDimitry Andric     case 0: Pred = ICmpInst::ICMP_EQ;  break;
280*700637cbSDimitry Andric     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
281*700637cbSDimitry Andric     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
282*700637cbSDimitry Andric     case 4: Pred = ICmpInst::ICMP_NE;  break;
283*700637cbSDimitry Andric     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
284*700637cbSDimitry Andric     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
285*700637cbSDimitry Andric     }
286*700637cbSDimitry Andric     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
287*700637cbSDimitry Andric   }
288*700637cbSDimitry Andric 
289*700637cbSDimitry Andric   Value *MaskIn = nullptr;
290*700637cbSDimitry Andric   if (Ops.size() == 4)
291*700637cbSDimitry Andric     MaskIn = Ops[3];
292*700637cbSDimitry Andric 
293*700637cbSDimitry Andric   return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
294*700637cbSDimitry Andric }
295*700637cbSDimitry Andric 
EmitX86ConvertToMask(CodeGenFunction & CGF,Value * In)296*700637cbSDimitry Andric static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
297*700637cbSDimitry Andric   Value *Zero = Constant::getNullValue(In->getType());
298*700637cbSDimitry Andric   return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
299*700637cbSDimitry Andric }
300*700637cbSDimitry Andric 
EmitX86ConvertIntToFp(CodeGenFunction & CGF,const CallExpr * E,ArrayRef<Value * > Ops,bool IsSigned)301*700637cbSDimitry Andric static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,
302*700637cbSDimitry Andric                                     ArrayRef<Value *> Ops, bool IsSigned) {
303*700637cbSDimitry Andric   unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
304*700637cbSDimitry Andric   llvm::Type *Ty = Ops[1]->getType();
305*700637cbSDimitry Andric 
306*700637cbSDimitry Andric   Value *Res;
307*700637cbSDimitry Andric   if (Rnd != 4) {
308*700637cbSDimitry Andric     Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
309*700637cbSDimitry Andric                                  : Intrinsic::x86_avx512_uitofp_round;
310*700637cbSDimitry Andric     Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
311*700637cbSDimitry Andric     Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
312*700637cbSDimitry Andric   } else {
313*700637cbSDimitry Andric     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
314*700637cbSDimitry Andric     Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
315*700637cbSDimitry Andric                    : CGF.Builder.CreateUIToFP(Ops[0], Ty);
316*700637cbSDimitry Andric   }
317*700637cbSDimitry Andric 
318*700637cbSDimitry Andric   return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
319*700637cbSDimitry Andric }
320*700637cbSDimitry Andric 
321*700637cbSDimitry Andric // Lowers X86 FMA intrinsics to IR.
EmitX86FMAExpr(CodeGenFunction & CGF,const CallExpr * E,ArrayRef<Value * > Ops,unsigned BuiltinID,bool IsAddSub)322*700637cbSDimitry Andric static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
323*700637cbSDimitry Andric                              ArrayRef<Value *> Ops, unsigned BuiltinID,
324*700637cbSDimitry Andric                              bool IsAddSub) {
325*700637cbSDimitry Andric 
326*700637cbSDimitry Andric   bool Subtract = false;
327*700637cbSDimitry Andric   Intrinsic::ID IID = Intrinsic::not_intrinsic;
328*700637cbSDimitry Andric   switch (BuiltinID) {
329*700637cbSDimitry Andric   default: break;
330*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
331*700637cbSDimitry Andric     Subtract = true;
332*700637cbSDimitry Andric     [[fallthrough]];
333*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
334*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
335*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
336*700637cbSDimitry Andric     IID = Intrinsic::x86_avx512fp16_vfmadd_ph_512;
337*700637cbSDimitry Andric     break;
338*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
339*700637cbSDimitry Andric     Subtract = true;
340*700637cbSDimitry Andric     [[fallthrough]];
341*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
342*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
343*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
344*700637cbSDimitry Andric     IID = Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
345*700637cbSDimitry Andric     break;
346*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
347*700637cbSDimitry Andric     Subtract = true;
348*700637cbSDimitry Andric     [[fallthrough]];
349*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
350*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
351*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
352*700637cbSDimitry Andric     IID = Intrinsic::x86_avx512_vfmadd_ps_512; break;
353*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
354*700637cbSDimitry Andric     Subtract = true;
355*700637cbSDimitry Andric     [[fallthrough]];
356*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
357*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
358*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
359*700637cbSDimitry Andric     IID = Intrinsic::x86_avx512_vfmadd_pd_512; break;
360*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
361*700637cbSDimitry Andric     Subtract = true;
362*700637cbSDimitry Andric     [[fallthrough]];
363*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
364*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
365*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
366*700637cbSDimitry Andric     IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
367*700637cbSDimitry Andric     break;
368*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
369*700637cbSDimitry Andric     Subtract = true;
370*700637cbSDimitry Andric     [[fallthrough]];
371*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
372*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
373*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
374*700637cbSDimitry Andric     IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
375*700637cbSDimitry Andric     break;
376*700637cbSDimitry Andric   }
377*700637cbSDimitry Andric 
378*700637cbSDimitry Andric   Value *A = Ops[0];
379*700637cbSDimitry Andric   Value *B = Ops[1];
380*700637cbSDimitry Andric   Value *C = Ops[2];
381*700637cbSDimitry Andric 
382*700637cbSDimitry Andric   if (Subtract)
383*700637cbSDimitry Andric     C = CGF.Builder.CreateFNeg(C);
384*700637cbSDimitry Andric 
385*700637cbSDimitry Andric   Value *Res;
386*700637cbSDimitry Andric 
387*700637cbSDimitry Andric   // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
388*700637cbSDimitry Andric   if (IID != Intrinsic::not_intrinsic &&
389*700637cbSDimitry Andric       (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
390*700637cbSDimitry Andric        IsAddSub)) {
391*700637cbSDimitry Andric     Function *Intr = CGF.CGM.getIntrinsic(IID);
392*700637cbSDimitry Andric     Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
393*700637cbSDimitry Andric   } else {
394*700637cbSDimitry Andric     llvm::Type *Ty = A->getType();
395*700637cbSDimitry Andric     Function *FMA;
396*700637cbSDimitry Andric     if (CGF.Builder.getIsFPConstrained()) {
397*700637cbSDimitry Andric       CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
398*700637cbSDimitry Andric       FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
399*700637cbSDimitry Andric       Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
400*700637cbSDimitry Andric     } else {
401*700637cbSDimitry Andric       FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
402*700637cbSDimitry Andric       Res = CGF.Builder.CreateCall(FMA, {A, B, C});
403*700637cbSDimitry Andric     }
404*700637cbSDimitry Andric   }
405*700637cbSDimitry Andric 
406*700637cbSDimitry Andric   // Handle any required masking.
407*700637cbSDimitry Andric   Value *MaskFalseVal = nullptr;
408*700637cbSDimitry Andric   switch (BuiltinID) {
409*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
410*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
411*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
412*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
413*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
414*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
415*700637cbSDimitry Andric     MaskFalseVal = Ops[0];
416*700637cbSDimitry Andric     break;
417*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
418*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
419*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
420*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
421*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
422*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
423*700637cbSDimitry Andric     MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
424*700637cbSDimitry Andric     break;
425*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
426*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
427*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
428*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
429*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
430*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
431*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
432*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
433*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
434*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
435*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
436*700637cbSDimitry Andric   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
437*700637cbSDimitry Andric     MaskFalseVal = Ops[2];
438*700637cbSDimitry Andric     break;
439*700637cbSDimitry Andric   }
440*700637cbSDimitry Andric 
441*700637cbSDimitry Andric   if (MaskFalseVal)
442*700637cbSDimitry Andric     return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
443*700637cbSDimitry Andric 
444*700637cbSDimitry Andric   return Res;
445*700637cbSDimitry Andric }
446*700637cbSDimitry Andric 
EmitScalarFMAExpr(CodeGenFunction & CGF,const CallExpr * E,MutableArrayRef<Value * > Ops,Value * Upper,bool ZeroMask=false,unsigned PTIdx=0,bool NegAcc=false)447*700637cbSDimitry Andric static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
448*700637cbSDimitry Andric                                 MutableArrayRef<Value *> Ops, Value *Upper,
449*700637cbSDimitry Andric                                 bool ZeroMask = false, unsigned PTIdx = 0,
450*700637cbSDimitry Andric                                 bool NegAcc = false) {
451*700637cbSDimitry Andric   unsigned Rnd = 4;
452*700637cbSDimitry Andric   if (Ops.size() > 4)
453*700637cbSDimitry Andric     Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
454*700637cbSDimitry Andric 
455*700637cbSDimitry Andric   if (NegAcc)
456*700637cbSDimitry Andric     Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
457*700637cbSDimitry Andric 
458*700637cbSDimitry Andric   Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
459*700637cbSDimitry Andric   Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
460*700637cbSDimitry Andric   Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
461*700637cbSDimitry Andric   Value *Res;
462*700637cbSDimitry Andric   if (Rnd != 4) {
463*700637cbSDimitry Andric     Intrinsic::ID IID;
464*700637cbSDimitry Andric 
465*700637cbSDimitry Andric     switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
466*700637cbSDimitry Andric     case 16:
467*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
468*700637cbSDimitry Andric       break;
469*700637cbSDimitry Andric     case 32:
470*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_vfmadd_f32;
471*700637cbSDimitry Andric       break;
472*700637cbSDimitry Andric     case 64:
473*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_vfmadd_f64;
474*700637cbSDimitry Andric       break;
475*700637cbSDimitry Andric     default:
476*700637cbSDimitry Andric       llvm_unreachable("Unexpected size");
477*700637cbSDimitry Andric     }
478*700637cbSDimitry Andric     Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
479*700637cbSDimitry Andric                                  {Ops[0], Ops[1], Ops[2], Ops[4]});
480*700637cbSDimitry Andric   } else if (CGF.Builder.getIsFPConstrained()) {
481*700637cbSDimitry Andric     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
482*700637cbSDimitry Andric     Function *FMA = CGF.CGM.getIntrinsic(
483*700637cbSDimitry Andric         Intrinsic::experimental_constrained_fma, Ops[0]->getType());
484*700637cbSDimitry Andric     Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
485*700637cbSDimitry Andric   } else {
486*700637cbSDimitry Andric     Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
487*700637cbSDimitry Andric     Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
488*700637cbSDimitry Andric   }
489*700637cbSDimitry Andric   // If we have more than 3 arguments, we need to do masking.
490*700637cbSDimitry Andric   if (Ops.size() > 3) {
491*700637cbSDimitry Andric     Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
492*700637cbSDimitry Andric                                : Ops[PTIdx];
493*700637cbSDimitry Andric 
494*700637cbSDimitry Andric     // If we negated the accumulator and the its the PassThru value we need to
495*700637cbSDimitry Andric     // bypass the negate. Conveniently Upper should be the same thing in this
496*700637cbSDimitry Andric     // case.
497*700637cbSDimitry Andric     if (NegAcc && PTIdx == 2)
498*700637cbSDimitry Andric       PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
499*700637cbSDimitry Andric 
500*700637cbSDimitry Andric     Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
501*700637cbSDimitry Andric   }
502*700637cbSDimitry Andric   return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
503*700637cbSDimitry Andric }
504*700637cbSDimitry Andric 
EmitX86Muldq(CodeGenFunction & CGF,bool IsSigned,ArrayRef<Value * > Ops)505*700637cbSDimitry Andric static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
506*700637cbSDimitry Andric                            ArrayRef<Value *> Ops) {
507*700637cbSDimitry Andric   llvm::Type *Ty = Ops[0]->getType();
508*700637cbSDimitry Andric   // Arguments have a vXi32 type so cast to vXi64.
509*700637cbSDimitry Andric   Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
510*700637cbSDimitry Andric                                   Ty->getPrimitiveSizeInBits() / 64);
511*700637cbSDimitry Andric   Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
512*700637cbSDimitry Andric   Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
513*700637cbSDimitry Andric 
514*700637cbSDimitry Andric   if (IsSigned) {
515*700637cbSDimitry Andric     // Shift left then arithmetic shift right.
516*700637cbSDimitry Andric     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
517*700637cbSDimitry Andric     LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
518*700637cbSDimitry Andric     LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
519*700637cbSDimitry Andric     RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
520*700637cbSDimitry Andric     RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
521*700637cbSDimitry Andric   } else {
522*700637cbSDimitry Andric     // Clear the upper bits.
523*700637cbSDimitry Andric     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
524*700637cbSDimitry Andric     LHS = CGF.Builder.CreateAnd(LHS, Mask);
525*700637cbSDimitry Andric     RHS = CGF.Builder.CreateAnd(RHS, Mask);
526*700637cbSDimitry Andric   }
527*700637cbSDimitry Andric 
528*700637cbSDimitry Andric   return CGF.Builder.CreateMul(LHS, RHS);
529*700637cbSDimitry Andric }
530*700637cbSDimitry Andric 
531*700637cbSDimitry Andric // Emit a masked pternlog intrinsic. This only exists because the header has to
532*700637cbSDimitry Andric // use a macro and we aren't able to pass the input argument to a pternlog
533*700637cbSDimitry Andric // builtin and a select builtin without evaluating it twice.
EmitX86Ternlog(CodeGenFunction & CGF,bool ZeroMask,ArrayRef<Value * > Ops)534*700637cbSDimitry Andric static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
535*700637cbSDimitry Andric                              ArrayRef<Value *> Ops) {
536*700637cbSDimitry Andric   llvm::Type *Ty = Ops[0]->getType();
537*700637cbSDimitry Andric 
538*700637cbSDimitry Andric   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
539*700637cbSDimitry Andric   unsigned EltWidth = Ty->getScalarSizeInBits();
540*700637cbSDimitry Andric   Intrinsic::ID IID;
541*700637cbSDimitry Andric   if (VecWidth == 128 && EltWidth == 32)
542*700637cbSDimitry Andric     IID = Intrinsic::x86_avx512_pternlog_d_128;
543*700637cbSDimitry Andric   else if (VecWidth == 256 && EltWidth == 32)
544*700637cbSDimitry Andric     IID = Intrinsic::x86_avx512_pternlog_d_256;
545*700637cbSDimitry Andric   else if (VecWidth == 512 && EltWidth == 32)
546*700637cbSDimitry Andric     IID = Intrinsic::x86_avx512_pternlog_d_512;
547*700637cbSDimitry Andric   else if (VecWidth == 128 && EltWidth == 64)
548*700637cbSDimitry Andric     IID = Intrinsic::x86_avx512_pternlog_q_128;
549*700637cbSDimitry Andric   else if (VecWidth == 256 && EltWidth == 64)
550*700637cbSDimitry Andric     IID = Intrinsic::x86_avx512_pternlog_q_256;
551*700637cbSDimitry Andric   else if (VecWidth == 512 && EltWidth == 64)
552*700637cbSDimitry Andric     IID = Intrinsic::x86_avx512_pternlog_q_512;
553*700637cbSDimitry Andric   else
554*700637cbSDimitry Andric     llvm_unreachable("Unexpected intrinsic");
555*700637cbSDimitry Andric 
556*700637cbSDimitry Andric   Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
557*700637cbSDimitry Andric                                           Ops.drop_back());
558*700637cbSDimitry Andric   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
559*700637cbSDimitry Andric   return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
560*700637cbSDimitry Andric }
561*700637cbSDimitry Andric 
EmitX86SExtMask(CodeGenFunction & CGF,Value * Op,llvm::Type * DstTy)562*700637cbSDimitry Andric static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
563*700637cbSDimitry Andric                               llvm::Type *DstTy) {
564*700637cbSDimitry Andric   unsigned NumberOfElements =
565*700637cbSDimitry Andric       cast<llvm::FixedVectorType>(DstTy)->getNumElements();
566*700637cbSDimitry Andric   Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
567*700637cbSDimitry Andric   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
568*700637cbSDimitry Andric }
569*700637cbSDimitry Andric 
EmitX86CpuIs(const CallExpr * E)570*700637cbSDimitry Andric Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
571*700637cbSDimitry Andric   const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
572*700637cbSDimitry Andric   StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
573*700637cbSDimitry Andric   return EmitX86CpuIs(CPUStr);
574*700637cbSDimitry Andric }
575*700637cbSDimitry Andric 
576*700637cbSDimitry Andric // Convert F16 halfs to floats.
EmitX86CvtF16ToFloatExpr(CodeGenFunction & CGF,ArrayRef<Value * > Ops,llvm::Type * DstTy)577*700637cbSDimitry Andric static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
578*700637cbSDimitry Andric                                        ArrayRef<Value *> Ops,
579*700637cbSDimitry Andric                                        llvm::Type *DstTy) {
580*700637cbSDimitry Andric   assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
581*700637cbSDimitry Andric          "Unknown cvtph2ps intrinsic");
582*700637cbSDimitry Andric 
583*700637cbSDimitry Andric   // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
584*700637cbSDimitry Andric   if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
585*700637cbSDimitry Andric     Function *F =
586*700637cbSDimitry Andric         CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
587*700637cbSDimitry Andric     return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
588*700637cbSDimitry Andric   }
589*700637cbSDimitry Andric 
590*700637cbSDimitry Andric   unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
591*700637cbSDimitry Andric   Value *Src = Ops[0];
592*700637cbSDimitry Andric 
593*700637cbSDimitry Andric   // Extract the subvector.
594*700637cbSDimitry Andric   if (NumDstElts !=
595*700637cbSDimitry Andric       cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
596*700637cbSDimitry Andric     assert(NumDstElts == 4 && "Unexpected vector size");
597*700637cbSDimitry Andric     Src = CGF.Builder.CreateShuffleVector(Src, {0, 1, 2, 3});
598*700637cbSDimitry Andric   }
599*700637cbSDimitry Andric 
600*700637cbSDimitry Andric   // Bitcast from vXi16 to vXf16.
601*700637cbSDimitry Andric   auto *HalfTy = llvm::FixedVectorType::get(
602*700637cbSDimitry Andric       llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
603*700637cbSDimitry Andric   Src = CGF.Builder.CreateBitCast(Src, HalfTy);
604*700637cbSDimitry Andric 
605*700637cbSDimitry Andric   // Perform the fp-extension.
606*700637cbSDimitry Andric   Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
607*700637cbSDimitry Andric 
608*700637cbSDimitry Andric   if (Ops.size() >= 3)
609*700637cbSDimitry Andric     Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
610*700637cbSDimitry Andric   return Res;
611*700637cbSDimitry Andric }
612*700637cbSDimitry Andric 
EmitX86CpuIs(StringRef CPUStr)613*700637cbSDimitry Andric Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
614*700637cbSDimitry Andric 
615*700637cbSDimitry Andric   llvm::Type *Int32Ty = Builder.getInt32Ty();
616*700637cbSDimitry Andric 
617*700637cbSDimitry Andric   // Matching the struct layout from the compiler-rt/libgcc structure that is
618*700637cbSDimitry Andric   // filled in:
619*700637cbSDimitry Andric   // unsigned int __cpu_vendor;
620*700637cbSDimitry Andric   // unsigned int __cpu_type;
621*700637cbSDimitry Andric   // unsigned int __cpu_subtype;
622*700637cbSDimitry Andric   // unsigned int __cpu_features[1];
623*700637cbSDimitry Andric   llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
624*700637cbSDimitry Andric                                           llvm::ArrayType::get(Int32Ty, 1));
625*700637cbSDimitry Andric 
626*700637cbSDimitry Andric   // Grab the global __cpu_model.
627*700637cbSDimitry Andric   llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
628*700637cbSDimitry Andric   cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
629*700637cbSDimitry Andric 
630*700637cbSDimitry Andric   // Calculate the index needed to access the correct field based on the
631*700637cbSDimitry Andric   // range. Also adjust the expected value.
632*700637cbSDimitry Andric   auto [Index, Value] = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
633*700637cbSDimitry Andric #define X86_VENDOR(ENUM, STRING)                                               \
634*700637cbSDimitry Andric   .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
635*700637cbSDimitry Andric #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)                                        \
636*700637cbSDimitry Andric   .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
637*700637cbSDimitry Andric #define X86_CPU_TYPE(ENUM, STR)                                                \
638*700637cbSDimitry Andric   .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
639*700637cbSDimitry Andric #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)                                     \
640*700637cbSDimitry Andric   .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
641*700637cbSDimitry Andric #define X86_CPU_SUBTYPE(ENUM, STR)                                             \
642*700637cbSDimitry Andric   .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
643*700637cbSDimitry Andric #include "llvm/TargetParser/X86TargetParser.def"
644*700637cbSDimitry Andric                                .Default({0, 0});
645*700637cbSDimitry Andric   assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
646*700637cbSDimitry Andric 
647*700637cbSDimitry Andric   // Grab the appropriate field from __cpu_model.
648*700637cbSDimitry Andric   llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
649*700637cbSDimitry Andric                          ConstantInt::get(Int32Ty, Index)};
650*700637cbSDimitry Andric   llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
651*700637cbSDimitry Andric   CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
652*700637cbSDimitry Andric                                        CharUnits::fromQuantity(4));
653*700637cbSDimitry Andric 
654*700637cbSDimitry Andric   // Check the value of the field against the requested value.
655*700637cbSDimitry Andric   return Builder.CreateICmpEQ(CpuValue,
656*700637cbSDimitry Andric                                   llvm::ConstantInt::get(Int32Ty, Value));
657*700637cbSDimitry Andric }
658*700637cbSDimitry Andric 
EmitX86CpuSupports(const CallExpr * E)659*700637cbSDimitry Andric Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
660*700637cbSDimitry Andric   const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
661*700637cbSDimitry Andric   StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
662*700637cbSDimitry Andric   if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
663*700637cbSDimitry Andric     return Builder.getFalse();
664*700637cbSDimitry Andric   return EmitX86CpuSupports(FeatureStr);
665*700637cbSDimitry Andric }
666*700637cbSDimitry Andric 
EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs)667*700637cbSDimitry Andric Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
668*700637cbSDimitry Andric   return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
669*700637cbSDimitry Andric }
670*700637cbSDimitry Andric 
671*700637cbSDimitry Andric llvm::Value *
EmitX86CpuSupports(std::array<uint32_t,4> FeatureMask)672*700637cbSDimitry Andric CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
673*700637cbSDimitry Andric   Value *Result = Builder.getTrue();
674*700637cbSDimitry Andric   if (FeatureMask[0] != 0) {
675*700637cbSDimitry Andric     // Matching the struct layout from the compiler-rt/libgcc structure that is
676*700637cbSDimitry Andric     // filled in:
677*700637cbSDimitry Andric     // unsigned int __cpu_vendor;
678*700637cbSDimitry Andric     // unsigned int __cpu_type;
679*700637cbSDimitry Andric     // unsigned int __cpu_subtype;
680*700637cbSDimitry Andric     // unsigned int __cpu_features[1];
681*700637cbSDimitry Andric     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
682*700637cbSDimitry Andric                                             llvm::ArrayType::get(Int32Ty, 1));
683*700637cbSDimitry Andric 
684*700637cbSDimitry Andric     // Grab the global __cpu_model.
685*700637cbSDimitry Andric     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
686*700637cbSDimitry Andric     cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
687*700637cbSDimitry Andric 
688*700637cbSDimitry Andric     // Grab the first (0th) element from the field __cpu_features off of the
689*700637cbSDimitry Andric     // global in the struct STy.
690*700637cbSDimitry Andric     Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
691*700637cbSDimitry Andric                      Builder.getInt32(0)};
692*700637cbSDimitry Andric     Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
693*700637cbSDimitry Andric     Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
694*700637cbSDimitry Andric                                                 CharUnits::fromQuantity(4));
695*700637cbSDimitry Andric 
696*700637cbSDimitry Andric     // Check the value of the bit corresponding to the feature requested.
697*700637cbSDimitry Andric     Value *Mask = Builder.getInt32(FeatureMask[0]);
698*700637cbSDimitry Andric     Value *Bitset = Builder.CreateAnd(Features, Mask);
699*700637cbSDimitry Andric     Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
700*700637cbSDimitry Andric     Result = Builder.CreateAnd(Result, Cmp);
701*700637cbSDimitry Andric   }
702*700637cbSDimitry Andric 
703*700637cbSDimitry Andric   llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
704*700637cbSDimitry Andric   llvm::Constant *CpuFeatures2 =
705*700637cbSDimitry Andric       CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
706*700637cbSDimitry Andric   cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
707*700637cbSDimitry Andric   for (int i = 1; i != 4; ++i) {
708*700637cbSDimitry Andric     const uint32_t M = FeatureMask[i];
709*700637cbSDimitry Andric     if (!M)
710*700637cbSDimitry Andric       continue;
711*700637cbSDimitry Andric     Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
712*700637cbSDimitry Andric     Value *Features = Builder.CreateAlignedLoad(
713*700637cbSDimitry Andric         Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
714*700637cbSDimitry Andric         CharUnits::fromQuantity(4));
715*700637cbSDimitry Andric     // Check the value of the bit corresponding to the feature requested.
716*700637cbSDimitry Andric     Value *Mask = Builder.getInt32(M);
717*700637cbSDimitry Andric     Value *Bitset = Builder.CreateAnd(Features, Mask);
718*700637cbSDimitry Andric     Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
719*700637cbSDimitry Andric     Result = Builder.CreateAnd(Result, Cmp);
720*700637cbSDimitry Andric   }
721*700637cbSDimitry Andric 
722*700637cbSDimitry Andric   return Result;
723*700637cbSDimitry Andric }
724*700637cbSDimitry Andric 
EmitX86CpuInit()725*700637cbSDimitry Andric Value *CodeGenFunction::EmitX86CpuInit() {
726*700637cbSDimitry Andric   llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
727*700637cbSDimitry Andric                                                     /*Variadic*/ false);
728*700637cbSDimitry Andric   llvm::FunctionCallee Func =
729*700637cbSDimitry Andric       CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
730*700637cbSDimitry Andric   cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
731*700637cbSDimitry Andric   cast<llvm::GlobalValue>(Func.getCallee())
732*700637cbSDimitry Andric       ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
733*700637cbSDimitry Andric   return Builder.CreateCall(Func);
734*700637cbSDimitry Andric }
735*700637cbSDimitry Andric 
736*700637cbSDimitry Andric 
EmitX86BuiltinExpr(unsigned BuiltinID,const CallExpr * E)737*700637cbSDimitry Andric Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
738*700637cbSDimitry Andric                                            const CallExpr *E) {
739*700637cbSDimitry Andric   if (BuiltinID == Builtin::BI__builtin_cpu_is)
740*700637cbSDimitry Andric     return EmitX86CpuIs(E);
741*700637cbSDimitry Andric   if (BuiltinID == Builtin::BI__builtin_cpu_supports)
742*700637cbSDimitry Andric     return EmitX86CpuSupports(E);
743*700637cbSDimitry Andric   if (BuiltinID == Builtin::BI__builtin_cpu_init)
744*700637cbSDimitry Andric     return EmitX86CpuInit();
745*700637cbSDimitry Andric 
746*700637cbSDimitry Andric   // Handle MSVC intrinsics before argument evaluation to prevent double
747*700637cbSDimitry Andric   // evaluation.
748*700637cbSDimitry Andric   if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
749*700637cbSDimitry Andric     return EmitMSVCBuiltinExpr(*MsvcIntId, E);
750*700637cbSDimitry Andric 
751*700637cbSDimitry Andric   SmallVector<Value*, 4> Ops;
752*700637cbSDimitry Andric   bool IsMaskFCmp = false;
753*700637cbSDimitry Andric   bool IsConjFMA = false;
754*700637cbSDimitry Andric 
755*700637cbSDimitry Andric   // Find out if any arguments are required to be integer constant expressions.
756*700637cbSDimitry Andric   unsigned ICEArguments = 0;
757*700637cbSDimitry Andric   ASTContext::GetBuiltinTypeError Error;
758*700637cbSDimitry Andric   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
759*700637cbSDimitry Andric   assert(Error == ASTContext::GE_None && "Should not codegen an error");
760*700637cbSDimitry Andric 
761*700637cbSDimitry Andric   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
762*700637cbSDimitry Andric     Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
763*700637cbSDimitry Andric   }
764*700637cbSDimitry Andric 
765*700637cbSDimitry Andric   // These exist so that the builtin that takes an immediate can be bounds
766*700637cbSDimitry Andric   // checked by clang to avoid passing bad immediates to the backend. Since
767*700637cbSDimitry Andric   // AVX has a larger immediate than SSE we would need separate builtins to
768*700637cbSDimitry Andric   // do the different bounds checking. Rather than create a clang specific
769*700637cbSDimitry Andric   // SSE only builtin, this implements eight separate builtins to match gcc
770*700637cbSDimitry Andric   // implementation.
771*700637cbSDimitry Andric   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
772*700637cbSDimitry Andric     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
773*700637cbSDimitry Andric     llvm::Function *F = CGM.getIntrinsic(ID);
774*700637cbSDimitry Andric     return Builder.CreateCall(F, Ops);
775*700637cbSDimitry Andric   };
776*700637cbSDimitry Andric 
777*700637cbSDimitry Andric   // For the vector forms of FP comparisons, translate the builtins directly to
778*700637cbSDimitry Andric   // IR.
779*700637cbSDimitry Andric   // TODO: The builtins could be removed if the SSE header files used vector
780*700637cbSDimitry Andric   // extension comparisons directly (vector ordered/unordered may need
781*700637cbSDimitry Andric   // additional support via __builtin_isnan()).
782*700637cbSDimitry Andric   auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
783*700637cbSDimitry Andric                                          bool IsSignaling) {
784*700637cbSDimitry Andric     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
785*700637cbSDimitry Andric     Value *Cmp;
786*700637cbSDimitry Andric     if (IsSignaling)
787*700637cbSDimitry Andric       Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
788*700637cbSDimitry Andric     else
789*700637cbSDimitry Andric       Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
790*700637cbSDimitry Andric     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
791*700637cbSDimitry Andric     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
792*700637cbSDimitry Andric     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
793*700637cbSDimitry Andric     return Builder.CreateBitCast(Sext, FPVecTy);
794*700637cbSDimitry Andric   };
795*700637cbSDimitry Andric 
796*700637cbSDimitry Andric   switch (BuiltinID) {
797*700637cbSDimitry Andric   default: return nullptr;
798*700637cbSDimitry Andric   case X86::BI_mm_prefetch: {
799*700637cbSDimitry Andric     Value *Address = Ops[0];
800*700637cbSDimitry Andric     ConstantInt *C = cast<ConstantInt>(Ops[1]);
801*700637cbSDimitry Andric     Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
802*700637cbSDimitry Andric     Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
803*700637cbSDimitry Andric     Value *Data = ConstantInt::get(Int32Ty, 1);
804*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
805*700637cbSDimitry Andric     return Builder.CreateCall(F, {Address, RW, Locality, Data});
806*700637cbSDimitry Andric   }
807*700637cbSDimitry Andric   case X86::BI_m_prefetch:
808*700637cbSDimitry Andric   case X86::BI_m_prefetchw: {
809*700637cbSDimitry Andric     Value *Address = Ops[0];
810*700637cbSDimitry Andric     // The 'w' suffix implies write.
811*700637cbSDimitry Andric     Value *RW =
812*700637cbSDimitry Andric         ConstantInt::get(Int32Ty, BuiltinID == X86::BI_m_prefetchw ? 1 : 0);
813*700637cbSDimitry Andric     Value *Locality = ConstantInt::get(Int32Ty, 0x3);
814*700637cbSDimitry Andric     Value *Data = ConstantInt::get(Int32Ty, 1);
815*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
816*700637cbSDimitry Andric     return Builder.CreateCall(F, {Address, RW, Locality, Data});
817*700637cbSDimitry Andric   }
818*700637cbSDimitry Andric   case X86::BI_mm_clflush: {
819*700637cbSDimitry Andric     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
820*700637cbSDimitry Andric                               Ops[0]);
821*700637cbSDimitry Andric   }
822*700637cbSDimitry Andric   case X86::BI_mm_lfence: {
823*700637cbSDimitry Andric     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
824*700637cbSDimitry Andric   }
825*700637cbSDimitry Andric   case X86::BI_mm_mfence: {
826*700637cbSDimitry Andric     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
827*700637cbSDimitry Andric   }
828*700637cbSDimitry Andric   case X86::BI_mm_sfence: {
829*700637cbSDimitry Andric     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
830*700637cbSDimitry Andric   }
831*700637cbSDimitry Andric   case X86::BI_mm_pause: {
832*700637cbSDimitry Andric     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
833*700637cbSDimitry Andric   }
834*700637cbSDimitry Andric   case X86::BI__rdtsc: {
835*700637cbSDimitry Andric     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
836*700637cbSDimitry Andric   }
837*700637cbSDimitry Andric   case X86::BI__builtin_ia32_rdtscp: {
838*700637cbSDimitry Andric     Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
839*700637cbSDimitry Andric     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
840*700637cbSDimitry Andric                                       Ops[0]);
841*700637cbSDimitry Andric     return Builder.CreateExtractValue(Call, 0);
842*700637cbSDimitry Andric   }
843*700637cbSDimitry Andric   case X86::BI__builtin_ia32_lzcnt_u16:
844*700637cbSDimitry Andric   case X86::BI__builtin_ia32_lzcnt_u32:
845*700637cbSDimitry Andric   case X86::BI__builtin_ia32_lzcnt_u64: {
846*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
847*700637cbSDimitry Andric     return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
848*700637cbSDimitry Andric   }
849*700637cbSDimitry Andric   case X86::BI__builtin_ia32_tzcnt_u16:
850*700637cbSDimitry Andric   case X86::BI__builtin_ia32_tzcnt_u32:
851*700637cbSDimitry Andric   case X86::BI__builtin_ia32_tzcnt_u64: {
852*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
853*700637cbSDimitry Andric     return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
854*700637cbSDimitry Andric   }
855*700637cbSDimitry Andric   case X86::BI__builtin_ia32_undef128:
856*700637cbSDimitry Andric   case X86::BI__builtin_ia32_undef256:
857*700637cbSDimitry Andric   case X86::BI__builtin_ia32_undef512:
858*700637cbSDimitry Andric     // The x86 definition of "undef" is not the same as the LLVM definition
859*700637cbSDimitry Andric     // (PR32176). We leave optimizing away an unnecessary zero constant to the
860*700637cbSDimitry Andric     // IR optimizer and backend.
861*700637cbSDimitry Andric     // TODO: If we had a "freeze" IR instruction to generate a fixed undef
862*700637cbSDimitry Andric     // value, we should use that here instead of a zero.
863*700637cbSDimitry Andric     return llvm::Constant::getNullValue(ConvertType(E->getType()));
864*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_ext_v4hi:
865*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_ext_v16qi:
866*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_ext_v8hi:
867*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_ext_v4si:
868*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_ext_v4sf:
869*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_ext_v2di:
870*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_ext_v32qi:
871*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_ext_v16hi:
872*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_ext_v8si:
873*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_ext_v4di: {
874*700637cbSDimitry Andric     unsigned NumElts =
875*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
876*700637cbSDimitry Andric     uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
877*700637cbSDimitry Andric     Index &= NumElts - 1;
878*700637cbSDimitry Andric     // These builtins exist so we can ensure the index is an ICE and in range.
879*700637cbSDimitry Andric     // Otherwise we could just do this in the header file.
880*700637cbSDimitry Andric     return Builder.CreateExtractElement(Ops[0], Index);
881*700637cbSDimitry Andric   }
882*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_set_v4hi:
883*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_set_v16qi:
884*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_set_v8hi:
885*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_set_v4si:
886*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_set_v2di:
887*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_set_v32qi:
888*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_set_v16hi:
889*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_set_v8si:
890*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vec_set_v4di: {
891*700637cbSDimitry Andric     unsigned NumElts =
892*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
893*700637cbSDimitry Andric     unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
894*700637cbSDimitry Andric     Index &= NumElts - 1;
895*700637cbSDimitry Andric     // These builtins exist so we can ensure the index is an ICE and in range.
896*700637cbSDimitry Andric     // Otherwise we could just do this in the header file.
897*700637cbSDimitry Andric     return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
898*700637cbSDimitry Andric   }
899*700637cbSDimitry Andric   case X86::BI_mm_setcsr:
900*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ldmxcsr: {
901*700637cbSDimitry Andric     RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
902*700637cbSDimitry Andric     Builder.CreateStore(Ops[0], Tmp);
903*700637cbSDimitry Andric     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
904*700637cbSDimitry Andric                               Tmp.getPointer());
905*700637cbSDimitry Andric   }
906*700637cbSDimitry Andric   case X86::BI_mm_getcsr:
907*700637cbSDimitry Andric   case X86::BI__builtin_ia32_stmxcsr: {
908*700637cbSDimitry Andric     RawAddress Tmp = CreateMemTemp(E->getType());
909*700637cbSDimitry Andric     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
910*700637cbSDimitry Andric                        Tmp.getPointer());
911*700637cbSDimitry Andric     return Builder.CreateLoad(Tmp, "stmxcsr");
912*700637cbSDimitry Andric   }
913*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xsave:
914*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xsave64:
915*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xrstor:
916*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xrstor64:
917*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xsaveopt:
918*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xsaveopt64:
919*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xrstors:
920*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xrstors64:
921*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xsavec:
922*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xsavec64:
923*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xsaves:
924*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xsaves64:
925*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xsetbv:
926*700637cbSDimitry Andric   case X86::BI_xsetbv: {
927*700637cbSDimitry Andric     Intrinsic::ID ID;
928*700637cbSDimitry Andric #define INTRINSIC_X86_XSAVE_ID(NAME) \
929*700637cbSDimitry Andric     case X86::BI__builtin_ia32_##NAME: \
930*700637cbSDimitry Andric       ID = Intrinsic::x86_##NAME; \
931*700637cbSDimitry Andric       break
932*700637cbSDimitry Andric     switch (BuiltinID) {
933*700637cbSDimitry Andric     default: llvm_unreachable("Unsupported intrinsic!");
934*700637cbSDimitry Andric     INTRINSIC_X86_XSAVE_ID(xsave);
935*700637cbSDimitry Andric     INTRINSIC_X86_XSAVE_ID(xsave64);
936*700637cbSDimitry Andric     INTRINSIC_X86_XSAVE_ID(xrstor);
937*700637cbSDimitry Andric     INTRINSIC_X86_XSAVE_ID(xrstor64);
938*700637cbSDimitry Andric     INTRINSIC_X86_XSAVE_ID(xsaveopt);
939*700637cbSDimitry Andric     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
940*700637cbSDimitry Andric     INTRINSIC_X86_XSAVE_ID(xrstors);
941*700637cbSDimitry Andric     INTRINSIC_X86_XSAVE_ID(xrstors64);
942*700637cbSDimitry Andric     INTRINSIC_X86_XSAVE_ID(xsavec);
943*700637cbSDimitry Andric     INTRINSIC_X86_XSAVE_ID(xsavec64);
944*700637cbSDimitry Andric     INTRINSIC_X86_XSAVE_ID(xsaves);
945*700637cbSDimitry Andric     INTRINSIC_X86_XSAVE_ID(xsaves64);
946*700637cbSDimitry Andric     INTRINSIC_X86_XSAVE_ID(xsetbv);
947*700637cbSDimitry Andric     case X86::BI_xsetbv:
948*700637cbSDimitry Andric       ID = Intrinsic::x86_xsetbv;
949*700637cbSDimitry Andric       break;
950*700637cbSDimitry Andric     }
951*700637cbSDimitry Andric #undef INTRINSIC_X86_XSAVE_ID
952*700637cbSDimitry Andric     Value *Mhi = Builder.CreateTrunc(
953*700637cbSDimitry Andric       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
954*700637cbSDimitry Andric     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
955*700637cbSDimitry Andric     Ops[1] = Mhi;
956*700637cbSDimitry Andric     Ops.push_back(Mlo);
957*700637cbSDimitry Andric     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
958*700637cbSDimitry Andric   }
959*700637cbSDimitry Andric   case X86::BI__builtin_ia32_xgetbv:
960*700637cbSDimitry Andric   case X86::BI_xgetbv:
961*700637cbSDimitry Andric     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
962*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storedqudi128_mask:
963*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storedqusi128_mask:
964*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storedquhi128_mask:
965*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storedquqi128_mask:
966*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storeupd128_mask:
967*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storeups128_mask:
968*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storedqudi256_mask:
969*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storedqusi256_mask:
970*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storedquhi256_mask:
971*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storedquqi256_mask:
972*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storeupd256_mask:
973*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storeups256_mask:
974*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storedqudi512_mask:
975*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storedqusi512_mask:
976*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storedquhi512_mask:
977*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storedquqi512_mask:
978*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storeupd512_mask:
979*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storeups512_mask:
980*700637cbSDimitry Andric     return EmitX86MaskedStore(*this, Ops, Align(1));
981*700637cbSDimitry Andric 
982*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storesbf16128_mask:
983*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storesh128_mask:
984*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storess128_mask:
985*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storesd128_mask:
986*700637cbSDimitry Andric     return EmitX86MaskedStore(*this, Ops, Align(1));
987*700637cbSDimitry Andric 
988*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtmask2b128:
989*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtmask2b256:
990*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtmask2b512:
991*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtmask2w128:
992*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtmask2w256:
993*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtmask2w512:
994*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtmask2d128:
995*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtmask2d256:
996*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtmask2d512:
997*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtmask2q128:
998*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtmask2q256:
999*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtmask2q512:
1000*700637cbSDimitry Andric     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
1001*700637cbSDimitry Andric 
1002*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtb2mask128:
1003*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtb2mask256:
1004*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtb2mask512:
1005*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtw2mask128:
1006*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtw2mask256:
1007*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtw2mask512:
1008*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtd2mask128:
1009*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtd2mask256:
1010*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtd2mask512:
1011*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtq2mask128:
1012*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtq2mask256:
1013*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtq2mask512:
1014*700637cbSDimitry Andric     return EmitX86ConvertToMask(*this, Ops[0]);
1015*700637cbSDimitry Andric 
1016*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtdq2ps512_mask:
1017*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtqq2ps512_mask:
1018*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtqq2pd512_mask:
1019*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcvtw2ph512_mask:
1020*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
1021*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
1022*700637cbSDimitry Andric     return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
1023*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtudq2ps512_mask:
1024*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
1025*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
1026*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
1027*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
1028*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
1029*700637cbSDimitry Andric     return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
1030*700637cbSDimitry Andric 
1031*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddss3:
1032*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsd3:
1033*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsh3_mask:
1034*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddss3_mask:
1035*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsd3_mask:
1036*700637cbSDimitry Andric     return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
1037*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddss:
1038*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsd:
1039*700637cbSDimitry Andric     return EmitScalarFMAExpr(*this, E, Ops,
1040*700637cbSDimitry Andric                              Constant::getNullValue(Ops[0]->getType()));
1041*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsh3_maskz:
1042*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddss3_maskz:
1043*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsd3_maskz:
1044*700637cbSDimitry Andric     return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
1045*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsh3_mask3:
1046*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddss3_mask3:
1047*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsd3_mask3:
1048*700637cbSDimitry Andric     return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
1049*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmsubsh3_mask3:
1050*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmsubss3_mask3:
1051*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmsubsd3_mask3:
1052*700637cbSDimitry Andric     return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
1053*700637cbSDimitry Andric                              /*NegAcc*/ true);
1054*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddph:
1055*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddps:
1056*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddpd:
1057*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddph256:
1058*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddps256:
1059*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddpd256:
1060*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddph512_mask:
1061*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddph512_maskz:
1062*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddph512_mask3:
1063*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddbf16128:
1064*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddbf16256:
1065*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddbf16512:
1066*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddps512_mask:
1067*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddps512_maskz:
1068*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddps512_mask3:
1069*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmsubps512_mask3:
1070*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddpd512_mask:
1071*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddpd512_maskz:
1072*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddpd512_mask3:
1073*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmsubpd512_mask3:
1074*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmsubph512_mask3:
1075*700637cbSDimitry Andric     return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
1076*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsubph512_mask:
1077*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
1078*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
1079*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
1080*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsubps512_mask:
1081*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
1082*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
1083*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
1084*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
1085*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
1086*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
1087*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
1088*700637cbSDimitry Andric     return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
1089*700637cbSDimitry Andric 
1090*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movdqa32store128_mask:
1091*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movdqa64store128_mask:
1092*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storeaps128_mask:
1093*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storeapd128_mask:
1094*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movdqa32store256_mask:
1095*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movdqa64store256_mask:
1096*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storeaps256_mask:
1097*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storeapd256_mask:
1098*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movdqa32store512_mask:
1099*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movdqa64store512_mask:
1100*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storeaps512_mask:
1101*700637cbSDimitry Andric   case X86::BI__builtin_ia32_storeapd512_mask:
1102*700637cbSDimitry Andric     return EmitX86MaskedStore(
1103*700637cbSDimitry Andric         *this, Ops,
1104*700637cbSDimitry Andric         getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
1105*700637cbSDimitry Andric 
1106*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadups128_mask:
1107*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadups256_mask:
1108*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadups512_mask:
1109*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadupd128_mask:
1110*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadupd256_mask:
1111*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadupd512_mask:
1112*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loaddquqi128_mask:
1113*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loaddquqi256_mask:
1114*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loaddquqi512_mask:
1115*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loaddquhi128_mask:
1116*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loaddquhi256_mask:
1117*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loaddquhi512_mask:
1118*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loaddqusi128_mask:
1119*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loaddqusi256_mask:
1120*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loaddqusi512_mask:
1121*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loaddqudi128_mask:
1122*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loaddqudi256_mask:
1123*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loaddqudi512_mask:
1124*700637cbSDimitry Andric     return EmitX86MaskedLoad(*this, Ops, Align(1));
1125*700637cbSDimitry Andric 
1126*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadsbf16128_mask:
1127*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadsh128_mask:
1128*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadss128_mask:
1129*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadsd128_mask:
1130*700637cbSDimitry Andric     return EmitX86MaskedLoad(*this, Ops, Align(1));
1131*700637cbSDimitry Andric 
1132*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadaps128_mask:
1133*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadaps256_mask:
1134*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadaps512_mask:
1135*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadapd128_mask:
1136*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadapd256_mask:
1137*700637cbSDimitry Andric   case X86::BI__builtin_ia32_loadapd512_mask:
1138*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movdqa32load128_mask:
1139*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movdqa32load256_mask:
1140*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movdqa32load512_mask:
1141*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movdqa64load128_mask:
1142*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movdqa64load256_mask:
1143*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movdqa64load512_mask:
1144*700637cbSDimitry Andric     return EmitX86MaskedLoad(
1145*700637cbSDimitry Andric         *this, Ops,
1146*700637cbSDimitry Andric         getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
1147*700637cbSDimitry Andric 
1148*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloaddf128_mask:
1149*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloaddf256_mask:
1150*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloaddf512_mask:
1151*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloadsf128_mask:
1152*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloadsf256_mask:
1153*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloadsf512_mask:
1154*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloaddi128_mask:
1155*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloaddi256_mask:
1156*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloaddi512_mask:
1157*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloadsi128_mask:
1158*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloadsi256_mask:
1159*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloadsi512_mask:
1160*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloadhi128_mask:
1161*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloadhi256_mask:
1162*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloadhi512_mask:
1163*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloadqi128_mask:
1164*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloadqi256_mask:
1165*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandloadqi512_mask:
1166*700637cbSDimitry Andric     return EmitX86ExpandLoad(*this, Ops);
1167*700637cbSDimitry Andric 
1168*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoredf128_mask:
1169*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoredf256_mask:
1170*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoredf512_mask:
1171*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoresf128_mask:
1172*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoresf256_mask:
1173*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoresf512_mask:
1174*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoredi128_mask:
1175*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoredi256_mask:
1176*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoredi512_mask:
1177*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoresi128_mask:
1178*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoresi256_mask:
1179*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoresi512_mask:
1180*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstorehi128_mask:
1181*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstorehi256_mask:
1182*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstorehi512_mask:
1183*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoreqi128_mask:
1184*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoreqi256_mask:
1185*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressstoreqi512_mask:
1186*700637cbSDimitry Andric     return EmitX86CompressStore(*this, Ops);
1187*700637cbSDimitry Andric 
1188*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expanddf128_mask:
1189*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expanddf256_mask:
1190*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expanddf512_mask:
1191*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandsf128_mask:
1192*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandsf256_mask:
1193*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandsf512_mask:
1194*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expanddi128_mask:
1195*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expanddi256_mask:
1196*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expanddi512_mask:
1197*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandsi128_mask:
1198*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandsi256_mask:
1199*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandsi512_mask:
1200*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandhi128_mask:
1201*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandhi256_mask:
1202*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandhi512_mask:
1203*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandqi128_mask:
1204*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandqi256_mask:
1205*700637cbSDimitry Andric   case X86::BI__builtin_ia32_expandqi512_mask:
1206*700637cbSDimitry Andric     return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
1207*700637cbSDimitry Andric 
1208*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressdf128_mask:
1209*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressdf256_mask:
1210*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressdf512_mask:
1211*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compresssf128_mask:
1212*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compresssf256_mask:
1213*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compresssf512_mask:
1214*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressdi128_mask:
1215*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressdi256_mask:
1216*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressdi512_mask:
1217*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compresssi128_mask:
1218*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compresssi256_mask:
1219*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compresssi512_mask:
1220*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compresshi128_mask:
1221*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compresshi256_mask:
1222*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compresshi512_mask:
1223*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressqi128_mask:
1224*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressqi256_mask:
1225*700637cbSDimitry Andric   case X86::BI__builtin_ia32_compressqi512_mask:
1226*700637cbSDimitry Andric     return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
1227*700637cbSDimitry Andric 
1228*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3div2df:
1229*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3div2di:
1230*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3div4df:
1231*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3div4di:
1232*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3div4sf:
1233*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3div4si:
1234*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3div8sf:
1235*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3div8si:
1236*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3siv2df:
1237*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3siv2di:
1238*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3siv4df:
1239*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3siv4di:
1240*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3siv4sf:
1241*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3siv4si:
1242*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3siv8sf:
1243*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gather3siv8si:
1244*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gathersiv8df:
1245*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gathersiv16sf:
1246*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gatherdiv8df:
1247*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gatherdiv16sf:
1248*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gathersiv8di:
1249*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gathersiv16si:
1250*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gatherdiv8di:
1251*700637cbSDimitry Andric   case X86::BI__builtin_ia32_gatherdiv16si: {
1252*700637cbSDimitry Andric     Intrinsic::ID IID;
1253*700637cbSDimitry Andric     switch (BuiltinID) {
1254*700637cbSDimitry Andric     default: llvm_unreachable("Unexpected builtin");
1255*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3div2df:
1256*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3div2_df;
1257*700637cbSDimitry Andric       break;
1258*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3div2di:
1259*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3div2_di;
1260*700637cbSDimitry Andric       break;
1261*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3div4df:
1262*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3div4_df;
1263*700637cbSDimitry Andric       break;
1264*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3div4di:
1265*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3div4_di;
1266*700637cbSDimitry Andric       break;
1267*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3div4sf:
1268*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
1269*700637cbSDimitry Andric       break;
1270*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3div4si:
1271*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3div4_si;
1272*700637cbSDimitry Andric       break;
1273*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3div8sf:
1274*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
1275*700637cbSDimitry Andric       break;
1276*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3div8si:
1277*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3div8_si;
1278*700637cbSDimitry Andric       break;
1279*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3siv2df:
1280*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
1281*700637cbSDimitry Andric       break;
1282*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3siv2di:
1283*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
1284*700637cbSDimitry Andric       break;
1285*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3siv4df:
1286*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
1287*700637cbSDimitry Andric       break;
1288*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3siv4di:
1289*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
1290*700637cbSDimitry Andric       break;
1291*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3siv4sf:
1292*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
1293*700637cbSDimitry Andric       break;
1294*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3siv4si:
1295*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
1296*700637cbSDimitry Andric       break;
1297*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3siv8sf:
1298*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
1299*700637cbSDimitry Andric       break;
1300*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gather3siv8si:
1301*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
1302*700637cbSDimitry Andric       break;
1303*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gathersiv8df:
1304*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
1305*700637cbSDimitry Andric       break;
1306*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gathersiv16sf:
1307*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather_dps_512;
1308*700637cbSDimitry Andric       break;
1309*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gatherdiv8df:
1310*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
1311*700637cbSDimitry Andric       break;
1312*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gatherdiv16sf:
1313*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather_qps_512;
1314*700637cbSDimitry Andric       break;
1315*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gathersiv8di:
1316*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
1317*700637cbSDimitry Andric       break;
1318*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gathersiv16si:
1319*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
1320*700637cbSDimitry Andric       break;
1321*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gatherdiv8di:
1322*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
1323*700637cbSDimitry Andric       break;
1324*700637cbSDimitry Andric     case X86::BI__builtin_ia32_gatherdiv16si:
1325*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
1326*700637cbSDimitry Andric       break;
1327*700637cbSDimitry Andric     }
1328*700637cbSDimitry Andric 
1329*700637cbSDimitry Andric     unsigned MinElts = std::min(
1330*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
1331*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
1332*700637cbSDimitry Andric     Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
1333*700637cbSDimitry Andric     Function *Intr = CGM.getIntrinsic(IID);
1334*700637cbSDimitry Andric     return Builder.CreateCall(Intr, Ops);
1335*700637cbSDimitry Andric   }
1336*700637cbSDimitry Andric 
1337*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scattersiv8df:
1338*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scattersiv16sf:
1339*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scatterdiv8df:
1340*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scatterdiv16sf:
1341*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scattersiv8di:
1342*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scattersiv16si:
1343*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scatterdiv8di:
1344*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scatterdiv16si:
1345*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scatterdiv2df:
1346*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scatterdiv2di:
1347*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scatterdiv4df:
1348*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scatterdiv4di:
1349*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scatterdiv4sf:
1350*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scatterdiv4si:
1351*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scatterdiv8sf:
1352*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scatterdiv8si:
1353*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scattersiv2df:
1354*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scattersiv2di:
1355*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scattersiv4df:
1356*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scattersiv4di:
1357*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scattersiv4sf:
1358*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scattersiv4si:
1359*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scattersiv8sf:
1360*700637cbSDimitry Andric   case X86::BI__builtin_ia32_scattersiv8si: {
1361*700637cbSDimitry Andric     Intrinsic::ID IID;
1362*700637cbSDimitry Andric     switch (BuiltinID) {
1363*700637cbSDimitry Andric     default: llvm_unreachable("Unexpected builtin");
1364*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scattersiv8df:
1365*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
1366*700637cbSDimitry Andric       break;
1367*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scattersiv16sf:
1368*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
1369*700637cbSDimitry Andric       break;
1370*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scatterdiv8df:
1371*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
1372*700637cbSDimitry Andric       break;
1373*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scatterdiv16sf:
1374*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
1375*700637cbSDimitry Andric       break;
1376*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scattersiv8di:
1377*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
1378*700637cbSDimitry Andric       break;
1379*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scattersiv16si:
1380*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
1381*700637cbSDimitry Andric       break;
1382*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scatterdiv8di:
1383*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
1384*700637cbSDimitry Andric       break;
1385*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scatterdiv16si:
1386*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
1387*700637cbSDimitry Andric       break;
1388*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scatterdiv2df:
1389*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
1390*700637cbSDimitry Andric       break;
1391*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scatterdiv2di:
1392*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
1393*700637cbSDimitry Andric       break;
1394*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scatterdiv4df:
1395*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
1396*700637cbSDimitry Andric       break;
1397*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scatterdiv4di:
1398*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
1399*700637cbSDimitry Andric       break;
1400*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scatterdiv4sf:
1401*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
1402*700637cbSDimitry Andric       break;
1403*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scatterdiv4si:
1404*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
1405*700637cbSDimitry Andric       break;
1406*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scatterdiv8sf:
1407*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
1408*700637cbSDimitry Andric       break;
1409*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scatterdiv8si:
1410*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
1411*700637cbSDimitry Andric       break;
1412*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scattersiv2df:
1413*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
1414*700637cbSDimitry Andric       break;
1415*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scattersiv2di:
1416*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
1417*700637cbSDimitry Andric       break;
1418*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scattersiv4df:
1419*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
1420*700637cbSDimitry Andric       break;
1421*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scattersiv4di:
1422*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
1423*700637cbSDimitry Andric       break;
1424*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scattersiv4sf:
1425*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
1426*700637cbSDimitry Andric       break;
1427*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scattersiv4si:
1428*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
1429*700637cbSDimitry Andric       break;
1430*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scattersiv8sf:
1431*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
1432*700637cbSDimitry Andric       break;
1433*700637cbSDimitry Andric     case X86::BI__builtin_ia32_scattersiv8si:
1434*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
1435*700637cbSDimitry Andric       break;
1436*700637cbSDimitry Andric     }
1437*700637cbSDimitry Andric 
1438*700637cbSDimitry Andric     unsigned MinElts = std::min(
1439*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
1440*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
1441*700637cbSDimitry Andric     Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
1442*700637cbSDimitry Andric     Function *Intr = CGM.getIntrinsic(IID);
1443*700637cbSDimitry Andric     return Builder.CreateCall(Intr, Ops);
1444*700637cbSDimitry Andric   }
1445*700637cbSDimitry Andric 
1446*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vextractf128_pd256:
1447*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vextractf128_ps256:
1448*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vextractf128_si256:
1449*700637cbSDimitry Andric   case X86::BI__builtin_ia32_extract128i256:
1450*700637cbSDimitry Andric   case X86::BI__builtin_ia32_extractf64x4_mask:
1451*700637cbSDimitry Andric   case X86::BI__builtin_ia32_extractf32x4_mask:
1452*700637cbSDimitry Andric   case X86::BI__builtin_ia32_extracti64x4_mask:
1453*700637cbSDimitry Andric   case X86::BI__builtin_ia32_extracti32x4_mask:
1454*700637cbSDimitry Andric   case X86::BI__builtin_ia32_extractf32x8_mask:
1455*700637cbSDimitry Andric   case X86::BI__builtin_ia32_extracti32x8_mask:
1456*700637cbSDimitry Andric   case X86::BI__builtin_ia32_extractf32x4_256_mask:
1457*700637cbSDimitry Andric   case X86::BI__builtin_ia32_extracti32x4_256_mask:
1458*700637cbSDimitry Andric   case X86::BI__builtin_ia32_extractf64x2_256_mask:
1459*700637cbSDimitry Andric   case X86::BI__builtin_ia32_extracti64x2_256_mask:
1460*700637cbSDimitry Andric   case X86::BI__builtin_ia32_extractf64x2_512_mask:
1461*700637cbSDimitry Andric   case X86::BI__builtin_ia32_extracti64x2_512_mask: {
1462*700637cbSDimitry Andric     auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
1463*700637cbSDimitry Andric     unsigned NumElts = DstTy->getNumElements();
1464*700637cbSDimitry Andric     unsigned SrcNumElts =
1465*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1466*700637cbSDimitry Andric     unsigned SubVectors = SrcNumElts / NumElts;
1467*700637cbSDimitry Andric     unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
1468*700637cbSDimitry Andric     assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
1469*700637cbSDimitry Andric     Index &= SubVectors - 1; // Remove any extra bits.
1470*700637cbSDimitry Andric     Index *= NumElts;
1471*700637cbSDimitry Andric 
1472*700637cbSDimitry Andric     int Indices[16];
1473*700637cbSDimitry Andric     for (unsigned i = 0; i != NumElts; ++i)
1474*700637cbSDimitry Andric       Indices[i] = i + Index;
1475*700637cbSDimitry Andric 
1476*700637cbSDimitry Andric     Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1477*700637cbSDimitry Andric                                              "extract");
1478*700637cbSDimitry Andric 
1479*700637cbSDimitry Andric     if (Ops.size() == 4)
1480*700637cbSDimitry Andric       Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
1481*700637cbSDimitry Andric 
1482*700637cbSDimitry Andric     return Res;
1483*700637cbSDimitry Andric   }
1484*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vinsertf128_pd256:
1485*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vinsertf128_ps256:
1486*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vinsertf128_si256:
1487*700637cbSDimitry Andric   case X86::BI__builtin_ia32_insert128i256:
1488*700637cbSDimitry Andric   case X86::BI__builtin_ia32_insertf64x4:
1489*700637cbSDimitry Andric   case X86::BI__builtin_ia32_insertf32x4:
1490*700637cbSDimitry Andric   case X86::BI__builtin_ia32_inserti64x4:
1491*700637cbSDimitry Andric   case X86::BI__builtin_ia32_inserti32x4:
1492*700637cbSDimitry Andric   case X86::BI__builtin_ia32_insertf32x8:
1493*700637cbSDimitry Andric   case X86::BI__builtin_ia32_inserti32x8:
1494*700637cbSDimitry Andric   case X86::BI__builtin_ia32_insertf32x4_256:
1495*700637cbSDimitry Andric   case X86::BI__builtin_ia32_inserti32x4_256:
1496*700637cbSDimitry Andric   case X86::BI__builtin_ia32_insertf64x2_256:
1497*700637cbSDimitry Andric   case X86::BI__builtin_ia32_inserti64x2_256:
1498*700637cbSDimitry Andric   case X86::BI__builtin_ia32_insertf64x2_512:
1499*700637cbSDimitry Andric   case X86::BI__builtin_ia32_inserti64x2_512: {
1500*700637cbSDimitry Andric     unsigned DstNumElts =
1501*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1502*700637cbSDimitry Andric     unsigned SrcNumElts =
1503*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
1504*700637cbSDimitry Andric     unsigned SubVectors = DstNumElts / SrcNumElts;
1505*700637cbSDimitry Andric     unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
1506*700637cbSDimitry Andric     assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
1507*700637cbSDimitry Andric     Index &= SubVectors - 1; // Remove any extra bits.
1508*700637cbSDimitry Andric     Index *= SrcNumElts;
1509*700637cbSDimitry Andric 
1510*700637cbSDimitry Andric     int Indices[16];
1511*700637cbSDimitry Andric     for (unsigned i = 0; i != DstNumElts; ++i)
1512*700637cbSDimitry Andric       Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
1513*700637cbSDimitry Andric 
1514*700637cbSDimitry Andric     Value *Op1 = Builder.CreateShuffleVector(
1515*700637cbSDimitry Andric         Ops[1], ArrayRef(Indices, DstNumElts), "widen");
1516*700637cbSDimitry Andric 
1517*700637cbSDimitry Andric     for (unsigned i = 0; i != DstNumElts; ++i) {
1518*700637cbSDimitry Andric       if (i >= Index && i < (Index + SrcNumElts))
1519*700637cbSDimitry Andric         Indices[i] = (i - Index) + DstNumElts;
1520*700637cbSDimitry Andric       else
1521*700637cbSDimitry Andric         Indices[i] = i;
1522*700637cbSDimitry Andric     }
1523*700637cbSDimitry Andric 
1524*700637cbSDimitry Andric     return Builder.CreateShuffleVector(Ops[0], Op1,
1525*700637cbSDimitry Andric                                        ArrayRef(Indices, DstNumElts), "insert");
1526*700637cbSDimitry Andric   }
1527*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pmovqd512_mask:
1528*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pmovwb512_mask: {
1529*700637cbSDimitry Andric     Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
1530*700637cbSDimitry Andric     return EmitX86Select(*this, Ops[2], Res, Ops[1]);
1531*700637cbSDimitry Andric   }
1532*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pmovdb512_mask:
1533*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pmovdw512_mask:
1534*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pmovqw512_mask: {
1535*700637cbSDimitry Andric     if (const auto *C = dyn_cast<Constant>(Ops[2]))
1536*700637cbSDimitry Andric       if (C->isAllOnesValue())
1537*700637cbSDimitry Andric         return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
1538*700637cbSDimitry Andric 
1539*700637cbSDimitry Andric     Intrinsic::ID IID;
1540*700637cbSDimitry Andric     switch (BuiltinID) {
1541*700637cbSDimitry Andric     default: llvm_unreachable("Unsupported intrinsic!");
1542*700637cbSDimitry Andric     case X86::BI__builtin_ia32_pmovdb512_mask:
1543*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_pmov_db_512;
1544*700637cbSDimitry Andric       break;
1545*700637cbSDimitry Andric     case X86::BI__builtin_ia32_pmovdw512_mask:
1546*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
1547*700637cbSDimitry Andric       break;
1548*700637cbSDimitry Andric     case X86::BI__builtin_ia32_pmovqw512_mask:
1549*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
1550*700637cbSDimitry Andric       break;
1551*700637cbSDimitry Andric     }
1552*700637cbSDimitry Andric 
1553*700637cbSDimitry Andric     Function *Intr = CGM.getIntrinsic(IID);
1554*700637cbSDimitry Andric     return Builder.CreateCall(Intr, Ops);
1555*700637cbSDimitry Andric   }
1556*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pblendw128:
1557*700637cbSDimitry Andric   case X86::BI__builtin_ia32_blendpd:
1558*700637cbSDimitry Andric   case X86::BI__builtin_ia32_blendps:
1559*700637cbSDimitry Andric   case X86::BI__builtin_ia32_blendpd256:
1560*700637cbSDimitry Andric   case X86::BI__builtin_ia32_blendps256:
1561*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pblendw256:
1562*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pblendd128:
1563*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pblendd256: {
1564*700637cbSDimitry Andric     unsigned NumElts =
1565*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1566*700637cbSDimitry Andric     unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1567*700637cbSDimitry Andric 
1568*700637cbSDimitry Andric     int Indices[16];
1569*700637cbSDimitry Andric     // If there are more than 8 elements, the immediate is used twice so make
1570*700637cbSDimitry Andric     // sure we handle that.
1571*700637cbSDimitry Andric     for (unsigned i = 0; i != NumElts; ++i)
1572*700637cbSDimitry Andric       Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
1573*700637cbSDimitry Andric 
1574*700637cbSDimitry Andric     return Builder.CreateShuffleVector(Ops[0], Ops[1],
1575*700637cbSDimitry Andric                                        ArrayRef(Indices, NumElts), "blend");
1576*700637cbSDimitry Andric   }
1577*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pshuflw:
1578*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pshuflw256:
1579*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pshuflw512: {
1580*700637cbSDimitry Andric     uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
1581*700637cbSDimitry Andric     auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1582*700637cbSDimitry Andric     unsigned NumElts = Ty->getNumElements();
1583*700637cbSDimitry Andric 
1584*700637cbSDimitry Andric     // Splat the 8-bits of immediate 4 times to help the loop wrap around.
1585*700637cbSDimitry Andric     Imm = (Imm & 0xff) * 0x01010101;
1586*700637cbSDimitry Andric 
1587*700637cbSDimitry Andric     int Indices[32];
1588*700637cbSDimitry Andric     for (unsigned l = 0; l != NumElts; l += 8) {
1589*700637cbSDimitry Andric       for (unsigned i = 0; i != 4; ++i) {
1590*700637cbSDimitry Andric         Indices[l + i] = l + (Imm & 3);
1591*700637cbSDimitry Andric         Imm >>= 2;
1592*700637cbSDimitry Andric       }
1593*700637cbSDimitry Andric       for (unsigned i = 4; i != 8; ++i)
1594*700637cbSDimitry Andric         Indices[l + i] = l + i;
1595*700637cbSDimitry Andric     }
1596*700637cbSDimitry Andric 
1597*700637cbSDimitry Andric     return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1598*700637cbSDimitry Andric                                        "pshuflw");
1599*700637cbSDimitry Andric   }
1600*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pshufhw:
1601*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pshufhw256:
1602*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pshufhw512: {
1603*700637cbSDimitry Andric     uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
1604*700637cbSDimitry Andric     auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1605*700637cbSDimitry Andric     unsigned NumElts = Ty->getNumElements();
1606*700637cbSDimitry Andric 
1607*700637cbSDimitry Andric     // Splat the 8-bits of immediate 4 times to help the loop wrap around.
1608*700637cbSDimitry Andric     Imm = (Imm & 0xff) * 0x01010101;
1609*700637cbSDimitry Andric 
1610*700637cbSDimitry Andric     int Indices[32];
1611*700637cbSDimitry Andric     for (unsigned l = 0; l != NumElts; l += 8) {
1612*700637cbSDimitry Andric       for (unsigned i = 0; i != 4; ++i)
1613*700637cbSDimitry Andric         Indices[l + i] = l + i;
1614*700637cbSDimitry Andric       for (unsigned i = 4; i != 8; ++i) {
1615*700637cbSDimitry Andric         Indices[l + i] = l + 4 + (Imm & 3);
1616*700637cbSDimitry Andric         Imm >>= 2;
1617*700637cbSDimitry Andric       }
1618*700637cbSDimitry Andric     }
1619*700637cbSDimitry Andric 
1620*700637cbSDimitry Andric     return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1621*700637cbSDimitry Andric                                        "pshufhw");
1622*700637cbSDimitry Andric   }
1623*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pshufd:
1624*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pshufd256:
1625*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pshufd512:
1626*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpermilpd:
1627*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpermilps:
1628*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpermilpd256:
1629*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpermilps256:
1630*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpermilpd512:
1631*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpermilps512: {
1632*700637cbSDimitry Andric     uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
1633*700637cbSDimitry Andric     auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1634*700637cbSDimitry Andric     unsigned NumElts = Ty->getNumElements();
1635*700637cbSDimitry Andric     unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
1636*700637cbSDimitry Andric     unsigned NumLaneElts = NumElts / NumLanes;
1637*700637cbSDimitry Andric 
1638*700637cbSDimitry Andric     // Splat the 8-bits of immediate 4 times to help the loop wrap around.
1639*700637cbSDimitry Andric     Imm = (Imm & 0xff) * 0x01010101;
1640*700637cbSDimitry Andric 
1641*700637cbSDimitry Andric     int Indices[16];
1642*700637cbSDimitry Andric     for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1643*700637cbSDimitry Andric       for (unsigned i = 0; i != NumLaneElts; ++i) {
1644*700637cbSDimitry Andric         Indices[i + l] = (Imm % NumLaneElts) + l;
1645*700637cbSDimitry Andric         Imm /= NumLaneElts;
1646*700637cbSDimitry Andric       }
1647*700637cbSDimitry Andric     }
1648*700637cbSDimitry Andric 
1649*700637cbSDimitry Andric     return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1650*700637cbSDimitry Andric                                        "permil");
1651*700637cbSDimitry Andric   }
1652*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shufpd:
1653*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shufpd256:
1654*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shufpd512:
1655*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shufps:
1656*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shufps256:
1657*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shufps512: {
1658*700637cbSDimitry Andric     uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1659*700637cbSDimitry Andric     auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1660*700637cbSDimitry Andric     unsigned NumElts = Ty->getNumElements();
1661*700637cbSDimitry Andric     unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
1662*700637cbSDimitry Andric     unsigned NumLaneElts = NumElts / NumLanes;
1663*700637cbSDimitry Andric 
1664*700637cbSDimitry Andric     // Splat the 8-bits of immediate 4 times to help the loop wrap around.
1665*700637cbSDimitry Andric     Imm = (Imm & 0xff) * 0x01010101;
1666*700637cbSDimitry Andric 
1667*700637cbSDimitry Andric     int Indices[16];
1668*700637cbSDimitry Andric     for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1669*700637cbSDimitry Andric       for (unsigned i = 0; i != NumLaneElts; ++i) {
1670*700637cbSDimitry Andric         unsigned Index = Imm % NumLaneElts;
1671*700637cbSDimitry Andric         Imm /= NumLaneElts;
1672*700637cbSDimitry Andric         if (i >= (NumLaneElts / 2))
1673*700637cbSDimitry Andric           Index += NumElts;
1674*700637cbSDimitry Andric         Indices[l + i] = l + Index;
1675*700637cbSDimitry Andric       }
1676*700637cbSDimitry Andric     }
1677*700637cbSDimitry Andric 
1678*700637cbSDimitry Andric     return Builder.CreateShuffleVector(Ops[0], Ops[1],
1679*700637cbSDimitry Andric                                        ArrayRef(Indices, NumElts), "shufp");
1680*700637cbSDimitry Andric   }
1681*700637cbSDimitry Andric   case X86::BI__builtin_ia32_permdi256:
1682*700637cbSDimitry Andric   case X86::BI__builtin_ia32_permdf256:
1683*700637cbSDimitry Andric   case X86::BI__builtin_ia32_permdi512:
1684*700637cbSDimitry Andric   case X86::BI__builtin_ia32_permdf512: {
1685*700637cbSDimitry Andric     unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
1686*700637cbSDimitry Andric     auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1687*700637cbSDimitry Andric     unsigned NumElts = Ty->getNumElements();
1688*700637cbSDimitry Andric 
1689*700637cbSDimitry Andric     // These intrinsics operate on 256-bit lanes of four 64-bit elements.
1690*700637cbSDimitry Andric     int Indices[8];
1691*700637cbSDimitry Andric     for (unsigned l = 0; l != NumElts; l += 4)
1692*700637cbSDimitry Andric       for (unsigned i = 0; i != 4; ++i)
1693*700637cbSDimitry Andric         Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
1694*700637cbSDimitry Andric 
1695*700637cbSDimitry Andric     return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
1696*700637cbSDimitry Andric                                        "perm");
1697*700637cbSDimitry Andric   }
1698*700637cbSDimitry Andric   case X86::BI__builtin_ia32_palignr128:
1699*700637cbSDimitry Andric   case X86::BI__builtin_ia32_palignr256:
1700*700637cbSDimitry Andric   case X86::BI__builtin_ia32_palignr512: {
1701*700637cbSDimitry Andric     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
1702*700637cbSDimitry Andric 
1703*700637cbSDimitry Andric     unsigned NumElts =
1704*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1705*700637cbSDimitry Andric     assert(NumElts % 16 == 0);
1706*700637cbSDimitry Andric 
1707*700637cbSDimitry Andric     // If palignr is shifting the pair of vectors more than the size of two
1708*700637cbSDimitry Andric     // lanes, emit zero.
1709*700637cbSDimitry Andric     if (ShiftVal >= 32)
1710*700637cbSDimitry Andric       return llvm::Constant::getNullValue(ConvertType(E->getType()));
1711*700637cbSDimitry Andric 
1712*700637cbSDimitry Andric     // If palignr is shifting the pair of input vectors more than one lane,
1713*700637cbSDimitry Andric     // but less than two lanes, convert to shifting in zeroes.
1714*700637cbSDimitry Andric     if (ShiftVal > 16) {
1715*700637cbSDimitry Andric       ShiftVal -= 16;
1716*700637cbSDimitry Andric       Ops[1] = Ops[0];
1717*700637cbSDimitry Andric       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
1718*700637cbSDimitry Andric     }
1719*700637cbSDimitry Andric 
1720*700637cbSDimitry Andric     int Indices[64];
1721*700637cbSDimitry Andric     // 256-bit palignr operates on 128-bit lanes so we need to handle that
1722*700637cbSDimitry Andric     for (unsigned l = 0; l != NumElts; l += 16) {
1723*700637cbSDimitry Andric       for (unsigned i = 0; i != 16; ++i) {
1724*700637cbSDimitry Andric         unsigned Idx = ShiftVal + i;
1725*700637cbSDimitry Andric         if (Idx >= 16)
1726*700637cbSDimitry Andric           Idx += NumElts - 16; // End of lane, switch operand.
1727*700637cbSDimitry Andric         Indices[l + i] = Idx + l;
1728*700637cbSDimitry Andric       }
1729*700637cbSDimitry Andric     }
1730*700637cbSDimitry Andric 
1731*700637cbSDimitry Andric     return Builder.CreateShuffleVector(Ops[1], Ops[0],
1732*700637cbSDimitry Andric                                        ArrayRef(Indices, NumElts), "palignr");
1733*700637cbSDimitry Andric   }
1734*700637cbSDimitry Andric   case X86::BI__builtin_ia32_alignd128:
1735*700637cbSDimitry Andric   case X86::BI__builtin_ia32_alignd256:
1736*700637cbSDimitry Andric   case X86::BI__builtin_ia32_alignd512:
1737*700637cbSDimitry Andric   case X86::BI__builtin_ia32_alignq128:
1738*700637cbSDimitry Andric   case X86::BI__builtin_ia32_alignq256:
1739*700637cbSDimitry Andric   case X86::BI__builtin_ia32_alignq512: {
1740*700637cbSDimitry Andric     unsigned NumElts =
1741*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1742*700637cbSDimitry Andric     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
1743*700637cbSDimitry Andric 
1744*700637cbSDimitry Andric     // Mask the shift amount to width of a vector.
1745*700637cbSDimitry Andric     ShiftVal &= NumElts - 1;
1746*700637cbSDimitry Andric 
1747*700637cbSDimitry Andric     int Indices[16];
1748*700637cbSDimitry Andric     for (unsigned i = 0; i != NumElts; ++i)
1749*700637cbSDimitry Andric       Indices[i] = i + ShiftVal;
1750*700637cbSDimitry Andric 
1751*700637cbSDimitry Andric     return Builder.CreateShuffleVector(Ops[1], Ops[0],
1752*700637cbSDimitry Andric                                        ArrayRef(Indices, NumElts), "valign");
1753*700637cbSDimitry Andric   }
1754*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shuf_f32x4_256:
1755*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shuf_f64x2_256:
1756*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shuf_i32x4_256:
1757*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shuf_i64x2_256:
1758*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shuf_f32x4:
1759*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shuf_f64x2:
1760*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shuf_i32x4:
1761*700637cbSDimitry Andric   case X86::BI__builtin_ia32_shuf_i64x2: {
1762*700637cbSDimitry Andric     unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1763*700637cbSDimitry Andric     auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
1764*700637cbSDimitry Andric     unsigned NumElts = Ty->getNumElements();
1765*700637cbSDimitry Andric     unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
1766*700637cbSDimitry Andric     unsigned NumLaneElts = NumElts / NumLanes;
1767*700637cbSDimitry Andric 
1768*700637cbSDimitry Andric     int Indices[16];
1769*700637cbSDimitry Andric     for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1770*700637cbSDimitry Andric       unsigned Index = (Imm % NumLanes) * NumLaneElts;
1771*700637cbSDimitry Andric       Imm /= NumLanes; // Discard the bits we just used.
1772*700637cbSDimitry Andric       if (l >= (NumElts / 2))
1773*700637cbSDimitry Andric         Index += NumElts; // Switch to other source.
1774*700637cbSDimitry Andric       for (unsigned i = 0; i != NumLaneElts; ++i) {
1775*700637cbSDimitry Andric         Indices[l + i] = Index + i;
1776*700637cbSDimitry Andric       }
1777*700637cbSDimitry Andric     }
1778*700637cbSDimitry Andric 
1779*700637cbSDimitry Andric     return Builder.CreateShuffleVector(Ops[0], Ops[1],
1780*700637cbSDimitry Andric                                        ArrayRef(Indices, NumElts), "shuf");
1781*700637cbSDimitry Andric   }
1782*700637cbSDimitry Andric 
1783*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vperm2f128_pd256:
1784*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vperm2f128_ps256:
1785*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vperm2f128_si256:
1786*700637cbSDimitry Andric   case X86::BI__builtin_ia32_permti256: {
1787*700637cbSDimitry Andric     unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1788*700637cbSDimitry Andric     unsigned NumElts =
1789*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
1790*700637cbSDimitry Andric 
1791*700637cbSDimitry Andric     // This takes a very simple approach since there are two lanes and a
1792*700637cbSDimitry Andric     // shuffle can have 2 inputs. So we reserve the first input for the first
1793*700637cbSDimitry Andric     // lane and the second input for the second lane. This may result in
1794*700637cbSDimitry Andric     // duplicate sources, but this can be dealt with in the backend.
1795*700637cbSDimitry Andric 
1796*700637cbSDimitry Andric     Value *OutOps[2];
1797*700637cbSDimitry Andric     int Indices[8];
1798*700637cbSDimitry Andric     for (unsigned l = 0; l != 2; ++l) {
1799*700637cbSDimitry Andric       // Determine the source for this lane.
1800*700637cbSDimitry Andric       if (Imm & (1 << ((l * 4) + 3)))
1801*700637cbSDimitry Andric         OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
1802*700637cbSDimitry Andric       else if (Imm & (1 << ((l * 4) + 1)))
1803*700637cbSDimitry Andric         OutOps[l] = Ops[1];
1804*700637cbSDimitry Andric       else
1805*700637cbSDimitry Andric         OutOps[l] = Ops[0];
1806*700637cbSDimitry Andric 
1807*700637cbSDimitry Andric       for (unsigned i = 0; i != NumElts/2; ++i) {
1808*700637cbSDimitry Andric         // Start with ith element of the source for this lane.
1809*700637cbSDimitry Andric         unsigned Idx = (l * NumElts) + i;
1810*700637cbSDimitry Andric         // If bit 0 of the immediate half is set, switch to the high half of
1811*700637cbSDimitry Andric         // the source.
1812*700637cbSDimitry Andric         if (Imm & (1 << (l * 4)))
1813*700637cbSDimitry Andric           Idx += NumElts/2;
1814*700637cbSDimitry Andric         Indices[(l * (NumElts/2)) + i] = Idx;
1815*700637cbSDimitry Andric       }
1816*700637cbSDimitry Andric     }
1817*700637cbSDimitry Andric 
1818*700637cbSDimitry Andric     return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
1819*700637cbSDimitry Andric                                        ArrayRef(Indices, NumElts), "vperm");
1820*700637cbSDimitry Andric   }
1821*700637cbSDimitry Andric 
1822*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pslldqi128_byteshift:
1823*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pslldqi256_byteshift:
1824*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pslldqi512_byteshift: {
1825*700637cbSDimitry Andric     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
1826*700637cbSDimitry Andric     auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
1827*700637cbSDimitry Andric     // Builtin type is vXi64 so multiply by 8 to get bytes.
1828*700637cbSDimitry Andric     unsigned NumElts = ResultType->getNumElements() * 8;
1829*700637cbSDimitry Andric 
1830*700637cbSDimitry Andric     // If pslldq is shifting the vector more than 15 bytes, emit zero.
1831*700637cbSDimitry Andric     if (ShiftVal >= 16)
1832*700637cbSDimitry Andric       return llvm::Constant::getNullValue(ResultType);
1833*700637cbSDimitry Andric 
1834*700637cbSDimitry Andric     int Indices[64];
1835*700637cbSDimitry Andric     // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
1836*700637cbSDimitry Andric     for (unsigned l = 0; l != NumElts; l += 16) {
1837*700637cbSDimitry Andric       for (unsigned i = 0; i != 16; ++i) {
1838*700637cbSDimitry Andric         unsigned Idx = NumElts + i - ShiftVal;
1839*700637cbSDimitry Andric         if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
1840*700637cbSDimitry Andric         Indices[l + i] = Idx + l;
1841*700637cbSDimitry Andric       }
1842*700637cbSDimitry Andric     }
1843*700637cbSDimitry Andric 
1844*700637cbSDimitry Andric     auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
1845*700637cbSDimitry Andric     Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
1846*700637cbSDimitry Andric     Value *Zero = llvm::Constant::getNullValue(VecTy);
1847*700637cbSDimitry Andric     Value *SV = Builder.CreateShuffleVector(
1848*700637cbSDimitry Andric         Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
1849*700637cbSDimitry Andric     return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
1850*700637cbSDimitry Andric   }
1851*700637cbSDimitry Andric   case X86::BI__builtin_ia32_psrldqi128_byteshift:
1852*700637cbSDimitry Andric   case X86::BI__builtin_ia32_psrldqi256_byteshift:
1853*700637cbSDimitry Andric   case X86::BI__builtin_ia32_psrldqi512_byteshift: {
1854*700637cbSDimitry Andric     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
1855*700637cbSDimitry Andric     auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
1856*700637cbSDimitry Andric     // Builtin type is vXi64 so multiply by 8 to get bytes.
1857*700637cbSDimitry Andric     unsigned NumElts = ResultType->getNumElements() * 8;
1858*700637cbSDimitry Andric 
1859*700637cbSDimitry Andric     // If psrldq is shifting the vector more than 15 bytes, emit zero.
1860*700637cbSDimitry Andric     if (ShiftVal >= 16)
1861*700637cbSDimitry Andric       return llvm::Constant::getNullValue(ResultType);
1862*700637cbSDimitry Andric 
1863*700637cbSDimitry Andric     int Indices[64];
1864*700637cbSDimitry Andric     // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
1865*700637cbSDimitry Andric     for (unsigned l = 0; l != NumElts; l += 16) {
1866*700637cbSDimitry Andric       for (unsigned i = 0; i != 16; ++i) {
1867*700637cbSDimitry Andric         unsigned Idx = i + ShiftVal;
1868*700637cbSDimitry Andric         if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
1869*700637cbSDimitry Andric         Indices[l + i] = Idx + l;
1870*700637cbSDimitry Andric       }
1871*700637cbSDimitry Andric     }
1872*700637cbSDimitry Andric 
1873*700637cbSDimitry Andric     auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
1874*700637cbSDimitry Andric     Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
1875*700637cbSDimitry Andric     Value *Zero = llvm::Constant::getNullValue(VecTy);
1876*700637cbSDimitry Andric     Value *SV = Builder.CreateShuffleVector(
1877*700637cbSDimitry Andric         Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
1878*700637cbSDimitry Andric     return Builder.CreateBitCast(SV, ResultType, "cast");
1879*700637cbSDimitry Andric   }
1880*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kshiftliqi:
1881*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kshiftlihi:
1882*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kshiftlisi:
1883*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kshiftlidi: {
1884*700637cbSDimitry Andric     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
1885*700637cbSDimitry Andric     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
1886*700637cbSDimitry Andric 
1887*700637cbSDimitry Andric     if (ShiftVal >= NumElts)
1888*700637cbSDimitry Andric       return llvm::Constant::getNullValue(Ops[0]->getType());
1889*700637cbSDimitry Andric 
1890*700637cbSDimitry Andric     Value *In = getMaskVecValue(*this, Ops[0], NumElts);
1891*700637cbSDimitry Andric 
1892*700637cbSDimitry Andric     int Indices[64];
1893*700637cbSDimitry Andric     for (unsigned i = 0; i != NumElts; ++i)
1894*700637cbSDimitry Andric       Indices[i] = NumElts + i - ShiftVal;
1895*700637cbSDimitry Andric 
1896*700637cbSDimitry Andric     Value *Zero = llvm::Constant::getNullValue(In->getType());
1897*700637cbSDimitry Andric     Value *SV = Builder.CreateShuffleVector(
1898*700637cbSDimitry Andric         Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
1899*700637cbSDimitry Andric     return Builder.CreateBitCast(SV, Ops[0]->getType());
1900*700637cbSDimitry Andric   }
1901*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kshiftriqi:
1902*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kshiftrihi:
1903*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kshiftrisi:
1904*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kshiftridi: {
1905*700637cbSDimitry Andric     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
1906*700637cbSDimitry Andric     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
1907*700637cbSDimitry Andric 
1908*700637cbSDimitry Andric     if (ShiftVal >= NumElts)
1909*700637cbSDimitry Andric       return llvm::Constant::getNullValue(Ops[0]->getType());
1910*700637cbSDimitry Andric 
1911*700637cbSDimitry Andric     Value *In = getMaskVecValue(*this, Ops[0], NumElts);
1912*700637cbSDimitry Andric 
1913*700637cbSDimitry Andric     int Indices[64];
1914*700637cbSDimitry Andric     for (unsigned i = 0; i != NumElts; ++i)
1915*700637cbSDimitry Andric       Indices[i] = i + ShiftVal;
1916*700637cbSDimitry Andric 
1917*700637cbSDimitry Andric     Value *Zero = llvm::Constant::getNullValue(In->getType());
1918*700637cbSDimitry Andric     Value *SV = Builder.CreateShuffleVector(
1919*700637cbSDimitry Andric         In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
1920*700637cbSDimitry Andric     return Builder.CreateBitCast(SV, Ops[0]->getType());
1921*700637cbSDimitry Andric   }
1922*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movnti:
1923*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movnti64:
1924*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movntsd:
1925*700637cbSDimitry Andric   case X86::BI__builtin_ia32_movntss: {
1926*700637cbSDimitry Andric     llvm::MDNode *Node = llvm::MDNode::get(
1927*700637cbSDimitry Andric         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
1928*700637cbSDimitry Andric 
1929*700637cbSDimitry Andric     Value *Ptr = Ops[0];
1930*700637cbSDimitry Andric     Value *Src = Ops[1];
1931*700637cbSDimitry Andric 
1932*700637cbSDimitry Andric     // Extract the 0'th element of the source vector.
1933*700637cbSDimitry Andric     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
1934*700637cbSDimitry Andric         BuiltinID == X86::BI__builtin_ia32_movntss)
1935*700637cbSDimitry Andric       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
1936*700637cbSDimitry Andric 
1937*700637cbSDimitry Andric     // Unaligned nontemporal store of the scalar value.
1938*700637cbSDimitry Andric     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
1939*700637cbSDimitry Andric     SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
1940*700637cbSDimitry Andric     SI->setAlignment(llvm::Align(1));
1941*700637cbSDimitry Andric     return SI;
1942*700637cbSDimitry Andric   }
1943*700637cbSDimitry Andric   // Rotate is a special case of funnel shift - 1st 2 args are the same.
1944*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vprotb:
1945*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vprotw:
1946*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vprotd:
1947*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vprotq:
1948*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vprotbi:
1949*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vprotwi:
1950*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vprotdi:
1951*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vprotqi:
1952*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prold128:
1953*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prold256:
1954*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prold512:
1955*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prolq128:
1956*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prolq256:
1957*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prolq512:
1958*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prolvd128:
1959*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prolvd256:
1960*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prolvd512:
1961*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prolvq128:
1962*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prolvq256:
1963*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prolvq512:
1964*700637cbSDimitry Andric     return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
1965*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prord128:
1966*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prord256:
1967*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prord512:
1968*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prorq128:
1969*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prorq256:
1970*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prorq512:
1971*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prorvd128:
1972*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prorvd256:
1973*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prorvd512:
1974*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prorvq128:
1975*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prorvq256:
1976*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prorvq512:
1977*700637cbSDimitry Andric     return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
1978*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectb_128:
1979*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectb_256:
1980*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectb_512:
1981*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectw_128:
1982*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectw_256:
1983*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectw_512:
1984*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectd_128:
1985*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectd_256:
1986*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectd_512:
1987*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectq_128:
1988*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectq_256:
1989*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectq_512:
1990*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectph_128:
1991*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectph_256:
1992*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectph_512:
1993*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectpbf_128:
1994*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectpbf_256:
1995*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectpbf_512:
1996*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectps_128:
1997*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectps_256:
1998*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectps_512:
1999*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectpd_128:
2000*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectpd_256:
2001*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectpd_512:
2002*700637cbSDimitry Andric     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
2003*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectsh_128:
2004*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectsbf_128:
2005*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectss_128:
2006*700637cbSDimitry Andric   case X86::BI__builtin_ia32_selectsd_128: {
2007*700637cbSDimitry Andric     Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2008*700637cbSDimitry Andric     Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2009*700637cbSDimitry Andric     A = EmitX86ScalarSelect(*this, Ops[0], A, B);
2010*700637cbSDimitry Andric     return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
2011*700637cbSDimitry Andric   }
2012*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpb128_mask:
2013*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpb256_mask:
2014*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpb512_mask:
2015*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpw128_mask:
2016*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpw256_mask:
2017*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpw512_mask:
2018*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpd128_mask:
2019*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpd256_mask:
2020*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpd512_mask:
2021*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpq128_mask:
2022*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpq256_mask:
2023*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpq512_mask: {
2024*700637cbSDimitry Andric     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
2025*700637cbSDimitry Andric     return EmitX86MaskedCompare(*this, CC, true, Ops);
2026*700637cbSDimitry Andric   }
2027*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ucmpb128_mask:
2028*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ucmpb256_mask:
2029*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ucmpb512_mask:
2030*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ucmpw128_mask:
2031*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ucmpw256_mask:
2032*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ucmpw512_mask:
2033*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ucmpd128_mask:
2034*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ucmpd256_mask:
2035*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ucmpd512_mask:
2036*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ucmpq128_mask:
2037*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ucmpq256_mask:
2038*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ucmpq512_mask: {
2039*700637cbSDimitry Andric     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
2040*700637cbSDimitry Andric     return EmitX86MaskedCompare(*this, CC, false, Ops);
2041*700637cbSDimitry Andric   }
2042*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpcomb:
2043*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpcomw:
2044*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpcomd:
2045*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpcomq:
2046*700637cbSDimitry Andric     return EmitX86vpcom(*this, Ops, true);
2047*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpcomub:
2048*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpcomuw:
2049*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpcomud:
2050*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpcomuq:
2051*700637cbSDimitry Andric     return EmitX86vpcom(*this, Ops, false);
2052*700637cbSDimitry Andric 
2053*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kortestcqi:
2054*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kortestchi:
2055*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kortestcsi:
2056*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kortestcdi: {
2057*700637cbSDimitry Andric     Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
2058*700637cbSDimitry Andric     Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
2059*700637cbSDimitry Andric     Value *Cmp = Builder.CreateICmpEQ(Or, C);
2060*700637cbSDimitry Andric     return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
2061*700637cbSDimitry Andric   }
2062*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kortestzqi:
2063*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kortestzhi:
2064*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kortestzsi:
2065*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kortestzdi: {
2066*700637cbSDimitry Andric     Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
2067*700637cbSDimitry Andric     Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
2068*700637cbSDimitry Andric     Value *Cmp = Builder.CreateICmpEQ(Or, C);
2069*700637cbSDimitry Andric     return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
2070*700637cbSDimitry Andric   }
2071*700637cbSDimitry Andric 
2072*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ktestcqi:
2073*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ktestzqi:
2074*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ktestchi:
2075*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ktestzhi:
2076*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ktestcsi:
2077*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ktestzsi:
2078*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ktestcdi:
2079*700637cbSDimitry Andric   case X86::BI__builtin_ia32_ktestzdi: {
2080*700637cbSDimitry Andric     Intrinsic::ID IID;
2081*700637cbSDimitry Andric     switch (BuiltinID) {
2082*700637cbSDimitry Andric     default: llvm_unreachable("Unsupported intrinsic!");
2083*700637cbSDimitry Andric     case X86::BI__builtin_ia32_ktestcqi:
2084*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_ktestc_b;
2085*700637cbSDimitry Andric       break;
2086*700637cbSDimitry Andric     case X86::BI__builtin_ia32_ktestzqi:
2087*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_ktestz_b;
2088*700637cbSDimitry Andric       break;
2089*700637cbSDimitry Andric     case X86::BI__builtin_ia32_ktestchi:
2090*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_ktestc_w;
2091*700637cbSDimitry Andric       break;
2092*700637cbSDimitry Andric     case X86::BI__builtin_ia32_ktestzhi:
2093*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_ktestz_w;
2094*700637cbSDimitry Andric       break;
2095*700637cbSDimitry Andric     case X86::BI__builtin_ia32_ktestcsi:
2096*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_ktestc_d;
2097*700637cbSDimitry Andric       break;
2098*700637cbSDimitry Andric     case X86::BI__builtin_ia32_ktestzsi:
2099*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_ktestz_d;
2100*700637cbSDimitry Andric       break;
2101*700637cbSDimitry Andric     case X86::BI__builtin_ia32_ktestcdi:
2102*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_ktestc_q;
2103*700637cbSDimitry Andric       break;
2104*700637cbSDimitry Andric     case X86::BI__builtin_ia32_ktestzdi:
2105*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_ktestz_q;
2106*700637cbSDimitry Andric       break;
2107*700637cbSDimitry Andric     }
2108*700637cbSDimitry Andric 
2109*700637cbSDimitry Andric     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2110*700637cbSDimitry Andric     Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
2111*700637cbSDimitry Andric     Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
2112*700637cbSDimitry Andric     Function *Intr = CGM.getIntrinsic(IID);
2113*700637cbSDimitry Andric     return Builder.CreateCall(Intr, {LHS, RHS});
2114*700637cbSDimitry Andric   }
2115*700637cbSDimitry Andric 
2116*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kaddqi:
2117*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kaddhi:
2118*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kaddsi:
2119*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kadddi: {
2120*700637cbSDimitry Andric     Intrinsic::ID IID;
2121*700637cbSDimitry Andric     switch (BuiltinID) {
2122*700637cbSDimitry Andric     default: llvm_unreachable("Unsupported intrinsic!");
2123*700637cbSDimitry Andric     case X86::BI__builtin_ia32_kaddqi:
2124*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_kadd_b;
2125*700637cbSDimitry Andric       break;
2126*700637cbSDimitry Andric     case X86::BI__builtin_ia32_kaddhi:
2127*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_kadd_w;
2128*700637cbSDimitry Andric       break;
2129*700637cbSDimitry Andric     case X86::BI__builtin_ia32_kaddsi:
2130*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_kadd_d;
2131*700637cbSDimitry Andric       break;
2132*700637cbSDimitry Andric     case X86::BI__builtin_ia32_kadddi:
2133*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512_kadd_q;
2134*700637cbSDimitry Andric       break;
2135*700637cbSDimitry Andric     }
2136*700637cbSDimitry Andric 
2137*700637cbSDimitry Andric     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2138*700637cbSDimitry Andric     Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
2139*700637cbSDimitry Andric     Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
2140*700637cbSDimitry Andric     Function *Intr = CGM.getIntrinsic(IID);
2141*700637cbSDimitry Andric     Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
2142*700637cbSDimitry Andric     return Builder.CreateBitCast(Res, Ops[0]->getType());
2143*700637cbSDimitry Andric   }
2144*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kandqi:
2145*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kandhi:
2146*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kandsi:
2147*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kanddi:
2148*700637cbSDimitry Andric     return EmitX86MaskLogic(*this, Instruction::And, Ops);
2149*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kandnqi:
2150*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kandnhi:
2151*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kandnsi:
2152*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kandndi:
2153*700637cbSDimitry Andric     return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
2154*700637cbSDimitry Andric   case X86::BI__builtin_ia32_korqi:
2155*700637cbSDimitry Andric   case X86::BI__builtin_ia32_korhi:
2156*700637cbSDimitry Andric   case X86::BI__builtin_ia32_korsi:
2157*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kordi:
2158*700637cbSDimitry Andric     return EmitX86MaskLogic(*this, Instruction::Or, Ops);
2159*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kxnorqi:
2160*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kxnorhi:
2161*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kxnorsi:
2162*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kxnordi:
2163*700637cbSDimitry Andric     return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
2164*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kxorqi:
2165*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kxorhi:
2166*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kxorsi:
2167*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kxordi:
2168*700637cbSDimitry Andric     return EmitX86MaskLogic(*this, Instruction::Xor,  Ops);
2169*700637cbSDimitry Andric   case X86::BI__builtin_ia32_knotqi:
2170*700637cbSDimitry Andric   case X86::BI__builtin_ia32_knothi:
2171*700637cbSDimitry Andric   case X86::BI__builtin_ia32_knotsi:
2172*700637cbSDimitry Andric   case X86::BI__builtin_ia32_knotdi: {
2173*700637cbSDimitry Andric     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2174*700637cbSDimitry Andric     Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
2175*700637cbSDimitry Andric     return Builder.CreateBitCast(Builder.CreateNot(Res),
2176*700637cbSDimitry Andric                                  Ops[0]->getType());
2177*700637cbSDimitry Andric   }
2178*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kmovb:
2179*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kmovw:
2180*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kmovd:
2181*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kmovq: {
2182*700637cbSDimitry Andric     // Bitcast to vXi1 type and then back to integer. This gets the mask
2183*700637cbSDimitry Andric     // register type into the IR, but might be optimized out depending on
2184*700637cbSDimitry Andric     // what's around it.
2185*700637cbSDimitry Andric     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2186*700637cbSDimitry Andric     Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
2187*700637cbSDimitry Andric     return Builder.CreateBitCast(Res, Ops[0]->getType());
2188*700637cbSDimitry Andric   }
2189*700637cbSDimitry Andric 
2190*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kunpckdi:
2191*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kunpcksi:
2192*700637cbSDimitry Andric   case X86::BI__builtin_ia32_kunpckhi: {
2193*700637cbSDimitry Andric     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
2194*700637cbSDimitry Andric     Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
2195*700637cbSDimitry Andric     Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
2196*700637cbSDimitry Andric     int Indices[64];
2197*700637cbSDimitry Andric     for (unsigned i = 0; i != NumElts; ++i)
2198*700637cbSDimitry Andric       Indices[i] = i;
2199*700637cbSDimitry Andric 
2200*700637cbSDimitry Andric     // First extract half of each vector. This gives better codegen than
2201*700637cbSDimitry Andric     // doing it in a single shuffle.
2202*700637cbSDimitry Andric     LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2203*700637cbSDimitry Andric     RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2204*700637cbSDimitry Andric     // Concat the vectors.
2205*700637cbSDimitry Andric     // NOTE: Operands are swapped to match the intrinsic definition.
2206*700637cbSDimitry Andric     Value *Res =
2207*700637cbSDimitry Andric         Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2208*700637cbSDimitry Andric     return Builder.CreateBitCast(Res, Ops[0]->getType());
2209*700637cbSDimitry Andric   }
2210*700637cbSDimitry Andric 
2211*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vplzcntd_128:
2212*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vplzcntd_256:
2213*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vplzcntd_512:
2214*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vplzcntq_128:
2215*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vplzcntq_256:
2216*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vplzcntq_512: {
2217*700637cbSDimitry Andric     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
2218*700637cbSDimitry Andric     return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
2219*700637cbSDimitry Andric   }
2220*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtss:
2221*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtsd: {
2222*700637cbSDimitry Andric     Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2223*700637cbSDimitry Andric     Function *F;
2224*700637cbSDimitry Andric     if (Builder.getIsFPConstrained()) {
2225*700637cbSDimitry Andric       CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2226*700637cbSDimitry Andric       F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
2227*700637cbSDimitry Andric                            A->getType());
2228*700637cbSDimitry Andric       A = Builder.CreateConstrainedFPCall(F, {A});
2229*700637cbSDimitry Andric     } else {
2230*700637cbSDimitry Andric       F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
2231*700637cbSDimitry Andric       A = Builder.CreateCall(F, {A});
2232*700637cbSDimitry Andric     }
2233*700637cbSDimitry Andric     return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
2234*700637cbSDimitry Andric   }
2235*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtsh_round_mask:
2236*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtsd_round_mask:
2237*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtss_round_mask: {
2238*700637cbSDimitry Andric     unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
2239*700637cbSDimitry Andric     // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
2240*700637cbSDimitry Andric     // otherwise keep the intrinsic.
2241*700637cbSDimitry Andric     if (CC != 4) {
2242*700637cbSDimitry Andric       Intrinsic::ID IID;
2243*700637cbSDimitry Andric 
2244*700637cbSDimitry Andric       switch (BuiltinID) {
2245*700637cbSDimitry Andric       default:
2246*700637cbSDimitry Andric         llvm_unreachable("Unsupported intrinsic!");
2247*700637cbSDimitry Andric       case X86::BI__builtin_ia32_sqrtsh_round_mask:
2248*700637cbSDimitry Andric         IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
2249*700637cbSDimitry Andric         break;
2250*700637cbSDimitry Andric       case X86::BI__builtin_ia32_sqrtsd_round_mask:
2251*700637cbSDimitry Andric         IID = Intrinsic::x86_avx512_mask_sqrt_sd;
2252*700637cbSDimitry Andric         break;
2253*700637cbSDimitry Andric       case X86::BI__builtin_ia32_sqrtss_round_mask:
2254*700637cbSDimitry Andric         IID = Intrinsic::x86_avx512_mask_sqrt_ss;
2255*700637cbSDimitry Andric         break;
2256*700637cbSDimitry Andric       }
2257*700637cbSDimitry Andric       return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
2258*700637cbSDimitry Andric     }
2259*700637cbSDimitry Andric     Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2260*700637cbSDimitry Andric     Function *F;
2261*700637cbSDimitry Andric     if (Builder.getIsFPConstrained()) {
2262*700637cbSDimitry Andric       CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2263*700637cbSDimitry Andric       F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
2264*700637cbSDimitry Andric                            A->getType());
2265*700637cbSDimitry Andric       A = Builder.CreateConstrainedFPCall(F, A);
2266*700637cbSDimitry Andric     } else {
2267*700637cbSDimitry Andric       F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
2268*700637cbSDimitry Andric       A = Builder.CreateCall(F, A);
2269*700637cbSDimitry Andric     }
2270*700637cbSDimitry Andric     Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2271*700637cbSDimitry Andric     A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
2272*700637cbSDimitry Andric     return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
2273*700637cbSDimitry Andric   }
2274*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtpd256:
2275*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtpd:
2276*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtps256:
2277*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtps:
2278*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtph256:
2279*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtph:
2280*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtph512:
2281*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vsqrtbf16256:
2282*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vsqrtbf16:
2283*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vsqrtbf16512:
2284*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtps512:
2285*700637cbSDimitry Andric   case X86::BI__builtin_ia32_sqrtpd512: {
2286*700637cbSDimitry Andric     if (Ops.size() == 2) {
2287*700637cbSDimitry Andric       unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
2288*700637cbSDimitry Andric       // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
2289*700637cbSDimitry Andric       // otherwise keep the intrinsic.
2290*700637cbSDimitry Andric       if (CC != 4) {
2291*700637cbSDimitry Andric         Intrinsic::ID IID;
2292*700637cbSDimitry Andric 
2293*700637cbSDimitry Andric         switch (BuiltinID) {
2294*700637cbSDimitry Andric         default:
2295*700637cbSDimitry Andric           llvm_unreachable("Unsupported intrinsic!");
2296*700637cbSDimitry Andric         case X86::BI__builtin_ia32_sqrtph512:
2297*700637cbSDimitry Andric           IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
2298*700637cbSDimitry Andric           break;
2299*700637cbSDimitry Andric         case X86::BI__builtin_ia32_sqrtps512:
2300*700637cbSDimitry Andric           IID = Intrinsic::x86_avx512_sqrt_ps_512;
2301*700637cbSDimitry Andric           break;
2302*700637cbSDimitry Andric         case X86::BI__builtin_ia32_sqrtpd512:
2303*700637cbSDimitry Andric           IID = Intrinsic::x86_avx512_sqrt_pd_512;
2304*700637cbSDimitry Andric           break;
2305*700637cbSDimitry Andric         }
2306*700637cbSDimitry Andric         return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
2307*700637cbSDimitry Andric       }
2308*700637cbSDimitry Andric     }
2309*700637cbSDimitry Andric     if (Builder.getIsFPConstrained()) {
2310*700637cbSDimitry Andric       CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2311*700637cbSDimitry Andric       Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
2312*700637cbSDimitry Andric                                      Ops[0]->getType());
2313*700637cbSDimitry Andric       return Builder.CreateConstrainedFPCall(F, Ops[0]);
2314*700637cbSDimitry Andric     } else {
2315*700637cbSDimitry Andric       Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
2316*700637cbSDimitry Andric       return Builder.CreateCall(F, Ops[0]);
2317*700637cbSDimitry Andric     }
2318*700637cbSDimitry Andric   }
2319*700637cbSDimitry Andric 
2320*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pmuludq128:
2321*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pmuludq256:
2322*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pmuludq512:
2323*700637cbSDimitry Andric     return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
2324*700637cbSDimitry Andric 
2325*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pmuldq128:
2326*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pmuldq256:
2327*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pmuldq512:
2328*700637cbSDimitry Andric     return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
2329*700637cbSDimitry Andric 
2330*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pternlogd512_mask:
2331*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pternlogq512_mask:
2332*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pternlogd128_mask:
2333*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pternlogd256_mask:
2334*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pternlogq128_mask:
2335*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pternlogq256_mask:
2336*700637cbSDimitry Andric     return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
2337*700637cbSDimitry Andric 
2338*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pternlogd512_maskz:
2339*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pternlogq512_maskz:
2340*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pternlogd128_maskz:
2341*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pternlogd256_maskz:
2342*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pternlogq128_maskz:
2343*700637cbSDimitry Andric   case X86::BI__builtin_ia32_pternlogq256_maskz:
2344*700637cbSDimitry Andric     return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
2345*700637cbSDimitry Andric 
2346*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldd128:
2347*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldd256:
2348*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldd512:
2349*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldq128:
2350*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldq256:
2351*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldq512:
2352*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldw128:
2353*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldw256:
2354*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldw512:
2355*700637cbSDimitry Andric     return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
2356*700637cbSDimitry Andric 
2357*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdd128:
2358*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdd256:
2359*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdd512:
2360*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdq128:
2361*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdq256:
2362*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdq512:
2363*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdw128:
2364*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdw256:
2365*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdw512:
2366*700637cbSDimitry Andric     // Ops 0 and 1 are swapped.
2367*700637cbSDimitry Andric     return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
2368*700637cbSDimitry Andric 
2369*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldvd128:
2370*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldvd256:
2371*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldvd512:
2372*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldvq128:
2373*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldvq256:
2374*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldvq512:
2375*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldvw128:
2376*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldvw256:
2377*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshldvw512:
2378*700637cbSDimitry Andric     return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
2379*700637cbSDimitry Andric 
2380*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdvd128:
2381*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdvd256:
2382*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdvd512:
2383*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdvq128:
2384*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdvq256:
2385*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdvq512:
2386*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdvw128:
2387*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdvw256:
2388*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshrdvw512:
2389*700637cbSDimitry Andric     // Ops 0 and 1 are swapped.
2390*700637cbSDimitry Andric     return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
2391*700637cbSDimitry Andric 
2392*700637cbSDimitry Andric   // Reductions
2393*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fadd_pd512:
2394*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fadd_ps512:
2395*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fadd_ph512:
2396*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fadd_ph256:
2397*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fadd_ph128: {
2398*700637cbSDimitry Andric     Function *F =
2399*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
2400*700637cbSDimitry Andric     IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2401*700637cbSDimitry Andric     Builder.getFastMathFlags().setAllowReassoc();
2402*700637cbSDimitry Andric     return Builder.CreateCall(F, {Ops[0], Ops[1]});
2403*700637cbSDimitry Andric   }
2404*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmul_pd512:
2405*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmul_ps512:
2406*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmul_ph512:
2407*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmul_ph256:
2408*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmul_ph128: {
2409*700637cbSDimitry Andric     Function *F =
2410*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
2411*700637cbSDimitry Andric     IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2412*700637cbSDimitry Andric     Builder.getFastMathFlags().setAllowReassoc();
2413*700637cbSDimitry Andric     return Builder.CreateCall(F, {Ops[0], Ops[1]});
2414*700637cbSDimitry Andric   }
2415*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmax_pd512:
2416*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmax_ps512:
2417*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmax_ph512:
2418*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmax_ph256:
2419*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmax_ph128: {
2420*700637cbSDimitry Andric     Function *F =
2421*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
2422*700637cbSDimitry Andric     IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2423*700637cbSDimitry Andric     Builder.getFastMathFlags().setNoNaNs();
2424*700637cbSDimitry Andric     return Builder.CreateCall(F, {Ops[0]});
2425*700637cbSDimitry Andric   }
2426*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmin_pd512:
2427*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmin_ps512:
2428*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmin_ph512:
2429*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmin_ph256:
2430*700637cbSDimitry Andric   case X86::BI__builtin_ia32_reduce_fmin_ph128: {
2431*700637cbSDimitry Andric     Function *F =
2432*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
2433*700637cbSDimitry Andric     IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2434*700637cbSDimitry Andric     Builder.getFastMathFlags().setNoNaNs();
2435*700637cbSDimitry Andric     return Builder.CreateCall(F, {Ops[0]});
2436*700637cbSDimitry Andric   }
2437*700637cbSDimitry Andric 
2438*700637cbSDimitry Andric   case X86::BI__builtin_ia32_rdrand16_step:
2439*700637cbSDimitry Andric   case X86::BI__builtin_ia32_rdrand32_step:
2440*700637cbSDimitry Andric   case X86::BI__builtin_ia32_rdrand64_step:
2441*700637cbSDimitry Andric   case X86::BI__builtin_ia32_rdseed16_step:
2442*700637cbSDimitry Andric   case X86::BI__builtin_ia32_rdseed32_step:
2443*700637cbSDimitry Andric   case X86::BI__builtin_ia32_rdseed64_step: {
2444*700637cbSDimitry Andric     Intrinsic::ID ID;
2445*700637cbSDimitry Andric     switch (BuiltinID) {
2446*700637cbSDimitry Andric     default: llvm_unreachable("Unsupported intrinsic!");
2447*700637cbSDimitry Andric     case X86::BI__builtin_ia32_rdrand16_step:
2448*700637cbSDimitry Andric       ID = Intrinsic::x86_rdrand_16;
2449*700637cbSDimitry Andric       break;
2450*700637cbSDimitry Andric     case X86::BI__builtin_ia32_rdrand32_step:
2451*700637cbSDimitry Andric       ID = Intrinsic::x86_rdrand_32;
2452*700637cbSDimitry Andric       break;
2453*700637cbSDimitry Andric     case X86::BI__builtin_ia32_rdrand64_step:
2454*700637cbSDimitry Andric       ID = Intrinsic::x86_rdrand_64;
2455*700637cbSDimitry Andric       break;
2456*700637cbSDimitry Andric     case X86::BI__builtin_ia32_rdseed16_step:
2457*700637cbSDimitry Andric       ID = Intrinsic::x86_rdseed_16;
2458*700637cbSDimitry Andric       break;
2459*700637cbSDimitry Andric     case X86::BI__builtin_ia32_rdseed32_step:
2460*700637cbSDimitry Andric       ID = Intrinsic::x86_rdseed_32;
2461*700637cbSDimitry Andric       break;
2462*700637cbSDimitry Andric     case X86::BI__builtin_ia32_rdseed64_step:
2463*700637cbSDimitry Andric       ID = Intrinsic::x86_rdseed_64;
2464*700637cbSDimitry Andric       break;
2465*700637cbSDimitry Andric     }
2466*700637cbSDimitry Andric 
2467*700637cbSDimitry Andric     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
2468*700637cbSDimitry Andric     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
2469*700637cbSDimitry Andric                                       Ops[0]);
2470*700637cbSDimitry Andric     return Builder.CreateExtractValue(Call, 1);
2471*700637cbSDimitry Andric   }
2472*700637cbSDimitry Andric   case X86::BI__builtin_ia32_addcarryx_u32:
2473*700637cbSDimitry Andric   case X86::BI__builtin_ia32_addcarryx_u64:
2474*700637cbSDimitry Andric   case X86::BI__builtin_ia32_subborrow_u32:
2475*700637cbSDimitry Andric   case X86::BI__builtin_ia32_subborrow_u64: {
2476*700637cbSDimitry Andric     Intrinsic::ID IID;
2477*700637cbSDimitry Andric     switch (BuiltinID) {
2478*700637cbSDimitry Andric     default: llvm_unreachable("Unsupported intrinsic!");
2479*700637cbSDimitry Andric     case X86::BI__builtin_ia32_addcarryx_u32:
2480*700637cbSDimitry Andric       IID = Intrinsic::x86_addcarry_32;
2481*700637cbSDimitry Andric       break;
2482*700637cbSDimitry Andric     case X86::BI__builtin_ia32_addcarryx_u64:
2483*700637cbSDimitry Andric       IID = Intrinsic::x86_addcarry_64;
2484*700637cbSDimitry Andric       break;
2485*700637cbSDimitry Andric     case X86::BI__builtin_ia32_subborrow_u32:
2486*700637cbSDimitry Andric       IID = Intrinsic::x86_subborrow_32;
2487*700637cbSDimitry Andric       break;
2488*700637cbSDimitry Andric     case X86::BI__builtin_ia32_subborrow_u64:
2489*700637cbSDimitry Andric       IID = Intrinsic::x86_subborrow_64;
2490*700637cbSDimitry Andric       break;
2491*700637cbSDimitry Andric     }
2492*700637cbSDimitry Andric 
2493*700637cbSDimitry Andric     Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
2494*700637cbSDimitry Andric                                      { Ops[0], Ops[1], Ops[2] });
2495*700637cbSDimitry Andric     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
2496*700637cbSDimitry Andric                                       Ops[3]);
2497*700637cbSDimitry Andric     return Builder.CreateExtractValue(Call, 0);
2498*700637cbSDimitry Andric   }
2499*700637cbSDimitry Andric 
2500*700637cbSDimitry Andric   case X86::BI__builtin_ia32_fpclassps128_mask:
2501*700637cbSDimitry Andric   case X86::BI__builtin_ia32_fpclassps256_mask:
2502*700637cbSDimitry Andric   case X86::BI__builtin_ia32_fpclassps512_mask:
2503*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfpclassbf16128_mask:
2504*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfpclassbf16256_mask:
2505*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfpclassbf16512_mask:
2506*700637cbSDimitry Andric   case X86::BI__builtin_ia32_fpclassph128_mask:
2507*700637cbSDimitry Andric   case X86::BI__builtin_ia32_fpclassph256_mask:
2508*700637cbSDimitry Andric   case X86::BI__builtin_ia32_fpclassph512_mask:
2509*700637cbSDimitry Andric   case X86::BI__builtin_ia32_fpclasspd128_mask:
2510*700637cbSDimitry Andric   case X86::BI__builtin_ia32_fpclasspd256_mask:
2511*700637cbSDimitry Andric   case X86::BI__builtin_ia32_fpclasspd512_mask: {
2512*700637cbSDimitry Andric     unsigned NumElts =
2513*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2514*700637cbSDimitry Andric     Value *MaskIn = Ops[2];
2515*700637cbSDimitry Andric     Ops.erase(&Ops[2]);
2516*700637cbSDimitry Andric 
2517*700637cbSDimitry Andric     Intrinsic::ID ID;
2518*700637cbSDimitry Andric     switch (BuiltinID) {
2519*700637cbSDimitry Andric     default: llvm_unreachable("Unsupported intrinsic!");
2520*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vfpclassbf16128_mask:
2521*700637cbSDimitry Andric       ID = Intrinsic::x86_avx10_fpclass_bf16_128;
2522*700637cbSDimitry Andric       break;
2523*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vfpclassbf16256_mask:
2524*700637cbSDimitry Andric       ID = Intrinsic::x86_avx10_fpclass_bf16_256;
2525*700637cbSDimitry Andric       break;
2526*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vfpclassbf16512_mask:
2527*700637cbSDimitry Andric       ID = Intrinsic::x86_avx10_fpclass_bf16_512;
2528*700637cbSDimitry Andric       break;
2529*700637cbSDimitry Andric     case X86::BI__builtin_ia32_fpclassph128_mask:
2530*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
2531*700637cbSDimitry Andric       break;
2532*700637cbSDimitry Andric     case X86::BI__builtin_ia32_fpclassph256_mask:
2533*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
2534*700637cbSDimitry Andric       break;
2535*700637cbSDimitry Andric     case X86::BI__builtin_ia32_fpclassph512_mask:
2536*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
2537*700637cbSDimitry Andric       break;
2538*700637cbSDimitry Andric     case X86::BI__builtin_ia32_fpclassps128_mask:
2539*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_fpclass_ps_128;
2540*700637cbSDimitry Andric       break;
2541*700637cbSDimitry Andric     case X86::BI__builtin_ia32_fpclassps256_mask:
2542*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_fpclass_ps_256;
2543*700637cbSDimitry Andric       break;
2544*700637cbSDimitry Andric     case X86::BI__builtin_ia32_fpclassps512_mask:
2545*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_fpclass_ps_512;
2546*700637cbSDimitry Andric       break;
2547*700637cbSDimitry Andric     case X86::BI__builtin_ia32_fpclasspd128_mask:
2548*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_fpclass_pd_128;
2549*700637cbSDimitry Andric       break;
2550*700637cbSDimitry Andric     case X86::BI__builtin_ia32_fpclasspd256_mask:
2551*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_fpclass_pd_256;
2552*700637cbSDimitry Andric       break;
2553*700637cbSDimitry Andric     case X86::BI__builtin_ia32_fpclasspd512_mask:
2554*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_fpclass_pd_512;
2555*700637cbSDimitry Andric       break;
2556*700637cbSDimitry Andric     }
2557*700637cbSDimitry Andric 
2558*700637cbSDimitry Andric     Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
2559*700637cbSDimitry Andric     return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
2560*700637cbSDimitry Andric   }
2561*700637cbSDimitry Andric 
2562*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vp2intersect_q_512:
2563*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vp2intersect_q_256:
2564*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vp2intersect_q_128:
2565*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vp2intersect_d_512:
2566*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vp2intersect_d_256:
2567*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vp2intersect_d_128: {
2568*700637cbSDimitry Andric     unsigned NumElts =
2569*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2570*700637cbSDimitry Andric     Intrinsic::ID ID;
2571*700637cbSDimitry Andric 
2572*700637cbSDimitry Andric     switch (BuiltinID) {
2573*700637cbSDimitry Andric     default: llvm_unreachable("Unsupported intrinsic!");
2574*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vp2intersect_q_512:
2575*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_vp2intersect_q_512;
2576*700637cbSDimitry Andric       break;
2577*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vp2intersect_q_256:
2578*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_vp2intersect_q_256;
2579*700637cbSDimitry Andric       break;
2580*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vp2intersect_q_128:
2581*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_vp2intersect_q_128;
2582*700637cbSDimitry Andric       break;
2583*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vp2intersect_d_512:
2584*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_vp2intersect_d_512;
2585*700637cbSDimitry Andric       break;
2586*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vp2intersect_d_256:
2587*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_vp2intersect_d_256;
2588*700637cbSDimitry Andric       break;
2589*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vp2intersect_d_128:
2590*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_vp2intersect_d_128;
2591*700637cbSDimitry Andric       break;
2592*700637cbSDimitry Andric     }
2593*700637cbSDimitry Andric 
2594*700637cbSDimitry Andric     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
2595*700637cbSDimitry Andric     Value *Result = Builder.CreateExtractValue(Call, 0);
2596*700637cbSDimitry Andric     Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
2597*700637cbSDimitry Andric     Builder.CreateDefaultAlignedStore(Result, Ops[2]);
2598*700637cbSDimitry Andric 
2599*700637cbSDimitry Andric     Result = Builder.CreateExtractValue(Call, 1);
2600*700637cbSDimitry Andric     Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
2601*700637cbSDimitry Andric     return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
2602*700637cbSDimitry Andric   }
2603*700637cbSDimitry Andric 
2604*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpmultishiftqb128:
2605*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpmultishiftqb256:
2606*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpmultishiftqb512: {
2607*700637cbSDimitry Andric     Intrinsic::ID ID;
2608*700637cbSDimitry Andric     switch (BuiltinID) {
2609*700637cbSDimitry Andric     default: llvm_unreachable("Unsupported intrinsic!");
2610*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vpmultishiftqb128:
2611*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_pmultishift_qb_128;
2612*700637cbSDimitry Andric       break;
2613*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vpmultishiftqb256:
2614*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_pmultishift_qb_256;
2615*700637cbSDimitry Andric       break;
2616*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vpmultishiftqb512:
2617*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_pmultishift_qb_512;
2618*700637cbSDimitry Andric       break;
2619*700637cbSDimitry Andric     }
2620*700637cbSDimitry Andric 
2621*700637cbSDimitry Andric     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
2622*700637cbSDimitry Andric   }
2623*700637cbSDimitry Andric 
2624*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
2625*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
2626*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
2627*700637cbSDimitry Andric     unsigned NumElts =
2628*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2629*700637cbSDimitry Andric     Value *MaskIn = Ops[2];
2630*700637cbSDimitry Andric     Ops.erase(&Ops[2]);
2631*700637cbSDimitry Andric 
2632*700637cbSDimitry Andric     Intrinsic::ID ID;
2633*700637cbSDimitry Andric     switch (BuiltinID) {
2634*700637cbSDimitry Andric     default: llvm_unreachable("Unsupported intrinsic!");
2635*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
2636*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2637*700637cbSDimitry Andric       break;
2638*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
2639*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2640*700637cbSDimitry Andric       break;
2641*700637cbSDimitry Andric     case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
2642*700637cbSDimitry Andric       ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2643*700637cbSDimitry Andric       break;
2644*700637cbSDimitry Andric     }
2645*700637cbSDimitry Andric 
2646*700637cbSDimitry Andric     Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
2647*700637cbSDimitry Andric     return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
2648*700637cbSDimitry Andric   }
2649*700637cbSDimitry Andric 
2650*700637cbSDimitry Andric   // packed comparison intrinsics
2651*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpeqps:
2652*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpeqpd:
2653*700637cbSDimitry Andric     return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
2654*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpltps:
2655*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpltpd:
2656*700637cbSDimitry Andric     return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
2657*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpleps:
2658*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmplepd:
2659*700637cbSDimitry Andric     return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
2660*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpunordps:
2661*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpunordpd:
2662*700637cbSDimitry Andric     return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
2663*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpneqps:
2664*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpneqpd:
2665*700637cbSDimitry Andric     return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
2666*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpnltps:
2667*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpnltpd:
2668*700637cbSDimitry Andric     return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
2669*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpnleps:
2670*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpnlepd:
2671*700637cbSDimitry Andric     return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
2672*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpordps:
2673*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpordpd:
2674*700637cbSDimitry Andric     return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
2675*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpph128_mask:
2676*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpph256_mask:
2677*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpph512_mask:
2678*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpps128_mask:
2679*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpps256_mask:
2680*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpps512_mask:
2681*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmppd128_mask:
2682*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmppd256_mask:
2683*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmppd512_mask:
2684*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcmpbf16512_mask:
2685*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcmpbf16256_mask:
2686*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcmpbf16128_mask:
2687*700637cbSDimitry Andric     IsMaskFCmp = true;
2688*700637cbSDimitry Andric     [[fallthrough]];
2689*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpps:
2690*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpps256:
2691*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmppd:
2692*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmppd256: {
2693*700637cbSDimitry Andric     // Lowering vector comparisons to fcmp instructions, while
2694*700637cbSDimitry Andric     // ignoring signalling behaviour requested
2695*700637cbSDimitry Andric     // ignoring rounding mode requested
2696*700637cbSDimitry Andric     // This is only possible if fp-model is not strict and FENV_ACCESS is off.
2697*700637cbSDimitry Andric 
2698*700637cbSDimitry Andric     // The third argument is the comparison condition, and integer in the
2699*700637cbSDimitry Andric     // range [0, 31]
2700*700637cbSDimitry Andric     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
2701*700637cbSDimitry Andric 
2702*700637cbSDimitry Andric     // Lowering to IR fcmp instruction.
2703*700637cbSDimitry Andric     // Ignoring requested signaling behaviour,
2704*700637cbSDimitry Andric     // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
2705*700637cbSDimitry Andric     FCmpInst::Predicate Pred;
2706*700637cbSDimitry Andric     bool IsSignaling;
2707*700637cbSDimitry Andric     // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
2708*700637cbSDimitry Andric     // behavior is inverted. We'll handle that after the switch.
2709*700637cbSDimitry Andric     switch (CC & 0xf) {
2710*700637cbSDimitry Andric     case 0x00: Pred = FCmpInst::FCMP_OEQ;   IsSignaling = false; break;
2711*700637cbSDimitry Andric     case 0x01: Pred = FCmpInst::FCMP_OLT;   IsSignaling = true;  break;
2712*700637cbSDimitry Andric     case 0x02: Pred = FCmpInst::FCMP_OLE;   IsSignaling = true;  break;
2713*700637cbSDimitry Andric     case 0x03: Pred = FCmpInst::FCMP_UNO;   IsSignaling = false; break;
2714*700637cbSDimitry Andric     case 0x04: Pred = FCmpInst::FCMP_UNE;   IsSignaling = false; break;
2715*700637cbSDimitry Andric     case 0x05: Pred = FCmpInst::FCMP_UGE;   IsSignaling = true;  break;
2716*700637cbSDimitry Andric     case 0x06: Pred = FCmpInst::FCMP_UGT;   IsSignaling = true;  break;
2717*700637cbSDimitry Andric     case 0x07: Pred = FCmpInst::FCMP_ORD;   IsSignaling = false; break;
2718*700637cbSDimitry Andric     case 0x08: Pred = FCmpInst::FCMP_UEQ;   IsSignaling = false; break;
2719*700637cbSDimitry Andric     case 0x09: Pred = FCmpInst::FCMP_ULT;   IsSignaling = true;  break;
2720*700637cbSDimitry Andric     case 0x0a: Pred = FCmpInst::FCMP_ULE;   IsSignaling = true;  break;
2721*700637cbSDimitry Andric     case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
2722*700637cbSDimitry Andric     case 0x0c: Pred = FCmpInst::FCMP_ONE;   IsSignaling = false; break;
2723*700637cbSDimitry Andric     case 0x0d: Pred = FCmpInst::FCMP_OGE;   IsSignaling = true;  break;
2724*700637cbSDimitry Andric     case 0x0e: Pred = FCmpInst::FCMP_OGT;   IsSignaling = true;  break;
2725*700637cbSDimitry Andric     case 0x0f: Pred = FCmpInst::FCMP_TRUE;  IsSignaling = false; break;
2726*700637cbSDimitry Andric     default: llvm_unreachable("Unhandled CC");
2727*700637cbSDimitry Andric     }
2728*700637cbSDimitry Andric 
2729*700637cbSDimitry Andric     // Invert the signalling behavior for 16-31.
2730*700637cbSDimitry Andric     if (CC & 0x10)
2731*700637cbSDimitry Andric       IsSignaling = !IsSignaling;
2732*700637cbSDimitry Andric 
2733*700637cbSDimitry Andric     // If the predicate is true or false and we're using constrained intrinsics,
2734*700637cbSDimitry Andric     // we don't have a compare intrinsic we can use. Just use the legacy X86
2735*700637cbSDimitry Andric     // specific intrinsic.
2736*700637cbSDimitry Andric     // If the intrinsic is mask enabled and we're using constrained intrinsics,
2737*700637cbSDimitry Andric     // use the legacy X86 specific intrinsic.
2738*700637cbSDimitry Andric     if (Builder.getIsFPConstrained() &&
2739*700637cbSDimitry Andric         (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
2740*700637cbSDimitry Andric          IsMaskFCmp)) {
2741*700637cbSDimitry Andric 
2742*700637cbSDimitry Andric       Intrinsic::ID IID;
2743*700637cbSDimitry Andric       switch (BuiltinID) {
2744*700637cbSDimitry Andric       default: llvm_unreachable("Unexpected builtin");
2745*700637cbSDimitry Andric       case X86::BI__builtin_ia32_cmpps:
2746*700637cbSDimitry Andric         IID = Intrinsic::x86_sse_cmp_ps;
2747*700637cbSDimitry Andric         break;
2748*700637cbSDimitry Andric       case X86::BI__builtin_ia32_cmpps256:
2749*700637cbSDimitry Andric         IID = Intrinsic::x86_avx_cmp_ps_256;
2750*700637cbSDimitry Andric         break;
2751*700637cbSDimitry Andric       case X86::BI__builtin_ia32_cmppd:
2752*700637cbSDimitry Andric         IID = Intrinsic::x86_sse2_cmp_pd;
2753*700637cbSDimitry Andric         break;
2754*700637cbSDimitry Andric       case X86::BI__builtin_ia32_cmppd256:
2755*700637cbSDimitry Andric         IID = Intrinsic::x86_avx_cmp_pd_256;
2756*700637cbSDimitry Andric         break;
2757*700637cbSDimitry Andric       case X86::BI__builtin_ia32_cmpph128_mask:
2758*700637cbSDimitry Andric         IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
2759*700637cbSDimitry Andric         break;
2760*700637cbSDimitry Andric       case X86::BI__builtin_ia32_cmpph256_mask:
2761*700637cbSDimitry Andric         IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
2762*700637cbSDimitry Andric         break;
2763*700637cbSDimitry Andric       case X86::BI__builtin_ia32_cmpph512_mask:
2764*700637cbSDimitry Andric         IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
2765*700637cbSDimitry Andric         break;
2766*700637cbSDimitry Andric       case X86::BI__builtin_ia32_cmpps512_mask:
2767*700637cbSDimitry Andric         IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2768*700637cbSDimitry Andric         break;
2769*700637cbSDimitry Andric       case X86::BI__builtin_ia32_cmppd512_mask:
2770*700637cbSDimitry Andric         IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2771*700637cbSDimitry Andric         break;
2772*700637cbSDimitry Andric       case X86::BI__builtin_ia32_cmpps128_mask:
2773*700637cbSDimitry Andric         IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2774*700637cbSDimitry Andric         break;
2775*700637cbSDimitry Andric       case X86::BI__builtin_ia32_cmpps256_mask:
2776*700637cbSDimitry Andric         IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2777*700637cbSDimitry Andric         break;
2778*700637cbSDimitry Andric       case X86::BI__builtin_ia32_cmppd128_mask:
2779*700637cbSDimitry Andric         IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2780*700637cbSDimitry Andric         break;
2781*700637cbSDimitry Andric       case X86::BI__builtin_ia32_cmppd256_mask:
2782*700637cbSDimitry Andric         IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2783*700637cbSDimitry Andric         break;
2784*700637cbSDimitry Andric       }
2785*700637cbSDimitry Andric 
2786*700637cbSDimitry Andric       Function *Intr = CGM.getIntrinsic(IID);
2787*700637cbSDimitry Andric       if (IsMaskFCmp) {
2788*700637cbSDimitry Andric         unsigned NumElts =
2789*700637cbSDimitry Andric             cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2790*700637cbSDimitry Andric         Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
2791*700637cbSDimitry Andric         Value *Cmp = Builder.CreateCall(Intr, Ops);
2792*700637cbSDimitry Andric         return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
2793*700637cbSDimitry Andric       }
2794*700637cbSDimitry Andric 
2795*700637cbSDimitry Andric       return Builder.CreateCall(Intr, Ops);
2796*700637cbSDimitry Andric     }
2797*700637cbSDimitry Andric 
2798*700637cbSDimitry Andric     // Builtins without the _mask suffix return a vector of integers
2799*700637cbSDimitry Andric     // of the same width as the input vectors
2800*700637cbSDimitry Andric     if (IsMaskFCmp) {
2801*700637cbSDimitry Andric       // We ignore SAE if strict FP is disabled. We only keep precise
2802*700637cbSDimitry Andric       // exception behavior under strict FP.
2803*700637cbSDimitry Andric       // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
2804*700637cbSDimitry Andric       // object will be required.
2805*700637cbSDimitry Andric       unsigned NumElts =
2806*700637cbSDimitry Andric           cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
2807*700637cbSDimitry Andric       Value *Cmp;
2808*700637cbSDimitry Andric       if (IsSignaling)
2809*700637cbSDimitry Andric         Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
2810*700637cbSDimitry Andric       else
2811*700637cbSDimitry Andric         Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
2812*700637cbSDimitry Andric       return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
2813*700637cbSDimitry Andric     }
2814*700637cbSDimitry Andric 
2815*700637cbSDimitry Andric     return getVectorFCmpIR(Pred, IsSignaling);
2816*700637cbSDimitry Andric   }
2817*700637cbSDimitry Andric 
2818*700637cbSDimitry Andric   // SSE scalar comparison intrinsics
2819*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpeqss:
2820*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
2821*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpltss:
2822*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
2823*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpless:
2824*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
2825*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpunordss:
2826*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
2827*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpneqss:
2828*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
2829*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpnltss:
2830*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
2831*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpnless:
2832*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
2833*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpordss:
2834*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
2835*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpeqsd:
2836*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
2837*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpltsd:
2838*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
2839*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmplesd:
2840*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
2841*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpunordsd:
2842*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
2843*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpneqsd:
2844*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
2845*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpnltsd:
2846*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
2847*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpnlesd:
2848*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
2849*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cmpordsd:
2850*700637cbSDimitry Andric     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
2851*700637cbSDimitry Andric 
2852*700637cbSDimitry Andric   // f16c half2float intrinsics
2853*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcvtph2ps:
2854*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcvtph2ps256:
2855*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcvtph2ps_mask:
2856*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcvtph2ps256_mask:
2857*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
2858*700637cbSDimitry Andric     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2859*700637cbSDimitry Andric     return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
2860*700637cbSDimitry Andric   }
2861*700637cbSDimitry Andric 
2862*700637cbSDimitry Andric   // AVX512 bf16 intrinsics
2863*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
2864*700637cbSDimitry Andric     Ops[2] = getMaskVecValue(
2865*700637cbSDimitry Andric         *this, Ops[2],
2866*700637cbSDimitry Andric         cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
2867*700637cbSDimitry Andric     Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
2868*700637cbSDimitry Andric     return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
2869*700637cbSDimitry Andric   }
2870*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtsbf162ss_32:
2871*700637cbSDimitry Andric     return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
2872*700637cbSDimitry Andric 
2873*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
2874*700637cbSDimitry Andric   case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
2875*700637cbSDimitry Andric     Intrinsic::ID IID;
2876*700637cbSDimitry Andric     switch (BuiltinID) {
2877*700637cbSDimitry Andric     default: llvm_unreachable("Unsupported intrinsic!");
2878*700637cbSDimitry Andric     case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
2879*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
2880*700637cbSDimitry Andric       break;
2881*700637cbSDimitry Andric     case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
2882*700637cbSDimitry Andric       IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
2883*700637cbSDimitry Andric       break;
2884*700637cbSDimitry Andric     }
2885*700637cbSDimitry Andric     Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
2886*700637cbSDimitry Andric     return EmitX86Select(*this, Ops[2], Res, Ops[1]);
2887*700637cbSDimitry Andric   }
2888*700637cbSDimitry Andric 
2889*700637cbSDimitry Andric   case X86::BI__cpuid:
2890*700637cbSDimitry Andric   case X86::BI__cpuidex: {
2891*700637cbSDimitry Andric     Value *FuncId = EmitScalarExpr(E->getArg(1));
2892*700637cbSDimitry Andric     Value *SubFuncId = BuiltinID == X86::BI__cpuidex
2893*700637cbSDimitry Andric                            ? EmitScalarExpr(E->getArg(2))
2894*700637cbSDimitry Andric                            : llvm::ConstantInt::get(Int32Ty, 0);
2895*700637cbSDimitry Andric 
2896*700637cbSDimitry Andric     llvm::StructType *CpuidRetTy =
2897*700637cbSDimitry Andric         llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
2898*700637cbSDimitry Andric     llvm::FunctionType *FTy =
2899*700637cbSDimitry Andric         llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
2900*700637cbSDimitry Andric 
2901*700637cbSDimitry Andric     StringRef Asm, Constraints;
2902*700637cbSDimitry Andric     if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2903*700637cbSDimitry Andric       Asm = "cpuid";
2904*700637cbSDimitry Andric       Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
2905*700637cbSDimitry Andric     } else {
2906*700637cbSDimitry Andric       // x86-64 uses %rbx as the base register, so preserve it.
2907*700637cbSDimitry Andric       Asm = "xchgq %rbx, ${1:q}\n"
2908*700637cbSDimitry Andric             "cpuid\n"
2909*700637cbSDimitry Andric             "xchgq %rbx, ${1:q}";
2910*700637cbSDimitry Andric       Constraints = "={ax},=r,={cx},={dx},0,2";
2911*700637cbSDimitry Andric     }
2912*700637cbSDimitry Andric 
2913*700637cbSDimitry Andric     llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
2914*700637cbSDimitry Andric                                                /*hasSideEffects=*/false);
2915*700637cbSDimitry Andric     Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
2916*700637cbSDimitry Andric     Value *BasePtr = EmitScalarExpr(E->getArg(0));
2917*700637cbSDimitry Andric     Value *Store = nullptr;
2918*700637cbSDimitry Andric     for (unsigned i = 0; i < 4; i++) {
2919*700637cbSDimitry Andric       Value *Extracted = Builder.CreateExtractValue(IACall, i);
2920*700637cbSDimitry Andric       Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
2921*700637cbSDimitry Andric       Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
2922*700637cbSDimitry Andric     }
2923*700637cbSDimitry Andric 
2924*700637cbSDimitry Andric     // Return the last store instruction to signal that we have emitted the
2925*700637cbSDimitry Andric     // the intrinsic.
2926*700637cbSDimitry Andric     return Store;
2927*700637cbSDimitry Andric   }
2928*700637cbSDimitry Andric 
2929*700637cbSDimitry Andric   case X86::BI__emul:
2930*700637cbSDimitry Andric   case X86::BI__emulu: {
2931*700637cbSDimitry Andric     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
2932*700637cbSDimitry Andric     bool isSigned = (BuiltinID == X86::BI__emul);
2933*700637cbSDimitry Andric     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
2934*700637cbSDimitry Andric     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
2935*700637cbSDimitry Andric     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
2936*700637cbSDimitry Andric   }
2937*700637cbSDimitry Andric   case X86::BI__mulh:
2938*700637cbSDimitry Andric   case X86::BI__umulh:
2939*700637cbSDimitry Andric   case X86::BI_mul128:
2940*700637cbSDimitry Andric   case X86::BI_umul128: {
2941*700637cbSDimitry Andric     llvm::Type *ResType = ConvertType(E->getType());
2942*700637cbSDimitry Andric     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
2943*700637cbSDimitry Andric 
2944*700637cbSDimitry Andric     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
2945*700637cbSDimitry Andric     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
2946*700637cbSDimitry Andric     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
2947*700637cbSDimitry Andric 
2948*700637cbSDimitry Andric     Value *MulResult, *HigherBits;
2949*700637cbSDimitry Andric     if (IsSigned) {
2950*700637cbSDimitry Andric       MulResult = Builder.CreateNSWMul(LHS, RHS);
2951*700637cbSDimitry Andric       HigherBits = Builder.CreateAShr(MulResult, 64);
2952*700637cbSDimitry Andric     } else {
2953*700637cbSDimitry Andric       MulResult = Builder.CreateNUWMul(LHS, RHS);
2954*700637cbSDimitry Andric       HigherBits = Builder.CreateLShr(MulResult, 64);
2955*700637cbSDimitry Andric     }
2956*700637cbSDimitry Andric     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
2957*700637cbSDimitry Andric 
2958*700637cbSDimitry Andric     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
2959*700637cbSDimitry Andric       return HigherBits;
2960*700637cbSDimitry Andric 
2961*700637cbSDimitry Andric     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
2962*700637cbSDimitry Andric     Builder.CreateStore(HigherBits, HighBitsAddress);
2963*700637cbSDimitry Andric     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
2964*700637cbSDimitry Andric   }
2965*700637cbSDimitry Andric 
2966*700637cbSDimitry Andric   case X86::BI__faststorefence: {
2967*700637cbSDimitry Andric     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
2968*700637cbSDimitry Andric                                llvm::SyncScope::System);
2969*700637cbSDimitry Andric   }
2970*700637cbSDimitry Andric   case X86::BI__shiftleft128:
2971*700637cbSDimitry Andric   case X86::BI__shiftright128: {
2972*700637cbSDimitry Andric     llvm::Function *F = CGM.getIntrinsic(
2973*700637cbSDimitry Andric         BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
2974*700637cbSDimitry Andric         Int64Ty);
2975*700637cbSDimitry Andric     // Flip low/high ops and zero-extend amount to matching type.
2976*700637cbSDimitry Andric     // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
2977*700637cbSDimitry Andric     // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
2978*700637cbSDimitry Andric     std::swap(Ops[0], Ops[1]);
2979*700637cbSDimitry Andric     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
2980*700637cbSDimitry Andric     return Builder.CreateCall(F, Ops);
2981*700637cbSDimitry Andric   }
2982*700637cbSDimitry Andric   case X86::BI_ReadWriteBarrier:
2983*700637cbSDimitry Andric   case X86::BI_ReadBarrier:
2984*700637cbSDimitry Andric   case X86::BI_WriteBarrier: {
2985*700637cbSDimitry Andric     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
2986*700637cbSDimitry Andric                                llvm::SyncScope::SingleThread);
2987*700637cbSDimitry Andric   }
2988*700637cbSDimitry Andric 
2989*700637cbSDimitry Andric   case X86::BI_AddressOfReturnAddress: {
2990*700637cbSDimitry Andric     Function *F =
2991*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
2992*700637cbSDimitry Andric     return Builder.CreateCall(F);
2993*700637cbSDimitry Andric   }
2994*700637cbSDimitry Andric   case X86::BI__stosb: {
2995*700637cbSDimitry Andric     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
2996*700637cbSDimitry Andric     // instruction, but it will create a memset that won't be optimized away.
2997*700637cbSDimitry Andric     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
2998*700637cbSDimitry Andric   }
2999*700637cbSDimitry Andric   // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
3000*700637cbSDimitry Andric   case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
3001*700637cbSDimitry Andric   case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
3002*700637cbSDimitry Andric   case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
3003*700637cbSDimitry Andric   case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
3004*700637cbSDimitry Andric   case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
3005*700637cbSDimitry Andric   case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
3006*700637cbSDimitry Andric   case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
3007*700637cbSDimitry Andric   case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
3008*700637cbSDimitry Andric     Intrinsic::ID IID;
3009*700637cbSDimitry Andric     switch (BuiltinID) {
3010*700637cbSDimitry Andric     default:
3011*700637cbSDimitry Andric       llvm_unreachable("Unsupported intrinsic!");
3012*700637cbSDimitry Andric     case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
3013*700637cbSDimitry Andric       IID = Intrinsic::x86_t2rpntlvwz0_internal;
3014*700637cbSDimitry Andric       break;
3015*700637cbSDimitry Andric     case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
3016*700637cbSDimitry Andric       IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
3017*700637cbSDimitry Andric       break;
3018*700637cbSDimitry Andric     case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
3019*700637cbSDimitry Andric       IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
3020*700637cbSDimitry Andric       break;
3021*700637cbSDimitry Andric     case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
3022*700637cbSDimitry Andric       IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
3023*700637cbSDimitry Andric       break;
3024*700637cbSDimitry Andric     case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
3025*700637cbSDimitry Andric       IID = Intrinsic::x86_t2rpntlvwz1_internal;
3026*700637cbSDimitry Andric       break;
3027*700637cbSDimitry Andric     case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
3028*700637cbSDimitry Andric       IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
3029*700637cbSDimitry Andric       break;
3030*700637cbSDimitry Andric     case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
3031*700637cbSDimitry Andric       IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
3032*700637cbSDimitry Andric       break;
3033*700637cbSDimitry Andric     case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
3034*700637cbSDimitry Andric       IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
3035*700637cbSDimitry Andric       break;
3036*700637cbSDimitry Andric     }
3037*700637cbSDimitry Andric 
3038*700637cbSDimitry Andric     // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
3039*700637cbSDimitry Andric     Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
3040*700637cbSDimitry Andric                                      {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
3041*700637cbSDimitry Andric 
3042*700637cbSDimitry Andric     auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
3043*700637cbSDimitry Andric     assert(PtrTy && "arg3 must be of pointer type");
3044*700637cbSDimitry Andric     QualType PtreeTy = PtrTy->getPointeeType();
3045*700637cbSDimitry Andric     llvm::Type *TyPtee = ConvertType(PtreeTy);
3046*700637cbSDimitry Andric 
3047*700637cbSDimitry Andric     // Bitcast amx type (x86_amx) to vector type (256 x i32)
3048*700637cbSDimitry Andric     // Then store tile0 into DstPtr0
3049*700637cbSDimitry Andric     Value *T0 = Builder.CreateExtractValue(Call, 0);
3050*700637cbSDimitry Andric     Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
3051*700637cbSDimitry Andric                                            {TyPtee}, {T0});
3052*700637cbSDimitry Andric     Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
3053*700637cbSDimitry Andric 
3054*700637cbSDimitry Andric     // Then store tile1 into DstPtr1
3055*700637cbSDimitry Andric     Value *T1 = Builder.CreateExtractValue(Call, 1);
3056*700637cbSDimitry Andric     Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
3057*700637cbSDimitry Andric                                            {TyPtee}, {T1});
3058*700637cbSDimitry Andric     Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
3059*700637cbSDimitry Andric 
3060*700637cbSDimitry Andric     // Note: Here we escape directly use x86_tilestored64_internal to store
3061*700637cbSDimitry Andric     // the results due to it can't make sure the Mem written scope. This may
3062*700637cbSDimitry Andric     // cause shapes reloads after first amx intrinsic, which current amx reg-
3063*700637cbSDimitry Andric     // ister allocation has no ability to handle it.
3064*700637cbSDimitry Andric 
3065*700637cbSDimitry Andric     return Store;
3066*700637cbSDimitry Andric   }
3067*700637cbSDimitry Andric   case X86::BI__ud2:
3068*700637cbSDimitry Andric     // llvm.trap makes a ud2a instruction on x86.
3069*700637cbSDimitry Andric     return EmitTrapCall(Intrinsic::trap);
3070*700637cbSDimitry Andric   case X86::BI__int2c: {
3071*700637cbSDimitry Andric     // This syscall signals a driver assertion failure in x86 NT kernels.
3072*700637cbSDimitry Andric     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
3073*700637cbSDimitry Andric     llvm::InlineAsm *IA =
3074*700637cbSDimitry Andric         llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
3075*700637cbSDimitry Andric     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
3076*700637cbSDimitry Andric         getLLVMContext(), llvm::AttributeList::FunctionIndex,
3077*700637cbSDimitry Andric         llvm::Attribute::NoReturn);
3078*700637cbSDimitry Andric     llvm::CallInst *CI = Builder.CreateCall(IA);
3079*700637cbSDimitry Andric     CI->setAttributes(NoReturnAttr);
3080*700637cbSDimitry Andric     return CI;
3081*700637cbSDimitry Andric   }
3082*700637cbSDimitry Andric   case X86::BI__readfsbyte:
3083*700637cbSDimitry Andric   case X86::BI__readfsword:
3084*700637cbSDimitry Andric   case X86::BI__readfsdword:
3085*700637cbSDimitry Andric   case X86::BI__readfsqword: {
3086*700637cbSDimitry Andric     llvm::Type *IntTy = ConvertType(E->getType());
3087*700637cbSDimitry Andric     Value *Ptr = Builder.CreateIntToPtr(
3088*700637cbSDimitry Andric         Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
3089*700637cbSDimitry Andric     LoadInst *Load = Builder.CreateAlignedLoad(
3090*700637cbSDimitry Andric         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
3091*700637cbSDimitry Andric     Load->setVolatile(true);
3092*700637cbSDimitry Andric     return Load;
3093*700637cbSDimitry Andric   }
3094*700637cbSDimitry Andric   case X86::BI__readgsbyte:
3095*700637cbSDimitry Andric   case X86::BI__readgsword:
3096*700637cbSDimitry Andric   case X86::BI__readgsdword:
3097*700637cbSDimitry Andric   case X86::BI__readgsqword: {
3098*700637cbSDimitry Andric     llvm::Type *IntTy = ConvertType(E->getType());
3099*700637cbSDimitry Andric     Value *Ptr = Builder.CreateIntToPtr(
3100*700637cbSDimitry Andric         Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
3101*700637cbSDimitry Andric     LoadInst *Load = Builder.CreateAlignedLoad(
3102*700637cbSDimitry Andric         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
3103*700637cbSDimitry Andric     Load->setVolatile(true);
3104*700637cbSDimitry Andric     return Load;
3105*700637cbSDimitry Andric   }
3106*700637cbSDimitry Andric   case X86::BI__builtin_ia32_encodekey128_u32: {
3107*700637cbSDimitry Andric     Intrinsic::ID IID = Intrinsic::x86_encodekey128;
3108*700637cbSDimitry Andric 
3109*700637cbSDimitry Andric     Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
3110*700637cbSDimitry Andric 
3111*700637cbSDimitry Andric     for (int i = 0; i < 3; ++i) {
3112*700637cbSDimitry Andric       Value *Extract = Builder.CreateExtractValue(Call, i + 1);
3113*700637cbSDimitry Andric       Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
3114*700637cbSDimitry Andric       Builder.CreateAlignedStore(Extract, Ptr, Align(1));
3115*700637cbSDimitry Andric     }
3116*700637cbSDimitry Andric 
3117*700637cbSDimitry Andric     return Builder.CreateExtractValue(Call, 0);
3118*700637cbSDimitry Andric   }
3119*700637cbSDimitry Andric   case X86::BI__builtin_ia32_encodekey256_u32: {
3120*700637cbSDimitry Andric     Intrinsic::ID IID = Intrinsic::x86_encodekey256;
3121*700637cbSDimitry Andric 
3122*700637cbSDimitry Andric     Value *Call =
3123*700637cbSDimitry Andric         Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
3124*700637cbSDimitry Andric 
3125*700637cbSDimitry Andric     for (int i = 0; i < 4; ++i) {
3126*700637cbSDimitry Andric       Value *Extract = Builder.CreateExtractValue(Call, i + 1);
3127*700637cbSDimitry Andric       Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
3128*700637cbSDimitry Andric       Builder.CreateAlignedStore(Extract, Ptr, Align(1));
3129*700637cbSDimitry Andric     }
3130*700637cbSDimitry Andric 
3131*700637cbSDimitry Andric     return Builder.CreateExtractValue(Call, 0);
3132*700637cbSDimitry Andric   }
3133*700637cbSDimitry Andric   case X86::BI__builtin_ia32_aesenc128kl_u8:
3134*700637cbSDimitry Andric   case X86::BI__builtin_ia32_aesdec128kl_u8:
3135*700637cbSDimitry Andric   case X86::BI__builtin_ia32_aesenc256kl_u8:
3136*700637cbSDimitry Andric   case X86::BI__builtin_ia32_aesdec256kl_u8: {
3137*700637cbSDimitry Andric     Intrinsic::ID IID;
3138*700637cbSDimitry Andric     StringRef BlockName;
3139*700637cbSDimitry Andric     switch (BuiltinID) {
3140*700637cbSDimitry Andric     default:
3141*700637cbSDimitry Andric       llvm_unreachable("Unexpected builtin");
3142*700637cbSDimitry Andric     case X86::BI__builtin_ia32_aesenc128kl_u8:
3143*700637cbSDimitry Andric       IID = Intrinsic::x86_aesenc128kl;
3144*700637cbSDimitry Andric       BlockName = "aesenc128kl";
3145*700637cbSDimitry Andric       break;
3146*700637cbSDimitry Andric     case X86::BI__builtin_ia32_aesdec128kl_u8:
3147*700637cbSDimitry Andric       IID = Intrinsic::x86_aesdec128kl;
3148*700637cbSDimitry Andric       BlockName = "aesdec128kl";
3149*700637cbSDimitry Andric       break;
3150*700637cbSDimitry Andric     case X86::BI__builtin_ia32_aesenc256kl_u8:
3151*700637cbSDimitry Andric       IID = Intrinsic::x86_aesenc256kl;
3152*700637cbSDimitry Andric       BlockName = "aesenc256kl";
3153*700637cbSDimitry Andric       break;
3154*700637cbSDimitry Andric     case X86::BI__builtin_ia32_aesdec256kl_u8:
3155*700637cbSDimitry Andric       IID = Intrinsic::x86_aesdec256kl;
3156*700637cbSDimitry Andric       BlockName = "aesdec256kl";
3157*700637cbSDimitry Andric       break;
3158*700637cbSDimitry Andric     }
3159*700637cbSDimitry Andric 
3160*700637cbSDimitry Andric     Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
3161*700637cbSDimitry Andric 
3162*700637cbSDimitry Andric     BasicBlock *NoError =
3163*700637cbSDimitry Andric         createBasicBlock(BlockName + "_no_error", this->CurFn);
3164*700637cbSDimitry Andric     BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
3165*700637cbSDimitry Andric     BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
3166*700637cbSDimitry Andric 
3167*700637cbSDimitry Andric     Value *Ret = Builder.CreateExtractValue(Call, 0);
3168*700637cbSDimitry Andric     Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
3169*700637cbSDimitry Andric     Value *Out = Builder.CreateExtractValue(Call, 1);
3170*700637cbSDimitry Andric     Builder.CreateCondBr(Succ, NoError, Error);
3171*700637cbSDimitry Andric 
3172*700637cbSDimitry Andric     Builder.SetInsertPoint(NoError);
3173*700637cbSDimitry Andric     Builder.CreateDefaultAlignedStore(Out, Ops[0]);
3174*700637cbSDimitry Andric     Builder.CreateBr(End);
3175*700637cbSDimitry Andric 
3176*700637cbSDimitry Andric     Builder.SetInsertPoint(Error);
3177*700637cbSDimitry Andric     Constant *Zero = llvm::Constant::getNullValue(Out->getType());
3178*700637cbSDimitry Andric     Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
3179*700637cbSDimitry Andric     Builder.CreateBr(End);
3180*700637cbSDimitry Andric 
3181*700637cbSDimitry Andric     Builder.SetInsertPoint(End);
3182*700637cbSDimitry Andric     return Builder.CreateExtractValue(Call, 0);
3183*700637cbSDimitry Andric   }
3184*700637cbSDimitry Andric   case X86::BI__builtin_ia32_aesencwide128kl_u8:
3185*700637cbSDimitry Andric   case X86::BI__builtin_ia32_aesdecwide128kl_u8:
3186*700637cbSDimitry Andric   case X86::BI__builtin_ia32_aesencwide256kl_u8:
3187*700637cbSDimitry Andric   case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
3188*700637cbSDimitry Andric     Intrinsic::ID IID;
3189*700637cbSDimitry Andric     StringRef BlockName;
3190*700637cbSDimitry Andric     switch (BuiltinID) {
3191*700637cbSDimitry Andric     case X86::BI__builtin_ia32_aesencwide128kl_u8:
3192*700637cbSDimitry Andric       IID = Intrinsic::x86_aesencwide128kl;
3193*700637cbSDimitry Andric       BlockName = "aesencwide128kl";
3194*700637cbSDimitry Andric       break;
3195*700637cbSDimitry Andric     case X86::BI__builtin_ia32_aesdecwide128kl_u8:
3196*700637cbSDimitry Andric       IID = Intrinsic::x86_aesdecwide128kl;
3197*700637cbSDimitry Andric       BlockName = "aesdecwide128kl";
3198*700637cbSDimitry Andric       break;
3199*700637cbSDimitry Andric     case X86::BI__builtin_ia32_aesencwide256kl_u8:
3200*700637cbSDimitry Andric       IID = Intrinsic::x86_aesencwide256kl;
3201*700637cbSDimitry Andric       BlockName = "aesencwide256kl";
3202*700637cbSDimitry Andric       break;
3203*700637cbSDimitry Andric     case X86::BI__builtin_ia32_aesdecwide256kl_u8:
3204*700637cbSDimitry Andric       IID = Intrinsic::x86_aesdecwide256kl;
3205*700637cbSDimitry Andric       BlockName = "aesdecwide256kl";
3206*700637cbSDimitry Andric       break;
3207*700637cbSDimitry Andric     }
3208*700637cbSDimitry Andric 
3209*700637cbSDimitry Andric     llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
3210*700637cbSDimitry Andric     Value *InOps[9];
3211*700637cbSDimitry Andric     InOps[0] = Ops[2];
3212*700637cbSDimitry Andric     for (int i = 0; i != 8; ++i) {
3213*700637cbSDimitry Andric       Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
3214*700637cbSDimitry Andric       InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
3215*700637cbSDimitry Andric     }
3216*700637cbSDimitry Andric 
3217*700637cbSDimitry Andric     Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
3218*700637cbSDimitry Andric 
3219*700637cbSDimitry Andric     BasicBlock *NoError =
3220*700637cbSDimitry Andric         createBasicBlock(BlockName + "_no_error", this->CurFn);
3221*700637cbSDimitry Andric     BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
3222*700637cbSDimitry Andric     BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
3223*700637cbSDimitry Andric 
3224*700637cbSDimitry Andric     Value *Ret = Builder.CreateExtractValue(Call, 0);
3225*700637cbSDimitry Andric     Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
3226*700637cbSDimitry Andric     Builder.CreateCondBr(Succ, NoError, Error);
3227*700637cbSDimitry Andric 
3228*700637cbSDimitry Andric     Builder.SetInsertPoint(NoError);
3229*700637cbSDimitry Andric     for (int i = 0; i != 8; ++i) {
3230*700637cbSDimitry Andric       Value *Extract = Builder.CreateExtractValue(Call, i + 1);
3231*700637cbSDimitry Andric       Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
3232*700637cbSDimitry Andric       Builder.CreateAlignedStore(Extract, Ptr, Align(16));
3233*700637cbSDimitry Andric     }
3234*700637cbSDimitry Andric     Builder.CreateBr(End);
3235*700637cbSDimitry Andric 
3236*700637cbSDimitry Andric     Builder.SetInsertPoint(Error);
3237*700637cbSDimitry Andric     for (int i = 0; i != 8; ++i) {
3238*700637cbSDimitry Andric       Value *Out = Builder.CreateExtractValue(Call, i + 1);
3239*700637cbSDimitry Andric       Constant *Zero = llvm::Constant::getNullValue(Out->getType());
3240*700637cbSDimitry Andric       Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
3241*700637cbSDimitry Andric       Builder.CreateAlignedStore(Zero, Ptr, Align(16));
3242*700637cbSDimitry Andric     }
3243*700637cbSDimitry Andric     Builder.CreateBr(End);
3244*700637cbSDimitry Andric 
3245*700637cbSDimitry Andric     Builder.SetInsertPoint(End);
3246*700637cbSDimitry Andric     return Builder.CreateExtractValue(Call, 0);
3247*700637cbSDimitry Andric   }
3248*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfcmaddcph512_mask:
3249*700637cbSDimitry Andric     IsConjFMA = true;
3250*700637cbSDimitry Andric     [[fallthrough]];
3251*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddcph512_mask: {
3252*700637cbSDimitry Andric     Intrinsic::ID IID = IsConjFMA
3253*700637cbSDimitry Andric                             ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
3254*700637cbSDimitry Andric                             : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
3255*700637cbSDimitry Andric     Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
3256*700637cbSDimitry Andric     return EmitX86Select(*this, Ops[3], Call, Ops[0]);
3257*700637cbSDimitry Andric   }
3258*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
3259*700637cbSDimitry Andric     IsConjFMA = true;
3260*700637cbSDimitry Andric     [[fallthrough]];
3261*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
3262*700637cbSDimitry Andric     Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
3263*700637cbSDimitry Andric                                   : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
3264*700637cbSDimitry Andric     Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
3265*700637cbSDimitry Andric     Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
3266*700637cbSDimitry Andric     return EmitX86Select(*this, And, Call, Ops[0]);
3267*700637cbSDimitry Andric   }
3268*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
3269*700637cbSDimitry Andric     IsConjFMA = true;
3270*700637cbSDimitry Andric     [[fallthrough]];
3271*700637cbSDimitry Andric   case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
3272*700637cbSDimitry Andric     Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
3273*700637cbSDimitry Andric                                   : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
3274*700637cbSDimitry Andric     Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
3275*700637cbSDimitry Andric     static constexpr int Mask[] = {0, 5, 6, 7};
3276*700637cbSDimitry Andric     return Builder.CreateShuffleVector(Call, Ops[2], Mask);
3277*700637cbSDimitry Andric   }
3278*700637cbSDimitry Andric   case X86::BI__builtin_ia32_prefetchi:
3279*700637cbSDimitry Andric     return Builder.CreateCall(
3280*700637cbSDimitry Andric         CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
3281*700637cbSDimitry Andric         {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
3282*700637cbSDimitry Andric          llvm::ConstantInt::get(Int32Ty, 0)});
3283*700637cbSDimitry Andric   }
3284*700637cbSDimitry Andric }
3285