xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1e8d8bef9SDimitry Andric //===-- X86InstCombineIntrinsic.cpp - X86 specific InstCombine pass -------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric /// \file
9e8d8bef9SDimitry Andric /// This file implements a TargetTransformInfo analysis pass specific to the
10e8d8bef9SDimitry Andric /// X86 target machine. It uses the target's detailed information to provide
11e8d8bef9SDimitry Andric /// more precise answers to certain TTI queries, while letting the target
12e8d8bef9SDimitry Andric /// independent and default TTI implementations handle the rest.
13e8d8bef9SDimitry Andric ///
14e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
15e8d8bef9SDimitry Andric 
16e8d8bef9SDimitry Andric #include "X86TargetTransformInfo.h"
17e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
18e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsX86.h"
19e8d8bef9SDimitry Andric #include "llvm/Support/KnownBits.h"
20e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h"
21e8d8bef9SDimitry Andric 
22e8d8bef9SDimitry Andric using namespace llvm;
23e8d8bef9SDimitry Andric 
24e8d8bef9SDimitry Andric #define DEBUG_TYPE "x86tti"
25e8d8bef9SDimitry Andric 
26e8d8bef9SDimitry Andric /// Return a constant boolean vector that has true elements in all positions
27e8d8bef9SDimitry Andric /// where the input constant data vector has an element with the sign bit set.
28e8d8bef9SDimitry Andric static Constant *getNegativeIsTrueBoolVec(Constant *V) {
29e8d8bef9SDimitry Andric   VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType()));
30e8d8bef9SDimitry Andric   V = ConstantExpr::getBitCast(V, IntTy);
31e8d8bef9SDimitry Andric   V = ConstantExpr::getICmp(CmpInst::ICMP_SGT, Constant::getNullValue(IntTy),
32e8d8bef9SDimitry Andric                             V);
33e8d8bef9SDimitry Andric   return V;
34e8d8bef9SDimitry Andric }
35e8d8bef9SDimitry Andric 
36e8d8bef9SDimitry Andric /// Convert the x86 XMM integer vector mask to a vector of bools based on
37e8d8bef9SDimitry Andric /// each element's most significant bit (the sign bit).
38e8d8bef9SDimitry Andric static Value *getBoolVecFromMask(Value *Mask) {
39e8d8bef9SDimitry Andric   // Fold Constant Mask.
40e8d8bef9SDimitry Andric   if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask))
41e8d8bef9SDimitry Andric     return getNegativeIsTrueBoolVec(ConstantMask);
42e8d8bef9SDimitry Andric 
43e8d8bef9SDimitry Andric   // Mask was extended from a boolean vector.
44e8d8bef9SDimitry Andric   Value *ExtMask;
45e8d8bef9SDimitry Andric   if (PatternMatch::match(
46e8d8bef9SDimitry Andric           Mask, PatternMatch::m_SExt(PatternMatch::m_Value(ExtMask))) &&
47e8d8bef9SDimitry Andric       ExtMask->getType()->isIntOrIntVectorTy(1))
48e8d8bef9SDimitry Andric     return ExtMask;
49e8d8bef9SDimitry Andric 
50e8d8bef9SDimitry Andric   return nullptr;
51e8d8bef9SDimitry Andric }
52e8d8bef9SDimitry Andric 
53e8d8bef9SDimitry Andric // TODO: If the x86 backend knew how to convert a bool vector mask back to an
54e8d8bef9SDimitry Andric // XMM register mask efficiently, we could transform all x86 masked intrinsics
55e8d8bef9SDimitry Andric // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
56e8d8bef9SDimitry Andric static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
57e8d8bef9SDimitry Andric   Value *Ptr = II.getOperand(0);
58e8d8bef9SDimitry Andric   Value *Mask = II.getOperand(1);
59e8d8bef9SDimitry Andric   Constant *ZeroVec = Constant::getNullValue(II.getType());
60e8d8bef9SDimitry Andric 
61e8d8bef9SDimitry Andric   // Zero Mask - masked load instruction creates a zero vector.
62e8d8bef9SDimitry Andric   if (isa<ConstantAggregateZero>(Mask))
63e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, ZeroVec);
64e8d8bef9SDimitry Andric 
65e8d8bef9SDimitry Andric   // The mask is constant or extended from a bool vector. Convert this x86
66e8d8bef9SDimitry Andric   // intrinsic to the LLVM intrinsic to allow target-independent optimizations.
67e8d8bef9SDimitry Andric   if (Value *BoolMask = getBoolVecFromMask(Mask)) {
68e8d8bef9SDimitry Andric     // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
69e8d8bef9SDimitry Andric     // the LLVM intrinsic definition for the pointer argument.
70e8d8bef9SDimitry Andric     unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
71e8d8bef9SDimitry Andric     PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
72e8d8bef9SDimitry Andric     Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
73e8d8bef9SDimitry Andric 
74e8d8bef9SDimitry Andric     // The pass-through vector for an x86 masked load is a zero vector.
75fe6060f1SDimitry Andric     CallInst *NewMaskedLoad = IC.Builder.CreateMaskedLoad(
76fe6060f1SDimitry Andric         II.getType(), PtrCast, Align(1), BoolMask, ZeroVec);
77e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, NewMaskedLoad);
78e8d8bef9SDimitry Andric   }
79e8d8bef9SDimitry Andric 
80e8d8bef9SDimitry Andric   return nullptr;
81e8d8bef9SDimitry Andric }
82e8d8bef9SDimitry Andric 
83e8d8bef9SDimitry Andric // TODO: If the x86 backend knew how to convert a bool vector mask back to an
84e8d8bef9SDimitry Andric // XMM register mask efficiently, we could transform all x86 masked intrinsics
85e8d8bef9SDimitry Andric // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
86e8d8bef9SDimitry Andric static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {
87e8d8bef9SDimitry Andric   Value *Ptr = II.getOperand(0);
88e8d8bef9SDimitry Andric   Value *Mask = II.getOperand(1);
89e8d8bef9SDimitry Andric   Value *Vec = II.getOperand(2);
90e8d8bef9SDimitry Andric 
91e8d8bef9SDimitry Andric   // Zero Mask - this masked store instruction does nothing.
92e8d8bef9SDimitry Andric   if (isa<ConstantAggregateZero>(Mask)) {
93e8d8bef9SDimitry Andric     IC.eraseInstFromFunction(II);
94e8d8bef9SDimitry Andric     return true;
95e8d8bef9SDimitry Andric   }
96e8d8bef9SDimitry Andric 
97e8d8bef9SDimitry Andric   // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
98e8d8bef9SDimitry Andric   // anything else at this level.
99e8d8bef9SDimitry Andric   if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
100e8d8bef9SDimitry Andric     return false;
101e8d8bef9SDimitry Andric 
102e8d8bef9SDimitry Andric   // The mask is constant or extended from a bool vector. Convert this x86
103e8d8bef9SDimitry Andric   // intrinsic to the LLVM intrinsic to allow target-independent optimizations.
104e8d8bef9SDimitry Andric   if (Value *BoolMask = getBoolVecFromMask(Mask)) {
105e8d8bef9SDimitry Andric     unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
106e8d8bef9SDimitry Andric     PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
107e8d8bef9SDimitry Andric     Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
108e8d8bef9SDimitry Andric 
109e8d8bef9SDimitry Andric     IC.Builder.CreateMaskedStore(Vec, PtrCast, Align(1), BoolMask);
110e8d8bef9SDimitry Andric 
111e8d8bef9SDimitry Andric     // 'Replace uses' doesn't work for stores. Erase the original masked store.
112e8d8bef9SDimitry Andric     IC.eraseInstFromFunction(II);
113e8d8bef9SDimitry Andric     return true;
114e8d8bef9SDimitry Andric   }
115e8d8bef9SDimitry Andric 
116e8d8bef9SDimitry Andric   return false;
117e8d8bef9SDimitry Andric }
118e8d8bef9SDimitry Andric 
119e8d8bef9SDimitry Andric static Value *simplifyX86immShift(const IntrinsicInst &II,
120e8d8bef9SDimitry Andric                                   InstCombiner::BuilderTy &Builder) {
121e8d8bef9SDimitry Andric   bool LogicalShift = false;
122e8d8bef9SDimitry Andric   bool ShiftLeft = false;
123e8d8bef9SDimitry Andric   bool IsImm = false;
124e8d8bef9SDimitry Andric 
125e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
126e8d8bef9SDimitry Andric   default:
127e8d8bef9SDimitry Andric     llvm_unreachable("Unexpected intrinsic!");
128e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrai_d:
129e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrai_w:
130e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrai_d:
131e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrai_w:
132e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_128:
133e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_256:
134e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_d_512:
135e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_512:
136e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_w_512:
137e8d8bef9SDimitry Andric     IsImm = true;
138e8d8bef9SDimitry Andric     LLVM_FALLTHROUGH;
139e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psra_d:
140e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psra_w:
141e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psra_d:
142e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psra_w:
143e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_128:
144e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_256:
145e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_d_512:
146e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_512:
147e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_w_512:
148e8d8bef9SDimitry Andric     LogicalShift = false;
149e8d8bef9SDimitry Andric     ShiftLeft = false;
150e8d8bef9SDimitry Andric     break;
151e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_d:
152e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_q:
153e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_w:
154e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_d:
155e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_q:
156e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_w:
157e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_d_512:
158e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_q_512:
159e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_w_512:
160e8d8bef9SDimitry Andric     IsImm = true;
161e8d8bef9SDimitry Andric     LLVM_FALLTHROUGH;
162e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_d:
163e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_q:
164e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_w:
165e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_d:
166e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_q:
167e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_w:
168e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_d_512:
169e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_q_512:
170e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_w_512:
171e8d8bef9SDimitry Andric     LogicalShift = true;
172e8d8bef9SDimitry Andric     ShiftLeft = false;
173e8d8bef9SDimitry Andric     break;
174e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_d:
175e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_q:
176e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_w:
177e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_d:
178e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_q:
179e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_w:
180e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_d_512:
181e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_q_512:
182e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_w_512:
183e8d8bef9SDimitry Andric     IsImm = true;
184e8d8bef9SDimitry Andric     LLVM_FALLTHROUGH;
185e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_d:
186e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_q:
187e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_w:
188e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_d:
189e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_q:
190e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_w:
191e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_d_512:
192e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_q_512:
193e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_w_512:
194e8d8bef9SDimitry Andric     LogicalShift = true;
195e8d8bef9SDimitry Andric     ShiftLeft = true;
196e8d8bef9SDimitry Andric     break;
197e8d8bef9SDimitry Andric   }
198e8d8bef9SDimitry Andric   assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
199e8d8bef9SDimitry Andric 
200fe6060f1SDimitry Andric   Value *Vec = II.getArgOperand(0);
201fe6060f1SDimitry Andric   Value *Amt = II.getArgOperand(1);
202fe6060f1SDimitry Andric   auto *VT = cast<FixedVectorType>(Vec->getType());
203fe6060f1SDimitry Andric   Type *SVT = VT->getElementType();
204fe6060f1SDimitry Andric   Type *AmtVT = Amt->getType();
205e8d8bef9SDimitry Andric   unsigned VWidth = VT->getNumElements();
206e8d8bef9SDimitry Andric   unsigned BitWidth = SVT->getPrimitiveSizeInBits();
207e8d8bef9SDimitry Andric 
208e8d8bef9SDimitry Andric   // If the shift amount is guaranteed to be in-range we can replace it with a
209e8d8bef9SDimitry Andric   // generic shift. If its guaranteed to be out of range, logical shifts combine
210e8d8bef9SDimitry Andric   // to zero and arithmetic shifts are clamped to (BitWidth - 1).
211e8d8bef9SDimitry Andric   if (IsImm) {
212e8d8bef9SDimitry Andric     assert(AmtVT->isIntegerTy(32) && "Unexpected shift-by-immediate type");
213e8d8bef9SDimitry Andric     KnownBits KnownAmtBits =
214e8d8bef9SDimitry Andric         llvm::computeKnownBits(Amt, II.getModule()->getDataLayout());
215e8d8bef9SDimitry Andric     if (KnownAmtBits.getMaxValue().ult(BitWidth)) {
216e8d8bef9SDimitry Andric       Amt = Builder.CreateZExtOrTrunc(Amt, SVT);
217e8d8bef9SDimitry Andric       Amt = Builder.CreateVectorSplat(VWidth, Amt);
218e8d8bef9SDimitry Andric       return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
219e8d8bef9SDimitry Andric                                         : Builder.CreateLShr(Vec, Amt))
220e8d8bef9SDimitry Andric                            : Builder.CreateAShr(Vec, Amt));
221e8d8bef9SDimitry Andric     }
222e8d8bef9SDimitry Andric     if (KnownAmtBits.getMinValue().uge(BitWidth)) {
223e8d8bef9SDimitry Andric       if (LogicalShift)
224e8d8bef9SDimitry Andric         return ConstantAggregateZero::get(VT);
225e8d8bef9SDimitry Andric       Amt = ConstantInt::get(SVT, BitWidth - 1);
226e8d8bef9SDimitry Andric       return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt));
227e8d8bef9SDimitry Andric     }
228e8d8bef9SDimitry Andric   } else {
229e8d8bef9SDimitry Andric     // Ensure the first element has an in-range value and the rest of the
230e8d8bef9SDimitry Andric     // elements in the bottom 64 bits are zero.
231e8d8bef9SDimitry Andric     assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 &&
232e8d8bef9SDimitry Andric            cast<VectorType>(AmtVT)->getElementType() == SVT &&
233e8d8bef9SDimitry Andric            "Unexpected shift-by-scalar type");
234e8d8bef9SDimitry Andric     unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
235e8d8bef9SDimitry Andric     APInt DemandedLower = APInt::getOneBitSet(NumAmtElts, 0);
236e8d8bef9SDimitry Andric     APInt DemandedUpper = APInt::getBitsSet(NumAmtElts, 1, NumAmtElts / 2);
237e8d8bef9SDimitry Andric     KnownBits KnownLowerBits = llvm::computeKnownBits(
238e8d8bef9SDimitry Andric         Amt, DemandedLower, II.getModule()->getDataLayout());
239e8d8bef9SDimitry Andric     KnownBits KnownUpperBits = llvm::computeKnownBits(
240e8d8bef9SDimitry Andric         Amt, DemandedUpper, II.getModule()->getDataLayout());
241e8d8bef9SDimitry Andric     if (KnownLowerBits.getMaxValue().ult(BitWidth) &&
242349cc55cSDimitry Andric         (DemandedUpper.isZero() || KnownUpperBits.isZero())) {
243e8d8bef9SDimitry Andric       SmallVector<int, 16> ZeroSplat(VWidth, 0);
244e8d8bef9SDimitry Andric       Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);
245e8d8bef9SDimitry Andric       return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
246e8d8bef9SDimitry Andric                                         : Builder.CreateLShr(Vec, Amt))
247e8d8bef9SDimitry Andric                            : Builder.CreateAShr(Vec, Amt));
248e8d8bef9SDimitry Andric     }
249e8d8bef9SDimitry Andric   }
250e8d8bef9SDimitry Andric 
251e8d8bef9SDimitry Andric   // Simplify if count is constant vector.
252fe6060f1SDimitry Andric   auto *CDV = dyn_cast<ConstantDataVector>(Amt);
253e8d8bef9SDimitry Andric   if (!CDV)
254e8d8bef9SDimitry Andric     return nullptr;
255e8d8bef9SDimitry Andric 
256e8d8bef9SDimitry Andric   // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
257e8d8bef9SDimitry Andric   // operand to compute the shift amount.
258e8d8bef9SDimitry Andric   assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 &&
259e8d8bef9SDimitry Andric          cast<VectorType>(AmtVT)->getElementType() == SVT &&
260e8d8bef9SDimitry Andric          "Unexpected shift-by-scalar type");
261e8d8bef9SDimitry Andric 
262e8d8bef9SDimitry Andric   // Concatenate the sub-elements to create the 64-bit value.
263e8d8bef9SDimitry Andric   APInt Count(64, 0);
264e8d8bef9SDimitry Andric   for (unsigned i = 0, NumSubElts = 64 / BitWidth; i != NumSubElts; ++i) {
265e8d8bef9SDimitry Andric     unsigned SubEltIdx = (NumSubElts - 1) - i;
266fe6060f1SDimitry Andric     auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
267e8d8bef9SDimitry Andric     Count <<= BitWidth;
268e8d8bef9SDimitry Andric     Count |= SubElt->getValue().zextOrTrunc(64);
269e8d8bef9SDimitry Andric   }
270e8d8bef9SDimitry Andric 
271e8d8bef9SDimitry Andric   // If shift-by-zero then just return the original value.
272349cc55cSDimitry Andric   if (Count.isZero())
273e8d8bef9SDimitry Andric     return Vec;
274e8d8bef9SDimitry Andric 
275e8d8bef9SDimitry Andric   // Handle cases when Shift >= BitWidth.
276e8d8bef9SDimitry Andric   if (Count.uge(BitWidth)) {
277e8d8bef9SDimitry Andric     // If LogicalShift - just return zero.
278e8d8bef9SDimitry Andric     if (LogicalShift)
279e8d8bef9SDimitry Andric       return ConstantAggregateZero::get(VT);
280e8d8bef9SDimitry Andric 
281e8d8bef9SDimitry Andric     // If ArithmeticShift - clamp Shift to (BitWidth - 1).
282e8d8bef9SDimitry Andric     Count = APInt(64, BitWidth - 1);
283e8d8bef9SDimitry Andric   }
284e8d8bef9SDimitry Andric 
285e8d8bef9SDimitry Andric   // Get a constant vector of the same type as the first operand.
286e8d8bef9SDimitry Andric   auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
287e8d8bef9SDimitry Andric   auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
288e8d8bef9SDimitry Andric 
289e8d8bef9SDimitry Andric   if (ShiftLeft)
290e8d8bef9SDimitry Andric     return Builder.CreateShl(Vec, ShiftVec);
291e8d8bef9SDimitry Andric 
292e8d8bef9SDimitry Andric   if (LogicalShift)
293e8d8bef9SDimitry Andric     return Builder.CreateLShr(Vec, ShiftVec);
294e8d8bef9SDimitry Andric 
295e8d8bef9SDimitry Andric   return Builder.CreateAShr(Vec, ShiftVec);
296e8d8bef9SDimitry Andric }
297e8d8bef9SDimitry Andric 
298e8d8bef9SDimitry Andric // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
299e8d8bef9SDimitry Andric // Unlike the generic IR shifts, the intrinsics have defined behaviour for out
300e8d8bef9SDimitry Andric // of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
301e8d8bef9SDimitry Andric static Value *simplifyX86varShift(const IntrinsicInst &II,
302e8d8bef9SDimitry Andric                                   InstCombiner::BuilderTy &Builder) {
303e8d8bef9SDimitry Andric   bool LogicalShift = false;
304e8d8bef9SDimitry Andric   bool ShiftLeft = false;
305e8d8bef9SDimitry Andric 
306e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
307e8d8bef9SDimitry Andric   default:
308e8d8bef9SDimitry Andric     llvm_unreachable("Unexpected intrinsic!");
309e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrav_d:
310e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrav_d_256:
311e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_128:
312e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_256:
313e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_d_512:
314e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_512:
315e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_128:
316e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_256:
317e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_512:
318e8d8bef9SDimitry Andric     LogicalShift = false;
319e8d8bef9SDimitry Andric     ShiftLeft = false;
320e8d8bef9SDimitry Andric     break;
321e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d:
322e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d_256:
323e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q:
324e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q_256:
325e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_d_512:
326e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_q_512:
327e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_128:
328e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_256:
329e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_512:
330e8d8bef9SDimitry Andric     LogicalShift = true;
331e8d8bef9SDimitry Andric     ShiftLeft = false;
332e8d8bef9SDimitry Andric     break;
333e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_d:
334e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_d_256:
335e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_q:
336e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_q_256:
337e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_d_512:
338e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_q_512:
339e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_128:
340e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_256:
341e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_512:
342e8d8bef9SDimitry Andric     LogicalShift = true;
343e8d8bef9SDimitry Andric     ShiftLeft = true;
344e8d8bef9SDimitry Andric     break;
345e8d8bef9SDimitry Andric   }
346e8d8bef9SDimitry Andric   assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
347e8d8bef9SDimitry Andric 
348fe6060f1SDimitry Andric   Value *Vec = II.getArgOperand(0);
349fe6060f1SDimitry Andric   Value *Amt = II.getArgOperand(1);
350fe6060f1SDimitry Andric   auto *VT = cast<FixedVectorType>(II.getType());
351fe6060f1SDimitry Andric   Type *SVT = VT->getElementType();
352e8d8bef9SDimitry Andric   int NumElts = VT->getNumElements();
353e8d8bef9SDimitry Andric   int BitWidth = SVT->getIntegerBitWidth();
354e8d8bef9SDimitry Andric 
355e8d8bef9SDimitry Andric   // If the shift amount is guaranteed to be in-range we can replace it with a
356e8d8bef9SDimitry Andric   // generic shift.
357*81ad6265SDimitry Andric   KnownBits KnownAmt =
358*81ad6265SDimitry Andric       llvm::computeKnownBits(Amt, II.getModule()->getDataLayout());
359*81ad6265SDimitry Andric   if (KnownAmt.getMaxValue().ult(BitWidth)) {
360e8d8bef9SDimitry Andric     return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
361e8d8bef9SDimitry Andric                                       : Builder.CreateLShr(Vec, Amt))
362e8d8bef9SDimitry Andric                          : Builder.CreateAShr(Vec, Amt));
363e8d8bef9SDimitry Andric   }
364e8d8bef9SDimitry Andric 
365e8d8bef9SDimitry Andric   // Simplify if all shift amounts are constant/undef.
366e8d8bef9SDimitry Andric   auto *CShift = dyn_cast<Constant>(Amt);
367e8d8bef9SDimitry Andric   if (!CShift)
368e8d8bef9SDimitry Andric     return nullptr;
369e8d8bef9SDimitry Andric 
370e8d8bef9SDimitry Andric   // Collect each element's shift amount.
371e8d8bef9SDimitry Andric   // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
372e8d8bef9SDimitry Andric   bool AnyOutOfRange = false;
373e8d8bef9SDimitry Andric   SmallVector<int, 8> ShiftAmts;
374e8d8bef9SDimitry Andric   for (int I = 0; I < NumElts; ++I) {
375e8d8bef9SDimitry Andric     auto *CElt = CShift->getAggregateElement(I);
376e8d8bef9SDimitry Andric     if (isa_and_nonnull<UndefValue>(CElt)) {
377e8d8bef9SDimitry Andric       ShiftAmts.push_back(-1);
378e8d8bef9SDimitry Andric       continue;
379e8d8bef9SDimitry Andric     }
380e8d8bef9SDimitry Andric 
381e8d8bef9SDimitry Andric     auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
382e8d8bef9SDimitry Andric     if (!COp)
383e8d8bef9SDimitry Andric       return nullptr;
384e8d8bef9SDimitry Andric 
385e8d8bef9SDimitry Andric     // Handle out of range shifts.
386e8d8bef9SDimitry Andric     // If LogicalShift - set to BitWidth (special case).
387e8d8bef9SDimitry Andric     // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
388e8d8bef9SDimitry Andric     APInt ShiftVal = COp->getValue();
389e8d8bef9SDimitry Andric     if (ShiftVal.uge(BitWidth)) {
390e8d8bef9SDimitry Andric       AnyOutOfRange = LogicalShift;
391e8d8bef9SDimitry Andric       ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
392e8d8bef9SDimitry Andric       continue;
393e8d8bef9SDimitry Andric     }
394e8d8bef9SDimitry Andric 
395e8d8bef9SDimitry Andric     ShiftAmts.push_back((int)ShiftVal.getZExtValue());
396e8d8bef9SDimitry Andric   }
397e8d8bef9SDimitry Andric 
398e8d8bef9SDimitry Andric   // If all elements out of range or UNDEF, return vector of zeros/undefs.
399e8d8bef9SDimitry Andric   // ArithmeticShift should only hit this if they are all UNDEF.
400e8d8bef9SDimitry Andric   auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
401e8d8bef9SDimitry Andric   if (llvm::all_of(ShiftAmts, OutOfRange)) {
402e8d8bef9SDimitry Andric     SmallVector<Constant *, 8> ConstantVec;
403e8d8bef9SDimitry Andric     for (int Idx : ShiftAmts) {
404e8d8bef9SDimitry Andric       if (Idx < 0) {
405e8d8bef9SDimitry Andric         ConstantVec.push_back(UndefValue::get(SVT));
406e8d8bef9SDimitry Andric       } else {
407e8d8bef9SDimitry Andric         assert(LogicalShift && "Logical shift expected");
408e8d8bef9SDimitry Andric         ConstantVec.push_back(ConstantInt::getNullValue(SVT));
409e8d8bef9SDimitry Andric       }
410e8d8bef9SDimitry Andric     }
411e8d8bef9SDimitry Andric     return ConstantVector::get(ConstantVec);
412e8d8bef9SDimitry Andric   }
413e8d8bef9SDimitry Andric 
414e8d8bef9SDimitry Andric   // We can't handle only some out of range values with generic logical shifts.
415e8d8bef9SDimitry Andric   if (AnyOutOfRange)
416e8d8bef9SDimitry Andric     return nullptr;
417e8d8bef9SDimitry Andric 
418e8d8bef9SDimitry Andric   // Build the shift amount constant vector.
419e8d8bef9SDimitry Andric   SmallVector<Constant *, 8> ShiftVecAmts;
420e8d8bef9SDimitry Andric   for (int Idx : ShiftAmts) {
421e8d8bef9SDimitry Andric     if (Idx < 0)
422e8d8bef9SDimitry Andric       ShiftVecAmts.push_back(UndefValue::get(SVT));
423e8d8bef9SDimitry Andric     else
424e8d8bef9SDimitry Andric       ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
425e8d8bef9SDimitry Andric   }
426e8d8bef9SDimitry Andric   auto ShiftVec = ConstantVector::get(ShiftVecAmts);
427e8d8bef9SDimitry Andric 
428e8d8bef9SDimitry Andric   if (ShiftLeft)
429e8d8bef9SDimitry Andric     return Builder.CreateShl(Vec, ShiftVec);
430e8d8bef9SDimitry Andric 
431e8d8bef9SDimitry Andric   if (LogicalShift)
432e8d8bef9SDimitry Andric     return Builder.CreateLShr(Vec, ShiftVec);
433e8d8bef9SDimitry Andric 
434e8d8bef9SDimitry Andric   return Builder.CreateAShr(Vec, ShiftVec);
435e8d8bef9SDimitry Andric }
436e8d8bef9SDimitry Andric 
437e8d8bef9SDimitry Andric static Value *simplifyX86pack(IntrinsicInst &II,
438e8d8bef9SDimitry Andric                               InstCombiner::BuilderTy &Builder, bool IsSigned) {
439e8d8bef9SDimitry Andric   Value *Arg0 = II.getArgOperand(0);
440e8d8bef9SDimitry Andric   Value *Arg1 = II.getArgOperand(1);
441e8d8bef9SDimitry Andric   Type *ResTy = II.getType();
442e8d8bef9SDimitry Andric 
443e8d8bef9SDimitry Andric   // Fast all undef handling.
444e8d8bef9SDimitry Andric   if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
445e8d8bef9SDimitry Andric     return UndefValue::get(ResTy);
446e8d8bef9SDimitry Andric 
447e8d8bef9SDimitry Andric   auto *ArgTy = cast<FixedVectorType>(Arg0->getType());
448e8d8bef9SDimitry Andric   unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
449e8d8bef9SDimitry Andric   unsigned NumSrcElts = ArgTy->getNumElements();
450e8d8bef9SDimitry Andric   assert(cast<FixedVectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) &&
451e8d8bef9SDimitry Andric          "Unexpected packing types");
452e8d8bef9SDimitry Andric 
453e8d8bef9SDimitry Andric   unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
454e8d8bef9SDimitry Andric   unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
455e8d8bef9SDimitry Andric   unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
456e8d8bef9SDimitry Andric   assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
457e8d8bef9SDimitry Andric          "Unexpected packing types");
458e8d8bef9SDimitry Andric 
459e8d8bef9SDimitry Andric   // Constant folding.
460e8d8bef9SDimitry Andric   if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
461e8d8bef9SDimitry Andric     return nullptr;
462e8d8bef9SDimitry Andric 
463e8d8bef9SDimitry Andric   // Clamp Values - signed/unsigned both use signed clamp values, but they
464e8d8bef9SDimitry Andric   // differ on the min/max values.
465e8d8bef9SDimitry Andric   APInt MinValue, MaxValue;
466e8d8bef9SDimitry Andric   if (IsSigned) {
467e8d8bef9SDimitry Andric     // PACKSS: Truncate signed value with signed saturation.
468e8d8bef9SDimitry Andric     // Source values less than dst minint are saturated to minint.
469e8d8bef9SDimitry Andric     // Source values greater than dst maxint are saturated to maxint.
470e8d8bef9SDimitry Andric     MinValue =
471e8d8bef9SDimitry Andric         APInt::getSignedMinValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
472e8d8bef9SDimitry Andric     MaxValue =
473e8d8bef9SDimitry Andric         APInt::getSignedMaxValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
474e8d8bef9SDimitry Andric   } else {
475e8d8bef9SDimitry Andric     // PACKUS: Truncate signed value with unsigned saturation.
476e8d8bef9SDimitry Andric     // Source values less than zero are saturated to zero.
477e8d8bef9SDimitry Andric     // Source values greater than dst maxuint are saturated to maxuint.
478349cc55cSDimitry Andric     MinValue = APInt::getZero(SrcScalarSizeInBits);
479e8d8bef9SDimitry Andric     MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits);
480e8d8bef9SDimitry Andric   }
481e8d8bef9SDimitry Andric 
482e8d8bef9SDimitry Andric   auto *MinC = Constant::getIntegerValue(ArgTy, MinValue);
483e8d8bef9SDimitry Andric   auto *MaxC = Constant::getIntegerValue(ArgTy, MaxValue);
484e8d8bef9SDimitry Andric   Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0);
485e8d8bef9SDimitry Andric   Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1);
486e8d8bef9SDimitry Andric   Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
487e8d8bef9SDimitry Andric   Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
488e8d8bef9SDimitry Andric 
489e8d8bef9SDimitry Andric   // Shuffle clamped args together at the lane level.
490e8d8bef9SDimitry Andric   SmallVector<int, 32> PackMask;
491e8d8bef9SDimitry Andric   for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
492e8d8bef9SDimitry Andric     for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
493e8d8bef9SDimitry Andric       PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane));
494e8d8bef9SDimitry Andric     for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
495e8d8bef9SDimitry Andric       PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
496e8d8bef9SDimitry Andric   }
497e8d8bef9SDimitry Andric   auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
498e8d8bef9SDimitry Andric 
499e8d8bef9SDimitry Andric   // Truncate to dst size.
500e8d8bef9SDimitry Andric   return Builder.CreateTrunc(Shuffle, ResTy);
501e8d8bef9SDimitry Andric }
502e8d8bef9SDimitry Andric 
503e8d8bef9SDimitry Andric static Value *simplifyX86movmsk(const IntrinsicInst &II,
504e8d8bef9SDimitry Andric                                 InstCombiner::BuilderTy &Builder) {
505e8d8bef9SDimitry Andric   Value *Arg = II.getArgOperand(0);
506e8d8bef9SDimitry Andric   Type *ResTy = II.getType();
507e8d8bef9SDimitry Andric 
508e8d8bef9SDimitry Andric   // movmsk(undef) -> zero as we must ensure the upper bits are zero.
509e8d8bef9SDimitry Andric   if (isa<UndefValue>(Arg))
510e8d8bef9SDimitry Andric     return Constant::getNullValue(ResTy);
511e8d8bef9SDimitry Andric 
512e8d8bef9SDimitry Andric   auto *ArgTy = dyn_cast<FixedVectorType>(Arg->getType());
513e8d8bef9SDimitry Andric   // We can't easily peek through x86_mmx types.
514e8d8bef9SDimitry Andric   if (!ArgTy)
515e8d8bef9SDimitry Andric     return nullptr;
516e8d8bef9SDimitry Andric 
517e8d8bef9SDimitry Andric   // Expand MOVMSK to compare/bitcast/zext:
518e8d8bef9SDimitry Andric   // e.g. PMOVMSKB(v16i8 x):
519e8d8bef9SDimitry Andric   // %cmp = icmp slt <16 x i8> %x, zeroinitializer
520e8d8bef9SDimitry Andric   // %int = bitcast <16 x i1> %cmp to i16
521e8d8bef9SDimitry Andric   // %res = zext i16 %int to i32
522e8d8bef9SDimitry Andric   unsigned NumElts = ArgTy->getNumElements();
523e8d8bef9SDimitry Andric   Type *IntegerTy = Builder.getIntNTy(NumElts);
524e8d8bef9SDimitry Andric 
525*81ad6265SDimitry Andric   Value *Res = Builder.CreateBitCast(Arg, VectorType::getInteger(ArgTy));
526*81ad6265SDimitry Andric   Res = Builder.CreateIsNeg(Res);
527e8d8bef9SDimitry Andric   Res = Builder.CreateBitCast(Res, IntegerTy);
528e8d8bef9SDimitry Andric   Res = Builder.CreateZExtOrTrunc(Res, ResTy);
529e8d8bef9SDimitry Andric   return Res;
530e8d8bef9SDimitry Andric }
531e8d8bef9SDimitry Andric 
532e8d8bef9SDimitry Andric static Value *simplifyX86addcarry(const IntrinsicInst &II,
533e8d8bef9SDimitry Andric                                   InstCombiner::BuilderTy &Builder) {
534e8d8bef9SDimitry Andric   Value *CarryIn = II.getArgOperand(0);
535e8d8bef9SDimitry Andric   Value *Op1 = II.getArgOperand(1);
536e8d8bef9SDimitry Andric   Value *Op2 = II.getArgOperand(2);
537e8d8bef9SDimitry Andric   Type *RetTy = II.getType();
538e8d8bef9SDimitry Andric   Type *OpTy = Op1->getType();
539e8d8bef9SDimitry Andric   assert(RetTy->getStructElementType(0)->isIntegerTy(8) &&
540e8d8bef9SDimitry Andric          RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() &&
541e8d8bef9SDimitry Andric          "Unexpected types for x86 addcarry");
542e8d8bef9SDimitry Andric 
543e8d8bef9SDimitry Andric   // If carry-in is zero, this is just an unsigned add with overflow.
544e8d8bef9SDimitry Andric   if (match(CarryIn, PatternMatch::m_ZeroInt())) {
545e8d8bef9SDimitry Andric     Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
546e8d8bef9SDimitry Andric                                           {Op1, Op2});
547e8d8bef9SDimitry Andric     // The types have to be adjusted to match the x86 call types.
548e8d8bef9SDimitry Andric     Value *UAddResult = Builder.CreateExtractValue(UAdd, 0);
549e8d8bef9SDimitry Andric     Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1),
550e8d8bef9SDimitry Andric                                        Builder.getInt8Ty());
551e8d8bef9SDimitry Andric     Value *Res = UndefValue::get(RetTy);
552e8d8bef9SDimitry Andric     Res = Builder.CreateInsertValue(Res, UAddOV, 0);
553e8d8bef9SDimitry Andric     return Builder.CreateInsertValue(Res, UAddResult, 1);
554e8d8bef9SDimitry Andric   }
555e8d8bef9SDimitry Andric 
556e8d8bef9SDimitry Andric   return nullptr;
557e8d8bef9SDimitry Andric }
558e8d8bef9SDimitry Andric 
559e8d8bef9SDimitry Andric static Value *simplifyX86insertps(const IntrinsicInst &II,
560e8d8bef9SDimitry Andric                                   InstCombiner::BuilderTy &Builder) {
561e8d8bef9SDimitry Andric   auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
562e8d8bef9SDimitry Andric   if (!CInt)
563e8d8bef9SDimitry Andric     return nullptr;
564e8d8bef9SDimitry Andric 
565e8d8bef9SDimitry Andric   auto *VecTy = cast<FixedVectorType>(II.getType());
566e8d8bef9SDimitry Andric   assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
567e8d8bef9SDimitry Andric 
568e8d8bef9SDimitry Andric   // The immediate permute control byte looks like this:
569e8d8bef9SDimitry Andric   //    [3:0] - zero mask for each 32-bit lane
570e8d8bef9SDimitry Andric   //    [5:4] - select one 32-bit destination lane
571e8d8bef9SDimitry Andric   //    [7:6] - select one 32-bit source lane
572e8d8bef9SDimitry Andric 
573e8d8bef9SDimitry Andric   uint8_t Imm = CInt->getZExtValue();
574e8d8bef9SDimitry Andric   uint8_t ZMask = Imm & 0xf;
575e8d8bef9SDimitry Andric   uint8_t DestLane = (Imm >> 4) & 0x3;
576e8d8bef9SDimitry Andric   uint8_t SourceLane = (Imm >> 6) & 0x3;
577e8d8bef9SDimitry Andric 
578e8d8bef9SDimitry Andric   ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
579e8d8bef9SDimitry Andric 
580e8d8bef9SDimitry Andric   // If all zero mask bits are set, this was just a weird way to
581e8d8bef9SDimitry Andric   // generate a zero vector.
582e8d8bef9SDimitry Andric   if (ZMask == 0xf)
583e8d8bef9SDimitry Andric     return ZeroVector;
584e8d8bef9SDimitry Andric 
585e8d8bef9SDimitry Andric   // Initialize by passing all of the first source bits through.
586e8d8bef9SDimitry Andric   int ShuffleMask[4] = {0, 1, 2, 3};
587e8d8bef9SDimitry Andric 
588e8d8bef9SDimitry Andric   // We may replace the second operand with the zero vector.
589e8d8bef9SDimitry Andric   Value *V1 = II.getArgOperand(1);
590e8d8bef9SDimitry Andric 
591e8d8bef9SDimitry Andric   if (ZMask) {
592e8d8bef9SDimitry Andric     // If the zero mask is being used with a single input or the zero mask
593e8d8bef9SDimitry Andric     // overrides the destination lane, this is a shuffle with the zero vector.
594e8d8bef9SDimitry Andric     if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
595e8d8bef9SDimitry Andric         (ZMask & (1 << DestLane))) {
596e8d8bef9SDimitry Andric       V1 = ZeroVector;
597e8d8bef9SDimitry Andric       // We may still move 32-bits of the first source vector from one lane
598e8d8bef9SDimitry Andric       // to another.
599e8d8bef9SDimitry Andric       ShuffleMask[DestLane] = SourceLane;
600e8d8bef9SDimitry Andric       // The zero mask may override the previous insert operation.
601e8d8bef9SDimitry Andric       for (unsigned i = 0; i < 4; ++i)
602e8d8bef9SDimitry Andric         if ((ZMask >> i) & 0x1)
603e8d8bef9SDimitry Andric           ShuffleMask[i] = i + 4;
604e8d8bef9SDimitry Andric     } else {
605e8d8bef9SDimitry Andric       // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
606e8d8bef9SDimitry Andric       return nullptr;
607e8d8bef9SDimitry Andric     }
608e8d8bef9SDimitry Andric   } else {
609e8d8bef9SDimitry Andric     // Replace the selected destination lane with the selected source lane.
610e8d8bef9SDimitry Andric     ShuffleMask[DestLane] = SourceLane + 4;
611e8d8bef9SDimitry Andric   }
612e8d8bef9SDimitry Andric 
613e8d8bef9SDimitry Andric   return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
614e8d8bef9SDimitry Andric }
615e8d8bef9SDimitry Andric 
616e8d8bef9SDimitry Andric /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
617e8d8bef9SDimitry Andric /// or conversion to a shuffle vector.
618e8d8bef9SDimitry Andric static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
619e8d8bef9SDimitry Andric                                ConstantInt *CILength, ConstantInt *CIIndex,
620e8d8bef9SDimitry Andric                                InstCombiner::BuilderTy &Builder) {
621e8d8bef9SDimitry Andric   auto LowConstantHighUndef = [&](uint64_t Val) {
622e8d8bef9SDimitry Andric     Type *IntTy64 = Type::getInt64Ty(II.getContext());
623e8d8bef9SDimitry Andric     Constant *Args[] = {ConstantInt::get(IntTy64, Val),
624e8d8bef9SDimitry Andric                         UndefValue::get(IntTy64)};
625e8d8bef9SDimitry Andric     return ConstantVector::get(Args);
626e8d8bef9SDimitry Andric   };
627e8d8bef9SDimitry Andric 
628e8d8bef9SDimitry Andric   // See if we're dealing with constant values.
629fe6060f1SDimitry Andric   auto *C0 = dyn_cast<Constant>(Op0);
630fe6060f1SDimitry Andric   auto *CI0 =
631e8d8bef9SDimitry Andric       C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
632e8d8bef9SDimitry Andric          : nullptr;
633e8d8bef9SDimitry Andric 
634e8d8bef9SDimitry Andric   // Attempt to constant fold.
635e8d8bef9SDimitry Andric   if (CILength && CIIndex) {
636e8d8bef9SDimitry Andric     // From AMD documentation: "The bit index and field length are each six
637e8d8bef9SDimitry Andric     // bits in length other bits of the field are ignored."
638e8d8bef9SDimitry Andric     APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
639e8d8bef9SDimitry Andric     APInt APLength = CILength->getValue().zextOrTrunc(6);
640e8d8bef9SDimitry Andric 
641e8d8bef9SDimitry Andric     unsigned Index = APIndex.getZExtValue();
642e8d8bef9SDimitry Andric 
643e8d8bef9SDimitry Andric     // From AMD documentation: "a value of zero in the field length is
644e8d8bef9SDimitry Andric     // defined as length of 64".
645e8d8bef9SDimitry Andric     unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
646e8d8bef9SDimitry Andric 
647e8d8bef9SDimitry Andric     // From AMD documentation: "If the sum of the bit index + length field
648e8d8bef9SDimitry Andric     // is greater than 64, the results are undefined".
649e8d8bef9SDimitry Andric     unsigned End = Index + Length;
650e8d8bef9SDimitry Andric 
651e8d8bef9SDimitry Andric     // Note that both field index and field length are 8-bit quantities.
652e8d8bef9SDimitry Andric     // Since variables 'Index' and 'Length' are unsigned values
653e8d8bef9SDimitry Andric     // obtained from zero-extending field index and field length
654e8d8bef9SDimitry Andric     // respectively, their sum should never wrap around.
655e8d8bef9SDimitry Andric     if (End > 64)
656e8d8bef9SDimitry Andric       return UndefValue::get(II.getType());
657e8d8bef9SDimitry Andric 
658e8d8bef9SDimitry Andric     // If we are inserting whole bytes, we can convert this to a shuffle.
659e8d8bef9SDimitry Andric     // Lowering can recognize EXTRQI shuffle masks.
660e8d8bef9SDimitry Andric     if ((Length % 8) == 0 && (Index % 8) == 0) {
661e8d8bef9SDimitry Andric       // Convert bit indices to byte indices.
662e8d8bef9SDimitry Andric       Length /= 8;
663e8d8bef9SDimitry Andric       Index /= 8;
664e8d8bef9SDimitry Andric 
665e8d8bef9SDimitry Andric       Type *IntTy8 = Type::getInt8Ty(II.getContext());
666e8d8bef9SDimitry Andric       auto *ShufTy = FixedVectorType::get(IntTy8, 16);
667e8d8bef9SDimitry Andric 
668e8d8bef9SDimitry Andric       SmallVector<int, 16> ShuffleMask;
669e8d8bef9SDimitry Andric       for (int i = 0; i != (int)Length; ++i)
670e8d8bef9SDimitry Andric         ShuffleMask.push_back(i + Index);
671e8d8bef9SDimitry Andric       for (int i = Length; i != 8; ++i)
672e8d8bef9SDimitry Andric         ShuffleMask.push_back(i + 16);
673e8d8bef9SDimitry Andric       for (int i = 8; i != 16; ++i)
674e8d8bef9SDimitry Andric         ShuffleMask.push_back(-1);
675e8d8bef9SDimitry Andric 
676e8d8bef9SDimitry Andric       Value *SV = Builder.CreateShuffleVector(
677e8d8bef9SDimitry Andric           Builder.CreateBitCast(Op0, ShufTy),
678e8d8bef9SDimitry Andric           ConstantAggregateZero::get(ShufTy), ShuffleMask);
679e8d8bef9SDimitry Andric       return Builder.CreateBitCast(SV, II.getType());
680e8d8bef9SDimitry Andric     }
681e8d8bef9SDimitry Andric 
682e8d8bef9SDimitry Andric     // Constant Fold - shift Index'th bit to lowest position and mask off
683e8d8bef9SDimitry Andric     // Length bits.
684e8d8bef9SDimitry Andric     if (CI0) {
685e8d8bef9SDimitry Andric       APInt Elt = CI0->getValue();
686e8d8bef9SDimitry Andric       Elt.lshrInPlace(Index);
687e8d8bef9SDimitry Andric       Elt = Elt.zextOrTrunc(Length);
688e8d8bef9SDimitry Andric       return LowConstantHighUndef(Elt.getZExtValue());
689e8d8bef9SDimitry Andric     }
690e8d8bef9SDimitry Andric 
691e8d8bef9SDimitry Andric     // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
692e8d8bef9SDimitry Andric     if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
693e8d8bef9SDimitry Andric       Value *Args[] = {Op0, CILength, CIIndex};
694e8d8bef9SDimitry Andric       Module *M = II.getModule();
695e8d8bef9SDimitry Andric       Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
696e8d8bef9SDimitry Andric       return Builder.CreateCall(F, Args);
697e8d8bef9SDimitry Andric     }
698e8d8bef9SDimitry Andric   }
699e8d8bef9SDimitry Andric 
700e8d8bef9SDimitry Andric   // Constant Fold - extraction from zero is always {zero, undef}.
701e8d8bef9SDimitry Andric   if (CI0 && CI0->isZero())
702e8d8bef9SDimitry Andric     return LowConstantHighUndef(0);
703e8d8bef9SDimitry Andric 
704e8d8bef9SDimitry Andric   return nullptr;
705e8d8bef9SDimitry Andric }
706e8d8bef9SDimitry Andric 
707e8d8bef9SDimitry Andric /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
708e8d8bef9SDimitry Andric /// folding or conversion to a shuffle vector.
709e8d8bef9SDimitry Andric static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
710e8d8bef9SDimitry Andric                                  APInt APLength, APInt APIndex,
711e8d8bef9SDimitry Andric                                  InstCombiner::BuilderTy &Builder) {
712e8d8bef9SDimitry Andric   // From AMD documentation: "The bit index and field length are each six bits
713e8d8bef9SDimitry Andric   // in length other bits of the field are ignored."
714e8d8bef9SDimitry Andric   APIndex = APIndex.zextOrTrunc(6);
715e8d8bef9SDimitry Andric   APLength = APLength.zextOrTrunc(6);
716e8d8bef9SDimitry Andric 
717e8d8bef9SDimitry Andric   // Attempt to constant fold.
718e8d8bef9SDimitry Andric   unsigned Index = APIndex.getZExtValue();
719e8d8bef9SDimitry Andric 
720e8d8bef9SDimitry Andric   // From AMD documentation: "a value of zero in the field length is
721e8d8bef9SDimitry Andric   // defined as length of 64".
722e8d8bef9SDimitry Andric   unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
723e8d8bef9SDimitry Andric 
724e8d8bef9SDimitry Andric   // From AMD documentation: "If the sum of the bit index + length field
725e8d8bef9SDimitry Andric   // is greater than 64, the results are undefined".
726e8d8bef9SDimitry Andric   unsigned End = Index + Length;
727e8d8bef9SDimitry Andric 
728e8d8bef9SDimitry Andric   // Note that both field index and field length are 8-bit quantities.
729e8d8bef9SDimitry Andric   // Since variables 'Index' and 'Length' are unsigned values
730e8d8bef9SDimitry Andric   // obtained from zero-extending field index and field length
731e8d8bef9SDimitry Andric   // respectively, their sum should never wrap around.
732e8d8bef9SDimitry Andric   if (End > 64)
733e8d8bef9SDimitry Andric     return UndefValue::get(II.getType());
734e8d8bef9SDimitry Andric 
735e8d8bef9SDimitry Andric   // If we are inserting whole bytes, we can convert this to a shuffle.
736e8d8bef9SDimitry Andric   // Lowering can recognize INSERTQI shuffle masks.
737e8d8bef9SDimitry Andric   if ((Length % 8) == 0 && (Index % 8) == 0) {
738e8d8bef9SDimitry Andric     // Convert bit indices to byte indices.
739e8d8bef9SDimitry Andric     Length /= 8;
740e8d8bef9SDimitry Andric     Index /= 8;
741e8d8bef9SDimitry Andric 
742e8d8bef9SDimitry Andric     Type *IntTy8 = Type::getInt8Ty(II.getContext());
743e8d8bef9SDimitry Andric     auto *ShufTy = FixedVectorType::get(IntTy8, 16);
744e8d8bef9SDimitry Andric 
745e8d8bef9SDimitry Andric     SmallVector<int, 16> ShuffleMask;
746e8d8bef9SDimitry Andric     for (int i = 0; i != (int)Index; ++i)
747e8d8bef9SDimitry Andric       ShuffleMask.push_back(i);
748e8d8bef9SDimitry Andric     for (int i = 0; i != (int)Length; ++i)
749e8d8bef9SDimitry Andric       ShuffleMask.push_back(i + 16);
750e8d8bef9SDimitry Andric     for (int i = Index + Length; i != 8; ++i)
751e8d8bef9SDimitry Andric       ShuffleMask.push_back(i);
752e8d8bef9SDimitry Andric     for (int i = 8; i != 16; ++i)
753e8d8bef9SDimitry Andric       ShuffleMask.push_back(-1);
754e8d8bef9SDimitry Andric 
755e8d8bef9SDimitry Andric     Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
756e8d8bef9SDimitry Andric                                             Builder.CreateBitCast(Op1, ShufTy),
757e8d8bef9SDimitry Andric                                             ShuffleMask);
758e8d8bef9SDimitry Andric     return Builder.CreateBitCast(SV, II.getType());
759e8d8bef9SDimitry Andric   }
760e8d8bef9SDimitry Andric 
761e8d8bef9SDimitry Andric   // See if we're dealing with constant values.
762fe6060f1SDimitry Andric   auto *C0 = dyn_cast<Constant>(Op0);
763fe6060f1SDimitry Andric   auto *C1 = dyn_cast<Constant>(Op1);
764fe6060f1SDimitry Andric   auto *CI00 =
765e8d8bef9SDimitry Andric       C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
766e8d8bef9SDimitry Andric          : nullptr;
767fe6060f1SDimitry Andric   auto *CI10 =
768e8d8bef9SDimitry Andric       C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
769e8d8bef9SDimitry Andric          : nullptr;
770e8d8bef9SDimitry Andric 
771e8d8bef9SDimitry Andric   // Constant Fold - insert bottom Length bits starting at the Index'th bit.
772e8d8bef9SDimitry Andric   if (CI00 && CI10) {
773e8d8bef9SDimitry Andric     APInt V00 = CI00->getValue();
774e8d8bef9SDimitry Andric     APInt V10 = CI10->getValue();
775e8d8bef9SDimitry Andric     APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
776e8d8bef9SDimitry Andric     V00 = V00 & ~Mask;
777e8d8bef9SDimitry Andric     V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
778e8d8bef9SDimitry Andric     APInt Val = V00 | V10;
779e8d8bef9SDimitry Andric     Type *IntTy64 = Type::getInt64Ty(II.getContext());
780e8d8bef9SDimitry Andric     Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
781e8d8bef9SDimitry Andric                         UndefValue::get(IntTy64)};
782e8d8bef9SDimitry Andric     return ConstantVector::get(Args);
783e8d8bef9SDimitry Andric   }
784e8d8bef9SDimitry Andric 
785e8d8bef9SDimitry Andric   // If we were an INSERTQ call, we'll save demanded elements if we convert to
786e8d8bef9SDimitry Andric   // INSERTQI.
787e8d8bef9SDimitry Andric   if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
788e8d8bef9SDimitry Andric     Type *IntTy8 = Type::getInt8Ty(II.getContext());
789e8d8bef9SDimitry Andric     Constant *CILength = ConstantInt::get(IntTy8, Length, false);
790e8d8bef9SDimitry Andric     Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
791e8d8bef9SDimitry Andric 
792e8d8bef9SDimitry Andric     Value *Args[] = {Op0, Op1, CILength, CIIndex};
793e8d8bef9SDimitry Andric     Module *M = II.getModule();
794e8d8bef9SDimitry Andric     Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
795e8d8bef9SDimitry Andric     return Builder.CreateCall(F, Args);
796e8d8bef9SDimitry Andric   }
797e8d8bef9SDimitry Andric 
798e8d8bef9SDimitry Andric   return nullptr;
799e8d8bef9SDimitry Andric }
800e8d8bef9SDimitry Andric 
801e8d8bef9SDimitry Andric /// Attempt to convert pshufb* to shufflevector if the mask is constant.
802e8d8bef9SDimitry Andric static Value *simplifyX86pshufb(const IntrinsicInst &II,
803e8d8bef9SDimitry Andric                                 InstCombiner::BuilderTy &Builder) {
804fe6060f1SDimitry Andric   auto *V = dyn_cast<Constant>(II.getArgOperand(1));
805e8d8bef9SDimitry Andric   if (!V)
806e8d8bef9SDimitry Andric     return nullptr;
807e8d8bef9SDimitry Andric 
808e8d8bef9SDimitry Andric   auto *VecTy = cast<FixedVectorType>(II.getType());
809e8d8bef9SDimitry Andric   unsigned NumElts = VecTy->getNumElements();
810e8d8bef9SDimitry Andric   assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
811e8d8bef9SDimitry Andric          "Unexpected number of elements in shuffle mask!");
812e8d8bef9SDimitry Andric 
813e8d8bef9SDimitry Andric   // Construct a shuffle mask from constant integers or UNDEFs.
814e8d8bef9SDimitry Andric   int Indexes[64];
815e8d8bef9SDimitry Andric 
816e8d8bef9SDimitry Andric   // Each byte in the shuffle control mask forms an index to permute the
817e8d8bef9SDimitry Andric   // corresponding byte in the destination operand.
818e8d8bef9SDimitry Andric   for (unsigned I = 0; I < NumElts; ++I) {
819e8d8bef9SDimitry Andric     Constant *COp = V->getAggregateElement(I);
820e8d8bef9SDimitry Andric     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
821e8d8bef9SDimitry Andric       return nullptr;
822e8d8bef9SDimitry Andric 
823e8d8bef9SDimitry Andric     if (isa<UndefValue>(COp)) {
824e8d8bef9SDimitry Andric       Indexes[I] = -1;
825e8d8bef9SDimitry Andric       continue;
826e8d8bef9SDimitry Andric     }
827e8d8bef9SDimitry Andric 
828e8d8bef9SDimitry Andric     int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
829e8d8bef9SDimitry Andric 
830e8d8bef9SDimitry Andric     // If the most significant bit (bit[7]) of each byte of the shuffle
831e8d8bef9SDimitry Andric     // control mask is set, then zero is written in the result byte.
832e8d8bef9SDimitry Andric     // The zero vector is in the right-hand side of the resulting
833e8d8bef9SDimitry Andric     // shufflevector.
834e8d8bef9SDimitry Andric 
835e8d8bef9SDimitry Andric     // The value of each index for the high 128-bit lane is the least
836e8d8bef9SDimitry Andric     // significant 4 bits of the respective shuffle control byte.
837e8d8bef9SDimitry Andric     Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
838e8d8bef9SDimitry Andric     Indexes[I] = Index;
839e8d8bef9SDimitry Andric   }
840e8d8bef9SDimitry Andric 
841e8d8bef9SDimitry Andric   auto V1 = II.getArgOperand(0);
842e8d8bef9SDimitry Andric   auto V2 = Constant::getNullValue(VecTy);
843e8d8bef9SDimitry Andric   return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes, NumElts));
844e8d8bef9SDimitry Andric }
845e8d8bef9SDimitry Andric 
846e8d8bef9SDimitry Andric /// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
847e8d8bef9SDimitry Andric static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
848e8d8bef9SDimitry Andric                                     InstCombiner::BuilderTy &Builder) {
849fe6060f1SDimitry Andric   auto *V = dyn_cast<Constant>(II.getArgOperand(1));
850e8d8bef9SDimitry Andric   if (!V)
851e8d8bef9SDimitry Andric     return nullptr;
852e8d8bef9SDimitry Andric 
853e8d8bef9SDimitry Andric   auto *VecTy = cast<FixedVectorType>(II.getType());
854e8d8bef9SDimitry Andric   unsigned NumElts = VecTy->getNumElements();
855e8d8bef9SDimitry Andric   bool IsPD = VecTy->getScalarType()->isDoubleTy();
856e8d8bef9SDimitry Andric   unsigned NumLaneElts = IsPD ? 2 : 4;
857e8d8bef9SDimitry Andric   assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
858e8d8bef9SDimitry Andric 
859e8d8bef9SDimitry Andric   // Construct a shuffle mask from constant integers or UNDEFs.
860e8d8bef9SDimitry Andric   int Indexes[16];
861e8d8bef9SDimitry Andric 
862e8d8bef9SDimitry Andric   // The intrinsics only read one or two bits, clear the rest.
863e8d8bef9SDimitry Andric   for (unsigned I = 0; I < NumElts; ++I) {
864e8d8bef9SDimitry Andric     Constant *COp = V->getAggregateElement(I);
865e8d8bef9SDimitry Andric     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
866e8d8bef9SDimitry Andric       return nullptr;
867e8d8bef9SDimitry Andric 
868e8d8bef9SDimitry Andric     if (isa<UndefValue>(COp)) {
869e8d8bef9SDimitry Andric       Indexes[I] = -1;
870e8d8bef9SDimitry Andric       continue;
871e8d8bef9SDimitry Andric     }
872e8d8bef9SDimitry Andric 
873e8d8bef9SDimitry Andric     APInt Index = cast<ConstantInt>(COp)->getValue();
874e8d8bef9SDimitry Andric     Index = Index.zextOrTrunc(32).getLoBits(2);
875e8d8bef9SDimitry Andric 
876e8d8bef9SDimitry Andric     // The PD variants uses bit 1 to select per-lane element index, so
877e8d8bef9SDimitry Andric     // shift down to convert to generic shuffle mask index.
878e8d8bef9SDimitry Andric     if (IsPD)
879e8d8bef9SDimitry Andric       Index.lshrInPlace(1);
880e8d8bef9SDimitry Andric 
881e8d8bef9SDimitry Andric     // The _256 variants are a bit trickier since the mask bits always index
882e8d8bef9SDimitry Andric     // into the corresponding 128 half. In order to convert to a generic
883e8d8bef9SDimitry Andric     // shuffle, we have to make that explicit.
884e8d8bef9SDimitry Andric     Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
885e8d8bef9SDimitry Andric 
886e8d8bef9SDimitry Andric     Indexes[I] = Index.getZExtValue();
887e8d8bef9SDimitry Andric   }
888e8d8bef9SDimitry Andric 
889e8d8bef9SDimitry Andric   auto V1 = II.getArgOperand(0);
890e8d8bef9SDimitry Andric   return Builder.CreateShuffleVector(V1, makeArrayRef(Indexes, NumElts));
891e8d8bef9SDimitry Andric }
892e8d8bef9SDimitry Andric 
893e8d8bef9SDimitry Andric /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
894e8d8bef9SDimitry Andric static Value *simplifyX86vpermv(const IntrinsicInst &II,
895e8d8bef9SDimitry Andric                                 InstCombiner::BuilderTy &Builder) {
896e8d8bef9SDimitry Andric   auto *V = dyn_cast<Constant>(II.getArgOperand(1));
897e8d8bef9SDimitry Andric   if (!V)
898e8d8bef9SDimitry Andric     return nullptr;
899e8d8bef9SDimitry Andric 
900e8d8bef9SDimitry Andric   auto *VecTy = cast<FixedVectorType>(II.getType());
901e8d8bef9SDimitry Andric   unsigned Size = VecTy->getNumElements();
902e8d8bef9SDimitry Andric   assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
903e8d8bef9SDimitry Andric          "Unexpected shuffle mask size");
904e8d8bef9SDimitry Andric 
905e8d8bef9SDimitry Andric   // Construct a shuffle mask from constant integers or UNDEFs.
906e8d8bef9SDimitry Andric   int Indexes[64];
907e8d8bef9SDimitry Andric 
908e8d8bef9SDimitry Andric   for (unsigned I = 0; I < Size; ++I) {
909e8d8bef9SDimitry Andric     Constant *COp = V->getAggregateElement(I);
910e8d8bef9SDimitry Andric     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
911e8d8bef9SDimitry Andric       return nullptr;
912e8d8bef9SDimitry Andric 
913e8d8bef9SDimitry Andric     if (isa<UndefValue>(COp)) {
914e8d8bef9SDimitry Andric       Indexes[I] = -1;
915e8d8bef9SDimitry Andric       continue;
916e8d8bef9SDimitry Andric     }
917e8d8bef9SDimitry Andric 
918e8d8bef9SDimitry Andric     uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
919e8d8bef9SDimitry Andric     Index &= Size - 1;
920e8d8bef9SDimitry Andric     Indexes[I] = Index;
921e8d8bef9SDimitry Andric   }
922e8d8bef9SDimitry Andric 
923e8d8bef9SDimitry Andric   auto V1 = II.getArgOperand(0);
924e8d8bef9SDimitry Andric   return Builder.CreateShuffleVector(V1, makeArrayRef(Indexes, Size));
925e8d8bef9SDimitry Andric }
926e8d8bef9SDimitry Andric 
927e8d8bef9SDimitry Andric Optional<Instruction *>
928e8d8bef9SDimitry Andric X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
929e8d8bef9SDimitry Andric   auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width,
930e8d8bef9SDimitry Andric                                              unsigned DemandedWidth) {
931e8d8bef9SDimitry Andric     APInt UndefElts(Width, 0);
932e8d8bef9SDimitry Andric     APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
933e8d8bef9SDimitry Andric     return IC.SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
934e8d8bef9SDimitry Andric   };
935e8d8bef9SDimitry Andric 
936e8d8bef9SDimitry Andric   Intrinsic::ID IID = II.getIntrinsicID();
937e8d8bef9SDimitry Andric   switch (IID) {
938e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_bextr_32:
939e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_bextr_64:
940e8d8bef9SDimitry Andric   case Intrinsic::x86_tbm_bextri_u32:
941e8d8bef9SDimitry Andric   case Intrinsic::x86_tbm_bextri_u64:
942e8d8bef9SDimitry Andric     // If the RHS is a constant we can try some simplifications.
943e8d8bef9SDimitry Andric     if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
944e8d8bef9SDimitry Andric       uint64_t Shift = C->getZExtValue();
945e8d8bef9SDimitry Andric       uint64_t Length = (Shift >> 8) & 0xff;
946e8d8bef9SDimitry Andric       Shift &= 0xff;
947e8d8bef9SDimitry Andric       unsigned BitWidth = II.getType()->getIntegerBitWidth();
948e8d8bef9SDimitry Andric       // If the length is 0 or the shift is out of range, replace with zero.
949e8d8bef9SDimitry Andric       if (Length == 0 || Shift >= BitWidth) {
950e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
951e8d8bef9SDimitry Andric       }
952e8d8bef9SDimitry Andric       // If the LHS is also a constant, we can completely constant fold this.
953e8d8bef9SDimitry Andric       if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
954e8d8bef9SDimitry Andric         uint64_t Result = InC->getZExtValue() >> Shift;
955e8d8bef9SDimitry Andric         if (Length > BitWidth)
956e8d8bef9SDimitry Andric           Length = BitWidth;
957e8d8bef9SDimitry Andric         Result &= maskTrailingOnes<uint64_t>(Length);
958e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
959e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), Result));
960e8d8bef9SDimitry Andric       }
961e8d8bef9SDimitry Andric       // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
962e8d8bef9SDimitry Andric       // are only masking bits that a shift already cleared?
963e8d8bef9SDimitry Andric     }
964e8d8bef9SDimitry Andric     break;
965e8d8bef9SDimitry Andric 
966e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_bzhi_32:
967e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_bzhi_64:
968e8d8bef9SDimitry Andric     // If the RHS is a constant we can try some simplifications.
969e8d8bef9SDimitry Andric     if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
970e8d8bef9SDimitry Andric       uint64_t Index = C->getZExtValue() & 0xff;
971e8d8bef9SDimitry Andric       unsigned BitWidth = II.getType()->getIntegerBitWidth();
972e8d8bef9SDimitry Andric       if (Index >= BitWidth) {
973e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, II.getArgOperand(0));
974e8d8bef9SDimitry Andric       }
975e8d8bef9SDimitry Andric       if (Index == 0) {
976e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
977e8d8bef9SDimitry Andric       }
978e8d8bef9SDimitry Andric       // If the LHS is also a constant, we can completely constant fold this.
979e8d8bef9SDimitry Andric       if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
980e8d8bef9SDimitry Andric         uint64_t Result = InC->getZExtValue();
981e8d8bef9SDimitry Andric         Result &= maskTrailingOnes<uint64_t>(Index);
982e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
983e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), Result));
984e8d8bef9SDimitry Andric       }
985e8d8bef9SDimitry Andric       // TODO should we convert this to an AND if the RHS is constant?
986e8d8bef9SDimitry Andric     }
987e8d8bef9SDimitry Andric     break;
988e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_pext_32:
989e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_pext_64:
990e8d8bef9SDimitry Andric     if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
991e8d8bef9SDimitry Andric       if (MaskC->isNullValue()) {
992e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
993e8d8bef9SDimitry Andric       }
994e8d8bef9SDimitry Andric       if (MaskC->isAllOnesValue()) {
995e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, II.getArgOperand(0));
996e8d8bef9SDimitry Andric       }
997e8d8bef9SDimitry Andric 
998*81ad6265SDimitry Andric       unsigned MaskIdx, MaskLen;
999*81ad6265SDimitry Andric       if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
1000e8d8bef9SDimitry Andric         // any single contingous sequence of 1s anywhere in the mask simply
1001e8d8bef9SDimitry Andric         // describes a subset of the input bits shifted to the appropriate
1002e8d8bef9SDimitry Andric         // position.  Replace with the straight forward IR.
1003e8d8bef9SDimitry Andric         Value *Input = II.getArgOperand(0);
1004e8d8bef9SDimitry Andric         Value *Masked = IC.Builder.CreateAnd(Input, II.getArgOperand(1));
1005*81ad6265SDimitry Andric         Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);
1006*81ad6265SDimitry Andric         Value *Shifted = IC.Builder.CreateLShr(Masked, ShiftAmt);
1007e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Shifted);
1008e8d8bef9SDimitry Andric       }
1009e8d8bef9SDimitry Andric 
1010e8d8bef9SDimitry Andric       if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
1011e8d8bef9SDimitry Andric         uint64_t Src = SrcC->getZExtValue();
1012e8d8bef9SDimitry Andric         uint64_t Mask = MaskC->getZExtValue();
1013e8d8bef9SDimitry Andric         uint64_t Result = 0;
1014e8d8bef9SDimitry Andric         uint64_t BitToSet = 1;
1015e8d8bef9SDimitry Andric 
1016e8d8bef9SDimitry Andric         while (Mask) {
1017e8d8bef9SDimitry Andric           // Isolate lowest set bit.
1018e8d8bef9SDimitry Andric           uint64_t BitToTest = Mask & -Mask;
1019e8d8bef9SDimitry Andric           if (BitToTest & Src)
1020e8d8bef9SDimitry Andric             Result |= BitToSet;
1021e8d8bef9SDimitry Andric 
1022e8d8bef9SDimitry Andric           BitToSet <<= 1;
1023e8d8bef9SDimitry Andric           // Clear lowest set bit.
1024e8d8bef9SDimitry Andric           Mask &= Mask - 1;
1025e8d8bef9SDimitry Andric         }
1026e8d8bef9SDimitry Andric 
1027e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
1028e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), Result));
1029e8d8bef9SDimitry Andric       }
1030e8d8bef9SDimitry Andric     }
1031e8d8bef9SDimitry Andric     break;
1032e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_pdep_32:
1033e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_pdep_64:
1034e8d8bef9SDimitry Andric     if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
1035e8d8bef9SDimitry Andric       if (MaskC->isNullValue()) {
1036e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
1037e8d8bef9SDimitry Andric       }
1038e8d8bef9SDimitry Andric       if (MaskC->isAllOnesValue()) {
1039e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, II.getArgOperand(0));
1040e8d8bef9SDimitry Andric       }
1041*81ad6265SDimitry Andric 
1042*81ad6265SDimitry Andric       unsigned MaskIdx, MaskLen;
1043*81ad6265SDimitry Andric       if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
1044e8d8bef9SDimitry Andric         // any single contingous sequence of 1s anywhere in the mask simply
1045e8d8bef9SDimitry Andric         // describes a subset of the input bits shifted to the appropriate
1046e8d8bef9SDimitry Andric         // position.  Replace with the straight forward IR.
1047e8d8bef9SDimitry Andric         Value *Input = II.getArgOperand(0);
1048*81ad6265SDimitry Andric         Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);
1049*81ad6265SDimitry Andric         Value *Shifted = IC.Builder.CreateShl(Input, ShiftAmt);
1050e8d8bef9SDimitry Andric         Value *Masked = IC.Builder.CreateAnd(Shifted, II.getArgOperand(1));
1051e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Masked);
1052e8d8bef9SDimitry Andric       }
1053e8d8bef9SDimitry Andric 
1054e8d8bef9SDimitry Andric       if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
1055e8d8bef9SDimitry Andric         uint64_t Src = SrcC->getZExtValue();
1056e8d8bef9SDimitry Andric         uint64_t Mask = MaskC->getZExtValue();
1057e8d8bef9SDimitry Andric         uint64_t Result = 0;
1058e8d8bef9SDimitry Andric         uint64_t BitToTest = 1;
1059e8d8bef9SDimitry Andric 
1060e8d8bef9SDimitry Andric         while (Mask) {
1061e8d8bef9SDimitry Andric           // Isolate lowest set bit.
1062e8d8bef9SDimitry Andric           uint64_t BitToSet = Mask & -Mask;
1063e8d8bef9SDimitry Andric           if (BitToTest & Src)
1064e8d8bef9SDimitry Andric             Result |= BitToSet;
1065e8d8bef9SDimitry Andric 
1066e8d8bef9SDimitry Andric           BitToTest <<= 1;
1067e8d8bef9SDimitry Andric           // Clear lowest set bit;
1068e8d8bef9SDimitry Andric           Mask &= Mask - 1;
1069e8d8bef9SDimitry Andric         }
1070e8d8bef9SDimitry Andric 
1071e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
1072e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), Result));
1073e8d8bef9SDimitry Andric       }
1074e8d8bef9SDimitry Andric     }
1075e8d8bef9SDimitry Andric     break;
1076e8d8bef9SDimitry Andric 
1077e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cvtss2si:
1078e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cvtss2si64:
1079e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cvttss2si:
1080e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cvttss2si64:
1081e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cvtsd2si:
1082e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cvtsd2si64:
1083e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cvttsd2si:
1084e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cvttsd2si64:
1085e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtss2si32:
1086e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtss2si64:
1087e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtss2usi32:
1088e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtss2usi64:
1089e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtsd2si32:
1090e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtsd2si64:
1091e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtsd2usi32:
1092e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtsd2usi64:
1093e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttss2si:
1094e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttss2si64:
1095e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttss2usi:
1096e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttss2usi64:
1097e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttsd2si:
1098e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttsd2si64:
1099e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttsd2usi:
1100e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttsd2usi64: {
1101e8d8bef9SDimitry Andric     // These intrinsics only demand the 0th element of their input vectors. If
1102e8d8bef9SDimitry Andric     // we can simplify the input based on that, do so now.
1103e8d8bef9SDimitry Andric     Value *Arg = II.getArgOperand(0);
1104e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Arg->getType())->getNumElements();
1105e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
1106e8d8bef9SDimitry Andric       return IC.replaceOperand(II, 0, V);
1107e8d8bef9SDimitry Andric     }
1108e8d8bef9SDimitry Andric     break;
1109e8d8bef9SDimitry Andric   }
1110e8d8bef9SDimitry Andric 
1111e8d8bef9SDimitry Andric   case Intrinsic::x86_mmx_pmovmskb:
1112e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_movmsk_ps:
1113e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_movmsk_pd:
1114e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pmovmskb_128:
1115e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_movmsk_pd_256:
1116e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_movmsk_ps_256:
1117e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pmovmskb:
1118e8d8bef9SDimitry Andric     if (Value *V = simplifyX86movmsk(II, IC.Builder)) {
1119e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
1120e8d8bef9SDimitry Andric     }
1121e8d8bef9SDimitry Andric     break;
1122e8d8bef9SDimitry Andric 
1123e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comieq_ss:
1124e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comige_ss:
1125e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comigt_ss:
1126e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comile_ss:
1127e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comilt_ss:
1128e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comineq_ss:
1129e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomieq_ss:
1130e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomige_ss:
1131e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomigt_ss:
1132e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomile_ss:
1133e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomilt_ss:
1134e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomineq_ss:
1135e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comieq_sd:
1136e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comige_sd:
1137e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comigt_sd:
1138e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comile_sd:
1139e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comilt_sd:
1140e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comineq_sd:
1141e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomieq_sd:
1142e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomige_sd:
1143e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomigt_sd:
1144e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomile_sd:
1145e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomilt_sd:
1146e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomineq_sd:
1147e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcomi_ss:
1148e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcomi_sd:
1149e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_cmp_ss:
1150e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_cmp_sd: {
1151e8d8bef9SDimitry Andric     // These intrinsics only demand the 0th element of their input vectors. If
1152e8d8bef9SDimitry Andric     // we can simplify the input based on that, do so now.
1153e8d8bef9SDimitry Andric     bool MadeChange = false;
1154e8d8bef9SDimitry Andric     Value *Arg0 = II.getArgOperand(0);
1155e8d8bef9SDimitry Andric     Value *Arg1 = II.getArgOperand(1);
1156e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Arg0->getType())->getNumElements();
1157e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
1158e8d8bef9SDimitry Andric       IC.replaceOperand(II, 0, V);
1159e8d8bef9SDimitry Andric       MadeChange = true;
1160e8d8bef9SDimitry Andric     }
1161e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
1162e8d8bef9SDimitry Andric       IC.replaceOperand(II, 1, V);
1163e8d8bef9SDimitry Andric       MadeChange = true;
1164e8d8bef9SDimitry Andric     }
1165e8d8bef9SDimitry Andric     if (MadeChange) {
1166e8d8bef9SDimitry Andric       return &II;
1167e8d8bef9SDimitry Andric     }
1168e8d8bef9SDimitry Andric     break;
1169e8d8bef9SDimitry Andric   }
1170e8d8bef9SDimitry Andric 
1171e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_add_ps_512:
1172e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_div_ps_512:
1173e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mul_ps_512:
1174e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_sub_ps_512:
1175e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_add_pd_512:
1176e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_div_pd_512:
1177e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mul_pd_512:
1178e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_sub_pd_512:
1179e8d8bef9SDimitry Andric     // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
1180e8d8bef9SDimitry Andric     // IR operations.
1181e8d8bef9SDimitry Andric     if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
1182e8d8bef9SDimitry Andric       if (R->getValue() == 4) {
1183e8d8bef9SDimitry Andric         Value *Arg0 = II.getArgOperand(0);
1184e8d8bef9SDimitry Andric         Value *Arg1 = II.getArgOperand(1);
1185e8d8bef9SDimitry Andric 
1186e8d8bef9SDimitry Andric         Value *V;
1187e8d8bef9SDimitry Andric         switch (IID) {
1188e8d8bef9SDimitry Andric         default:
1189e8d8bef9SDimitry Andric           llvm_unreachable("Case stmts out of sync!");
1190e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_add_ps_512:
1191e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_add_pd_512:
1192e8d8bef9SDimitry Andric           V = IC.Builder.CreateFAdd(Arg0, Arg1);
1193e8d8bef9SDimitry Andric           break;
1194e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_sub_ps_512:
1195e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_sub_pd_512:
1196e8d8bef9SDimitry Andric           V = IC.Builder.CreateFSub(Arg0, Arg1);
1197e8d8bef9SDimitry Andric           break;
1198e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mul_ps_512:
1199e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mul_pd_512:
1200e8d8bef9SDimitry Andric           V = IC.Builder.CreateFMul(Arg0, Arg1);
1201e8d8bef9SDimitry Andric           break;
1202e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_div_ps_512:
1203e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_div_pd_512:
1204e8d8bef9SDimitry Andric           V = IC.Builder.CreateFDiv(Arg0, Arg1);
1205e8d8bef9SDimitry Andric           break;
1206e8d8bef9SDimitry Andric         }
1207e8d8bef9SDimitry Andric 
1208e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, V);
1209e8d8bef9SDimitry Andric       }
1210e8d8bef9SDimitry Andric     }
1211e8d8bef9SDimitry Andric     break;
1212e8d8bef9SDimitry Andric 
1213e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_add_ss_round:
1214e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_div_ss_round:
1215e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_mul_ss_round:
1216e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_sub_ss_round:
1217e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_add_sd_round:
1218e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_div_sd_round:
1219e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_mul_sd_round:
1220e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_sub_sd_round:
1221e8d8bef9SDimitry Andric     // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
1222e8d8bef9SDimitry Andric     // IR operations.
1223e8d8bef9SDimitry Andric     if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(4))) {
1224e8d8bef9SDimitry Andric       if (R->getValue() == 4) {
1225e8d8bef9SDimitry Andric         // Extract the element as scalars.
1226e8d8bef9SDimitry Andric         Value *Arg0 = II.getArgOperand(0);
1227e8d8bef9SDimitry Andric         Value *Arg1 = II.getArgOperand(1);
1228e8d8bef9SDimitry Andric         Value *LHS = IC.Builder.CreateExtractElement(Arg0, (uint64_t)0);
1229e8d8bef9SDimitry Andric         Value *RHS = IC.Builder.CreateExtractElement(Arg1, (uint64_t)0);
1230e8d8bef9SDimitry Andric 
1231e8d8bef9SDimitry Andric         Value *V;
1232e8d8bef9SDimitry Andric         switch (IID) {
1233e8d8bef9SDimitry Andric         default:
1234e8d8bef9SDimitry Andric           llvm_unreachable("Case stmts out of sync!");
1235e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_add_ss_round:
1236e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_add_sd_round:
1237e8d8bef9SDimitry Andric           V = IC.Builder.CreateFAdd(LHS, RHS);
1238e8d8bef9SDimitry Andric           break;
1239e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_sub_ss_round:
1240e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_sub_sd_round:
1241e8d8bef9SDimitry Andric           V = IC.Builder.CreateFSub(LHS, RHS);
1242e8d8bef9SDimitry Andric           break;
1243e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_mul_ss_round:
1244e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_mul_sd_round:
1245e8d8bef9SDimitry Andric           V = IC.Builder.CreateFMul(LHS, RHS);
1246e8d8bef9SDimitry Andric           break;
1247e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_div_ss_round:
1248e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_div_sd_round:
1249e8d8bef9SDimitry Andric           V = IC.Builder.CreateFDiv(LHS, RHS);
1250e8d8bef9SDimitry Andric           break;
1251e8d8bef9SDimitry Andric         }
1252e8d8bef9SDimitry Andric 
1253e8d8bef9SDimitry Andric         // Handle the masking aspect of the intrinsic.
1254e8d8bef9SDimitry Andric         Value *Mask = II.getArgOperand(3);
1255e8d8bef9SDimitry Andric         auto *C = dyn_cast<ConstantInt>(Mask);
1256e8d8bef9SDimitry Andric         // We don't need a select if we know the mask bit is a 1.
1257e8d8bef9SDimitry Andric         if (!C || !C->getValue()[0]) {
1258e8d8bef9SDimitry Andric           // Cast the mask to an i1 vector and then extract the lowest element.
1259e8d8bef9SDimitry Andric           auto *MaskTy = FixedVectorType::get(
1260e8d8bef9SDimitry Andric               IC.Builder.getInt1Ty(),
1261e8d8bef9SDimitry Andric               cast<IntegerType>(Mask->getType())->getBitWidth());
1262e8d8bef9SDimitry Andric           Mask = IC.Builder.CreateBitCast(Mask, MaskTy);
1263e8d8bef9SDimitry Andric           Mask = IC.Builder.CreateExtractElement(Mask, (uint64_t)0);
1264e8d8bef9SDimitry Andric           // Extract the lowest element from the passthru operand.
1265e8d8bef9SDimitry Andric           Value *Passthru =
1266e8d8bef9SDimitry Andric               IC.Builder.CreateExtractElement(II.getArgOperand(2), (uint64_t)0);
1267e8d8bef9SDimitry Andric           V = IC.Builder.CreateSelect(Mask, V, Passthru);
1268e8d8bef9SDimitry Andric         }
1269e8d8bef9SDimitry Andric 
1270e8d8bef9SDimitry Andric         // Insert the result back into the original argument 0.
1271e8d8bef9SDimitry Andric         V = IC.Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
1272e8d8bef9SDimitry Andric 
1273e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, V);
1274e8d8bef9SDimitry Andric       }
1275e8d8bef9SDimitry Andric     }
1276e8d8bef9SDimitry Andric     break;
1277e8d8bef9SDimitry Andric 
1278e8d8bef9SDimitry Andric   // Constant fold ashr( <A x Bi>, Ci ).
1279e8d8bef9SDimitry Andric   // Constant fold lshr( <A x Bi>, Ci ).
1280e8d8bef9SDimitry Andric   // Constant fold shl( <A x Bi>, Ci ).
1281e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrai_d:
1282e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrai_w:
1283e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrai_d:
1284e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrai_w:
1285e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_128:
1286e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_256:
1287e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_d_512:
1288e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_512:
1289e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_w_512:
1290e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_d:
1291e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_q:
1292e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_w:
1293e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_d:
1294e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_q:
1295e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_w:
1296e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_d_512:
1297e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_q_512:
1298e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_w_512:
1299e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_d:
1300e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_q:
1301e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_w:
1302e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_d:
1303e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_q:
1304e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_w:
1305e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_d_512:
1306e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_q_512:
1307e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_w_512:
1308e8d8bef9SDimitry Andric     if (Value *V = simplifyX86immShift(II, IC.Builder)) {
1309e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
1310e8d8bef9SDimitry Andric     }
1311e8d8bef9SDimitry Andric     break;
1312e8d8bef9SDimitry Andric 
1313e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psra_d:
1314e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psra_w:
1315e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psra_d:
1316e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psra_w:
1317e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_128:
1318e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_256:
1319e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_d_512:
1320e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_512:
1321e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_w_512:
1322e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_d:
1323e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_q:
1324e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_w:
1325e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_d:
1326e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_q:
1327e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_w:
1328e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_d_512:
1329e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_q_512:
1330e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_w_512:
1331e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_d:
1332e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_q:
1333e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_w:
1334e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_d:
1335e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_q:
1336e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_w:
1337e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_d_512:
1338e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_q_512:
1339e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_w_512: {
1340e8d8bef9SDimitry Andric     if (Value *V = simplifyX86immShift(II, IC.Builder)) {
1341e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
1342e8d8bef9SDimitry Andric     }
1343e8d8bef9SDimitry Andric 
1344e8d8bef9SDimitry Andric     // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
1345e8d8bef9SDimitry Andric     // operand to compute the shift amount.
1346e8d8bef9SDimitry Andric     Value *Arg1 = II.getArgOperand(1);
1347e8d8bef9SDimitry Andric     assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
1348e8d8bef9SDimitry Andric            "Unexpected packed shift size");
1349e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Arg1->getType())->getNumElements();
1350e8d8bef9SDimitry Andric 
1351e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
1352e8d8bef9SDimitry Andric       return IC.replaceOperand(II, 1, V);
1353e8d8bef9SDimitry Andric     }
1354e8d8bef9SDimitry Andric     break;
1355e8d8bef9SDimitry Andric   }
1356e8d8bef9SDimitry Andric 
1357e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_d:
1358e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_d_256:
1359e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_q:
1360e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_q_256:
1361e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_d_512:
1362e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_q_512:
1363e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_128:
1364e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_256:
1365e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_512:
1366e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrav_d:
1367e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrav_d_256:
1368e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_128:
1369e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_256:
1370e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_d_512:
1371e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_512:
1372e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_128:
1373e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_256:
1374e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_512:
1375e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d:
1376e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d_256:
1377e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q:
1378e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q_256:
1379e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_d_512:
1380e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_q_512:
1381e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_128:
1382e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_256:
1383e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_512:
1384e8d8bef9SDimitry Andric     if (Value *V = simplifyX86varShift(II, IC.Builder)) {
1385e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
1386e8d8bef9SDimitry Andric     }
1387e8d8bef9SDimitry Andric     break;
1388e8d8bef9SDimitry Andric 
1389e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packssdw_128:
1390e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packsswb_128:
1391e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packssdw:
1392e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packsswb:
1393e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packssdw_512:
1394e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packsswb_512:
1395e8d8bef9SDimitry Andric     if (Value *V = simplifyX86pack(II, IC.Builder, true)) {
1396e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
1397e8d8bef9SDimitry Andric     }
1398e8d8bef9SDimitry Andric     break;
1399e8d8bef9SDimitry Andric 
1400e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packuswb_128:
1401e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_packusdw:
1402e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packusdw:
1403e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packuswb:
1404e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packusdw_512:
1405e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packuswb_512:
1406e8d8bef9SDimitry Andric     if (Value *V = simplifyX86pack(II, IC.Builder, false)) {
1407e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
1408e8d8bef9SDimitry Andric     }
1409e8d8bef9SDimitry Andric     break;
1410e8d8bef9SDimitry Andric 
1411e8d8bef9SDimitry Andric   case Intrinsic::x86_pclmulqdq:
1412e8d8bef9SDimitry Andric   case Intrinsic::x86_pclmulqdq_256:
1413e8d8bef9SDimitry Andric   case Intrinsic::x86_pclmulqdq_512: {
1414e8d8bef9SDimitry Andric     if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
1415e8d8bef9SDimitry Andric       unsigned Imm = C->getZExtValue();
1416e8d8bef9SDimitry Andric 
1417e8d8bef9SDimitry Andric       bool MadeChange = false;
1418e8d8bef9SDimitry Andric       Value *Arg0 = II.getArgOperand(0);
1419e8d8bef9SDimitry Andric       Value *Arg1 = II.getArgOperand(1);
1420e8d8bef9SDimitry Andric       unsigned VWidth =
1421e8d8bef9SDimitry Andric           cast<FixedVectorType>(Arg0->getType())->getNumElements();
1422e8d8bef9SDimitry Andric 
1423e8d8bef9SDimitry Andric       APInt UndefElts1(VWidth, 0);
1424e8d8bef9SDimitry Andric       APInt DemandedElts1 =
1425e8d8bef9SDimitry Andric           APInt::getSplat(VWidth, APInt(2, (Imm & 0x01) ? 2 : 1));
1426e8d8bef9SDimitry Andric       if (Value *V =
1427e8d8bef9SDimitry Andric               IC.SimplifyDemandedVectorElts(Arg0, DemandedElts1, UndefElts1)) {
1428e8d8bef9SDimitry Andric         IC.replaceOperand(II, 0, V);
1429e8d8bef9SDimitry Andric         MadeChange = true;
1430e8d8bef9SDimitry Andric       }
1431e8d8bef9SDimitry Andric 
1432e8d8bef9SDimitry Andric       APInt UndefElts2(VWidth, 0);
1433e8d8bef9SDimitry Andric       APInt DemandedElts2 =
1434e8d8bef9SDimitry Andric           APInt::getSplat(VWidth, APInt(2, (Imm & 0x10) ? 2 : 1));
1435e8d8bef9SDimitry Andric       if (Value *V =
1436e8d8bef9SDimitry Andric               IC.SimplifyDemandedVectorElts(Arg1, DemandedElts2, UndefElts2)) {
1437e8d8bef9SDimitry Andric         IC.replaceOperand(II, 1, V);
1438e8d8bef9SDimitry Andric         MadeChange = true;
1439e8d8bef9SDimitry Andric       }
1440e8d8bef9SDimitry Andric 
1441e8d8bef9SDimitry Andric       // If either input elements are undef, the result is zero.
1442e8d8bef9SDimitry Andric       if (DemandedElts1.isSubsetOf(UndefElts1) ||
1443e8d8bef9SDimitry Andric           DemandedElts2.isSubsetOf(UndefElts2)) {
1444e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
1445e8d8bef9SDimitry Andric                                       ConstantAggregateZero::get(II.getType()));
1446e8d8bef9SDimitry Andric       }
1447e8d8bef9SDimitry Andric 
1448e8d8bef9SDimitry Andric       if (MadeChange) {
1449e8d8bef9SDimitry Andric         return &II;
1450e8d8bef9SDimitry Andric       }
1451e8d8bef9SDimitry Andric     }
1452e8d8bef9SDimitry Andric     break;
1453e8d8bef9SDimitry Andric   }
1454e8d8bef9SDimitry Andric 
1455e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_insertps:
1456e8d8bef9SDimitry Andric     if (Value *V = simplifyX86insertps(II, IC.Builder)) {
1457e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
1458e8d8bef9SDimitry Andric     }
1459e8d8bef9SDimitry Andric     break;
1460e8d8bef9SDimitry Andric 
1461e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_extrq: {
1462e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
1463e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
1464e8d8bef9SDimitry Andric     unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements();
1465e8d8bef9SDimitry Andric     unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements();
1466e8d8bef9SDimitry Andric     assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
1467e8d8bef9SDimitry Andric            Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
1468e8d8bef9SDimitry Andric            VWidth1 == 16 && "Unexpected operand sizes");
1469e8d8bef9SDimitry Andric 
1470e8d8bef9SDimitry Andric     // See if we're dealing with constant values.
1471fe6060f1SDimitry Andric     auto *C1 = dyn_cast<Constant>(Op1);
1472fe6060f1SDimitry Andric     auto *CILength =
1473e8d8bef9SDimitry Andric         C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
1474e8d8bef9SDimitry Andric            : nullptr;
1475fe6060f1SDimitry Andric     auto *CIIndex =
1476e8d8bef9SDimitry Andric         C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
1477e8d8bef9SDimitry Andric            : nullptr;
1478e8d8bef9SDimitry Andric 
1479e8d8bef9SDimitry Andric     // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
1480e8d8bef9SDimitry Andric     if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) {
1481e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
1482e8d8bef9SDimitry Andric     }
1483e8d8bef9SDimitry Andric 
1484e8d8bef9SDimitry Andric     // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
1485e8d8bef9SDimitry Andric     // operands and the lowest 16-bits of the second.
1486e8d8bef9SDimitry Andric     bool MadeChange = false;
1487e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
1488e8d8bef9SDimitry Andric       IC.replaceOperand(II, 0, V);
1489e8d8bef9SDimitry Andric       MadeChange = true;
1490e8d8bef9SDimitry Andric     }
1491e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
1492e8d8bef9SDimitry Andric       IC.replaceOperand(II, 1, V);
1493e8d8bef9SDimitry Andric       MadeChange = true;
1494e8d8bef9SDimitry Andric     }
1495e8d8bef9SDimitry Andric     if (MadeChange) {
1496e8d8bef9SDimitry Andric       return &II;
1497e8d8bef9SDimitry Andric     }
1498e8d8bef9SDimitry Andric     break;
1499e8d8bef9SDimitry Andric   }
1500e8d8bef9SDimitry Andric 
1501e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_extrqi: {
1502e8d8bef9SDimitry Andric     // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
1503e8d8bef9SDimitry Andric     // bits of the lower 64-bits. The upper 64-bits are undefined.
1504e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
1505e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements();
1506e8d8bef9SDimitry Andric     assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
1507e8d8bef9SDimitry Andric            "Unexpected operand size");
1508e8d8bef9SDimitry Andric 
1509e8d8bef9SDimitry Andric     // See if we're dealing with constant values.
1510fe6060f1SDimitry Andric     auto *CILength = dyn_cast<ConstantInt>(II.getArgOperand(1));
1511fe6060f1SDimitry Andric     auto *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(2));
1512e8d8bef9SDimitry Andric 
1513e8d8bef9SDimitry Andric     // Attempt to simplify to a constant or shuffle vector.
1514e8d8bef9SDimitry Andric     if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) {
1515e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
1516e8d8bef9SDimitry Andric     }
1517e8d8bef9SDimitry Andric 
1518e8d8bef9SDimitry Andric     // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
1519e8d8bef9SDimitry Andric     // operand.
1520e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
1521e8d8bef9SDimitry Andric       return IC.replaceOperand(II, 0, V);
1522e8d8bef9SDimitry Andric     }
1523e8d8bef9SDimitry Andric     break;
1524e8d8bef9SDimitry Andric   }
1525e8d8bef9SDimitry Andric 
1526e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_insertq: {
1527e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
1528e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
1529e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements();
1530e8d8bef9SDimitry Andric     assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
1531e8d8bef9SDimitry Andric            Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
1532e8d8bef9SDimitry Andric            cast<FixedVectorType>(Op1->getType())->getNumElements() == 2 &&
1533e8d8bef9SDimitry Andric            "Unexpected operand size");
1534e8d8bef9SDimitry Andric 
1535e8d8bef9SDimitry Andric     // See if we're dealing with constant values.
1536fe6060f1SDimitry Andric     auto *C1 = dyn_cast<Constant>(Op1);
1537fe6060f1SDimitry Andric     auto *CI11 =
1538e8d8bef9SDimitry Andric         C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
1539e8d8bef9SDimitry Andric            : nullptr;
1540e8d8bef9SDimitry Andric 
1541e8d8bef9SDimitry Andric     // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
1542e8d8bef9SDimitry Andric     if (CI11) {
1543e8d8bef9SDimitry Andric       const APInt &V11 = CI11->getValue();
1544e8d8bef9SDimitry Andric       APInt Len = V11.zextOrTrunc(6);
1545e8d8bef9SDimitry Andric       APInt Idx = V11.lshr(8).zextOrTrunc(6);
1546e8d8bef9SDimitry Andric       if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) {
1547e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, V);
1548e8d8bef9SDimitry Andric       }
1549e8d8bef9SDimitry Andric     }
1550e8d8bef9SDimitry Andric 
1551e8d8bef9SDimitry Andric     // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
1552e8d8bef9SDimitry Andric     // operand.
1553e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
1554e8d8bef9SDimitry Andric       return IC.replaceOperand(II, 0, V);
1555e8d8bef9SDimitry Andric     }
1556e8d8bef9SDimitry Andric     break;
1557e8d8bef9SDimitry Andric   }
1558e8d8bef9SDimitry Andric 
1559e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_insertqi: {
1560e8d8bef9SDimitry Andric     // INSERTQI: Extract lowest Length bits from lower half of second source and
1561e8d8bef9SDimitry Andric     // insert over first source starting at Index bit. The upper 64-bits are
1562e8d8bef9SDimitry Andric     // undefined.
1563e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
1564e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
1565e8d8bef9SDimitry Andric     unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements();
1566e8d8bef9SDimitry Andric     unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements();
1567e8d8bef9SDimitry Andric     assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
1568e8d8bef9SDimitry Andric            Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
1569e8d8bef9SDimitry Andric            VWidth1 == 2 && "Unexpected operand sizes");
1570e8d8bef9SDimitry Andric 
1571e8d8bef9SDimitry Andric     // See if we're dealing with constant values.
1572fe6060f1SDimitry Andric     auto *CILength = dyn_cast<ConstantInt>(II.getArgOperand(2));
1573fe6060f1SDimitry Andric     auto *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(3));
1574e8d8bef9SDimitry Andric 
1575e8d8bef9SDimitry Andric     // Attempt to simplify to a constant or shuffle vector.
1576e8d8bef9SDimitry Andric     if (CILength && CIIndex) {
1577e8d8bef9SDimitry Andric       APInt Len = CILength->getValue().zextOrTrunc(6);
1578e8d8bef9SDimitry Andric       APInt Idx = CIIndex->getValue().zextOrTrunc(6);
1579e8d8bef9SDimitry Andric       if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) {
1580e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, V);
1581e8d8bef9SDimitry Andric       }
1582e8d8bef9SDimitry Andric     }
1583e8d8bef9SDimitry Andric 
1584e8d8bef9SDimitry Andric     // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
1585e8d8bef9SDimitry Andric     // operands.
1586e8d8bef9SDimitry Andric     bool MadeChange = false;
1587e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
1588e8d8bef9SDimitry Andric       IC.replaceOperand(II, 0, V);
1589e8d8bef9SDimitry Andric       MadeChange = true;
1590e8d8bef9SDimitry Andric     }
1591e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
1592e8d8bef9SDimitry Andric       IC.replaceOperand(II, 1, V);
1593e8d8bef9SDimitry Andric       MadeChange = true;
1594e8d8bef9SDimitry Andric     }
1595e8d8bef9SDimitry Andric     if (MadeChange) {
1596e8d8bef9SDimitry Andric       return &II;
1597e8d8bef9SDimitry Andric     }
1598e8d8bef9SDimitry Andric     break;
1599e8d8bef9SDimitry Andric   }
1600e8d8bef9SDimitry Andric 
1601e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_pblendvb:
1602e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_blendvps:
1603e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_blendvpd:
1604e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_blendv_ps_256:
1605e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_blendv_pd_256:
1606e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pblendvb: {
1607e8d8bef9SDimitry Andric     // fold (blend A, A, Mask) -> A
1608e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
1609e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
1610e8d8bef9SDimitry Andric     Value *Mask = II.getArgOperand(2);
1611e8d8bef9SDimitry Andric     if (Op0 == Op1) {
1612e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Op0);
1613e8d8bef9SDimitry Andric     }
1614e8d8bef9SDimitry Andric 
1615e8d8bef9SDimitry Andric     // Zero Mask - select 1st argument.
1616e8d8bef9SDimitry Andric     if (isa<ConstantAggregateZero>(Mask)) {
1617e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Op0);
1618e8d8bef9SDimitry Andric     }
1619e8d8bef9SDimitry Andric 
1620e8d8bef9SDimitry Andric     // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
1621e8d8bef9SDimitry Andric     if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
1622e8d8bef9SDimitry Andric       Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
1623e8d8bef9SDimitry Andric       return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
1624e8d8bef9SDimitry Andric     }
1625e8d8bef9SDimitry Andric 
1626e8d8bef9SDimitry Andric     // Convert to a vector select if we can bypass casts and find a boolean
1627e8d8bef9SDimitry Andric     // vector condition value.
1628e8d8bef9SDimitry Andric     Value *BoolVec;
1629e8d8bef9SDimitry Andric     Mask = InstCombiner::peekThroughBitcast(Mask);
1630e8d8bef9SDimitry Andric     if (match(Mask, PatternMatch::m_SExt(PatternMatch::m_Value(BoolVec))) &&
1631e8d8bef9SDimitry Andric         BoolVec->getType()->isVectorTy() &&
1632e8d8bef9SDimitry Andric         BoolVec->getType()->getScalarSizeInBits() == 1) {
1633e8d8bef9SDimitry Andric       assert(Mask->getType()->getPrimitiveSizeInBits() ==
1634e8d8bef9SDimitry Andric                  II.getType()->getPrimitiveSizeInBits() &&
1635e8d8bef9SDimitry Andric              "Not expecting mask and operands with different sizes");
1636e8d8bef9SDimitry Andric 
1637e8d8bef9SDimitry Andric       unsigned NumMaskElts =
1638e8d8bef9SDimitry Andric           cast<FixedVectorType>(Mask->getType())->getNumElements();
1639e8d8bef9SDimitry Andric       unsigned NumOperandElts =
1640e8d8bef9SDimitry Andric           cast<FixedVectorType>(II.getType())->getNumElements();
1641e8d8bef9SDimitry Andric       if (NumMaskElts == NumOperandElts) {
1642e8d8bef9SDimitry Andric         return SelectInst::Create(BoolVec, Op1, Op0);
1643e8d8bef9SDimitry Andric       }
1644e8d8bef9SDimitry Andric 
1645e8d8bef9SDimitry Andric       // If the mask has less elements than the operands, each mask bit maps to
1646e8d8bef9SDimitry Andric       // multiple elements of the operands. Bitcast back and forth.
1647e8d8bef9SDimitry Andric       if (NumMaskElts < NumOperandElts) {
1648e8d8bef9SDimitry Andric         Value *CastOp0 = IC.Builder.CreateBitCast(Op0, Mask->getType());
1649e8d8bef9SDimitry Andric         Value *CastOp1 = IC.Builder.CreateBitCast(Op1, Mask->getType());
1650e8d8bef9SDimitry Andric         Value *Sel = IC.Builder.CreateSelect(BoolVec, CastOp1, CastOp0);
1651e8d8bef9SDimitry Andric         return new BitCastInst(Sel, II.getType());
1652e8d8bef9SDimitry Andric       }
1653e8d8bef9SDimitry Andric     }
1654e8d8bef9SDimitry Andric 
1655e8d8bef9SDimitry Andric     break;
1656e8d8bef9SDimitry Andric   }
1657e8d8bef9SDimitry Andric 
1658e8d8bef9SDimitry Andric   case Intrinsic::x86_ssse3_pshuf_b_128:
1659e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pshuf_b:
1660e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pshuf_b_512:
1661e8d8bef9SDimitry Andric     if (Value *V = simplifyX86pshufb(II, IC.Builder)) {
1662e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
1663e8d8bef9SDimitry Andric     }
1664e8d8bef9SDimitry Andric     break;
1665e8d8bef9SDimitry Andric 
1666e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_ps:
1667e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_ps_256:
1668e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vpermilvar_ps_512:
1669e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_pd:
1670e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_pd_256:
1671e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vpermilvar_pd_512:
1672e8d8bef9SDimitry Andric     if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
1673e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
1674e8d8bef9SDimitry Andric     }
1675e8d8bef9SDimitry Andric     break;
1676e8d8bef9SDimitry Andric 
1677e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_permd:
1678e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_permps:
1679e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_df_256:
1680e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_df_512:
1681e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_di_256:
1682e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_di_512:
1683e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_hi_128:
1684e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_hi_256:
1685e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_hi_512:
1686e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_qi_128:
1687e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_qi_256:
1688e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_qi_512:
1689e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_sf_512:
1690e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_si_512:
1691e8d8bef9SDimitry Andric     if (Value *V = simplifyX86vpermv(II, IC.Builder)) {
1692e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
1693e8d8bef9SDimitry Andric     }
1694e8d8bef9SDimitry Andric     break;
1695e8d8bef9SDimitry Andric 
1696e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskload_ps:
1697e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskload_pd:
1698e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskload_ps_256:
1699e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskload_pd_256:
1700e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskload_d:
1701e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskload_q:
1702e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskload_d_256:
1703e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskload_q_256:
1704e8d8bef9SDimitry Andric     if (Instruction *I = simplifyX86MaskedLoad(II, IC)) {
1705e8d8bef9SDimitry Andric       return I;
1706e8d8bef9SDimitry Andric     }
1707e8d8bef9SDimitry Andric     break;
1708e8d8bef9SDimitry Andric 
1709e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_maskmov_dqu:
1710e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskstore_ps:
1711e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskstore_pd:
1712e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskstore_ps_256:
1713e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskstore_pd_256:
1714e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskstore_d:
1715e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskstore_q:
1716e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskstore_d_256:
1717e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskstore_q_256:
1718e8d8bef9SDimitry Andric     if (simplifyX86MaskedStore(II, IC)) {
1719e8d8bef9SDimitry Andric       return nullptr;
1720e8d8bef9SDimitry Andric     }
1721e8d8bef9SDimitry Andric     break;
1722e8d8bef9SDimitry Andric 
1723e8d8bef9SDimitry Andric   case Intrinsic::x86_addcarry_32:
1724e8d8bef9SDimitry Andric   case Intrinsic::x86_addcarry_64:
1725e8d8bef9SDimitry Andric     if (Value *V = simplifyX86addcarry(II, IC.Builder)) {
1726e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
1727e8d8bef9SDimitry Andric     }
1728e8d8bef9SDimitry Andric     break;
1729e8d8bef9SDimitry Andric 
1730e8d8bef9SDimitry Andric   default:
1731e8d8bef9SDimitry Andric     break;
1732e8d8bef9SDimitry Andric   }
1733e8d8bef9SDimitry Andric   return None;
1734e8d8bef9SDimitry Andric }
1735e8d8bef9SDimitry Andric 
1736e8d8bef9SDimitry Andric Optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic(
1737e8d8bef9SDimitry Andric     InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
1738e8d8bef9SDimitry Andric     bool &KnownBitsComputed) const {
1739e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
1740e8d8bef9SDimitry Andric   default:
1741e8d8bef9SDimitry Andric     break;
1742e8d8bef9SDimitry Andric   case Intrinsic::x86_mmx_pmovmskb:
1743e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_movmsk_ps:
1744e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_movmsk_pd:
1745e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pmovmskb_128:
1746e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_movmsk_ps_256:
1747e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_movmsk_pd_256:
1748e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pmovmskb: {
1749e8d8bef9SDimitry Andric     // MOVMSK copies the vector elements' sign bits to the low bits
1750e8d8bef9SDimitry Andric     // and zeros the high bits.
1751e8d8bef9SDimitry Andric     unsigned ArgWidth;
1752e8d8bef9SDimitry Andric     if (II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
1753e8d8bef9SDimitry Andric       ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>.
1754e8d8bef9SDimitry Andric     } else {
1755fe6060f1SDimitry Andric       auto *ArgType = cast<FixedVectorType>(II.getArgOperand(0)->getType());
1756e8d8bef9SDimitry Andric       ArgWidth = ArgType->getNumElements();
1757e8d8bef9SDimitry Andric     }
1758e8d8bef9SDimitry Andric 
1759e8d8bef9SDimitry Andric     // If we don't need any of low bits then return zero,
1760e8d8bef9SDimitry Andric     // we know that DemandedMask is non-zero already.
1761e8d8bef9SDimitry Andric     APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
1762e8d8bef9SDimitry Andric     Type *VTy = II.getType();
1763349cc55cSDimitry Andric     if (DemandedElts.isZero()) {
1764e8d8bef9SDimitry Andric       return ConstantInt::getNullValue(VTy);
1765e8d8bef9SDimitry Andric     }
1766e8d8bef9SDimitry Andric 
1767e8d8bef9SDimitry Andric     // We know that the upper bits are set to zero.
1768e8d8bef9SDimitry Andric     Known.Zero.setBitsFrom(ArgWidth);
1769e8d8bef9SDimitry Andric     KnownBitsComputed = true;
1770e8d8bef9SDimitry Andric     break;
1771e8d8bef9SDimitry Andric   }
1772e8d8bef9SDimitry Andric   }
1773e8d8bef9SDimitry Andric   return None;
1774e8d8bef9SDimitry Andric }
1775e8d8bef9SDimitry Andric 
1776e8d8bef9SDimitry Andric Optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
1777e8d8bef9SDimitry Andric     InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1778e8d8bef9SDimitry Andric     APInt &UndefElts2, APInt &UndefElts3,
1779e8d8bef9SDimitry Andric     std::function<void(Instruction *, unsigned, APInt, APInt &)>
1780e8d8bef9SDimitry Andric         simplifyAndSetOp) const {
1781e8d8bef9SDimitry Andric   unsigned VWidth = cast<FixedVectorType>(II.getType())->getNumElements();
1782e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
1783e8d8bef9SDimitry Andric   default:
1784e8d8bef9SDimitry Andric     break;
1785e8d8bef9SDimitry Andric   case Intrinsic::x86_xop_vfrcz_ss:
1786e8d8bef9SDimitry Andric   case Intrinsic::x86_xop_vfrcz_sd:
1787e8d8bef9SDimitry Andric     // The instructions for these intrinsics are speced to zero upper bits not
1788e8d8bef9SDimitry Andric     // pass them through like other scalar intrinsics. So we shouldn't just
1789e8d8bef9SDimitry Andric     // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
1790e8d8bef9SDimitry Andric     // Instead we should return a zero vector.
1791e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
1792e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
1793e8d8bef9SDimitry Andric       return ConstantAggregateZero::get(II.getType());
1794e8d8bef9SDimitry Andric     }
1795e8d8bef9SDimitry Andric 
1796e8d8bef9SDimitry Andric     // Only the lower element is used.
1797e8d8bef9SDimitry Andric     DemandedElts = 1;
1798e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1799e8d8bef9SDimitry Andric 
1800e8d8bef9SDimitry Andric     // Only the lower element is undefined. The high elements are zero.
1801e8d8bef9SDimitry Andric     UndefElts = UndefElts[0];
1802e8d8bef9SDimitry Andric     break;
1803e8d8bef9SDimitry Andric 
1804e8d8bef9SDimitry Andric   // Unary scalar-as-vector operations that work column-wise.
1805e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_rcp_ss:
1806e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_rsqrt_ss:
1807e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1808e8d8bef9SDimitry Andric 
1809e8d8bef9SDimitry Andric     // If lowest element of a scalar op isn't used then use Arg0.
1810e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
1811e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
1812e8d8bef9SDimitry Andric       return II.getArgOperand(0);
1813e8d8bef9SDimitry Andric     }
1814e8d8bef9SDimitry Andric     // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
1815e8d8bef9SDimitry Andric     // checks).
1816e8d8bef9SDimitry Andric     break;
1817e8d8bef9SDimitry Andric 
1818e8d8bef9SDimitry Andric   // Binary scalar-as-vector operations that work column-wise. The high
1819e8d8bef9SDimitry Andric   // elements come from operand 0. The low element is a function of both
1820e8d8bef9SDimitry Andric   // operands.
1821e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_min_ss:
1822e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_max_ss:
1823e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cmp_ss:
1824e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_min_sd:
1825e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_max_sd:
1826e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cmp_sd: {
1827e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1828e8d8bef9SDimitry Andric 
1829e8d8bef9SDimitry Andric     // If lowest element of a scalar op isn't used then use Arg0.
1830e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
1831e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
1832e8d8bef9SDimitry Andric       return II.getArgOperand(0);
1833e8d8bef9SDimitry Andric     }
1834e8d8bef9SDimitry Andric 
1835e8d8bef9SDimitry Andric     // Only lower element is used for operand 1.
1836e8d8bef9SDimitry Andric     DemandedElts = 1;
1837e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1838e8d8bef9SDimitry Andric 
1839e8d8bef9SDimitry Andric     // Lower element is undefined if both lower elements are undefined.
1840e8d8bef9SDimitry Andric     // Consider things like undef&0.  The result is known zero, not undef.
1841e8d8bef9SDimitry Andric     if (!UndefElts2[0])
1842e8d8bef9SDimitry Andric       UndefElts.clearBit(0);
1843e8d8bef9SDimitry Andric 
1844e8d8bef9SDimitry Andric     break;
1845e8d8bef9SDimitry Andric   }
1846e8d8bef9SDimitry Andric 
1847e8d8bef9SDimitry Andric   // Binary scalar-as-vector operations that work column-wise. The high
1848e8d8bef9SDimitry Andric   // elements come from operand 0 and the low element comes from operand 1.
1849e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_round_ss:
1850e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_round_sd: {
1851e8d8bef9SDimitry Andric     // Don't use the low element of operand 0.
1852e8d8bef9SDimitry Andric     APInt DemandedElts2 = DemandedElts;
1853e8d8bef9SDimitry Andric     DemandedElts2.clearBit(0);
1854e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts2, UndefElts);
1855e8d8bef9SDimitry Andric 
1856e8d8bef9SDimitry Andric     // If lowest element of a scalar op isn't used then use Arg0.
1857e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
1858e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
1859e8d8bef9SDimitry Andric       return II.getArgOperand(0);
1860e8d8bef9SDimitry Andric     }
1861e8d8bef9SDimitry Andric 
1862e8d8bef9SDimitry Andric     // Only lower element is used for operand 1.
1863e8d8bef9SDimitry Andric     DemandedElts = 1;
1864e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1865e8d8bef9SDimitry Andric 
1866e8d8bef9SDimitry Andric     // Take the high undef elements from operand 0 and take the lower element
1867e8d8bef9SDimitry Andric     // from operand 1.
1868e8d8bef9SDimitry Andric     UndefElts.clearBit(0);
1869e8d8bef9SDimitry Andric     UndefElts |= UndefElts2[0];
1870e8d8bef9SDimitry Andric     break;
1871e8d8bef9SDimitry Andric   }
1872e8d8bef9SDimitry Andric 
1873e8d8bef9SDimitry Andric   // Three input scalar-as-vector operations that work column-wise. The high
1874e8d8bef9SDimitry Andric   // elements come from operand 0 and the low element is a function of all
1875e8d8bef9SDimitry Andric   // three inputs.
1876e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_add_ss_round:
1877e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_div_ss_round:
1878e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_mul_ss_round:
1879e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_sub_ss_round:
1880e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_max_ss_round:
1881e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_min_ss_round:
1882e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_add_sd_round:
1883e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_div_sd_round:
1884e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_mul_sd_round:
1885e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_sub_sd_round:
1886e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_max_sd_round:
1887e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_min_sd_round:
1888e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1889e8d8bef9SDimitry Andric 
1890e8d8bef9SDimitry Andric     // If lowest element of a scalar op isn't used then use Arg0.
1891e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
1892e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
1893e8d8bef9SDimitry Andric       return II.getArgOperand(0);
1894e8d8bef9SDimitry Andric     }
1895e8d8bef9SDimitry Andric 
1896e8d8bef9SDimitry Andric     // Only lower element is used for operand 1 and 2.
1897e8d8bef9SDimitry Andric     DemandedElts = 1;
1898e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1899e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 2, DemandedElts, UndefElts3);
1900e8d8bef9SDimitry Andric 
1901e8d8bef9SDimitry Andric     // Lower element is undefined if all three lower elements are undefined.
1902e8d8bef9SDimitry Andric     // Consider things like undef&0.  The result is known zero, not undef.
1903e8d8bef9SDimitry Andric     if (!UndefElts2[0] || !UndefElts3[0])
1904e8d8bef9SDimitry Andric       UndefElts.clearBit(0);
1905e8d8bef9SDimitry Andric     break;
1906e8d8bef9SDimitry Andric 
1907e8d8bef9SDimitry Andric   // TODO: Add fmaddsub support?
1908e8d8bef9SDimitry Andric   case Intrinsic::x86_sse3_addsub_pd:
1909e8d8bef9SDimitry Andric   case Intrinsic::x86_sse3_addsub_ps:
1910e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_addsub_pd_256:
1911e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_addsub_ps_256: {
1912e8d8bef9SDimitry Andric     // If none of the even or none of the odd lanes are required, turn this
1913e8d8bef9SDimitry Andric     // into a generic FP math instruction.
1914e8d8bef9SDimitry Andric     APInt SubMask = APInt::getSplat(VWidth, APInt(2, 0x1));
1915e8d8bef9SDimitry Andric     APInt AddMask = APInt::getSplat(VWidth, APInt(2, 0x2));
1916e8d8bef9SDimitry Andric     bool IsSubOnly = DemandedElts.isSubsetOf(SubMask);
1917e8d8bef9SDimitry Andric     bool IsAddOnly = DemandedElts.isSubsetOf(AddMask);
1918e8d8bef9SDimitry Andric     if (IsSubOnly || IsAddOnly) {
1919e8d8bef9SDimitry Andric       assert((IsSubOnly ^ IsAddOnly) && "Can't be both add-only and sub-only");
1920e8d8bef9SDimitry Andric       IRBuilderBase::InsertPointGuard Guard(IC.Builder);
1921e8d8bef9SDimitry Andric       IC.Builder.SetInsertPoint(&II);
1922e8d8bef9SDimitry Andric       Value *Arg0 = II.getArgOperand(0), *Arg1 = II.getArgOperand(1);
1923e8d8bef9SDimitry Andric       return IC.Builder.CreateBinOp(
1924e8d8bef9SDimitry Andric           IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
1925e8d8bef9SDimitry Andric     }
1926e8d8bef9SDimitry Andric 
1927e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1928e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1929e8d8bef9SDimitry Andric     UndefElts &= UndefElts2;
1930e8d8bef9SDimitry Andric     break;
1931e8d8bef9SDimitry Andric   }
1932e8d8bef9SDimitry Andric 
1933*81ad6265SDimitry Andric   // General per-element vector operations.
1934*81ad6265SDimitry Andric   case Intrinsic::x86_avx2_psllv_d:
1935*81ad6265SDimitry Andric   case Intrinsic::x86_avx2_psllv_d_256:
1936*81ad6265SDimitry Andric   case Intrinsic::x86_avx2_psllv_q:
1937*81ad6265SDimitry Andric   case Intrinsic::x86_avx2_psllv_q_256:
1938*81ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d:
1939*81ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d_256:
1940*81ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q:
1941*81ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q_256:
1942*81ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrav_d:
1943*81ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrav_d_256: {
1944*81ad6265SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1945*81ad6265SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1946*81ad6265SDimitry Andric     UndefElts &= UndefElts2;
1947*81ad6265SDimitry Andric     break;
1948*81ad6265SDimitry Andric   }
1949*81ad6265SDimitry Andric 
1950e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packssdw_128:
1951e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packsswb_128:
1952e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packuswb_128:
1953e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_packusdw:
1954e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packssdw:
1955e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packsswb:
1956e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packusdw:
1957e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packuswb:
1958e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packssdw_512:
1959e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packsswb_512:
1960e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packusdw_512:
1961e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packuswb_512: {
1962e8d8bef9SDimitry Andric     auto *Ty0 = II.getArgOperand(0)->getType();
1963e8d8bef9SDimitry Andric     unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
1964e8d8bef9SDimitry Andric     assert(VWidth == (InnerVWidth * 2) && "Unexpected input size");
1965e8d8bef9SDimitry Andric 
1966e8d8bef9SDimitry Andric     unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
1967e8d8bef9SDimitry Andric     unsigned VWidthPerLane = VWidth / NumLanes;
1968e8d8bef9SDimitry Andric     unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
1969e8d8bef9SDimitry Andric 
1970e8d8bef9SDimitry Andric     // Per lane, pack the elements of the first input and then the second.
1971e8d8bef9SDimitry Andric     // e.g.
1972e8d8bef9SDimitry Andric     // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3])
1973e8d8bef9SDimitry Andric     // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15])
1974e8d8bef9SDimitry Andric     for (int OpNum = 0; OpNum != 2; ++OpNum) {
1975e8d8bef9SDimitry Andric       APInt OpDemandedElts(InnerVWidth, 0);
1976e8d8bef9SDimitry Andric       for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
1977e8d8bef9SDimitry Andric         unsigned LaneIdx = Lane * VWidthPerLane;
1978e8d8bef9SDimitry Andric         for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
1979e8d8bef9SDimitry Andric           unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
1980e8d8bef9SDimitry Andric           if (DemandedElts[Idx])
1981e8d8bef9SDimitry Andric             OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt);
1982e8d8bef9SDimitry Andric         }
1983e8d8bef9SDimitry Andric       }
1984e8d8bef9SDimitry Andric 
1985e8d8bef9SDimitry Andric       // Demand elements from the operand.
1986e8d8bef9SDimitry Andric       APInt OpUndefElts(InnerVWidth, 0);
1987e8d8bef9SDimitry Andric       simplifyAndSetOp(&II, OpNum, OpDemandedElts, OpUndefElts);
1988e8d8bef9SDimitry Andric 
1989e8d8bef9SDimitry Andric       // Pack the operand's UNDEF elements, one lane at a time.
1990e8d8bef9SDimitry Andric       OpUndefElts = OpUndefElts.zext(VWidth);
1991e8d8bef9SDimitry Andric       for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
1992e8d8bef9SDimitry Andric         APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
1993e8d8bef9SDimitry Andric         LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
1994e8d8bef9SDimitry Andric         LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
1995e8d8bef9SDimitry Andric         UndefElts |= LaneElts;
1996e8d8bef9SDimitry Andric       }
1997e8d8bef9SDimitry Andric     }
1998e8d8bef9SDimitry Andric     break;
1999e8d8bef9SDimitry Andric   }
2000e8d8bef9SDimitry Andric 
2001e8d8bef9SDimitry Andric   // PSHUFB
2002e8d8bef9SDimitry Andric   case Intrinsic::x86_ssse3_pshuf_b_128:
2003e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pshuf_b:
2004e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pshuf_b_512:
2005e8d8bef9SDimitry Andric   // PERMILVAR
2006e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_ps:
2007e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_ps_256:
2008e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vpermilvar_ps_512:
2009e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_pd:
2010e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_pd_256:
2011e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vpermilvar_pd_512:
2012e8d8bef9SDimitry Andric   // PERMV
2013e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_permd:
2014e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_permps: {
2015e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts);
2016e8d8bef9SDimitry Andric     break;
2017e8d8bef9SDimitry Andric   }
2018e8d8bef9SDimitry Andric 
2019e8d8bef9SDimitry Andric   // SSE4A instructions leave the upper 64-bits of the 128-bit result
2020e8d8bef9SDimitry Andric   // in an undefined state.
2021e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_extrq:
2022e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_extrqi:
2023e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_insertq:
2024e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_insertqi:
2025e8d8bef9SDimitry Andric     UndefElts.setHighBits(VWidth / 2);
2026e8d8bef9SDimitry Andric     break;
2027e8d8bef9SDimitry Andric   }
2028e8d8bef9SDimitry Andric   return None;
2029e8d8bef9SDimitry Andric }
2030